1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29#include <linux/cgroup.h>
30#include <linux/ctype.h>
31#include <linux/errno.h>
32#include <linux/fs.h>
33#include <linux/kernel.h>
34#include <linux/list.h>
35#include <linux/mm.h>
36#include <linux/mutex.h>
37#include <linux/mount.h>
38#include <linux/pagemap.h>
39#include <linux/proc_fs.h>
40#include <linux/rcupdate.h>
41#include <linux/sched.h>
42#include <linux/backing-dev.h>
43#include <linux/seq_file.h>
44#include <linux/slab.h>
45#include <linux/magic.h>
46#include <linux/spinlock.h>
47#include <linux/string.h>
48#include <linux/sort.h>
49#include <linux/kmod.h>
50#include <linux/module.h>
51#include <linux/delayacct.h>
52#include <linux/cgroupstats.h>
53#include <linux/hash.h>
54#include <linux/namei.h>
55#include <linux/smp_lock.h>
56#include <linux/pid_namespace.h>
57#include <linux/idr.h>
58#include <linux/vmalloc.h>
59#include <linux/eventfd.h>
60#include <linux/poll.h>
61
62#include <asm/atomic.h>
63
64static DEFINE_MUTEX(cgroup_mutex);
65
66
67
68
69
70
71
72#define SUBSYS(_x) &_x ## _subsys,
73static struct cgroup_subsys *subsys[CGROUP_SUBSYS_COUNT] = {
74#include <linux/cgroup_subsys.h>
75};
76
77#define MAX_CGROUP_ROOT_NAMELEN 64
78
79
80
81
82
83
84struct cgroupfs_root {
85 struct super_block *sb;
86
87
88
89
90
91 unsigned long subsys_bits;
92
93
94 int hierarchy_id;
95
96
97 unsigned long actual_subsys_bits;
98
99
100 struct list_head subsys_list;
101
102
103 struct cgroup top_cgroup;
104
105
106 int number_of_cgroups;
107
108
109 struct list_head root_list;
110
111
112 unsigned long flags;
113
114
115 char release_agent_path[PATH_MAX];
116
117
118 char name[MAX_CGROUP_ROOT_NAMELEN];
119};
120
121
122
123
124
125
126static struct cgroupfs_root rootnode;
127
128
129
130
131
132#define CSS_ID_MAX (65535)
133struct css_id {
134
135
136
137
138
139
140
141 struct cgroup_subsys_state *css;
142
143
144
145 unsigned short id;
146
147
148
149 unsigned short depth;
150
151
152
153 struct rcu_head rcu_head;
154
155
156
157 unsigned short stack[0];
158};
159
160
161
162
163struct cgroup_event {
164
165
166
167 struct cgroup *cgrp;
168
169
170
171 struct cftype *cft;
172
173
174
175 struct eventfd_ctx *eventfd;
176
177
178
179 struct list_head list;
180
181
182
183
184 poll_table pt;
185 wait_queue_head_t *wqh;
186 wait_queue_t wait;
187 struct work_struct remove;
188};
189
190
191
192static LIST_HEAD(roots);
193static int root_count;
194
195static DEFINE_IDA(hierarchy_ida);
196static int next_hierarchy_id;
197static DEFINE_SPINLOCK(hierarchy_id_lock);
198
199
200#define dummytop (&rootnode.top_cgroup)
201
202
203
204
205
206
207static int need_forkexit_callback __read_mostly;
208
209#ifdef CONFIG_PROVE_LOCKING
210int cgroup_lock_is_held(void)
211{
212 return lockdep_is_held(&cgroup_mutex);
213}
214#else
215int cgroup_lock_is_held(void)
216{
217 return mutex_is_locked(&cgroup_mutex);
218}
219#endif
220
221EXPORT_SYMBOL_GPL(cgroup_lock_is_held);
222
223
224inline int cgroup_is_removed(const struct cgroup *cgrp)
225{
226 return test_bit(CGRP_REMOVED, &cgrp->flags);
227}
228
229
230enum {
231 ROOT_NOPREFIX,
232};
233
234static int cgroup_is_releasable(const struct cgroup *cgrp)
235{
236 const int bits =
237 (1 << CGRP_RELEASABLE) |
238 (1 << CGRP_NOTIFY_ON_RELEASE);
239 return (cgrp->flags & bits) == bits;
240}
241
242static int notify_on_release(const struct cgroup *cgrp)
243{
244 return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
245}
246
247
248
249
250
251#define for_each_subsys(_root, _ss) \
252list_for_each_entry(_ss, &_root->subsys_list, sibling)
253
254
255#define for_each_active_root(_root) \
256list_for_each_entry(_root, &roots, root_list)
257
258
259
260static LIST_HEAD(release_list);
261static DEFINE_SPINLOCK(release_list_lock);
262static void cgroup_release_agent(struct work_struct *work);
263static DECLARE_WORK(release_agent_work, cgroup_release_agent);
264static void check_for_release(struct cgroup *cgrp);
265
266
267struct cg_cgroup_link {
268
269
270
271
272 struct list_head cgrp_link_list;
273 struct cgroup *cgrp;
274
275
276
277
278 struct list_head cg_link_list;
279 struct css_set *cg;
280};
281
282
283
284
285
286
287
288
289static struct css_set init_css_set;
290static struct cg_cgroup_link init_css_set_link;
291
292static int cgroup_init_idr(struct cgroup_subsys *ss,
293 struct cgroup_subsys_state *css);
294
295
296
297
298static DEFINE_RWLOCK(css_set_lock);
299static int css_set_count;
300
301
302
303
304
305
306#define CSS_SET_HASH_BITS 7
307#define CSS_SET_TABLE_SIZE (1 << CSS_SET_HASH_BITS)
308static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE];
309
310static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
311{
312 int i;
313 int index;
314 unsigned long tmp = 0UL;
315
316 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++)
317 tmp += (unsigned long)css[i];
318 tmp = (tmp >> 16) ^ tmp;
319
320 index = hash_long(tmp, CSS_SET_HASH_BITS);
321
322 return &css_set_table[index];
323}
324
325static void free_css_set_rcu(struct rcu_head *obj)
326{
327 struct css_set *cg = container_of(obj, struct css_set, rcu_head);
328 kfree(cg);
329}
330
331
332
333
334
335static int use_task_css_set_links __read_mostly;
336
337static void __put_css_set(struct css_set *cg, int taskexit)
338{
339 struct cg_cgroup_link *link;
340 struct cg_cgroup_link *saved_link;
341
342
343
344
345
346 if (atomic_add_unless(&cg->refcount, -1, 1))
347 return;
348 write_lock(&css_set_lock);
349 if (!atomic_dec_and_test(&cg->refcount)) {
350 write_unlock(&css_set_lock);
351 return;
352 }
353
354
355 hlist_del(&cg->hlist);
356 css_set_count--;
357
358 list_for_each_entry_safe(link, saved_link, &cg->cg_links,
359 cg_link_list) {
360 struct cgroup *cgrp = link->cgrp;
361 list_del(&link->cg_link_list);
362 list_del(&link->cgrp_link_list);
363 if (atomic_dec_and_test(&cgrp->count) &&
364 notify_on_release(cgrp)) {
365 if (taskexit)
366 set_bit(CGRP_RELEASABLE, &cgrp->flags);
367 check_for_release(cgrp);
368 }
369
370 kfree(link);
371 }
372
373 write_unlock(&css_set_lock);
374 call_rcu(&cg->rcu_head, free_css_set_rcu);
375}
376
377
378
379
380static inline void get_css_set(struct css_set *cg)
381{
382 atomic_inc(&cg->refcount);
383}
384
385static inline void put_css_set(struct css_set *cg)
386{
387 __put_css_set(cg, 0);
388}
389
390static inline void put_css_set_taskexit(struct css_set *cg)
391{
392 __put_css_set(cg, 1);
393}
394
395
396
397
398
399
400
401
402
403
404
405static bool compare_css_sets(struct css_set *cg,
406 struct css_set *old_cg,
407 struct cgroup *new_cgrp,
408 struct cgroup_subsys_state *template[])
409{
410 struct list_head *l1, *l2;
411
412 if (memcmp(template, cg->subsys, sizeof(cg->subsys))) {
413
414 return false;
415 }
416
417
418
419
420
421
422
423
424
425
426 l1 = &cg->cg_links;
427 l2 = &old_cg->cg_links;
428 while (1) {
429 struct cg_cgroup_link *cgl1, *cgl2;
430 struct cgroup *cg1, *cg2;
431
432 l1 = l1->next;
433 l2 = l2->next;
434
435 if (l1 == &cg->cg_links) {
436 BUG_ON(l2 != &old_cg->cg_links);
437 break;
438 } else {
439 BUG_ON(l2 == &old_cg->cg_links);
440 }
441
442 cgl1 = list_entry(l1, struct cg_cgroup_link, cg_link_list);
443 cgl2 = list_entry(l2, struct cg_cgroup_link, cg_link_list);
444 cg1 = cgl1->cgrp;
445 cg2 = cgl2->cgrp;
446
447 BUG_ON(cg1->root != cg2->root);
448
449
450
451
452
453
454
455
456 if (cg1->root == new_cgrp->root) {
457 if (cg1 != new_cgrp)
458 return false;
459 } else {
460 if (cg1 != cg2)
461 return false;
462 }
463 }
464 return true;
465}
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480static struct css_set *find_existing_css_set(
481 struct css_set *oldcg,
482 struct cgroup *cgrp,
483 struct cgroup_subsys_state *template[])
484{
485 int i;
486 struct cgroupfs_root *root = cgrp->root;
487 struct hlist_head *hhead;
488 struct hlist_node *node;
489 struct css_set *cg;
490
491
492
493
494
495
496 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
497 if (root->subsys_bits & (1UL << i)) {
498
499
500
501 template[i] = cgrp->subsys[i];
502 } else {
503
504
505 template[i] = oldcg->subsys[i];
506 }
507 }
508
509 hhead = css_set_hash(template);
510 hlist_for_each_entry(cg, node, hhead, hlist) {
511 if (!compare_css_sets(cg, oldcg, cgrp, template))
512 continue;
513
514
515 return cg;
516 }
517
518
519 return NULL;
520}
521
522static void free_cg_links(struct list_head *tmp)
523{
524 struct cg_cgroup_link *link;
525 struct cg_cgroup_link *saved_link;
526
527 list_for_each_entry_safe(link, saved_link, tmp, cgrp_link_list) {
528 list_del(&link->cgrp_link_list);
529 kfree(link);
530 }
531}
532
533
534
535
536
537
538static int allocate_cg_links(int count, struct list_head *tmp)
539{
540 struct cg_cgroup_link *link;
541 int i;
542 INIT_LIST_HEAD(tmp);
543 for (i = 0; i < count; i++) {
544 link = kmalloc(sizeof(*link), GFP_KERNEL);
545 if (!link) {
546 free_cg_links(tmp);
547 return -ENOMEM;
548 }
549 list_add(&link->cgrp_link_list, tmp);
550 }
551 return 0;
552}
553
554
555
556
557
558
559
560static void link_css_set(struct list_head *tmp_cg_links,
561 struct css_set *cg, struct cgroup *cgrp)
562{
563 struct cg_cgroup_link *link;
564
565 BUG_ON(list_empty(tmp_cg_links));
566 link = list_first_entry(tmp_cg_links, struct cg_cgroup_link,
567 cgrp_link_list);
568 link->cg = cg;
569 link->cgrp = cgrp;
570 atomic_inc(&cgrp->count);
571 list_move(&link->cgrp_link_list, &cgrp->css_sets);
572
573
574
575
576 list_add_tail(&link->cg_link_list, &cg->cg_links);
577}
578
579
580
581
582
583
584
585
586static struct css_set *find_css_set(
587 struct css_set *oldcg, struct cgroup *cgrp)
588{
589 struct css_set *res;
590 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
591
592 struct list_head tmp_cg_links;
593
594 struct hlist_head *hhead;
595 struct cg_cgroup_link *link;
596
597
598
599 read_lock(&css_set_lock);
600 res = find_existing_css_set(oldcg, cgrp, template);
601 if (res)
602 get_css_set(res);
603 read_unlock(&css_set_lock);
604
605 if (res)
606 return res;
607
608 res = kmalloc(sizeof(*res), GFP_KERNEL);
609 if (!res)
610 return NULL;
611
612
613 if (allocate_cg_links(root_count, &tmp_cg_links) < 0) {
614 kfree(res);
615 return NULL;
616 }
617
618 atomic_set(&res->refcount, 1);
619 INIT_LIST_HEAD(&res->cg_links);
620 INIT_LIST_HEAD(&res->tasks);
621 INIT_HLIST_NODE(&res->hlist);
622
623
624
625 memcpy(res->subsys, template, sizeof(res->subsys));
626
627 write_lock(&css_set_lock);
628
629 list_for_each_entry(link, &oldcg->cg_links, cg_link_list) {
630 struct cgroup *c = link->cgrp;
631 if (c->root == cgrp->root)
632 c = cgrp;
633 link_css_set(&tmp_cg_links, res, c);
634 }
635
636 BUG_ON(!list_empty(&tmp_cg_links));
637
638 css_set_count++;
639
640
641 hhead = css_set_hash(res->subsys);
642 hlist_add_head(&res->hlist, hhead);
643
644 write_unlock(&css_set_lock);
645
646 return res;
647}
648
649
650
651
652
653static struct cgroup *task_cgroup_from_root(struct task_struct *task,
654 struct cgroupfs_root *root)
655{
656 struct css_set *css;
657 struct cgroup *res = NULL;
658
659 BUG_ON(!mutex_is_locked(&cgroup_mutex));
660 read_lock(&css_set_lock);
661
662
663
664
665
666 css = task->cgroups;
667 if (css == &init_css_set) {
668 res = &root->top_cgroup;
669 } else {
670 struct cg_cgroup_link *link;
671 list_for_each_entry(link, &css->cg_links, cg_link_list) {
672 struct cgroup *c = link->cgrp;
673 if (c->root == root) {
674 res = c;
675 break;
676 }
677 }
678 }
679 read_unlock(&css_set_lock);
680 BUG_ON(!res);
681 return res;
682}
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738void cgroup_lock(void)
739{
740 mutex_lock(&cgroup_mutex);
741}
742EXPORT_SYMBOL_GPL(cgroup_lock);
743
744
745
746
747
748
749void cgroup_unlock(void)
750{
751 mutex_unlock(&cgroup_mutex);
752}
753EXPORT_SYMBOL_GPL(cgroup_unlock);
754
755
756
757
758
759
760
761
762static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode);
763static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
764static int cgroup_populate_dir(struct cgroup *cgrp);
765static const struct inode_operations cgroup_dir_inode_operations;
766static const struct file_operations proc_cgroupstats_operations;
767
768static struct backing_dev_info cgroup_backing_dev_info = {
769 .name = "cgroup",
770 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
771};
772
773static int alloc_css_id(struct cgroup_subsys *ss,
774 struct cgroup *parent, struct cgroup *child);
775
776static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
777{
778 struct inode *inode = new_inode(sb);
779
780 if (inode) {
781 inode->i_mode = mode;
782 inode->i_uid = current_fsuid();
783 inode->i_gid = current_fsgid();
784 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
785 inode->i_mapping->backing_dev_info = &cgroup_backing_dev_info;
786 }
787 return inode;
788}
789
790
791
792
793
794static int cgroup_call_pre_destroy(struct cgroup *cgrp)
795{
796 struct cgroup_subsys *ss;
797 int ret = 0;
798
799 for_each_subsys(cgrp->root, ss)
800 if (ss->pre_destroy) {
801 ret = ss->pre_destroy(ss, cgrp);
802 if (ret)
803 break;
804 }
805
806 return ret;
807}
808
809static void free_cgroup_rcu(struct rcu_head *obj)
810{
811 struct cgroup *cgrp = container_of(obj, struct cgroup, rcu_head);
812
813 kfree(cgrp);
814}
815
816static void cgroup_diput(struct dentry *dentry, struct inode *inode)
817{
818
819 if (S_ISDIR(inode->i_mode)) {
820 struct cgroup *cgrp = dentry->d_fsdata;
821 struct cgroup_subsys *ss;
822 BUG_ON(!(cgroup_is_removed(cgrp)));
823
824
825
826
827
828
829 synchronize_rcu();
830
831 mutex_lock(&cgroup_mutex);
832
833
834
835 for_each_subsys(cgrp->root, ss)
836 ss->destroy(ss, cgrp);
837
838 cgrp->root->number_of_cgroups--;
839 mutex_unlock(&cgroup_mutex);
840
841
842
843
844
845 deactivate_super(cgrp->root->sb);
846
847
848
849
850
851 BUG_ON(!list_empty(&cgrp->pidlists));
852
853 call_rcu(&cgrp->rcu_head, free_cgroup_rcu);
854 }
855 iput(inode);
856}
857
858static void remove_dir(struct dentry *d)
859{
860 struct dentry *parent = dget(d->d_parent);
861
862 d_delete(d);
863 simple_rmdir(parent->d_inode, d);
864 dput(parent);
865}
866
867static void cgroup_clear_directory(struct dentry *dentry)
868{
869 struct list_head *node;
870
871 BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
872 spin_lock(&dcache_lock);
873 node = dentry->d_subdirs.next;
874 while (node != &dentry->d_subdirs) {
875 struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
876 list_del_init(node);
877 if (d->d_inode) {
878
879
880 BUG_ON(d->d_inode->i_mode & S_IFDIR);
881 d = dget_locked(d);
882 spin_unlock(&dcache_lock);
883 d_delete(d);
884 simple_unlink(dentry->d_inode, d);
885 dput(d);
886 spin_lock(&dcache_lock);
887 }
888 node = dentry->d_subdirs.next;
889 }
890 spin_unlock(&dcache_lock);
891}
892
893
894
895
896static void cgroup_d_remove_dir(struct dentry *dentry)
897{
898 cgroup_clear_directory(dentry);
899
900 spin_lock(&dcache_lock);
901 list_del_init(&dentry->d_u.d_child);
902 spin_unlock(&dcache_lock);
903 remove_dir(dentry);
904}
905
906
907
908
909
910
911
912
913
914DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
915
916static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
917{
918 if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
919 wake_up_all(&cgroup_rmdir_waitq);
920}
921
922void cgroup_exclude_rmdir(struct cgroup_subsys_state *css)
923{
924 css_get(css);
925}
926
927void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
928{
929 cgroup_wakeup_rmdir_waiter(css->cgroup);
930 css_put(css);
931}
932
933
934
935
936
937
938static int rebind_subsystems(struct cgroupfs_root *root,
939 unsigned long final_bits)
940{
941 unsigned long added_bits, removed_bits;
942 struct cgroup *cgrp = &root->top_cgroup;
943 int i;
944
945 BUG_ON(!mutex_is_locked(&cgroup_mutex));
946
947 removed_bits = root->actual_subsys_bits & ~final_bits;
948 added_bits = final_bits & ~root->actual_subsys_bits;
949
950 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
951 unsigned long bit = 1UL << i;
952 struct cgroup_subsys *ss = subsys[i];
953 if (!(bit & added_bits))
954 continue;
955
956
957
958
959
960 BUG_ON(ss == NULL);
961 if (ss->root != &rootnode) {
962
963 return -EBUSY;
964 }
965 }
966
967
968
969
970
971 if (root->number_of_cgroups > 1)
972 return -EBUSY;
973
974
975 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
976 struct cgroup_subsys *ss = subsys[i];
977 unsigned long bit = 1UL << i;
978 if (bit & added_bits) {
979
980 BUG_ON(ss == NULL);
981 BUG_ON(cgrp->subsys[i]);
982 BUG_ON(!dummytop->subsys[i]);
983 BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
984 mutex_lock(&ss->hierarchy_mutex);
985 cgrp->subsys[i] = dummytop->subsys[i];
986 cgrp->subsys[i]->cgroup = cgrp;
987 list_move(&ss->sibling, &root->subsys_list);
988 ss->root = root;
989 if (ss->bind)
990 ss->bind(ss, cgrp);
991 mutex_unlock(&ss->hierarchy_mutex);
992
993 } else if (bit & removed_bits) {
994
995 BUG_ON(ss == NULL);
996 BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
997 BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
998 mutex_lock(&ss->hierarchy_mutex);
999 if (ss->bind)
1000 ss->bind(ss, dummytop);
1001 dummytop->subsys[i]->cgroup = dummytop;
1002 cgrp->subsys[i] = NULL;
1003 subsys[i]->root = &rootnode;
1004 list_move(&ss->sibling, &rootnode.subsys_list);
1005 mutex_unlock(&ss->hierarchy_mutex);
1006
1007 module_put(ss->module);
1008 } else if (bit & final_bits) {
1009
1010 BUG_ON(ss == NULL);
1011 BUG_ON(!cgrp->subsys[i]);
1012
1013
1014
1015
1016 module_put(ss->module);
1017#ifdef CONFIG_MODULE_UNLOAD
1018 BUG_ON(ss->module && !module_refcount(ss->module));
1019#endif
1020 } else {
1021
1022 BUG_ON(cgrp->subsys[i]);
1023 }
1024 }
1025 root->subsys_bits = root->actual_subsys_bits = final_bits;
1026 synchronize_rcu();
1027
1028 return 0;
1029}
1030
1031static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs)
1032{
1033 struct cgroupfs_root *root = vfs->mnt_sb->s_fs_info;
1034 struct cgroup_subsys *ss;
1035
1036 mutex_lock(&cgroup_mutex);
1037 for_each_subsys(root, ss)
1038 seq_printf(seq, ",%s", ss->name);
1039 if (test_bit(ROOT_NOPREFIX, &root->flags))
1040 seq_puts(seq, ",noprefix");
1041 if (strlen(root->release_agent_path))
1042 seq_printf(seq, ",release_agent=%s", root->release_agent_path);
1043 if (strlen(root->name))
1044 seq_printf(seq, ",name=%s", root->name);
1045 mutex_unlock(&cgroup_mutex);
1046 return 0;
1047}
1048
1049struct cgroup_sb_opts {
1050 unsigned long subsys_bits;
1051 unsigned long flags;
1052 char *release_agent;
1053 char *name;
1054
1055 bool none;
1056
1057 struct cgroupfs_root *new_root;
1058
1059};
1060
1061
1062
1063
1064
1065
1066
1067static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
1068{
1069 char *token, *o = data ?: "all";
1070 unsigned long mask = (unsigned long)-1;
1071 int i;
1072 bool module_pin_failed = false;
1073
1074 BUG_ON(!mutex_is_locked(&cgroup_mutex));
1075
1076#ifdef CONFIG_CPUSETS
1077 mask = ~(1UL << cpuset_subsys_id);
1078#endif
1079
1080 memset(opts, 0, sizeof(*opts));
1081
1082 while ((token = strsep(&o, ",")) != NULL) {
1083 if (!*token)
1084 return -EINVAL;
1085 if (!strcmp(token, "all")) {
1086
1087 opts->subsys_bits = 0;
1088 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1089 struct cgroup_subsys *ss = subsys[i];
1090 if (ss == NULL)
1091 continue;
1092 if (!ss->disabled)
1093 opts->subsys_bits |= 1ul << i;
1094 }
1095 } else if (!strcmp(token, "none")) {
1096
1097 opts->none = true;
1098 } else if (!strcmp(token, "noprefix")) {
1099 set_bit(ROOT_NOPREFIX, &opts->flags);
1100 } else if (!strncmp(token, "release_agent=", 14)) {
1101
1102 if (opts->release_agent)
1103 return -EINVAL;
1104 opts->release_agent =
1105 kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL);
1106 if (!opts->release_agent)
1107 return -ENOMEM;
1108 } else if (!strncmp(token, "name=", 5)) {
1109 const char *name = token + 5;
1110
1111 if (!strlen(name))
1112 return -EINVAL;
1113
1114 for (i = 0; i < strlen(name); i++) {
1115 char c = name[i];
1116 if (isalnum(c))
1117 continue;
1118 if ((c == '.') || (c == '-') || (c == '_'))
1119 continue;
1120 return -EINVAL;
1121 }
1122
1123 if (opts->name)
1124 return -EINVAL;
1125 opts->name = kstrndup(name,
1126 MAX_CGROUP_ROOT_NAMELEN - 1,
1127 GFP_KERNEL);
1128 if (!opts->name)
1129 return -ENOMEM;
1130 } else {
1131 struct cgroup_subsys *ss;
1132 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1133 ss = subsys[i];
1134 if (ss == NULL)
1135 continue;
1136 if (!strcmp(token, ss->name)) {
1137 if (!ss->disabled)
1138 set_bit(i, &opts->subsys_bits);
1139 break;
1140 }
1141 }
1142 if (i == CGROUP_SUBSYS_COUNT)
1143 return -ENOENT;
1144 }
1145 }
1146
1147
1148
1149
1150
1151
1152
1153
1154 if (test_bit(ROOT_NOPREFIX, &opts->flags) &&
1155 (opts->subsys_bits & mask))
1156 return -EINVAL;
1157
1158
1159
1160 if (opts->subsys_bits && opts->none)
1161 return -EINVAL;
1162
1163
1164
1165
1166
1167 if (!opts->subsys_bits && !opts->name)
1168 return -EINVAL;
1169
1170
1171
1172
1173
1174
1175
1176 for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
1177 unsigned long bit = 1UL << i;
1178
1179 if (!(bit & opts->subsys_bits))
1180 continue;
1181 if (!try_module_get(subsys[i]->module)) {
1182 module_pin_failed = true;
1183 break;
1184 }
1185 }
1186 if (module_pin_failed) {
1187
1188
1189
1190
1191
1192 for (i--; i >= CGROUP_BUILTIN_SUBSYS_COUNT; i--) {
1193
1194 unsigned long bit = 1UL << i;
1195
1196 if (!(bit & opts->subsys_bits))
1197 continue;
1198 module_put(subsys[i]->module);
1199 }
1200 return -ENOENT;
1201 }
1202
1203 return 0;
1204}
1205
1206static void drop_parsed_module_refcounts(unsigned long subsys_bits)
1207{
1208 int i;
1209 for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
1210 unsigned long bit = 1UL << i;
1211
1212 if (!(bit & subsys_bits))
1213 continue;
1214 module_put(subsys[i]->module);
1215 }
1216}
1217
1218static int cgroup_remount(struct super_block *sb, int *flags, char *data)
1219{
1220 int ret = 0;
1221 struct cgroupfs_root *root = sb->s_fs_info;
1222 struct cgroup *cgrp = &root->top_cgroup;
1223 struct cgroup_sb_opts opts;
1224
1225 lock_kernel();
1226 mutex_lock(&cgrp->dentry->d_inode->i_mutex);
1227 mutex_lock(&cgroup_mutex);
1228
1229
1230 ret = parse_cgroupfs_options(data, &opts);
1231 if (ret)
1232 goto out_unlock;
1233
1234
1235 if (opts.flags != root->flags ||
1236 (opts.name && strcmp(opts.name, root->name))) {
1237 ret = -EINVAL;
1238 drop_parsed_module_refcounts(opts.subsys_bits);
1239 goto out_unlock;
1240 }
1241
1242 ret = rebind_subsystems(root, opts.subsys_bits);
1243 if (ret) {
1244 drop_parsed_module_refcounts(opts.subsys_bits);
1245 goto out_unlock;
1246 }
1247
1248
1249 cgroup_populate_dir(cgrp);
1250
1251 if (opts.release_agent)
1252 strcpy(root->release_agent_path, opts.release_agent);
1253 out_unlock:
1254 kfree(opts.release_agent);
1255 kfree(opts.name);
1256 mutex_unlock(&cgroup_mutex);
1257 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
1258 unlock_kernel();
1259 return ret;
1260}
1261
1262static const struct super_operations cgroup_ops = {
1263 .statfs = simple_statfs,
1264 .drop_inode = generic_delete_inode,
1265 .show_options = cgroup_show_options,
1266 .remount_fs = cgroup_remount,
1267};
1268
1269static void init_cgroup_housekeeping(struct cgroup *cgrp)
1270{
1271 INIT_LIST_HEAD(&cgrp->sibling);
1272 INIT_LIST_HEAD(&cgrp->children);
1273 INIT_LIST_HEAD(&cgrp->css_sets);
1274 INIT_LIST_HEAD(&cgrp->release_list);
1275 INIT_LIST_HEAD(&cgrp->pidlists);
1276 mutex_init(&cgrp->pidlist_mutex);
1277 INIT_LIST_HEAD(&cgrp->event_list);
1278 spin_lock_init(&cgrp->event_list_lock);
1279}
1280
1281static void init_cgroup_root(struct cgroupfs_root *root)
1282{
1283 struct cgroup *cgrp = &root->top_cgroup;
1284 INIT_LIST_HEAD(&root->subsys_list);
1285 INIT_LIST_HEAD(&root->root_list);
1286 root->number_of_cgroups = 1;
1287 cgrp->root = root;
1288 cgrp->top_cgroup = cgrp;
1289 init_cgroup_housekeeping(cgrp);
1290}
1291
1292static bool init_root_id(struct cgroupfs_root *root)
1293{
1294 int ret = 0;
1295
1296 do {
1297 if (!ida_pre_get(&hierarchy_ida, GFP_KERNEL))
1298 return false;
1299 spin_lock(&hierarchy_id_lock);
1300
1301 ret = ida_get_new_above(&hierarchy_ida, next_hierarchy_id,
1302 &root->hierarchy_id);
1303 if (ret == -ENOSPC)
1304
1305 ret = ida_get_new(&hierarchy_ida, &root->hierarchy_id);
1306 if (!ret) {
1307 next_hierarchy_id = root->hierarchy_id + 1;
1308 } else if (ret != -EAGAIN) {
1309
1310 BUG_ON(ret);
1311 }
1312 spin_unlock(&hierarchy_id_lock);
1313 } while (ret);
1314 return true;
1315}
1316
1317static int cgroup_test_super(struct super_block *sb, void *data)
1318{
1319 struct cgroup_sb_opts *opts = data;
1320 struct cgroupfs_root *root = sb->s_fs_info;
1321
1322
1323 if (opts->name && strcmp(opts->name, root->name))
1324 return 0;
1325
1326
1327
1328
1329
1330 if ((opts->subsys_bits || opts->none)
1331 && (opts->subsys_bits != root->subsys_bits))
1332 return 0;
1333
1334 return 1;
1335}
1336
1337static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
1338{
1339 struct cgroupfs_root *root;
1340
1341 if (!opts->subsys_bits && !opts->none)
1342 return NULL;
1343
1344 root = kzalloc(sizeof(*root), GFP_KERNEL);
1345 if (!root)
1346 return ERR_PTR(-ENOMEM);
1347
1348 if (!init_root_id(root)) {
1349 kfree(root);
1350 return ERR_PTR(-ENOMEM);
1351 }
1352 init_cgroup_root(root);
1353
1354 root->subsys_bits = opts->subsys_bits;
1355 root->flags = opts->flags;
1356 if (opts->release_agent)
1357 strcpy(root->release_agent_path, opts->release_agent);
1358 if (opts->name)
1359 strcpy(root->name, opts->name);
1360 return root;
1361}
1362
1363static void cgroup_drop_root(struct cgroupfs_root *root)
1364{
1365 if (!root)
1366 return;
1367
1368 BUG_ON(!root->hierarchy_id);
1369 spin_lock(&hierarchy_id_lock);
1370 ida_remove(&hierarchy_ida, root->hierarchy_id);
1371 spin_unlock(&hierarchy_id_lock);
1372 kfree(root);
1373}
1374
1375static int cgroup_set_super(struct super_block *sb, void *data)
1376{
1377 int ret;
1378 struct cgroup_sb_opts *opts = data;
1379
1380
1381 if (!opts->new_root)
1382 return -EINVAL;
1383
1384 BUG_ON(!opts->subsys_bits && !opts->none);
1385
1386 ret = set_anon_super(sb, NULL);
1387 if (ret)
1388 return ret;
1389
1390 sb->s_fs_info = opts->new_root;
1391 opts->new_root->sb = sb;
1392
1393 sb->s_blocksize = PAGE_CACHE_SIZE;
1394 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
1395 sb->s_magic = CGROUP_SUPER_MAGIC;
1396 sb->s_op = &cgroup_ops;
1397
1398 return 0;
1399}
1400
1401static int cgroup_get_rootdir(struct super_block *sb)
1402{
1403 struct inode *inode =
1404 cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb);
1405 struct dentry *dentry;
1406
1407 if (!inode)
1408 return -ENOMEM;
1409
1410 inode->i_fop = &simple_dir_operations;
1411 inode->i_op = &cgroup_dir_inode_operations;
1412
1413 inc_nlink(inode);
1414 dentry = d_alloc_root(inode);
1415 if (!dentry) {
1416 iput(inode);
1417 return -ENOMEM;
1418 }
1419 sb->s_root = dentry;
1420 return 0;
1421}
1422
1423static int cgroup_get_sb(struct file_system_type *fs_type,
1424 int flags, const char *unused_dev_name,
1425 void *data, struct vfsmount *mnt)
1426{
1427 struct cgroup_sb_opts opts;
1428 struct cgroupfs_root *root;
1429 int ret = 0;
1430 struct super_block *sb;
1431 struct cgroupfs_root *new_root;
1432
1433
1434 mutex_lock(&cgroup_mutex);
1435 ret = parse_cgroupfs_options(data, &opts);
1436 mutex_unlock(&cgroup_mutex);
1437 if (ret)
1438 goto out_err;
1439
1440
1441
1442
1443
1444 new_root = cgroup_root_from_opts(&opts);
1445 if (IS_ERR(new_root)) {
1446 ret = PTR_ERR(new_root);
1447 goto drop_modules;
1448 }
1449 opts.new_root = new_root;
1450
1451
1452 sb = sget(fs_type, cgroup_test_super, cgroup_set_super, &opts);
1453 if (IS_ERR(sb)) {
1454 ret = PTR_ERR(sb);
1455 cgroup_drop_root(opts.new_root);
1456 goto drop_modules;
1457 }
1458
1459 root = sb->s_fs_info;
1460 BUG_ON(!root);
1461 if (root == opts.new_root) {
1462
1463 struct list_head tmp_cg_links;
1464 struct cgroup *root_cgrp = &root->top_cgroup;
1465 struct inode *inode;
1466 struct cgroupfs_root *existing_root;
1467 int i;
1468
1469 BUG_ON(sb->s_root != NULL);
1470
1471 ret = cgroup_get_rootdir(sb);
1472 if (ret)
1473 goto drop_new_super;
1474 inode = sb->s_root->d_inode;
1475
1476 mutex_lock(&inode->i_mutex);
1477 mutex_lock(&cgroup_mutex);
1478
1479 if (strlen(root->name)) {
1480
1481 for_each_active_root(existing_root) {
1482 if (!strcmp(existing_root->name, root->name)) {
1483 ret = -EBUSY;
1484 mutex_unlock(&cgroup_mutex);
1485 mutex_unlock(&inode->i_mutex);
1486 goto drop_new_super;
1487 }
1488 }
1489 }
1490
1491
1492
1493
1494
1495
1496
1497
1498 ret = allocate_cg_links(css_set_count, &tmp_cg_links);
1499 if (ret) {
1500 mutex_unlock(&cgroup_mutex);
1501 mutex_unlock(&inode->i_mutex);
1502 goto drop_new_super;
1503 }
1504
1505 ret = rebind_subsystems(root, root->subsys_bits);
1506 if (ret == -EBUSY) {
1507 mutex_unlock(&cgroup_mutex);
1508 mutex_unlock(&inode->i_mutex);
1509 free_cg_links(&tmp_cg_links);
1510 goto drop_new_super;
1511 }
1512
1513
1514
1515
1516
1517
1518
1519 BUG_ON(ret);
1520
1521 list_add(&root->root_list, &roots);
1522 root_count++;
1523
1524 sb->s_root->d_fsdata = root_cgrp;
1525 root->top_cgroup.dentry = sb->s_root;
1526
1527
1528
1529 write_lock(&css_set_lock);
1530 for (i = 0; i < CSS_SET_TABLE_SIZE; i++) {
1531 struct hlist_head *hhead = &css_set_table[i];
1532 struct hlist_node *node;
1533 struct css_set *cg;
1534
1535 hlist_for_each_entry(cg, node, hhead, hlist)
1536 link_css_set(&tmp_cg_links, cg, root_cgrp);
1537 }
1538 write_unlock(&css_set_lock);
1539
1540 free_cg_links(&tmp_cg_links);
1541
1542 BUG_ON(!list_empty(&root_cgrp->sibling));
1543 BUG_ON(!list_empty(&root_cgrp->children));
1544 BUG_ON(root->number_of_cgroups != 1);
1545
1546 cgroup_populate_dir(root_cgrp);
1547 mutex_unlock(&cgroup_mutex);
1548 mutex_unlock(&inode->i_mutex);
1549 } else {
1550
1551
1552
1553
1554 cgroup_drop_root(opts.new_root);
1555
1556 drop_parsed_module_refcounts(opts.subsys_bits);
1557 }
1558
1559 simple_set_mnt(mnt, sb);
1560 kfree(opts.release_agent);
1561 kfree(opts.name);
1562 return 0;
1563
1564 drop_new_super:
1565 deactivate_locked_super(sb);
1566 drop_modules:
1567 drop_parsed_module_refcounts(opts.subsys_bits);
1568 out_err:
1569 kfree(opts.release_agent);
1570 kfree(opts.name);
1571
1572 return ret;
1573}
1574
1575static void cgroup_kill_sb(struct super_block *sb) {
1576 struct cgroupfs_root *root = sb->s_fs_info;
1577 struct cgroup *cgrp = &root->top_cgroup;
1578 int ret;
1579 struct cg_cgroup_link *link;
1580 struct cg_cgroup_link *saved_link;
1581
1582 BUG_ON(!root);
1583
1584 BUG_ON(root->number_of_cgroups != 1);
1585 BUG_ON(!list_empty(&cgrp->children));
1586 BUG_ON(!list_empty(&cgrp->sibling));
1587
1588 mutex_lock(&cgroup_mutex);
1589
1590
1591 ret = rebind_subsystems(root, 0);
1592
1593 BUG_ON(ret);
1594
1595
1596
1597
1598
1599 write_lock(&css_set_lock);
1600
1601 list_for_each_entry_safe(link, saved_link, &cgrp->css_sets,
1602 cgrp_link_list) {
1603 list_del(&link->cg_link_list);
1604 list_del(&link->cgrp_link_list);
1605 kfree(link);
1606 }
1607 write_unlock(&css_set_lock);
1608
1609 if (!list_empty(&root->root_list)) {
1610 list_del(&root->root_list);
1611 root_count--;
1612 }
1613
1614 mutex_unlock(&cgroup_mutex);
1615
1616 kill_litter_super(sb);
1617 cgroup_drop_root(root);
1618}
1619
1620static struct file_system_type cgroup_fs_type = {
1621 .name = "cgroup",
1622 .get_sb = cgroup_get_sb,
1623 .kill_sb = cgroup_kill_sb,
1624};
1625
1626static struct kobject *cgroup_kobj;
1627
1628static inline struct cgroup *__d_cgrp(struct dentry *dentry)
1629{
1630 return dentry->d_fsdata;
1631}
1632
1633static inline struct cftype *__d_cft(struct dentry *dentry)
1634{
1635 return dentry->d_fsdata;
1636}
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
1649{
1650 char *start;
1651 struct dentry *dentry = rcu_dereference_check(cgrp->dentry,
1652 rcu_read_lock_held() ||
1653 cgroup_lock_is_held());
1654
1655 if (!dentry || cgrp == dummytop) {
1656
1657
1658
1659
1660 strcpy(buf, "/");
1661 return 0;
1662 }
1663
1664 start = buf + buflen;
1665
1666 *--start = '\0';
1667 for (;;) {
1668 int len = dentry->d_name.len;
1669
1670 if ((start -= len) < buf)
1671 return -ENAMETOOLONG;
1672 memcpy(start, dentry->d_name.name, len);
1673 cgrp = cgrp->parent;
1674 if (!cgrp)
1675 break;
1676
1677 dentry = rcu_dereference_check(cgrp->dentry,
1678 rcu_read_lock_held() ||
1679 cgroup_lock_is_held());
1680 if (!cgrp->parent)
1681 continue;
1682 if (--start < buf)
1683 return -ENAMETOOLONG;
1684 *start = '/';
1685 }
1686 memmove(buf, start, buf + buflen - start);
1687 return 0;
1688}
1689EXPORT_SYMBOL_GPL(cgroup_path);
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1700{
1701 int retval = 0;
1702 struct cgroup_subsys *ss, *failed_ss = NULL;
1703 struct cgroup *oldcgrp;
1704 struct css_set *cg;
1705 struct css_set *newcg;
1706 struct cgroupfs_root *root = cgrp->root;
1707
1708
1709 oldcgrp = task_cgroup_from_root(tsk, root);
1710 if (cgrp == oldcgrp)
1711 return 0;
1712
1713 for_each_subsys(root, ss) {
1714 if (ss->can_attach) {
1715 retval = ss->can_attach(ss, cgrp, tsk, false);
1716 if (retval) {
1717
1718
1719
1720
1721
1722
1723 failed_ss = ss;
1724 goto out;
1725 }
1726 }
1727 }
1728
1729 task_lock(tsk);
1730 cg = tsk->cgroups;
1731 get_css_set(cg);
1732 task_unlock(tsk);
1733
1734
1735
1736
1737 newcg = find_css_set(cg, cgrp);
1738 put_css_set(cg);
1739 if (!newcg) {
1740 retval = -ENOMEM;
1741 goto out;
1742 }
1743
1744 task_lock(tsk);
1745 if (tsk->flags & PF_EXITING) {
1746 task_unlock(tsk);
1747 put_css_set(newcg);
1748 retval = -ESRCH;
1749 goto out;
1750 }
1751 rcu_assign_pointer(tsk->cgroups, newcg);
1752 task_unlock(tsk);
1753
1754
1755 write_lock(&css_set_lock);
1756 if (!list_empty(&tsk->cg_list)) {
1757 list_del(&tsk->cg_list);
1758 list_add(&tsk->cg_list, &newcg->tasks);
1759 }
1760 write_unlock(&css_set_lock);
1761
1762 for_each_subsys(root, ss) {
1763 if (ss->attach)
1764 ss->attach(ss, cgrp, oldcgrp, tsk, false);
1765 }
1766 set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
1767 synchronize_rcu();
1768 put_css_set(cg);
1769
1770
1771
1772
1773
1774 cgroup_wakeup_rmdir_waiter(cgrp);
1775out:
1776 if (retval) {
1777 for_each_subsys(root, ss) {
1778 if (ss == failed_ss)
1779
1780
1781
1782
1783
1784
1785 break;
1786 if (ss->cancel_attach)
1787 ss->cancel_attach(ss, cgrp, tsk, false);
1788 }
1789 }
1790 return retval;
1791}
1792
1793
1794
1795
1796
1797
1798int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
1799{
1800 struct cgroupfs_root *root;
1801 int retval = 0;
1802
1803 cgroup_lock();
1804 for_each_active_root(root) {
1805 struct cgroup *from_cg = task_cgroup_from_root(from, root);
1806
1807 retval = cgroup_attach_task(from_cg, tsk);
1808 if (retval)
1809 break;
1810 }
1811 cgroup_unlock();
1812
1813 return retval;
1814}
1815EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
1816
1817
1818
1819
1820
1821static int attach_task_by_pid(struct cgroup *cgrp, u64 pid)
1822{
1823 struct task_struct *tsk;
1824 const struct cred *cred = current_cred(), *tcred;
1825 int ret;
1826
1827 if (pid) {
1828 rcu_read_lock();
1829 tsk = find_task_by_vpid(pid);
1830 if (!tsk || tsk->flags & PF_EXITING) {
1831 rcu_read_unlock();
1832 return -ESRCH;
1833 }
1834
1835 tcred = __task_cred(tsk);
1836 if (cred->euid &&
1837 cred->euid != tcred->uid &&
1838 cred->euid != tcred->suid) {
1839 rcu_read_unlock();
1840 return -EACCES;
1841 }
1842 get_task_struct(tsk);
1843 rcu_read_unlock();
1844 } else {
1845 tsk = current;
1846 get_task_struct(tsk);
1847 }
1848
1849 ret = cgroup_attach_task(cgrp, tsk);
1850 put_task_struct(tsk);
1851 return ret;
1852}
1853
1854static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid)
1855{
1856 int ret;
1857 if (!cgroup_lock_live_group(cgrp))
1858 return -ENODEV;
1859 ret = attach_task_by_pid(cgrp, pid);
1860 cgroup_unlock();
1861 return ret;
1862}
1863
1864
1865
1866
1867
1868
1869
1870
1871bool cgroup_lock_live_group(struct cgroup *cgrp)
1872{
1873 mutex_lock(&cgroup_mutex);
1874 if (cgroup_is_removed(cgrp)) {
1875 mutex_unlock(&cgroup_mutex);
1876 return false;
1877 }
1878 return true;
1879}
1880EXPORT_SYMBOL_GPL(cgroup_lock_live_group);
1881
1882static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
1883 const char *buffer)
1884{
1885 BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
1886 if (!cgroup_lock_live_group(cgrp))
1887 return -ENODEV;
1888 strcpy(cgrp->root->release_agent_path, buffer);
1889 cgroup_unlock();
1890 return 0;
1891}
1892
1893static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft,
1894 struct seq_file *seq)
1895{
1896 if (!cgroup_lock_live_group(cgrp))
1897 return -ENODEV;
1898 seq_puts(seq, cgrp->root->release_agent_path);
1899 seq_putc(seq, '\n');
1900 cgroup_unlock();
1901 return 0;
1902}
1903
1904
1905#define CGROUP_LOCAL_BUFFER_SIZE 64
1906
1907static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft,
1908 struct file *file,
1909 const char __user *userbuf,
1910 size_t nbytes, loff_t *unused_ppos)
1911{
1912 char buffer[CGROUP_LOCAL_BUFFER_SIZE];
1913 int retval = 0;
1914 char *end;
1915
1916 if (!nbytes)
1917 return -EINVAL;
1918 if (nbytes >= sizeof(buffer))
1919 return -E2BIG;
1920 if (copy_from_user(buffer, userbuf, nbytes))
1921 return -EFAULT;
1922
1923 buffer[nbytes] = 0;
1924 if (cft->write_u64) {
1925 u64 val = simple_strtoull(strstrip(buffer), &end, 0);
1926 if (*end)
1927 return -EINVAL;
1928 retval = cft->write_u64(cgrp, cft, val);
1929 } else {
1930 s64 val = simple_strtoll(strstrip(buffer), &end, 0);
1931 if (*end)
1932 return -EINVAL;
1933 retval = cft->write_s64(cgrp, cft, val);
1934 }
1935 if (!retval)
1936 retval = nbytes;
1937 return retval;
1938}
1939
1940static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft,
1941 struct file *file,
1942 const char __user *userbuf,
1943 size_t nbytes, loff_t *unused_ppos)
1944{
1945 char local_buffer[CGROUP_LOCAL_BUFFER_SIZE];
1946 int retval = 0;
1947 size_t max_bytes = cft->max_write_len;
1948 char *buffer = local_buffer;
1949
1950 if (!max_bytes)
1951 max_bytes = sizeof(local_buffer) - 1;
1952 if (nbytes >= max_bytes)
1953 return -E2BIG;
1954
1955 if (nbytes >= sizeof(local_buffer)) {
1956 buffer = kmalloc(nbytes + 1, GFP_KERNEL);
1957 if (buffer == NULL)
1958 return -ENOMEM;
1959 }
1960 if (nbytes && copy_from_user(buffer, userbuf, nbytes)) {
1961 retval = -EFAULT;
1962 goto out;
1963 }
1964
1965 buffer[nbytes] = 0;
1966 retval = cft->write_string(cgrp, cft, strstrip(buffer));
1967 if (!retval)
1968 retval = nbytes;
1969out:
1970 if (buffer != local_buffer)
1971 kfree(buffer);
1972 return retval;
1973}
1974
1975static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
1976 size_t nbytes, loff_t *ppos)
1977{
1978 struct cftype *cft = __d_cft(file->f_dentry);
1979 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
1980
1981 if (cgroup_is_removed(cgrp))
1982 return -ENODEV;
1983 if (cft->write)
1984 return cft->write(cgrp, cft, file, buf, nbytes, ppos);
1985 if (cft->write_u64 || cft->write_s64)
1986 return cgroup_write_X64(cgrp, cft, file, buf, nbytes, ppos);
1987 if (cft->write_string)
1988 return cgroup_write_string(cgrp, cft, file, buf, nbytes, ppos);
1989 if (cft->trigger) {
1990 int ret = cft->trigger(cgrp, (unsigned int)cft->private);
1991 return ret ? ret : nbytes;
1992 }
1993 return -EINVAL;
1994}
1995
1996static ssize_t cgroup_read_u64(struct cgroup *cgrp, struct cftype *cft,
1997 struct file *file,
1998 char __user *buf, size_t nbytes,
1999 loff_t *ppos)
2000{
2001 char tmp[CGROUP_LOCAL_BUFFER_SIZE];
2002 u64 val = cft->read_u64(cgrp, cft);
2003 int len = sprintf(tmp, "%llu\n", (unsigned long long) val);
2004
2005 return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
2006}
2007
2008static ssize_t cgroup_read_s64(struct cgroup *cgrp, struct cftype *cft,
2009 struct file *file,
2010 char __user *buf, size_t nbytes,
2011 loff_t *ppos)
2012{
2013 char tmp[CGROUP_LOCAL_BUFFER_SIZE];
2014 s64 val = cft->read_s64(cgrp, cft);
2015 int len = sprintf(tmp, "%lld\n", (long long) val);
2016
2017 return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
2018}
2019
2020static ssize_t cgroup_file_read(struct file *file, char __user *buf,
2021 size_t nbytes, loff_t *ppos)
2022{
2023 struct cftype *cft = __d_cft(file->f_dentry);
2024 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
2025
2026 if (cgroup_is_removed(cgrp))
2027 return -ENODEV;
2028
2029 if (cft->read)
2030 return cft->read(cgrp, cft, file, buf, nbytes, ppos);
2031 if (cft->read_u64)
2032 return cgroup_read_u64(cgrp, cft, file, buf, nbytes, ppos);
2033 if (cft->read_s64)
2034 return cgroup_read_s64(cgrp, cft, file, buf, nbytes, ppos);
2035 return -EINVAL;
2036}
2037
2038
2039
2040
2041
2042
2043struct cgroup_seqfile_state {
2044 struct cftype *cft;
2045 struct cgroup *cgroup;
2046};
2047
2048static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value)
2049{
2050 struct seq_file *sf = cb->state;
2051 return seq_printf(sf, "%s %llu\n", key, (unsigned long long)value);
2052}
2053
2054static int cgroup_seqfile_show(struct seq_file *m, void *arg)
2055{
2056 struct cgroup_seqfile_state *state = m->private;
2057 struct cftype *cft = state->cft;
2058 if (cft->read_map) {
2059 struct cgroup_map_cb cb = {
2060 .fill = cgroup_map_add,
2061 .state = m,
2062 };
2063 return cft->read_map(state->cgroup, cft, &cb);
2064 }
2065 return cft->read_seq_string(state->cgroup, cft, m);
2066}
2067
2068static int cgroup_seqfile_release(struct inode *inode, struct file *file)
2069{
2070 struct seq_file *seq = file->private_data;
2071 kfree(seq->private);
2072 return single_release(inode, file);
2073}
2074
2075static const struct file_operations cgroup_seqfile_operations = {
2076 .read = seq_read,
2077 .write = cgroup_file_write,
2078 .llseek = seq_lseek,
2079 .release = cgroup_seqfile_release,
2080};
2081
2082static int cgroup_file_open(struct inode *inode, struct file *file)
2083{
2084 int err;
2085 struct cftype *cft;
2086
2087 err = generic_file_open(inode, file);
2088 if (err)
2089 return err;
2090 cft = __d_cft(file->f_dentry);
2091
2092 if (cft->read_map || cft->read_seq_string) {
2093 struct cgroup_seqfile_state *state =
2094 kzalloc(sizeof(*state), GFP_USER);
2095 if (!state)
2096 return -ENOMEM;
2097 state->cft = cft;
2098 state->cgroup = __d_cgrp(file->f_dentry->d_parent);
2099 file->f_op = &cgroup_seqfile_operations;
2100 err = single_open(file, cgroup_seqfile_show, state);
2101 if (err < 0)
2102 kfree(state);
2103 } else if (cft->open)
2104 err = cft->open(inode, file);
2105 else
2106 err = 0;
2107
2108 return err;
2109}
2110
2111static int cgroup_file_release(struct inode *inode, struct file *file)
2112{
2113 struct cftype *cft = __d_cft(file->f_dentry);
2114 if (cft->release)
2115 return cft->release(inode, file);
2116 return 0;
2117}
2118
2119
2120
2121
2122static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry,
2123 struct inode *new_dir, struct dentry *new_dentry)
2124{
2125 if (!S_ISDIR(old_dentry->d_inode->i_mode))
2126 return -ENOTDIR;
2127 if (new_dentry->d_inode)
2128 return -EEXIST;
2129 if (old_dir != new_dir)
2130 return -EIO;
2131 return simple_rename(old_dir, old_dentry, new_dir, new_dentry);
2132}
2133
2134static const struct file_operations cgroup_file_operations = {
2135 .read = cgroup_file_read,
2136 .write = cgroup_file_write,
2137 .llseek = generic_file_llseek,
2138 .open = cgroup_file_open,
2139 .release = cgroup_file_release,
2140};
2141
2142static const struct inode_operations cgroup_dir_inode_operations = {
2143 .lookup = simple_lookup,
2144 .mkdir = cgroup_mkdir,
2145 .rmdir = cgroup_rmdir,
2146 .rename = cgroup_rename,
2147};
2148
2149
2150
2151
2152static inline struct cftype *__file_cft(struct file *file)
2153{
2154 if (file->f_dentry->d_inode->i_fop != &cgroup_file_operations)
2155 return ERR_PTR(-EINVAL);
2156 return __d_cft(file->f_dentry);
2157}
2158
2159static int cgroup_create_file(struct dentry *dentry, mode_t mode,
2160 struct super_block *sb)
2161{
2162 static const struct dentry_operations cgroup_dops = {
2163 .d_iput = cgroup_diput,
2164 };
2165
2166 struct inode *inode;
2167
2168 if (!dentry)
2169 return -ENOENT;
2170 if (dentry->d_inode)
2171 return -EEXIST;
2172
2173 inode = cgroup_new_inode(mode, sb);
2174 if (!inode)
2175 return -ENOMEM;
2176
2177 if (S_ISDIR(mode)) {
2178 inode->i_op = &cgroup_dir_inode_operations;
2179 inode->i_fop = &simple_dir_operations;
2180
2181
2182 inc_nlink(inode);
2183
2184
2185
2186 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
2187 } else if (S_ISREG(mode)) {
2188 inode->i_size = 0;
2189 inode->i_fop = &cgroup_file_operations;
2190 }
2191 dentry->d_op = &cgroup_dops;
2192 d_instantiate(dentry, inode);
2193 dget(dentry);
2194 return 0;
2195}
2196
2197
2198
2199
2200
2201
2202
2203
2204static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry,
2205 mode_t mode)
2206{
2207 struct dentry *parent;
2208 int error = 0;
2209
2210 parent = cgrp->parent->dentry;
2211 error = cgroup_create_file(dentry, S_IFDIR | mode, cgrp->root->sb);
2212 if (!error) {
2213 dentry->d_fsdata = cgrp;
2214 inc_nlink(parent->d_inode);
2215 rcu_assign_pointer(cgrp->dentry, dentry);
2216 dget(dentry);
2217 }
2218 dput(dentry);
2219
2220 return error;
2221}
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232static mode_t cgroup_file_mode(const struct cftype *cft)
2233{
2234 mode_t mode = 0;
2235
2236 if (cft->mode)
2237 return cft->mode;
2238
2239 if (cft->read || cft->read_u64 || cft->read_s64 ||
2240 cft->read_map || cft->read_seq_string)
2241 mode |= S_IRUGO;
2242
2243 if (cft->write || cft->write_u64 || cft->write_s64 ||
2244 cft->write_string || cft->trigger)
2245 mode |= S_IWUSR;
2246
2247 return mode;
2248}
2249
2250int cgroup_add_file(struct cgroup *cgrp,
2251 struct cgroup_subsys *subsys,
2252 const struct cftype *cft)
2253{
2254 struct dentry *dir = cgrp->dentry;
2255 struct dentry *dentry;
2256 int error;
2257 mode_t mode;
2258
2259 char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 };
2260 if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) {
2261 strcpy(name, subsys->name);
2262 strcat(name, ".");
2263 }
2264 strcat(name, cft->name);
2265 BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex));
2266 dentry = lookup_one_len(name, dir, strlen(name));
2267 if (!IS_ERR(dentry)) {
2268 mode = cgroup_file_mode(cft);
2269 error = cgroup_create_file(dentry, mode | S_IFREG,
2270 cgrp->root->sb);
2271 if (!error)
2272 dentry->d_fsdata = (void *)cft;
2273 dput(dentry);
2274 } else
2275 error = PTR_ERR(dentry);
2276 return error;
2277}
2278EXPORT_SYMBOL_GPL(cgroup_add_file);
2279
2280int cgroup_add_files(struct cgroup *cgrp,
2281 struct cgroup_subsys *subsys,
2282 const struct cftype cft[],
2283 int count)
2284{
2285 int i, err;
2286 for (i = 0; i < count; i++) {
2287 err = cgroup_add_file(cgrp, subsys, &cft[i]);
2288 if (err)
2289 return err;
2290 }
2291 return 0;
2292}
2293EXPORT_SYMBOL_GPL(cgroup_add_files);
2294
2295
2296
2297
2298
2299
2300
2301int cgroup_task_count(const struct cgroup *cgrp)
2302{
2303 int count = 0;
2304 struct cg_cgroup_link *link;
2305
2306 read_lock(&css_set_lock);
2307 list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {
2308 count += atomic_read(&link->cg->refcount);
2309 }
2310 read_unlock(&css_set_lock);
2311 return count;
2312}
2313
2314
2315
2316
2317
2318static void cgroup_advance_iter(struct cgroup *cgrp,
2319 struct cgroup_iter *it)
2320{
2321 struct list_head *l = it->cg_link;
2322 struct cg_cgroup_link *link;
2323 struct css_set *cg;
2324
2325
2326 do {
2327 l = l->next;
2328 if (l == &cgrp->css_sets) {
2329 it->cg_link = NULL;
2330 return;
2331 }
2332 link = list_entry(l, struct cg_cgroup_link, cgrp_link_list);
2333 cg = link->cg;
2334 } while (list_empty(&cg->tasks));
2335 it->cg_link = l;
2336 it->task = cg->tasks.next;
2337}
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348static void cgroup_enable_task_cg_lists(void)
2349{
2350 struct task_struct *p, *g;
2351 write_lock(&css_set_lock);
2352 use_task_css_set_links = 1;
2353 do_each_thread(g, p) {
2354 task_lock(p);
2355
2356
2357
2358
2359
2360 if (!(p->flags & PF_EXITING) && list_empty(&p->cg_list))
2361 list_add(&p->cg_list, &p->cgroups->tasks);
2362 task_unlock(p);
2363 } while_each_thread(g, p);
2364 write_unlock(&css_set_lock);
2365}
2366
2367void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it)
2368{
2369
2370
2371
2372
2373
2374 if (!use_task_css_set_links)
2375 cgroup_enable_task_cg_lists();
2376
2377 read_lock(&css_set_lock);
2378 it->cg_link = &cgrp->css_sets;
2379 cgroup_advance_iter(cgrp, it);
2380}
2381
2382struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
2383 struct cgroup_iter *it)
2384{
2385 struct task_struct *res;
2386 struct list_head *l = it->task;
2387 struct cg_cgroup_link *link;
2388
2389
2390 if (!it->cg_link)
2391 return NULL;
2392 res = list_entry(l, struct task_struct, cg_list);
2393
2394 l = l->next;
2395 link = list_entry(it->cg_link, struct cg_cgroup_link, cgrp_link_list);
2396 if (l == &link->cg->tasks) {
2397
2398
2399 cgroup_advance_iter(cgrp, it);
2400 } else {
2401 it->task = l;
2402 }
2403 return res;
2404}
2405
2406void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it)
2407{
2408 read_unlock(&css_set_lock);
2409}
2410
2411static inline int started_after_time(struct task_struct *t1,
2412 struct timespec *time,
2413 struct task_struct *t2)
2414{
2415 int start_diff = timespec_compare(&t1->start_time, time);
2416 if (start_diff > 0) {
2417 return 1;
2418 } else if (start_diff < 0) {
2419 return 0;
2420 } else {
2421
2422
2423
2424
2425
2426
2427
2428
2429 return t1 > t2;
2430 }
2431}
2432
2433
2434
2435
2436
2437
2438static inline int started_after(void *p1, void *p2)
2439{
2440 struct task_struct *t1 = p1;
2441 struct task_struct *t2 = p2;
2442 return started_after_time(t1, &t2->start_time, t2);
2443}
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472int cgroup_scan_tasks(struct cgroup_scanner *scan)
2473{
2474 int retval, i;
2475 struct cgroup_iter it;
2476 struct task_struct *p, *dropped;
2477
2478 struct task_struct *latest_task = NULL;
2479 struct ptr_heap tmp_heap;
2480 struct ptr_heap *heap;
2481 struct timespec latest_time = { 0, 0 };
2482
2483 if (scan->heap) {
2484
2485 heap = scan->heap;
2486 heap->gt = &started_after;
2487 } else {
2488
2489 heap = &tmp_heap;
2490 retval = heap_init(heap, PAGE_SIZE, GFP_KERNEL, &started_after);
2491 if (retval)
2492
2493 return retval;
2494 }
2495
2496 again:
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509 heap->size = 0;
2510 cgroup_iter_start(scan->cg, &it);
2511 while ((p = cgroup_iter_next(scan->cg, &it))) {
2512
2513
2514
2515
2516 if (scan->test_task && !scan->test_task(p, scan))
2517 continue;
2518
2519
2520
2521
2522 if (!started_after_time(p, &latest_time, latest_task))
2523 continue;
2524 dropped = heap_insert(heap, p);
2525 if (dropped == NULL) {
2526
2527
2528
2529
2530 get_task_struct(p);
2531 } else if (dropped != p) {
2532
2533
2534
2535
2536 get_task_struct(p);
2537 put_task_struct(dropped);
2538 }
2539
2540
2541
2542
2543 }
2544 cgroup_iter_end(scan->cg, &it);
2545
2546 if (heap->size) {
2547 for (i = 0; i < heap->size; i++) {
2548 struct task_struct *q = heap->ptrs[i];
2549 if (i == 0) {
2550 latest_time = q->start_time;
2551 latest_task = q;
2552 }
2553
2554 scan->process_task(q, scan);
2555 put_task_struct(q);
2556 }
2557
2558
2559
2560
2561
2562
2563
2564 goto again;
2565 }
2566 if (heap == &tmp_heap)
2567 heap_free(&tmp_heap);
2568 return 0;
2569}
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586#define PIDLIST_TOO_LARGE(c) ((c) * sizeof(pid_t) > (PAGE_SIZE * 2))
2587static void *pidlist_allocate(int count)
2588{
2589 if (PIDLIST_TOO_LARGE(count))
2590 return vmalloc(count * sizeof(pid_t));
2591 else
2592 return kmalloc(count * sizeof(pid_t), GFP_KERNEL);
2593}
2594static void pidlist_free(void *p)
2595{
2596 if (is_vmalloc_addr(p))
2597 vfree(p);
2598 else
2599 kfree(p);
2600}
2601static void *pidlist_resize(void *p, int newcount)
2602{
2603 void *newlist;
2604
2605 if (is_vmalloc_addr(p)) {
2606 newlist = vmalloc(newcount * sizeof(pid_t));
2607 if (!newlist)
2608 return NULL;
2609 memcpy(newlist, p, newcount * sizeof(pid_t));
2610 vfree(p);
2611 } else {
2612 newlist = krealloc(p, newcount * sizeof(pid_t), GFP_KERNEL);
2613 }
2614 return newlist;
2615}
2616
2617
2618
2619
2620
2621
2622
2623
2624#define PIDLIST_REALLOC_DIFFERENCE(old, new) ((old) - PAGE_SIZE >= (new))
2625static int pidlist_uniq(pid_t **p, int length)
2626{
2627 int src, dest = 1;
2628 pid_t *list = *p;
2629 pid_t *newlist;
2630
2631
2632
2633
2634
2635 if (length == 0 || length == 1)
2636 return length;
2637
2638 for (src = 1; src < length; src++) {
2639
2640 while (list[src] == list[src-1]) {
2641 src++;
2642 if (src == length)
2643 goto after;
2644 }
2645
2646 list[dest] = list[src];
2647 dest++;
2648 }
2649after:
2650
2651
2652
2653
2654
2655 if (PIDLIST_REALLOC_DIFFERENCE(length, dest)) {
2656 newlist = pidlist_resize(list, dest);
2657 if (newlist)
2658 *p = newlist;
2659 }
2660 return dest;
2661}
2662
2663static int cmppid(const void *a, const void *b)
2664{
2665 return *(pid_t *)a - *(pid_t *)b;
2666}
2667
2668
2669
2670
2671
2672
2673
2674static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
2675 enum cgroup_filetype type)
2676{
2677 struct cgroup_pidlist *l;
2678
2679 struct pid_namespace *ns = current->nsproxy->pid_ns;
2680
2681
2682
2683
2684
2685
2686
2687 mutex_lock(&cgrp->pidlist_mutex);
2688 list_for_each_entry(l, &cgrp->pidlists, links) {
2689 if (l->key.type == type && l->key.ns == ns) {
2690
2691 down_write(&l->mutex);
2692 mutex_unlock(&cgrp->pidlist_mutex);
2693 return l;
2694 }
2695 }
2696
2697 l = kmalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL);
2698 if (!l) {
2699 mutex_unlock(&cgrp->pidlist_mutex);
2700 return l;
2701 }
2702 init_rwsem(&l->mutex);
2703 down_write(&l->mutex);
2704 l->key.type = type;
2705 l->key.ns = get_pid_ns(ns);
2706 l->use_count = 0;
2707 l->list = NULL;
2708 l->owner = cgrp;
2709 list_add(&l->links, &cgrp->pidlists);
2710 mutex_unlock(&cgrp->pidlist_mutex);
2711 return l;
2712}
2713
2714
2715
2716
2717static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
2718 struct cgroup_pidlist **lp)
2719{
2720 pid_t *array;
2721 int length;
2722 int pid, n = 0;
2723 struct cgroup_iter it;
2724 struct task_struct *tsk;
2725 struct cgroup_pidlist *l;
2726
2727
2728
2729
2730
2731
2732
2733 length = cgroup_task_count(cgrp);
2734 array = pidlist_allocate(length);
2735 if (!array)
2736 return -ENOMEM;
2737
2738 cgroup_iter_start(cgrp, &it);
2739 while ((tsk = cgroup_iter_next(cgrp, &it))) {
2740 if (unlikely(n == length))
2741 break;
2742
2743 if (type == CGROUP_FILE_PROCS)
2744 pid = task_tgid_vnr(tsk);
2745 else
2746 pid = task_pid_vnr(tsk);
2747 if (pid > 0)
2748 array[n++] = pid;
2749 }
2750 cgroup_iter_end(cgrp, &it);
2751 length = n;
2752
2753 sort(array, length, sizeof(pid_t), cmppid, NULL);
2754 if (type == CGROUP_FILE_PROCS)
2755 length = pidlist_uniq(&array, length);
2756 l = cgroup_pidlist_find(cgrp, type);
2757 if (!l) {
2758 pidlist_free(array);
2759 return -ENOMEM;
2760 }
2761
2762 pidlist_free(l->list);
2763 l->list = array;
2764 l->length = length;
2765 l->use_count++;
2766 up_write(&l->mutex);
2767 *lp = l;
2768 return 0;
2769}
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
2781{
2782 int ret = -EINVAL;
2783 struct cgroup *cgrp;
2784 struct cgroup_iter it;
2785 struct task_struct *tsk;
2786
2787
2788
2789
2790
2791 if (dentry->d_sb->s_op != &cgroup_ops ||
2792 !S_ISDIR(dentry->d_inode->i_mode))
2793 goto err;
2794
2795 ret = 0;
2796 cgrp = dentry->d_fsdata;
2797
2798 cgroup_iter_start(cgrp, &it);
2799 while ((tsk = cgroup_iter_next(cgrp, &it))) {
2800 switch (tsk->state) {
2801 case TASK_RUNNING:
2802 stats->nr_running++;
2803 break;
2804 case TASK_INTERRUPTIBLE:
2805 stats->nr_sleeping++;
2806 break;
2807 case TASK_UNINTERRUPTIBLE:
2808 stats->nr_uninterruptible++;
2809 break;
2810 case TASK_STOPPED:
2811 stats->nr_stopped++;
2812 break;
2813 default:
2814 if (delayacct_is_task_waiting_on_io(tsk))
2815 stats->nr_io_wait++;
2816 break;
2817 }
2818 }
2819 cgroup_iter_end(cgrp, &it);
2820
2821err:
2822 return ret;
2823}
2824
2825
2826
2827
2828
2829
2830
2831
2832static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
2833{
2834
2835
2836
2837
2838
2839
2840 struct cgroup_pidlist *l = s->private;
2841 int index = 0, pid = *pos;
2842 int *iter;
2843
2844 down_read(&l->mutex);
2845 if (pid) {
2846 int end = l->length;
2847
2848 while (index < end) {
2849 int mid = (index + end) / 2;
2850 if (l->list[mid] == pid) {
2851 index = mid;
2852 break;
2853 } else if (l->list[mid] <= pid)
2854 index = mid + 1;
2855 else
2856 end = mid;
2857 }
2858 }
2859
2860 if (index >= l->length)
2861 return NULL;
2862
2863 iter = l->list + index;
2864 *pos = *iter;
2865 return iter;
2866}
2867
2868static void cgroup_pidlist_stop(struct seq_file *s, void *v)
2869{
2870 struct cgroup_pidlist *l = s->private;
2871 up_read(&l->mutex);
2872}
2873
2874static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
2875{
2876 struct cgroup_pidlist *l = s->private;
2877 pid_t *p = v;
2878 pid_t *end = l->list + l->length;
2879
2880
2881
2882
2883 p++;
2884 if (p >= end) {
2885 return NULL;
2886 } else {
2887 *pos = *p;
2888 return p;
2889 }
2890}
2891
2892static int cgroup_pidlist_show(struct seq_file *s, void *v)
2893{
2894 return seq_printf(s, "%d\n", *(int *)v);
2895}
2896
2897
2898
2899
2900
2901static const struct seq_operations cgroup_pidlist_seq_operations = {
2902 .start = cgroup_pidlist_start,
2903 .stop = cgroup_pidlist_stop,
2904 .next = cgroup_pidlist_next,
2905 .show = cgroup_pidlist_show,
2906};
2907
2908static void cgroup_release_pid_array(struct cgroup_pidlist *l)
2909{
2910
2911
2912
2913
2914
2915
2916 mutex_lock(&l->owner->pidlist_mutex);
2917 down_write(&l->mutex);
2918 BUG_ON(!l->use_count);
2919 if (!--l->use_count) {
2920
2921 list_del(&l->links);
2922 mutex_unlock(&l->owner->pidlist_mutex);
2923 pidlist_free(l->list);
2924 put_pid_ns(l->key.ns);
2925 up_write(&l->mutex);
2926 kfree(l);
2927 return;
2928 }
2929 mutex_unlock(&l->owner->pidlist_mutex);
2930 up_write(&l->mutex);
2931}
2932
2933static int cgroup_pidlist_release(struct inode *inode, struct file *file)
2934{
2935 struct cgroup_pidlist *l;
2936 if (!(file->f_mode & FMODE_READ))
2937 return 0;
2938
2939
2940
2941
2942 l = ((struct seq_file *)file->private_data)->private;
2943 cgroup_release_pid_array(l);
2944 return seq_release(inode, file);
2945}
2946
2947static const struct file_operations cgroup_pidlist_operations = {
2948 .read = seq_read,
2949 .llseek = seq_lseek,
2950 .write = cgroup_file_write,
2951 .release = cgroup_pidlist_release,
2952};
2953
2954
2955
2956
2957
2958
2959
2960static int cgroup_pidlist_open(struct file *file, enum cgroup_filetype type)
2961{
2962 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
2963 struct cgroup_pidlist *l;
2964 int retval;
2965
2966
2967 if (!(file->f_mode & FMODE_READ))
2968 return 0;
2969
2970
2971 retval = pidlist_array_load(cgrp, type, &l);
2972 if (retval)
2973 return retval;
2974
2975 file->f_op = &cgroup_pidlist_operations;
2976
2977 retval = seq_open(file, &cgroup_pidlist_seq_operations);
2978 if (retval) {
2979 cgroup_release_pid_array(l);
2980 return retval;
2981 }
2982 ((struct seq_file *)file->private_data)->private = l;
2983 return 0;
2984}
2985static int cgroup_tasks_open(struct inode *unused, struct file *file)
2986{
2987 return cgroup_pidlist_open(file, CGROUP_FILE_TASKS);
2988}
2989static int cgroup_procs_open(struct inode *unused, struct file *file)
2990{
2991 return cgroup_pidlist_open(file, CGROUP_FILE_PROCS);
2992}
2993
2994static u64 cgroup_read_notify_on_release(struct cgroup *cgrp,
2995 struct cftype *cft)
2996{
2997 return notify_on_release(cgrp);
2998}
2999
3000static int cgroup_write_notify_on_release(struct cgroup *cgrp,
3001 struct cftype *cft,
3002 u64 val)
3003{
3004 clear_bit(CGRP_RELEASABLE, &cgrp->flags);
3005 if (val)
3006 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
3007 else
3008 clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
3009 return 0;
3010}
3011
3012
3013
3014
3015
3016
3017static void cgroup_event_remove(struct work_struct *work)
3018{
3019 struct cgroup_event *event = container_of(work, struct cgroup_event,
3020 remove);
3021 struct cgroup *cgrp = event->cgrp;
3022
3023 event->cft->unregister_event(cgrp, event->cft, event->eventfd);
3024
3025 eventfd_ctx_put(event->eventfd);
3026 kfree(event);
3027 dput(cgrp->dentry);
3028}
3029
3030
3031
3032
3033
3034
3035static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
3036 int sync, void *key)
3037{
3038 struct cgroup_event *event = container_of(wait,
3039 struct cgroup_event, wait);
3040 struct cgroup *cgrp = event->cgrp;
3041 unsigned long flags = (unsigned long)key;
3042
3043 if (flags & POLLHUP) {
3044 __remove_wait_queue(event->wqh, &event->wait);
3045 spin_lock(&cgrp->event_list_lock);
3046 list_del(&event->list);
3047 spin_unlock(&cgrp->event_list_lock);
3048
3049
3050
3051
3052 schedule_work(&event->remove);
3053 }
3054
3055 return 0;
3056}
3057
3058static void cgroup_event_ptable_queue_proc(struct file *file,
3059 wait_queue_head_t *wqh, poll_table *pt)
3060{
3061 struct cgroup_event *event = container_of(pt,
3062 struct cgroup_event, pt);
3063
3064 event->wqh = wqh;
3065 add_wait_queue(wqh, &event->wait);
3066}
3067
3068
3069
3070
3071
3072
3073
3074static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft,
3075 const char *buffer)
3076{
3077 struct cgroup_event *event = NULL;
3078 unsigned int efd, cfd;
3079 struct file *efile = NULL;
3080 struct file *cfile = NULL;
3081 char *endp;
3082 int ret;
3083
3084 efd = simple_strtoul(buffer, &endp, 10);
3085 if (*endp != ' ')
3086 return -EINVAL;
3087 buffer = endp + 1;
3088
3089 cfd = simple_strtoul(buffer, &endp, 10);
3090 if ((*endp != ' ') && (*endp != '\0'))
3091 return -EINVAL;
3092 buffer = endp + 1;
3093
3094 event = kzalloc(sizeof(*event), GFP_KERNEL);
3095 if (!event)
3096 return -ENOMEM;
3097 event->cgrp = cgrp;
3098 INIT_LIST_HEAD(&event->list);
3099 init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc);
3100 init_waitqueue_func_entry(&event->wait, cgroup_event_wake);
3101 INIT_WORK(&event->remove, cgroup_event_remove);
3102
3103 efile = eventfd_fget(efd);
3104 if (IS_ERR(efile)) {
3105 ret = PTR_ERR(efile);
3106 goto fail;
3107 }
3108
3109 event->eventfd = eventfd_ctx_fileget(efile);
3110 if (IS_ERR(event->eventfd)) {
3111 ret = PTR_ERR(event->eventfd);
3112 goto fail;
3113 }
3114
3115 cfile = fget(cfd);
3116 if (!cfile) {
3117 ret = -EBADF;
3118 goto fail;
3119 }
3120
3121
3122 ret = file_permission(cfile, MAY_READ);
3123 if (ret < 0)
3124 goto fail;
3125
3126 event->cft = __file_cft(cfile);
3127 if (IS_ERR(event->cft)) {
3128 ret = PTR_ERR(event->cft);
3129 goto fail;
3130 }
3131
3132 if (!event->cft->register_event || !event->cft->unregister_event) {
3133 ret = -EINVAL;
3134 goto fail;
3135 }
3136
3137 ret = event->cft->register_event(cgrp, event->cft,
3138 event->eventfd, buffer);
3139 if (ret)
3140 goto fail;
3141
3142 if (efile->f_op->poll(efile, &event->pt) & POLLHUP) {
3143 event->cft->unregister_event(cgrp, event->cft, event->eventfd);
3144 ret = 0;
3145 goto fail;
3146 }
3147
3148
3149
3150
3151
3152
3153 dget(cgrp->dentry);
3154
3155 spin_lock(&cgrp->event_list_lock);
3156 list_add(&event->list, &cgrp->event_list);
3157 spin_unlock(&cgrp->event_list_lock);
3158
3159 fput(cfile);
3160 fput(efile);
3161
3162 return 0;
3163
3164fail:
3165 if (cfile)
3166 fput(cfile);
3167
3168 if (event && event->eventfd && !IS_ERR(event->eventfd))
3169 eventfd_ctx_put(event->eventfd);
3170
3171 if (!IS_ERR_OR_NULL(efile))
3172 fput(efile);
3173
3174 kfree(event);
3175
3176 return ret;
3177}
3178
3179
3180
3181
3182
3183#define CGROUP_FILE_GENERIC_PREFIX "cgroup."
3184static struct cftype files[] = {
3185 {
3186 .name = "tasks",
3187 .open = cgroup_tasks_open,
3188 .write_u64 = cgroup_tasks_write,
3189 .release = cgroup_pidlist_release,
3190 .mode = S_IRUGO | S_IWUSR,
3191 },
3192 {
3193 .name = CGROUP_FILE_GENERIC_PREFIX "procs",
3194 .open = cgroup_procs_open,
3195
3196 .release = cgroup_pidlist_release,
3197 .mode = S_IRUGO,
3198 },
3199 {
3200 .name = "notify_on_release",
3201 .read_u64 = cgroup_read_notify_on_release,
3202 .write_u64 = cgroup_write_notify_on_release,
3203 },
3204 {
3205 .name = CGROUP_FILE_GENERIC_PREFIX "event_control",
3206 .write_string = cgroup_write_event_control,
3207 .mode = S_IWUGO,
3208 },
3209};
3210
3211static struct cftype cft_release_agent = {
3212 .name = "release_agent",
3213 .read_seq_string = cgroup_release_agent_show,
3214 .write_string = cgroup_release_agent_write,
3215 .max_write_len = PATH_MAX,
3216};
3217
3218static int cgroup_populate_dir(struct cgroup *cgrp)
3219{
3220 int err;
3221 struct cgroup_subsys *ss;
3222
3223
3224 cgroup_clear_directory(cgrp->dentry);
3225
3226 err = cgroup_add_files(cgrp, NULL, files, ARRAY_SIZE(files));
3227 if (err < 0)
3228 return err;
3229
3230 if (cgrp == cgrp->top_cgroup) {
3231 if ((err = cgroup_add_file(cgrp, NULL, &cft_release_agent)) < 0)
3232 return err;
3233 }
3234
3235 for_each_subsys(cgrp->root, ss) {
3236 if (ss->populate && (err = ss->populate(ss, cgrp)) < 0)
3237 return err;
3238 }
3239
3240 for_each_subsys(cgrp->root, ss) {
3241 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
3242
3243
3244
3245
3246
3247 if (css->id)
3248 rcu_assign_pointer(css->id->css, css);
3249 }
3250
3251 return 0;
3252}
3253
3254static void init_cgroup_css(struct cgroup_subsys_state *css,
3255 struct cgroup_subsys *ss,
3256 struct cgroup *cgrp)
3257{
3258 css->cgroup = cgrp;
3259 atomic_set(&css->refcnt, 1);
3260 css->flags = 0;
3261 css->id = NULL;
3262 if (cgrp == dummytop)
3263 set_bit(CSS_ROOT, &css->flags);
3264 BUG_ON(cgrp->subsys[ss->subsys_id]);
3265 cgrp->subsys[ss->subsys_id] = css;
3266}
3267
3268static void cgroup_lock_hierarchy(struct cgroupfs_root *root)
3269{
3270
3271 int i;
3272
3273
3274
3275
3276
3277 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3278 struct cgroup_subsys *ss = subsys[i];
3279 if (ss == NULL)
3280 continue;
3281 if (ss->root == root)
3282 mutex_lock(&ss->hierarchy_mutex);
3283 }
3284}
3285
3286static void cgroup_unlock_hierarchy(struct cgroupfs_root *root)
3287{
3288 int i;
3289
3290 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3291 struct cgroup_subsys *ss = subsys[i];
3292 if (ss == NULL)
3293 continue;
3294 if (ss->root == root)
3295 mutex_unlock(&ss->hierarchy_mutex);
3296 }
3297}
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
3308 mode_t mode)
3309{
3310 struct cgroup *cgrp;
3311 struct cgroupfs_root *root = parent->root;
3312 int err = 0;
3313 struct cgroup_subsys *ss;
3314 struct super_block *sb = root->sb;
3315
3316 cgrp = kzalloc(sizeof(*cgrp), GFP_KERNEL);
3317 if (!cgrp)
3318 return -ENOMEM;
3319
3320
3321
3322
3323
3324
3325 atomic_inc(&sb->s_active);
3326
3327 mutex_lock(&cgroup_mutex);
3328
3329 init_cgroup_housekeeping(cgrp);
3330
3331 cgrp->parent = parent;
3332 cgrp->root = parent->root;
3333 cgrp->top_cgroup = parent->top_cgroup;
3334
3335 if (notify_on_release(parent))
3336 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
3337
3338 for_each_subsys(root, ss) {
3339 struct cgroup_subsys_state *css = ss->create(ss, cgrp);
3340
3341 if (IS_ERR(css)) {
3342 err = PTR_ERR(css);
3343 goto err_destroy;
3344 }
3345 init_cgroup_css(css, ss, cgrp);
3346 if (ss->use_id) {
3347 err = alloc_css_id(ss, parent, cgrp);
3348 if (err)
3349 goto err_destroy;
3350 }
3351
3352 }
3353
3354 cgroup_lock_hierarchy(root);
3355 list_add(&cgrp->sibling, &cgrp->parent->children);
3356 cgroup_unlock_hierarchy(root);
3357 root->number_of_cgroups++;
3358
3359 err = cgroup_create_dir(cgrp, dentry, mode);
3360 if (err < 0)
3361 goto err_remove;
3362
3363
3364 BUG_ON(!mutex_is_locked(&cgrp->dentry->d_inode->i_mutex));
3365
3366 err = cgroup_populate_dir(cgrp);
3367
3368
3369 mutex_unlock(&cgroup_mutex);
3370 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
3371
3372 return 0;
3373
3374 err_remove:
3375
3376 cgroup_lock_hierarchy(root);
3377 list_del(&cgrp->sibling);
3378 cgroup_unlock_hierarchy(root);
3379 root->number_of_cgroups--;
3380
3381 err_destroy:
3382
3383 for_each_subsys(root, ss) {
3384 if (cgrp->subsys[ss->subsys_id])
3385 ss->destroy(ss, cgrp);
3386 }
3387
3388 mutex_unlock(&cgroup_mutex);
3389
3390
3391 deactivate_super(sb);
3392
3393 kfree(cgrp);
3394 return err;
3395}
3396
3397static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode)
3398{
3399 struct cgroup *c_parent = dentry->d_parent->d_fsdata;
3400
3401
3402 return cgroup_create(c_parent, dentry, mode | S_IFDIR);
3403}
3404
3405static int cgroup_has_css_refs(struct cgroup *cgrp)
3406{
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416 int i;
3417
3418
3419
3420
3421
3422 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3423 struct cgroup_subsys *ss = subsys[i];
3424 struct cgroup_subsys_state *css;
3425
3426 if (ss == NULL || ss->root != cgrp->root)
3427 continue;
3428 css = cgrp->subsys[ss->subsys_id];
3429
3430
3431
3432
3433
3434
3435 if (css && (atomic_read(&css->refcnt) > 1))
3436 return 1;
3437 }
3438 return 0;
3439}
3440
3441
3442
3443
3444
3445
3446
3447static int cgroup_clear_css_refs(struct cgroup *cgrp)
3448{
3449 struct cgroup_subsys *ss;
3450 unsigned long flags;
3451 bool failed = false;
3452 local_irq_save(flags);
3453 for_each_subsys(cgrp->root, ss) {
3454 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
3455 int refcnt;
3456 while (1) {
3457
3458 refcnt = atomic_read(&css->refcnt);
3459 if (refcnt > 1) {
3460 failed = true;
3461 goto done;
3462 }
3463 BUG_ON(!refcnt);
3464
3465
3466
3467
3468
3469
3470 if (atomic_cmpxchg(&css->refcnt, refcnt, 0) == refcnt)
3471 break;
3472 cpu_relax();
3473 }
3474 }
3475 done:
3476 for_each_subsys(cgrp->root, ss) {
3477 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
3478 if (failed) {
3479
3480
3481
3482
3483 if (!atomic_read(&css->refcnt))
3484 atomic_set(&css->refcnt, 1);
3485 } else {
3486
3487 set_bit(CSS_REMOVED, &css->flags);
3488 }
3489 }
3490 local_irq_restore(flags);
3491 return !failed;
3492}
3493
3494static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
3495{
3496 struct cgroup *cgrp = dentry->d_fsdata;
3497 struct dentry *d;
3498 struct cgroup *parent;
3499 DEFINE_WAIT(wait);
3500 struct cgroup_event *event, *tmp;
3501 int ret;
3502
3503
3504again:
3505 mutex_lock(&cgroup_mutex);
3506 if (atomic_read(&cgrp->count) != 0) {
3507 mutex_unlock(&cgroup_mutex);
3508 return -EBUSY;
3509 }
3510 if (!list_empty(&cgrp->children)) {
3511 mutex_unlock(&cgroup_mutex);
3512 return -EBUSY;
3513 }
3514 mutex_unlock(&cgroup_mutex);
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525 set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
3526
3527
3528
3529
3530
3531 ret = cgroup_call_pre_destroy(cgrp);
3532 if (ret) {
3533 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
3534 return ret;
3535 }
3536
3537 mutex_lock(&cgroup_mutex);
3538 parent = cgrp->parent;
3539 if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) {
3540 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
3541 mutex_unlock(&cgroup_mutex);
3542 return -EBUSY;
3543 }
3544 prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE);
3545 if (!cgroup_clear_css_refs(cgrp)) {
3546 mutex_unlock(&cgroup_mutex);
3547
3548
3549
3550
3551 if (test_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags))
3552 schedule();
3553 finish_wait(&cgroup_rmdir_waitq, &wait);
3554 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
3555 if (signal_pending(current))
3556 return -EINTR;
3557 goto again;
3558 }
3559
3560 finish_wait(&cgroup_rmdir_waitq, &wait);
3561 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
3562
3563 spin_lock(&release_list_lock);
3564 set_bit(CGRP_REMOVED, &cgrp->flags);
3565 if (!list_empty(&cgrp->release_list))
3566 list_del(&cgrp->release_list);
3567 spin_unlock(&release_list_lock);
3568
3569 cgroup_lock_hierarchy(cgrp->root);
3570
3571 list_del(&cgrp->sibling);
3572 cgroup_unlock_hierarchy(cgrp->root);
3573
3574 spin_lock(&cgrp->dentry->d_lock);
3575 d = dget(cgrp->dentry);
3576 spin_unlock(&d->d_lock);
3577
3578 cgroup_d_remove_dir(d);
3579 dput(d);
3580
3581 set_bit(CGRP_RELEASABLE, &parent->flags);
3582 check_for_release(parent);
3583
3584
3585
3586
3587
3588
3589 spin_lock(&cgrp->event_list_lock);
3590 list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
3591 list_del(&event->list);
3592 remove_wait_queue(event->wqh, &event->wait);
3593 eventfd_signal(event->eventfd, 1);
3594 schedule_work(&event->remove);
3595 }
3596 spin_unlock(&cgrp->event_list_lock);
3597
3598 mutex_unlock(&cgroup_mutex);
3599 return 0;
3600}
3601
3602static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
3603{
3604 struct cgroup_subsys_state *css;
3605
3606 printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);
3607
3608
3609 list_add(&ss->sibling, &rootnode.subsys_list);
3610 ss->root = &rootnode;
3611 css = ss->create(ss, dummytop);
3612
3613 BUG_ON(IS_ERR(css));
3614 init_cgroup_css(css, ss, dummytop);
3615
3616
3617
3618
3619
3620 init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id];
3621
3622 need_forkexit_callback |= ss->fork || ss->exit;
3623
3624
3625
3626
3627 BUG_ON(!list_empty(&init_task.tasks));
3628
3629 mutex_init(&ss->hierarchy_mutex);
3630 lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
3631 ss->active = 1;
3632
3633
3634
3635 BUG_ON(ss->module);
3636}
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
3648{
3649 int i;
3650 struct cgroup_subsys_state *css;
3651
3652
3653 if (ss->name == NULL || strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN ||
3654 ss->create == NULL || ss->destroy == NULL)
3655 return -EINVAL;
3656
3657
3658
3659
3660
3661
3662
3663 if (ss->fork || ss->exit)
3664 return -EINVAL;
3665
3666
3667
3668
3669
3670 if (ss->module == NULL) {
3671
3672 BUG_ON(ss->subsys_id >= CGROUP_BUILTIN_SUBSYS_COUNT);
3673 BUG_ON(subsys[ss->subsys_id] != ss);
3674 return 0;
3675 }
3676
3677
3678
3679
3680
3681 mutex_lock(&cgroup_mutex);
3682
3683 for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
3684 if (subsys[i] == NULL)
3685 break;
3686 }
3687 if (i == CGROUP_SUBSYS_COUNT) {
3688
3689 mutex_unlock(&cgroup_mutex);
3690 return -EBUSY;
3691 }
3692
3693 ss->subsys_id = i;
3694 subsys[i] = ss;
3695
3696
3697
3698
3699
3700 css = ss->create(ss, dummytop);
3701 if (IS_ERR(css)) {
3702
3703 subsys[i] = NULL;
3704 mutex_unlock(&cgroup_mutex);
3705 return PTR_ERR(css);
3706 }
3707
3708 list_add(&ss->sibling, &rootnode.subsys_list);
3709 ss->root = &rootnode;
3710
3711
3712 init_cgroup_css(css, ss, dummytop);
3713
3714 if (ss->use_id) {
3715 int ret = cgroup_init_idr(ss, css);
3716 if (ret) {
3717 dummytop->subsys[ss->subsys_id] = NULL;
3718 ss->destroy(ss, dummytop);
3719 subsys[i] = NULL;
3720 mutex_unlock(&cgroup_mutex);
3721 return ret;
3722 }
3723 }
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733 write_lock(&css_set_lock);
3734 for (i = 0; i < CSS_SET_TABLE_SIZE; i++) {
3735 struct css_set *cg;
3736 struct hlist_node *node, *tmp;
3737 struct hlist_head *bucket = &css_set_table[i], *new_bucket;
3738
3739 hlist_for_each_entry_safe(cg, node, tmp, bucket, hlist) {
3740
3741 if (cg->subsys[ss->subsys_id])
3742 continue;
3743
3744 hlist_del(&cg->hlist);
3745
3746 cg->subsys[ss->subsys_id] = css;
3747
3748 new_bucket = css_set_hash(cg->subsys);
3749 hlist_add_head(&cg->hlist, new_bucket);
3750 }
3751 }
3752 write_unlock(&css_set_lock);
3753
3754 mutex_init(&ss->hierarchy_mutex);
3755 lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
3756 ss->active = 1;
3757
3758
3759 mutex_unlock(&cgroup_mutex);
3760 return 0;
3761}
3762EXPORT_SYMBOL_GPL(cgroup_load_subsys);
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772void cgroup_unload_subsys(struct cgroup_subsys *ss)
3773{
3774 struct cg_cgroup_link *link;
3775 struct hlist_head *hhead;
3776
3777 BUG_ON(ss->module == NULL);
3778
3779
3780
3781
3782
3783
3784 BUG_ON(ss->root != &rootnode);
3785
3786 mutex_lock(&cgroup_mutex);
3787
3788 BUG_ON(ss->subsys_id < CGROUP_BUILTIN_SUBSYS_COUNT);
3789 subsys[ss->subsys_id] = NULL;
3790
3791
3792 list_del(&ss->sibling);
3793
3794
3795
3796
3797
3798 write_lock(&css_set_lock);
3799 list_for_each_entry(link, &dummytop->css_sets, cgrp_link_list) {
3800 struct css_set *cg = link->cg;
3801
3802 hlist_del(&cg->hlist);
3803 BUG_ON(!cg->subsys[ss->subsys_id]);
3804 cg->subsys[ss->subsys_id] = NULL;
3805 hhead = css_set_hash(cg->subsys);
3806 hlist_add_head(&cg->hlist, hhead);
3807 }
3808 write_unlock(&css_set_lock);
3809
3810
3811
3812
3813
3814
3815
3816 ss->destroy(ss, dummytop);
3817 dummytop->subsys[ss->subsys_id] = NULL;
3818
3819 mutex_unlock(&cgroup_mutex);
3820}
3821EXPORT_SYMBOL_GPL(cgroup_unload_subsys);
3822
3823
3824
3825
3826
3827
3828
3829int __init cgroup_init_early(void)
3830{
3831 int i;
3832 atomic_set(&init_css_set.refcount, 1);
3833 INIT_LIST_HEAD(&init_css_set.cg_links);
3834 INIT_LIST_HEAD(&init_css_set.tasks);
3835 INIT_HLIST_NODE(&init_css_set.hlist);
3836 css_set_count = 1;
3837 init_cgroup_root(&rootnode);
3838 root_count = 1;
3839 init_task.cgroups = &init_css_set;
3840
3841 init_css_set_link.cg = &init_css_set;
3842 init_css_set_link.cgrp = dummytop;
3843 list_add(&init_css_set_link.cgrp_link_list,
3844 &rootnode.top_cgroup.css_sets);
3845 list_add(&init_css_set_link.cg_link_list,
3846 &init_css_set.cg_links);
3847
3848 for (i = 0; i < CSS_SET_TABLE_SIZE; i++)
3849 INIT_HLIST_HEAD(&css_set_table[i]);
3850
3851
3852 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
3853 struct cgroup_subsys *ss = subsys[i];
3854
3855 BUG_ON(!ss->name);
3856 BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN);
3857 BUG_ON(!ss->create);
3858 BUG_ON(!ss->destroy);
3859 if (ss->subsys_id != i) {
3860 printk(KERN_ERR "cgroup: Subsys %s id == %d\n",
3861 ss->name, ss->subsys_id);
3862 BUG();
3863 }
3864
3865 if (ss->early_init)
3866 cgroup_init_subsys(ss);
3867 }
3868 return 0;
3869}
3870
3871
3872
3873
3874
3875
3876
3877int __init cgroup_init(void)
3878{
3879 int err;
3880 int i;
3881 struct hlist_head *hhead;
3882
3883 err = bdi_init(&cgroup_backing_dev_info);
3884 if (err)
3885 return err;
3886
3887
3888 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
3889 struct cgroup_subsys *ss = subsys[i];
3890 if (!ss->early_init)
3891 cgroup_init_subsys(ss);
3892 if (ss->use_id)
3893 cgroup_init_idr(ss, init_css_set.subsys[ss->subsys_id]);
3894 }
3895
3896
3897 hhead = css_set_hash(init_css_set.subsys);
3898 hlist_add_head(&init_css_set.hlist, hhead);
3899 BUG_ON(!init_root_id(&rootnode));
3900
3901 cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj);
3902 if (!cgroup_kobj) {
3903 err = -ENOMEM;
3904 goto out;
3905 }
3906
3907 err = register_filesystem(&cgroup_fs_type);
3908 if (err < 0) {
3909 kobject_put(cgroup_kobj);
3910 goto out;
3911 }
3912
3913 proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations);
3914
3915out:
3916 if (err)
3917 bdi_destroy(&cgroup_backing_dev_info);
3918
3919 return err;
3920}
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935static int proc_cgroup_show(struct seq_file *m, void *v)
3936{
3937 struct pid *pid;
3938 struct task_struct *tsk;
3939 char *buf;
3940 int retval;
3941 struct cgroupfs_root *root;
3942
3943 retval = -ENOMEM;
3944 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
3945 if (!buf)
3946 goto out;
3947
3948 retval = -ESRCH;
3949 pid = m->private;
3950 tsk = get_pid_task(pid, PIDTYPE_PID);
3951 if (!tsk)
3952 goto out_free;
3953
3954 retval = 0;
3955
3956 mutex_lock(&cgroup_mutex);
3957
3958 for_each_active_root(root) {
3959 struct cgroup_subsys *ss;
3960 struct cgroup *cgrp;
3961 int count = 0;
3962
3963 seq_printf(m, "%d:", root->hierarchy_id);
3964 for_each_subsys(root, ss)
3965 seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
3966 if (strlen(root->name))
3967 seq_printf(m, "%sname=%s", count ? "," : "",
3968 root->name);
3969 seq_putc(m, ':');
3970 cgrp = task_cgroup_from_root(tsk, root);
3971 retval = cgroup_path(cgrp, buf, PAGE_SIZE);
3972 if (retval < 0)
3973 goto out_unlock;
3974 seq_puts(m, buf);
3975 seq_putc(m, '\n');
3976 }
3977
3978out_unlock:
3979 mutex_unlock(&cgroup_mutex);
3980 put_task_struct(tsk);
3981out_free:
3982 kfree(buf);
3983out:
3984 return retval;
3985}
3986
3987static int cgroup_open(struct inode *inode, struct file *file)
3988{
3989 struct pid *pid = PROC_I(inode)->pid;
3990 return single_open(file, proc_cgroup_show, pid);
3991}
3992
3993const struct file_operations proc_cgroup_operations = {
3994 .open = cgroup_open,
3995 .read = seq_read,
3996 .llseek = seq_lseek,
3997 .release = single_release,
3998};
3999
4000
4001static int proc_cgroupstats_show(struct seq_file *m, void *v)
4002{
4003 int i;
4004
4005 seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
4006
4007
4008
4009
4010
4011 mutex_lock(&cgroup_mutex);
4012 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
4013 struct cgroup_subsys *ss = subsys[i];
4014 if (ss == NULL)
4015 continue;
4016 seq_printf(m, "%s\t%d\t%d\t%d\n",
4017 ss->name, ss->root->hierarchy_id,
4018 ss->root->number_of_cgroups, !ss->disabled);
4019 }
4020 mutex_unlock(&cgroup_mutex);
4021 return 0;
4022}
4023
4024static int cgroupstats_open(struct inode *inode, struct file *file)
4025{
4026 return single_open(file, proc_cgroupstats_show, NULL);
4027}
4028
4029static const struct file_operations proc_cgroupstats_operations = {
4030 .open = cgroupstats_open,
4031 .read = seq_read,
4032 .llseek = seq_lseek,
4033 .release = single_release,
4034};
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052void cgroup_fork(struct task_struct *child)
4053{
4054 task_lock(current);
4055 child->cgroups = current->cgroups;
4056 get_css_set(child->cgroups);
4057 task_unlock(current);
4058 INIT_LIST_HEAD(&child->cg_list);
4059}
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069void cgroup_fork_callbacks(struct task_struct *child)
4070{
4071 if (need_forkexit_callback) {
4072 int i;
4073
4074
4075
4076
4077
4078 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4079 struct cgroup_subsys *ss = subsys[i];
4080 if (ss->fork)
4081 ss->fork(ss, child);
4082 }
4083 }
4084}
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095void cgroup_post_fork(struct task_struct *child)
4096{
4097 if (use_task_css_set_links) {
4098 write_lock(&css_set_lock);
4099 task_lock(child);
4100 if (list_empty(&child->cg_list))
4101 list_add(&child->cg_list, &child->cgroups->tasks);
4102 task_unlock(child);
4103 write_unlock(&css_set_lock);
4104 }
4105}
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141void cgroup_exit(struct task_struct *tsk, int run_callbacks)
4142{
4143 int i;
4144 struct css_set *cg;
4145
4146 if (run_callbacks && need_forkexit_callback) {
4147
4148
4149
4150
4151 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4152 struct cgroup_subsys *ss = subsys[i];
4153 if (ss->exit)
4154 ss->exit(ss, tsk);
4155 }
4156 }
4157
4158
4159
4160
4161
4162
4163 if (!list_empty(&tsk->cg_list)) {
4164 write_lock(&css_set_lock);
4165 if (!list_empty(&tsk->cg_list))
4166 list_del(&tsk->cg_list);
4167 write_unlock(&css_set_lock);
4168 }
4169
4170
4171 task_lock(tsk);
4172 cg = tsk->cgroups;
4173 tsk->cgroups = &init_css_set;
4174 task_unlock(tsk);
4175 if (cg)
4176 put_css_set_taskexit(cg);
4177}
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
4190 char *nodename)
4191{
4192 struct dentry *dentry;
4193 int ret = 0;
4194 struct cgroup *parent, *child;
4195 struct inode *inode;
4196 struct css_set *cg;
4197 struct cgroupfs_root *root;
4198 struct cgroup_subsys *ss;
4199
4200
4201 BUG_ON(!subsys->active);
4202
4203
4204
4205 mutex_lock(&cgroup_mutex);
4206 again:
4207 root = subsys->root;
4208 if (root == &rootnode) {
4209 mutex_unlock(&cgroup_mutex);
4210 return 0;
4211 }
4212
4213
4214 if (!atomic_inc_not_zero(&root->sb->s_active)) {
4215
4216 mutex_unlock(&cgroup_mutex);
4217 return 0;
4218 }
4219
4220
4221 task_lock(tsk);
4222 parent = task_cgroup(tsk, subsys->subsys_id);
4223 cg = tsk->cgroups;
4224 get_css_set(cg);
4225 task_unlock(tsk);
4226
4227 mutex_unlock(&cgroup_mutex);
4228
4229
4230 inode = parent->dentry->d_inode;
4231
4232
4233
4234 mutex_lock(&inode->i_mutex);
4235 dentry = lookup_one_len(nodename, parent->dentry, strlen(nodename));
4236 if (IS_ERR(dentry)) {
4237 printk(KERN_INFO
4238 "cgroup: Couldn't allocate dentry for %s: %ld\n", nodename,
4239 PTR_ERR(dentry));
4240 ret = PTR_ERR(dentry);
4241 goto out_release;
4242 }
4243
4244
4245 ret = vfs_mkdir(inode, dentry, 0755);
4246 child = __d_cgrp(dentry);
4247 dput(dentry);
4248 if (ret) {
4249 printk(KERN_INFO
4250 "Failed to create cgroup %s: %d\n", nodename,
4251 ret);
4252 goto out_release;
4253 }
4254
4255
4256
4257
4258 mutex_lock(&cgroup_mutex);
4259 if ((root != subsys->root) ||
4260 (parent != task_cgroup(tsk, subsys->subsys_id))) {
4261
4262 mutex_unlock(&inode->i_mutex);
4263 put_css_set(cg);
4264
4265 deactivate_super(root->sb);
4266
4267
4268
4269 printk(KERN_INFO
4270 "Race in cgroup_clone() - leaking cgroup %s\n",
4271 nodename);
4272 goto again;
4273 }
4274
4275
4276 for_each_subsys(root, ss) {
4277 if (ss->post_clone)
4278 ss->post_clone(ss, child);
4279 }
4280
4281
4282 ret = cgroup_attach_task(child, tsk);
4283 mutex_unlock(&cgroup_mutex);
4284
4285 out_release:
4286 mutex_unlock(&inode->i_mutex);
4287
4288 mutex_lock(&cgroup_mutex);
4289 put_css_set(cg);
4290 mutex_unlock(&cgroup_mutex);
4291 deactivate_super(root->sb);
4292 return ret;
4293}
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task)
4309{
4310 int ret;
4311 struct cgroup *target;
4312
4313 if (cgrp == dummytop)
4314 return 1;
4315
4316 target = task_cgroup_from_root(task, cgrp->root);
4317 while (cgrp != target && cgrp!= cgrp->top_cgroup)
4318 cgrp = cgrp->parent;
4319 ret = (cgrp == target);
4320 return ret;
4321}
4322
4323static void check_for_release(struct cgroup *cgrp)
4324{
4325
4326
4327 if (cgroup_is_releasable(cgrp) && !atomic_read(&cgrp->count)
4328 && list_empty(&cgrp->children) && !cgroup_has_css_refs(cgrp)) {
4329
4330
4331
4332 int need_schedule_work = 0;
4333 spin_lock(&release_list_lock);
4334 if (!cgroup_is_removed(cgrp) &&
4335 list_empty(&cgrp->release_list)) {
4336 list_add(&cgrp->release_list, &release_list);
4337 need_schedule_work = 1;
4338 }
4339 spin_unlock(&release_list_lock);
4340 if (need_schedule_work)
4341 schedule_work(&release_agent_work);
4342 }
4343}
4344
4345
4346void __css_put(struct cgroup_subsys_state *css, int count)
4347{
4348 struct cgroup *cgrp = css->cgroup;
4349 int val;
4350 rcu_read_lock();
4351 val = atomic_sub_return(count, &css->refcnt);
4352 if (val == 1) {
4353 if (notify_on_release(cgrp)) {
4354 set_bit(CGRP_RELEASABLE, &cgrp->flags);
4355 check_for_release(cgrp);
4356 }
4357 cgroup_wakeup_rmdir_waiter(cgrp);
4358 }
4359 rcu_read_unlock();
4360 WARN_ON_ONCE(val < 1);
4361}
4362EXPORT_SYMBOL_GPL(__css_put);
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387static void cgroup_release_agent(struct work_struct *work)
4388{
4389 BUG_ON(work != &release_agent_work);
4390 mutex_lock(&cgroup_mutex);
4391 spin_lock(&release_list_lock);
4392 while (!list_empty(&release_list)) {
4393 char *argv[3], *envp[3];
4394 int i;
4395 char *pathbuf = NULL, *agentbuf = NULL;
4396 struct cgroup *cgrp = list_entry(release_list.next,
4397 struct cgroup,
4398 release_list);
4399 list_del_init(&cgrp->release_list);
4400 spin_unlock(&release_list_lock);
4401 pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
4402 if (!pathbuf)
4403 goto continue_free;
4404 if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0)
4405 goto continue_free;
4406 agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
4407 if (!agentbuf)
4408 goto continue_free;
4409
4410 i = 0;
4411 argv[i++] = agentbuf;
4412 argv[i++] = pathbuf;
4413 argv[i] = NULL;
4414
4415 i = 0;
4416
4417 envp[i++] = "HOME=/";
4418 envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
4419 envp[i] = NULL;
4420
4421
4422
4423
4424 mutex_unlock(&cgroup_mutex);
4425 call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
4426 mutex_lock(&cgroup_mutex);
4427 continue_free:
4428 kfree(pathbuf);
4429 kfree(agentbuf);
4430 spin_lock(&release_list_lock);
4431 }
4432 spin_unlock(&release_list_lock);
4433 mutex_unlock(&cgroup_mutex);
4434}
4435
4436static int __init cgroup_disable(char *str)
4437{
4438 int i;
4439 char *token;
4440
4441 while ((token = strsep(&str, ",")) != NULL) {
4442 if (!*token)
4443 continue;
4444
4445
4446
4447
4448 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4449 struct cgroup_subsys *ss = subsys[i];
4450
4451 if (!strcmp(token, ss->name)) {
4452 ss->disabled = 1;
4453 printk(KERN_INFO "Disabling %s control group"
4454 " subsystem\n", ss->name);
4455 break;
4456 }
4457 }
4458 }
4459 return 1;
4460}
4461__setup("cgroup_disable=", cgroup_disable);
4462
4463
4464
4465
4466
4467
4468
4469
4470unsigned short css_id(struct cgroup_subsys_state *css)
4471{
4472 struct css_id *cssid;
4473
4474
4475
4476
4477
4478
4479 cssid = rcu_dereference_check(css->id,
4480 rcu_read_lock_held() || atomic_read(&css->refcnt));
4481
4482 if (cssid)
4483 return cssid->id;
4484 return 0;
4485}
4486EXPORT_SYMBOL_GPL(css_id);
4487
4488unsigned short css_depth(struct cgroup_subsys_state *css)
4489{
4490 struct css_id *cssid;
4491
4492 cssid = rcu_dereference_check(css->id,
4493 rcu_read_lock_held() || atomic_read(&css->refcnt));
4494
4495 if (cssid)
4496 return cssid->depth;
4497 return 0;
4498}
4499EXPORT_SYMBOL_GPL(css_depth);
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514bool css_is_ancestor(struct cgroup_subsys_state *child,
4515 const struct cgroup_subsys_state *root)
4516{
4517 struct css_id *child_id;
4518 struct css_id *root_id;
4519 bool ret = true;
4520
4521 rcu_read_lock();
4522 child_id = rcu_dereference(child->id);
4523 root_id = rcu_dereference(root->id);
4524 if (!child_id
4525 || !root_id
4526 || (child_id->depth < root_id->depth)
4527 || (child_id->stack[root_id->depth] != root_id->id))
4528 ret = false;
4529 rcu_read_unlock();
4530 return ret;
4531}
4532
4533static void __free_css_id_cb(struct rcu_head *head)
4534{
4535 struct css_id *id;
4536
4537 id = container_of(head, struct css_id, rcu_head);
4538 kfree(id);
4539}
4540
4541void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
4542{
4543 struct css_id *id = css->id;
4544
4545 if (!id)
4546 return;
4547
4548 BUG_ON(!ss->use_id);
4549
4550 rcu_assign_pointer(id->css, NULL);
4551 rcu_assign_pointer(css->id, NULL);
4552 spin_lock(&ss->id_lock);
4553 idr_remove(&ss->idr, id->id);
4554 spin_unlock(&ss->id_lock);
4555 call_rcu(&id->rcu_head, __free_css_id_cb);
4556}
4557EXPORT_SYMBOL_GPL(free_css_id);
4558
4559
4560
4561
4562
4563
4564static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth)
4565{
4566 struct css_id *newid;
4567 int myid, error, size;
4568
4569 BUG_ON(!ss->use_id);
4570
4571 size = sizeof(*newid) + sizeof(unsigned short) * (depth + 1);
4572 newid = kzalloc(size, GFP_KERNEL);
4573 if (!newid)
4574 return ERR_PTR(-ENOMEM);
4575
4576 if (unlikely(!idr_pre_get(&ss->idr, GFP_KERNEL))) {
4577 error = -ENOMEM;
4578 goto err_out;
4579 }
4580 spin_lock(&ss->id_lock);
4581
4582 error = idr_get_new_above(&ss->idr, newid, 1, &myid);
4583 spin_unlock(&ss->id_lock);
4584
4585
4586 if (error) {
4587 error = -ENOSPC;
4588 goto err_out;
4589 }
4590 if (myid > CSS_ID_MAX)
4591 goto remove_idr;
4592
4593 newid->id = myid;
4594 newid->depth = depth;
4595 return newid;
4596remove_idr:
4597 error = -ENOSPC;
4598 spin_lock(&ss->id_lock);
4599 idr_remove(&ss->idr, myid);
4600 spin_unlock(&ss->id_lock);
4601err_out:
4602 kfree(newid);
4603 return ERR_PTR(error);
4604
4605}
4606
4607static int __init_or_module cgroup_init_idr(struct cgroup_subsys *ss,
4608 struct cgroup_subsys_state *rootcss)
4609{
4610 struct css_id *newid;
4611
4612 spin_lock_init(&ss->id_lock);
4613 idr_init(&ss->idr);
4614
4615 newid = get_new_cssid(ss, 0);
4616 if (IS_ERR(newid))
4617 return PTR_ERR(newid);
4618
4619 newid->stack[0] = newid->id;
4620 newid->css = rootcss;
4621 rootcss->id = newid;
4622 return 0;
4623}
4624
4625static int alloc_css_id(struct cgroup_subsys *ss, struct cgroup *parent,
4626 struct cgroup *child)
4627{
4628 int subsys_id, i, depth = 0;
4629 struct cgroup_subsys_state *parent_css, *child_css;
4630 struct css_id *child_id, *parent_id;
4631
4632 subsys_id = ss->subsys_id;
4633 parent_css = parent->subsys[subsys_id];
4634 child_css = child->subsys[subsys_id];
4635 parent_id = parent_css->id;
4636 depth = parent_id->depth + 1;
4637
4638 child_id = get_new_cssid(ss, depth);
4639 if (IS_ERR(child_id))
4640 return PTR_ERR(child_id);
4641
4642 for (i = 0; i < depth; i++)
4643 child_id->stack[i] = parent_id->stack[i];
4644 child_id->stack[depth] = child_id->id;
4645
4646
4647
4648
4649 rcu_assign_pointer(child_css->id, child_id);
4650
4651 return 0;
4652}
4653
4654
4655
4656
4657
4658
4659
4660