1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29#include <linux/cgroup.h>
30#include <linux/cred.h>
31#include <linux/ctype.h>
32#include <linux/errno.h>
33#include <linux/fs.h>
34#include <linux/init_task.h>
35#include <linux/kernel.h>
36#include <linux/list.h>
37#include <linux/mm.h>
38#include <linux/mutex.h>
39#include <linux/mount.h>
40#include <linux/pagemap.h>
41#include <linux/proc_fs.h>
42#include <linux/rcupdate.h>
43#include <linux/sched.h>
44#include <linux/backing-dev.h>
45#include <linux/seq_file.h>
46#include <linux/slab.h>
47#include <linux/magic.h>
48#include <linux/spinlock.h>
49#include <linux/string.h>
50#include <linux/sort.h>
51#include <linux/kmod.h>
52#include <linux/module.h>
53#include <linux/delayacct.h>
54#include <linux/cgroupstats.h>
55#include <linux/hash.h>
56#include <linux/namei.h>
57#include <linux/pid_namespace.h>
58#include <linux/idr.h>
59#include <linux/vmalloc.h>
60#include <linux/eventfd.h>
61#include <linux/poll.h>
62#include <linux/flex_array.h>
63#include <linux/kthread.h>
64
65#include <linux/atomic.h>
66
67
68#define CSS_DEACT_BIAS INT_MIN
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86static DEFINE_MUTEX(cgroup_mutex);
87static DEFINE_MUTEX(cgroup_root_mutex);
88
89
90
91
92
93
94
95#define SUBSYS(_x) [_x ## _subsys_id] = &_x ## _subsys,
96#define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option)
97static struct cgroup_subsys *subsys[CGROUP_SUBSYS_COUNT] = {
98#include <linux/cgroup_subsys.h>
99};
100
101#define MAX_CGROUP_ROOT_NAMELEN 64
102
103
104
105
106
107
108struct cgroupfs_root {
109 struct super_block *sb;
110
111
112
113
114
115 unsigned long subsys_mask;
116
117
118 int hierarchy_id;
119
120
121 unsigned long actual_subsys_mask;
122
123
124 struct list_head subsys_list;
125
126
127 struct cgroup top_cgroup;
128
129
130 int number_of_cgroups;
131
132
133 struct list_head root_list;
134
135
136 struct list_head allcg_list;
137
138
139 unsigned long flags;
140
141
142 char release_agent_path[PATH_MAX];
143
144
145 char name[MAX_CGROUP_ROOT_NAMELEN];
146};
147
148
149
150
151
152
153static struct cgroupfs_root rootnode;
154
155
156
157
158struct cfent {
159 struct list_head node;
160 struct dentry *dentry;
161 struct cftype *type;
162};
163
164
165
166
167
168#define CSS_ID_MAX (65535)
169struct css_id {
170
171
172
173
174
175
176
177 struct cgroup_subsys_state __rcu *css;
178
179
180
181 unsigned short id;
182
183
184
185 unsigned short depth;
186
187
188
189 struct rcu_head rcu_head;
190
191
192
193 unsigned short stack[0];
194};
195
196
197
198
199struct cgroup_event {
200
201
202
203 struct cgroup *cgrp;
204
205
206
207 struct cftype *cft;
208
209
210
211 struct eventfd_ctx *eventfd;
212
213
214
215 struct list_head list;
216
217
218
219
220 poll_table pt;
221 wait_queue_head_t *wqh;
222 wait_queue_t wait;
223 struct work_struct remove;
224};
225
226
227
228static LIST_HEAD(roots);
229static int root_count;
230
231static DEFINE_IDA(hierarchy_ida);
232static int next_hierarchy_id;
233static DEFINE_SPINLOCK(hierarchy_id_lock);
234
235
236#define dummytop (&rootnode.top_cgroup)
237
238
239
240
241
242
243static int need_forkexit_callback __read_mostly;
244
245#ifdef CONFIG_PROVE_LOCKING
246int cgroup_lock_is_held(void)
247{
248 return lockdep_is_held(&cgroup_mutex);
249}
250#else
251int cgroup_lock_is_held(void)
252{
253 return mutex_is_locked(&cgroup_mutex);
254}
255#endif
256
257EXPORT_SYMBOL_GPL(cgroup_lock_is_held);
258
259static int css_unbias_refcnt(int refcnt)
260{
261 return refcnt >= 0 ? refcnt : refcnt - CSS_DEACT_BIAS;
262}
263
264
265static int css_refcnt(struct cgroup_subsys_state *css)
266{
267 int v = atomic_read(&css->refcnt);
268
269 return css_unbias_refcnt(v);
270}
271
272
273inline int cgroup_is_removed(const struct cgroup *cgrp)
274{
275 return test_bit(CGRP_REMOVED, &cgrp->flags);
276}
277
278
279enum {
280 ROOT_NOPREFIX,
281 ROOT_XATTR,
282};
283
284static int cgroup_is_releasable(const struct cgroup *cgrp)
285{
286 const int bits =
287 (1 << CGRP_RELEASABLE) |
288 (1 << CGRP_NOTIFY_ON_RELEASE);
289 return (cgrp->flags & bits) == bits;
290}
291
292static int notify_on_release(const struct cgroup *cgrp)
293{
294 return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
295}
296
297static int clone_children(const struct cgroup *cgrp)
298{
299 return test_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
300}
301
302
303
304
305
306#define for_each_subsys(_root, _ss) \
307list_for_each_entry(_ss, &_root->subsys_list, sibling)
308
309
310#define for_each_active_root(_root) \
311list_for_each_entry(_root, &roots, root_list)
312
313static inline struct cgroup *__d_cgrp(struct dentry *dentry)
314{
315 return dentry->d_fsdata;
316}
317
318static inline struct cfent *__d_cfe(struct dentry *dentry)
319{
320 return dentry->d_fsdata;
321}
322
323static inline struct cftype *__d_cft(struct dentry *dentry)
324{
325 return __d_cfe(dentry)->type;
326}
327
328
329
330static LIST_HEAD(release_list);
331static DEFINE_RAW_SPINLOCK(release_list_lock);
332static void cgroup_release_agent(struct work_struct *work);
333static DECLARE_WORK(release_agent_work, cgroup_release_agent);
334static void check_for_release(struct cgroup *cgrp);
335
336
337struct cg_cgroup_link {
338
339
340
341
342 struct list_head cgrp_link_list;
343 struct cgroup *cgrp;
344
345
346
347
348 struct list_head cg_link_list;
349 struct css_set *cg;
350};
351
352
353
354
355
356
357
358
359static struct css_set init_css_set;
360static struct cg_cgroup_link init_css_set_link;
361
362static int cgroup_init_idr(struct cgroup_subsys *ss,
363 struct cgroup_subsys_state *css);
364
365
366
367
368static DEFINE_RWLOCK(css_set_lock);
369static int css_set_count;
370
371
372
373
374
375
376#define CSS_SET_HASH_BITS 7
377#define CSS_SET_TABLE_SIZE (1 << CSS_SET_HASH_BITS)
378static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE];
379
380static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
381{
382 int i;
383 int index;
384 unsigned long tmp = 0UL;
385
386 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++)
387 tmp += (unsigned long)css[i];
388 tmp = (tmp >> 16) ^ tmp;
389
390 index = hash_long(tmp, CSS_SET_HASH_BITS);
391
392 return &css_set_table[index];
393}
394
395
396
397
398
399static int use_task_css_set_links __read_mostly;
400
401static void __put_css_set(struct css_set *cg, int taskexit)
402{
403 struct cg_cgroup_link *link;
404 struct cg_cgroup_link *saved_link;
405
406
407
408
409
410 if (atomic_add_unless(&cg->refcount, -1, 1))
411 return;
412 write_lock(&css_set_lock);
413 if (!atomic_dec_and_test(&cg->refcount)) {
414 write_unlock(&css_set_lock);
415 return;
416 }
417
418
419 hlist_del(&cg->hlist);
420 css_set_count--;
421
422 list_for_each_entry_safe(link, saved_link, &cg->cg_links,
423 cg_link_list) {
424 struct cgroup *cgrp = link->cgrp;
425 list_del(&link->cg_link_list);
426 list_del(&link->cgrp_link_list);
427 if (atomic_dec_and_test(&cgrp->count) &&
428 notify_on_release(cgrp)) {
429 if (taskexit)
430 set_bit(CGRP_RELEASABLE, &cgrp->flags);
431 check_for_release(cgrp);
432 }
433
434 kfree(link);
435 }
436
437 write_unlock(&css_set_lock);
438 kfree_rcu(cg, rcu_head);
439}
440
441
442
443
444static inline void get_css_set(struct css_set *cg)
445{
446 atomic_inc(&cg->refcount);
447}
448
449static inline void put_css_set(struct css_set *cg)
450{
451 __put_css_set(cg, 0);
452}
453
454static inline void put_css_set_taskexit(struct css_set *cg)
455{
456 __put_css_set(cg, 1);
457}
458
459
460
461
462
463
464
465
466
467
468
469static bool compare_css_sets(struct css_set *cg,
470 struct css_set *old_cg,
471 struct cgroup *new_cgrp,
472 struct cgroup_subsys_state *template[])
473{
474 struct list_head *l1, *l2;
475
476 if (memcmp(template, cg->subsys, sizeof(cg->subsys))) {
477
478 return false;
479 }
480
481
482
483
484
485
486
487
488
489
490 l1 = &cg->cg_links;
491 l2 = &old_cg->cg_links;
492 while (1) {
493 struct cg_cgroup_link *cgl1, *cgl2;
494 struct cgroup *cg1, *cg2;
495
496 l1 = l1->next;
497 l2 = l2->next;
498
499 if (l1 == &cg->cg_links) {
500 BUG_ON(l2 != &old_cg->cg_links);
501 break;
502 } else {
503 BUG_ON(l2 == &old_cg->cg_links);
504 }
505
506 cgl1 = list_entry(l1, struct cg_cgroup_link, cg_link_list);
507 cgl2 = list_entry(l2, struct cg_cgroup_link, cg_link_list);
508 cg1 = cgl1->cgrp;
509 cg2 = cgl2->cgrp;
510
511 BUG_ON(cg1->root != cg2->root);
512
513
514
515
516
517
518
519
520 if (cg1->root == new_cgrp->root) {
521 if (cg1 != new_cgrp)
522 return false;
523 } else {
524 if (cg1 != cg2)
525 return false;
526 }
527 }
528 return true;
529}
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544static struct css_set *find_existing_css_set(
545 struct css_set *oldcg,
546 struct cgroup *cgrp,
547 struct cgroup_subsys_state *template[])
548{
549 int i;
550 struct cgroupfs_root *root = cgrp->root;
551 struct hlist_head *hhead;
552 struct hlist_node *node;
553 struct css_set *cg;
554
555
556
557
558
559
560 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
561 if (root->subsys_mask & (1UL << i)) {
562
563
564
565 template[i] = cgrp->subsys[i];
566 } else {
567
568
569 template[i] = oldcg->subsys[i];
570 }
571 }
572
573 hhead = css_set_hash(template);
574 hlist_for_each_entry(cg, node, hhead, hlist) {
575 if (!compare_css_sets(cg, oldcg, cgrp, template))
576 continue;
577
578
579 return cg;
580 }
581
582
583 return NULL;
584}
585
586static void free_cg_links(struct list_head *tmp)
587{
588 struct cg_cgroup_link *link;
589 struct cg_cgroup_link *saved_link;
590
591 list_for_each_entry_safe(link, saved_link, tmp, cgrp_link_list) {
592 list_del(&link->cgrp_link_list);
593 kfree(link);
594 }
595}
596
597
598
599
600
601
602static int allocate_cg_links(int count, struct list_head *tmp)
603{
604 struct cg_cgroup_link *link;
605 int i;
606 INIT_LIST_HEAD(tmp);
607 for (i = 0; i < count; i++) {
608 link = kmalloc(sizeof(*link), GFP_KERNEL);
609 if (!link) {
610 free_cg_links(tmp);
611 return -ENOMEM;
612 }
613 list_add(&link->cgrp_link_list, tmp);
614 }
615 return 0;
616}
617
618
619
620
621
622
623
624static void link_css_set(struct list_head *tmp_cg_links,
625 struct css_set *cg, struct cgroup *cgrp)
626{
627 struct cg_cgroup_link *link;
628
629 BUG_ON(list_empty(tmp_cg_links));
630 link = list_first_entry(tmp_cg_links, struct cg_cgroup_link,
631 cgrp_link_list);
632 link->cg = cg;
633 link->cgrp = cgrp;
634 atomic_inc(&cgrp->count);
635 list_move(&link->cgrp_link_list, &cgrp->css_sets);
636
637
638
639
640 list_add_tail(&link->cg_link_list, &cg->cg_links);
641}
642
643
644
645
646
647
648
649
650static struct css_set *find_css_set(
651 struct css_set *oldcg, struct cgroup *cgrp)
652{
653 struct css_set *res;
654 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
655
656 struct list_head tmp_cg_links;
657
658 struct hlist_head *hhead;
659 struct cg_cgroup_link *link;
660
661
662
663 read_lock(&css_set_lock);
664 res = find_existing_css_set(oldcg, cgrp, template);
665 if (res)
666 get_css_set(res);
667 read_unlock(&css_set_lock);
668
669 if (res)
670 return res;
671
672 res = kmalloc(sizeof(*res), GFP_KERNEL);
673 if (!res)
674 return NULL;
675
676
677 if (allocate_cg_links(root_count, &tmp_cg_links) < 0) {
678 kfree(res);
679 return NULL;
680 }
681
682 atomic_set(&res->refcount, 1);
683 INIT_LIST_HEAD(&res->cg_links);
684 INIT_LIST_HEAD(&res->tasks);
685 INIT_HLIST_NODE(&res->hlist);
686
687
688
689 memcpy(res->subsys, template, sizeof(res->subsys));
690
691 write_lock(&css_set_lock);
692
693 list_for_each_entry(link, &oldcg->cg_links, cg_link_list) {
694 struct cgroup *c = link->cgrp;
695 if (c->root == cgrp->root)
696 c = cgrp;
697 link_css_set(&tmp_cg_links, res, c);
698 }
699
700 BUG_ON(!list_empty(&tmp_cg_links));
701
702 css_set_count++;
703
704
705 hhead = css_set_hash(res->subsys);
706 hlist_add_head(&res->hlist, hhead);
707
708 write_unlock(&css_set_lock);
709
710 return res;
711}
712
713
714
715
716
717static struct cgroup *task_cgroup_from_root(struct task_struct *task,
718 struct cgroupfs_root *root)
719{
720 struct css_set *css;
721 struct cgroup *res = NULL;
722
723 BUG_ON(!mutex_is_locked(&cgroup_mutex));
724 read_lock(&css_set_lock);
725
726
727
728
729
730 css = task->cgroups;
731 if (css == &init_css_set) {
732 res = &root->top_cgroup;
733 } else {
734 struct cg_cgroup_link *link;
735 list_for_each_entry(link, &css->cg_links, cg_link_list) {
736 struct cgroup *c = link->cgrp;
737 if (c->root == root) {
738 res = c;
739 break;
740 }
741 }
742 }
743 read_unlock(&css_set_lock);
744 BUG_ON(!res);
745 return res;
746}
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802void cgroup_lock(void)
803{
804 mutex_lock(&cgroup_mutex);
805}
806EXPORT_SYMBOL_GPL(cgroup_lock);
807
808
809
810
811
812
813void cgroup_unlock(void)
814{
815 mutex_unlock(&cgroup_mutex);
816}
817EXPORT_SYMBOL_GPL(cgroup_unlock);
818
819
820
821
822
823
824
825
826static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
827static struct dentry *cgroup_lookup(struct inode *, struct dentry *, unsigned int);
828static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
829static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files,
830 unsigned long subsys_mask);
831static const struct inode_operations cgroup_dir_inode_operations;
832static const struct file_operations proc_cgroupstats_operations;
833
834static struct backing_dev_info cgroup_backing_dev_info = {
835 .name = "cgroup",
836 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
837};
838
839static int alloc_css_id(struct cgroup_subsys *ss,
840 struct cgroup *parent, struct cgroup *child);
841
842static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb)
843{
844 struct inode *inode = new_inode(sb);
845
846 if (inode) {
847 inode->i_ino = get_next_ino();
848 inode->i_mode = mode;
849 inode->i_uid = current_fsuid();
850 inode->i_gid = current_fsgid();
851 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
852 inode->i_mapping->backing_dev_info = &cgroup_backing_dev_info;
853 }
854 return inode;
855}
856
857
858
859
860
861static int cgroup_call_pre_destroy(struct cgroup *cgrp)
862{
863 struct cgroup_subsys *ss;
864 int ret = 0;
865
866 for_each_subsys(cgrp->root, ss) {
867 if (!ss->pre_destroy)
868 continue;
869
870 ret = ss->pre_destroy(cgrp);
871 if (ret) {
872
873 WARN_ON_ONCE(!ss->__DEPRECATED_clear_css_refs);
874 break;
875 }
876 }
877
878 return ret;
879}
880
881static void cgroup_diput(struct dentry *dentry, struct inode *inode)
882{
883
884 if (S_ISDIR(inode->i_mode)) {
885 struct cgroup *cgrp = dentry->d_fsdata;
886 struct cgroup_subsys *ss;
887 BUG_ON(!(cgroup_is_removed(cgrp)));
888
889
890
891
892
893
894 synchronize_rcu();
895
896 mutex_lock(&cgroup_mutex);
897
898
899
900 for_each_subsys(cgrp->root, ss)
901 ss->destroy(cgrp);
902
903 cgrp->root->number_of_cgroups--;
904 mutex_unlock(&cgroup_mutex);
905
906
907
908
909
910 deactivate_super(cgrp->root->sb);
911
912
913
914
915
916 BUG_ON(!list_empty(&cgrp->pidlists));
917
918 simple_xattrs_free(&cgrp->xattrs);
919
920 kfree_rcu(cgrp, rcu_head);
921 } else {
922 struct cfent *cfe = __d_cfe(dentry);
923 struct cgroup *cgrp = dentry->d_parent->d_fsdata;
924 struct cftype *cft = cfe->type;
925
926 WARN_ONCE(!list_empty(&cfe->node) &&
927 cgrp != &cgrp->root->top_cgroup,
928 "cfe still linked for %s\n", cfe->type->name);
929 kfree(cfe);
930 simple_xattrs_free(&cft->xattrs);
931 }
932 iput(inode);
933}
934
935static int cgroup_delete(const struct dentry *d)
936{
937 return 1;
938}
939
940static void remove_dir(struct dentry *d)
941{
942 struct dentry *parent = dget(d->d_parent);
943
944 d_delete(d);
945 simple_rmdir(parent->d_inode, d);
946 dput(parent);
947}
948
949static int cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
950{
951 struct cfent *cfe;
952
953 lockdep_assert_held(&cgrp->dentry->d_inode->i_mutex);
954 lockdep_assert_held(&cgroup_mutex);
955
956 list_for_each_entry(cfe, &cgrp->files, node) {
957 struct dentry *d = cfe->dentry;
958
959 if (cft && cfe->type != cft)
960 continue;
961
962 dget(d);
963 d_delete(d);
964 simple_unlink(cgrp->dentry->d_inode, d);
965 list_del_init(&cfe->node);
966 dput(d);
967
968 return 0;
969 }
970 return -ENOENT;
971}
972
973
974
975
976
977
978
979static void cgroup_clear_directory(struct dentry *dir, bool base_files,
980 unsigned long subsys_mask)
981{
982 struct cgroup *cgrp = __d_cgrp(dir);
983 struct cgroup_subsys *ss;
984
985 for_each_subsys(cgrp->root, ss) {
986 struct cftype_set *set;
987 if (!test_bit(ss->subsys_id, &subsys_mask))
988 continue;
989 list_for_each_entry(set, &ss->cftsets, node)
990 cgroup_rm_file(cgrp, set->cfts);
991 }
992 if (base_files) {
993 while (!list_empty(&cgrp->files))
994 cgroup_rm_file(cgrp, NULL);
995 }
996}
997
998
999
1000
1001static void cgroup_d_remove_dir(struct dentry *dentry)
1002{
1003 struct dentry *parent;
1004 struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
1005
1006 cgroup_clear_directory(dentry, true, root->subsys_mask);
1007
1008 parent = dentry->d_parent;
1009 spin_lock(&parent->d_lock);
1010 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1011 list_del_init(&dentry->d_u.d_child);
1012 spin_unlock(&dentry->d_lock);
1013 spin_unlock(&parent->d_lock);
1014 remove_dir(dentry);
1015}
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025static DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
1026
1027static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
1028{
1029 if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
1030 wake_up_all(&cgroup_rmdir_waitq);
1031}
1032
1033void cgroup_exclude_rmdir(struct cgroup_subsys_state *css)
1034{
1035 css_get(css);
1036}
1037
1038void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
1039{
1040 cgroup_wakeup_rmdir_waiter(css->cgroup);
1041 css_put(css);
1042}
1043
1044
1045
1046
1047
1048
1049static int rebind_subsystems(struct cgroupfs_root *root,
1050 unsigned long final_subsys_mask)
1051{
1052 unsigned long added_mask, removed_mask;
1053 struct cgroup *cgrp = &root->top_cgroup;
1054 int i;
1055
1056 BUG_ON(!mutex_is_locked(&cgroup_mutex));
1057 BUG_ON(!mutex_is_locked(&cgroup_root_mutex));
1058
1059 removed_mask = root->actual_subsys_mask & ~final_subsys_mask;
1060 added_mask = final_subsys_mask & ~root->actual_subsys_mask;
1061
1062 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1063 unsigned long bit = 1UL << i;
1064 struct cgroup_subsys *ss = subsys[i];
1065 if (!(bit & added_mask))
1066 continue;
1067
1068
1069
1070
1071
1072 BUG_ON(ss == NULL);
1073 if (ss->root != &rootnode) {
1074
1075 return -EBUSY;
1076 }
1077 }
1078
1079
1080
1081
1082
1083 if (root->number_of_cgroups > 1)
1084 return -EBUSY;
1085
1086
1087 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1088 struct cgroup_subsys *ss = subsys[i];
1089 unsigned long bit = 1UL << i;
1090 if (bit & added_mask) {
1091
1092 BUG_ON(ss == NULL);
1093 BUG_ON(cgrp->subsys[i]);
1094 BUG_ON(!dummytop->subsys[i]);
1095 BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
1096 cgrp->subsys[i] = dummytop->subsys[i];
1097 cgrp->subsys[i]->cgroup = cgrp;
1098 list_move(&ss->sibling, &root->subsys_list);
1099 ss->root = root;
1100 if (ss->bind)
1101 ss->bind(cgrp);
1102
1103 } else if (bit & removed_mask) {
1104
1105 BUG_ON(ss == NULL);
1106 BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
1107 BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
1108 if (ss->bind)
1109 ss->bind(dummytop);
1110 dummytop->subsys[i]->cgroup = dummytop;
1111 cgrp->subsys[i] = NULL;
1112 subsys[i]->root = &rootnode;
1113 list_move(&ss->sibling, &rootnode.subsys_list);
1114
1115 module_put(ss->module);
1116 } else if (bit & final_subsys_mask) {
1117
1118 BUG_ON(ss == NULL);
1119 BUG_ON(!cgrp->subsys[i]);
1120
1121
1122
1123
1124 module_put(ss->module);
1125#ifdef CONFIG_MODULE_UNLOAD
1126 BUG_ON(ss->module && !module_refcount(ss->module));
1127#endif
1128 } else {
1129
1130 BUG_ON(cgrp->subsys[i]);
1131 }
1132 }
1133 root->subsys_mask = root->actual_subsys_mask = final_subsys_mask;
1134 synchronize_rcu();
1135
1136 return 0;
1137}
1138
1139static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry)
1140{
1141 struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
1142 struct cgroup_subsys *ss;
1143
1144 mutex_lock(&cgroup_root_mutex);
1145 for_each_subsys(root, ss)
1146 seq_printf(seq, ",%s", ss->name);
1147 if (test_bit(ROOT_NOPREFIX, &root->flags))
1148 seq_puts(seq, ",noprefix");
1149 if (test_bit(ROOT_XATTR, &root->flags))
1150 seq_puts(seq, ",xattr");
1151 if (strlen(root->release_agent_path))
1152 seq_printf(seq, ",release_agent=%s", root->release_agent_path);
1153 if (clone_children(&root->top_cgroup))
1154 seq_puts(seq, ",clone_children");
1155 if (strlen(root->name))
1156 seq_printf(seq, ",name=%s", root->name);
1157 mutex_unlock(&cgroup_root_mutex);
1158 return 0;
1159}
1160
1161struct cgroup_sb_opts {
1162 unsigned long subsys_mask;
1163 unsigned long flags;
1164 char *release_agent;
1165 bool clone_children;
1166 char *name;
1167
1168 bool none;
1169
1170 struct cgroupfs_root *new_root;
1171
1172};
1173
1174
1175
1176
1177
1178
1179
1180static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
1181{
1182 char *token, *o = data;
1183 bool all_ss = false, one_ss = false;
1184 unsigned long mask = (unsigned long)-1;
1185 int i;
1186 bool module_pin_failed = false;
1187
1188 BUG_ON(!mutex_is_locked(&cgroup_mutex));
1189
1190#ifdef CONFIG_CPUSETS
1191 mask = ~(1UL << cpuset_subsys_id);
1192#endif
1193
1194 memset(opts, 0, sizeof(*opts));
1195
1196 while ((token = strsep(&o, ",")) != NULL) {
1197 if (!*token)
1198 return -EINVAL;
1199 if (!strcmp(token, "none")) {
1200
1201 opts->none = true;
1202 continue;
1203 }
1204 if (!strcmp(token, "all")) {
1205
1206 if (one_ss)
1207 return -EINVAL;
1208 all_ss = true;
1209 continue;
1210 }
1211 if (!strcmp(token, "noprefix")) {
1212 set_bit(ROOT_NOPREFIX, &opts->flags);
1213 continue;
1214 }
1215 if (!strcmp(token, "clone_children")) {
1216 opts->clone_children = true;
1217 continue;
1218 }
1219 if (!strcmp(token, "xattr")) {
1220 set_bit(ROOT_XATTR, &opts->flags);
1221 continue;
1222 }
1223 if (!strncmp(token, "release_agent=", 14)) {
1224
1225 if (opts->release_agent)
1226 return -EINVAL;
1227 opts->release_agent =
1228 kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL);
1229 if (!opts->release_agent)
1230 return -ENOMEM;
1231 continue;
1232 }
1233 if (!strncmp(token, "name=", 5)) {
1234 const char *name = token + 5;
1235
1236 if (!strlen(name))
1237 return -EINVAL;
1238
1239 for (i = 0; i < strlen(name); i++) {
1240 char c = name[i];
1241 if (isalnum(c))
1242 continue;
1243 if ((c == '.') || (c == '-') || (c == '_'))
1244 continue;
1245 return -EINVAL;
1246 }
1247
1248 if (opts->name)
1249 return -EINVAL;
1250 opts->name = kstrndup(name,
1251 MAX_CGROUP_ROOT_NAMELEN - 1,
1252 GFP_KERNEL);
1253 if (!opts->name)
1254 return -ENOMEM;
1255
1256 continue;
1257 }
1258
1259 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1260 struct cgroup_subsys *ss = subsys[i];
1261 if (ss == NULL)
1262 continue;
1263 if (strcmp(token, ss->name))
1264 continue;
1265 if (ss->disabled)
1266 continue;
1267
1268
1269 if (all_ss)
1270 return -EINVAL;
1271 set_bit(i, &opts->subsys_mask);
1272 one_ss = true;
1273
1274 break;
1275 }
1276 if (i == CGROUP_SUBSYS_COUNT)
1277 return -ENOENT;
1278 }
1279
1280
1281
1282
1283
1284
1285 if (all_ss || (!one_ss && !opts->none && !opts->name)) {
1286 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1287 struct cgroup_subsys *ss = subsys[i];
1288 if (ss == NULL)
1289 continue;
1290 if (ss->disabled)
1291 continue;
1292 set_bit(i, &opts->subsys_mask);
1293 }
1294 }
1295
1296
1297
1298
1299
1300
1301
1302
1303 if (test_bit(ROOT_NOPREFIX, &opts->flags) &&
1304 (opts->subsys_mask & mask))
1305 return -EINVAL;
1306
1307
1308
1309 if (opts->subsys_mask && opts->none)
1310 return -EINVAL;
1311
1312
1313
1314
1315
1316 if (!opts->subsys_mask && !opts->name)
1317 return -EINVAL;
1318
1319
1320
1321
1322
1323
1324
1325 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1326 unsigned long bit = 1UL << i;
1327
1328 if (!(bit & opts->subsys_mask))
1329 continue;
1330 if (!try_module_get(subsys[i]->module)) {
1331 module_pin_failed = true;
1332 break;
1333 }
1334 }
1335 if (module_pin_failed) {
1336
1337
1338
1339
1340
1341 for (i--; i >= 0; i--) {
1342
1343 unsigned long bit = 1UL << i;
1344
1345 if (!(bit & opts->subsys_mask))
1346 continue;
1347 module_put(subsys[i]->module);
1348 }
1349 return -ENOENT;
1350 }
1351
1352 return 0;
1353}
1354
1355static void drop_parsed_module_refcounts(unsigned long subsys_mask)
1356{
1357 int i;
1358 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1359 unsigned long bit = 1UL << i;
1360
1361 if (!(bit & subsys_mask))
1362 continue;
1363 module_put(subsys[i]->module);
1364 }
1365}
1366
1367static int cgroup_remount(struct super_block *sb, int *flags, char *data)
1368{
1369 int ret = 0;
1370 struct cgroupfs_root *root = sb->s_fs_info;
1371 struct cgroup *cgrp = &root->top_cgroup;
1372 struct cgroup_sb_opts opts;
1373 unsigned long added_mask, removed_mask;
1374
1375 mutex_lock(&cgrp->dentry->d_inode->i_mutex);
1376 mutex_lock(&cgroup_mutex);
1377 mutex_lock(&cgroup_root_mutex);
1378
1379
1380 ret = parse_cgroupfs_options(data, &opts);
1381 if (ret)
1382 goto out_unlock;
1383
1384
1385 if (opts.subsys_mask != root->actual_subsys_mask || opts.release_agent)
1386 pr_warning("cgroup: option changes via remount are deprecated (pid=%d comm=%s)\n",
1387 task_tgid_nr(current), current->comm);
1388
1389 added_mask = opts.subsys_mask & ~root->subsys_mask;
1390 removed_mask = root->subsys_mask & ~opts.subsys_mask;
1391
1392
1393 if (opts.flags != root->flags ||
1394 (opts.name && strcmp(opts.name, root->name))) {
1395 ret = -EINVAL;
1396 drop_parsed_module_refcounts(opts.subsys_mask);
1397 goto out_unlock;
1398 }
1399
1400 ret = rebind_subsystems(root, opts.subsys_mask);
1401 if (ret) {
1402 drop_parsed_module_refcounts(opts.subsys_mask);
1403 goto out_unlock;
1404 }
1405
1406
1407 cgroup_clear_directory(cgrp->dentry, false, removed_mask);
1408
1409 cgroup_populate_dir(cgrp, false, added_mask);
1410
1411 if (opts.release_agent)
1412 strcpy(root->release_agent_path, opts.release_agent);
1413 out_unlock:
1414 kfree(opts.release_agent);
1415 kfree(opts.name);
1416 mutex_unlock(&cgroup_root_mutex);
1417 mutex_unlock(&cgroup_mutex);
1418 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
1419 return ret;
1420}
1421
1422static const struct super_operations cgroup_ops = {
1423 .statfs = simple_statfs,
1424 .drop_inode = generic_delete_inode,
1425 .show_options = cgroup_show_options,
1426 .remount_fs = cgroup_remount,
1427};
1428
1429static void init_cgroup_housekeeping(struct cgroup *cgrp)
1430{
1431 INIT_LIST_HEAD(&cgrp->sibling);
1432 INIT_LIST_HEAD(&cgrp->children);
1433 INIT_LIST_HEAD(&cgrp->files);
1434 INIT_LIST_HEAD(&cgrp->css_sets);
1435 INIT_LIST_HEAD(&cgrp->release_list);
1436 INIT_LIST_HEAD(&cgrp->pidlists);
1437 mutex_init(&cgrp->pidlist_mutex);
1438 INIT_LIST_HEAD(&cgrp->event_list);
1439 spin_lock_init(&cgrp->event_list_lock);
1440 simple_xattrs_init(&cgrp->xattrs);
1441}
1442
1443static void init_cgroup_root(struct cgroupfs_root *root)
1444{
1445 struct cgroup *cgrp = &root->top_cgroup;
1446
1447 INIT_LIST_HEAD(&root->subsys_list);
1448 INIT_LIST_HEAD(&root->root_list);
1449 INIT_LIST_HEAD(&root->allcg_list);
1450 root->number_of_cgroups = 1;
1451 cgrp->root = root;
1452 cgrp->top_cgroup = cgrp;
1453 list_add_tail(&cgrp->allcg_node, &root->allcg_list);
1454 init_cgroup_housekeeping(cgrp);
1455}
1456
1457static bool init_root_id(struct cgroupfs_root *root)
1458{
1459 int ret = 0;
1460
1461 do {
1462 if (!ida_pre_get(&hierarchy_ida, GFP_KERNEL))
1463 return false;
1464 spin_lock(&hierarchy_id_lock);
1465
1466 ret = ida_get_new_above(&hierarchy_ida, next_hierarchy_id,
1467 &root->hierarchy_id);
1468 if (ret == -ENOSPC)
1469
1470 ret = ida_get_new(&hierarchy_ida, &root->hierarchy_id);
1471 if (!ret) {
1472 next_hierarchy_id = root->hierarchy_id + 1;
1473 } else if (ret != -EAGAIN) {
1474
1475 BUG_ON(ret);
1476 }
1477 spin_unlock(&hierarchy_id_lock);
1478 } while (ret);
1479 return true;
1480}
1481
1482static int cgroup_test_super(struct super_block *sb, void *data)
1483{
1484 struct cgroup_sb_opts *opts = data;
1485 struct cgroupfs_root *root = sb->s_fs_info;
1486
1487
1488 if (opts->name && strcmp(opts->name, root->name))
1489 return 0;
1490
1491
1492
1493
1494
1495 if ((opts->subsys_mask || opts->none)
1496 && (opts->subsys_mask != root->subsys_mask))
1497 return 0;
1498
1499 return 1;
1500}
1501
1502static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
1503{
1504 struct cgroupfs_root *root;
1505
1506 if (!opts->subsys_mask && !opts->none)
1507 return NULL;
1508
1509 root = kzalloc(sizeof(*root), GFP_KERNEL);
1510 if (!root)
1511 return ERR_PTR(-ENOMEM);
1512
1513 if (!init_root_id(root)) {
1514 kfree(root);
1515 return ERR_PTR(-ENOMEM);
1516 }
1517 init_cgroup_root(root);
1518
1519 root->subsys_mask = opts->subsys_mask;
1520 root->flags = opts->flags;
1521 if (opts->release_agent)
1522 strcpy(root->release_agent_path, opts->release_agent);
1523 if (opts->name)
1524 strcpy(root->name, opts->name);
1525 if (opts->clone_children)
1526 set_bit(CGRP_CLONE_CHILDREN, &root->top_cgroup.flags);
1527 return root;
1528}
1529
1530static void cgroup_drop_root(struct cgroupfs_root *root)
1531{
1532 if (!root)
1533 return;
1534
1535 BUG_ON(!root->hierarchy_id);
1536 spin_lock(&hierarchy_id_lock);
1537 ida_remove(&hierarchy_ida, root->hierarchy_id);
1538 spin_unlock(&hierarchy_id_lock);
1539 kfree(root);
1540}
1541
1542static int cgroup_set_super(struct super_block *sb, void *data)
1543{
1544 int ret;
1545 struct cgroup_sb_opts *opts = data;
1546
1547
1548 if (!opts->new_root)
1549 return -EINVAL;
1550
1551 BUG_ON(!opts->subsys_mask && !opts->none);
1552
1553 ret = set_anon_super(sb, NULL);
1554 if (ret)
1555 return ret;
1556
1557 sb->s_fs_info = opts->new_root;
1558 opts->new_root->sb = sb;
1559
1560 sb->s_blocksize = PAGE_CACHE_SIZE;
1561 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
1562 sb->s_magic = CGROUP_SUPER_MAGIC;
1563 sb->s_op = &cgroup_ops;
1564
1565 return 0;
1566}
1567
1568static int cgroup_get_rootdir(struct super_block *sb)
1569{
1570 static const struct dentry_operations cgroup_dops = {
1571 .d_iput = cgroup_diput,
1572 .d_delete = cgroup_delete,
1573 };
1574
1575 struct inode *inode =
1576 cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb);
1577
1578 if (!inode)
1579 return -ENOMEM;
1580
1581 inode->i_fop = &simple_dir_operations;
1582 inode->i_op = &cgroup_dir_inode_operations;
1583
1584 inc_nlink(inode);
1585 sb->s_root = d_make_root(inode);
1586 if (!sb->s_root)
1587 return -ENOMEM;
1588
1589 sb->s_d_op = &cgroup_dops;
1590 return 0;
1591}
1592
1593static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1594 int flags, const char *unused_dev_name,
1595 void *data)
1596{
1597 struct cgroup_sb_opts opts;
1598 struct cgroupfs_root *root;
1599 int ret = 0;
1600 struct super_block *sb;
1601 struct cgroupfs_root *new_root;
1602 struct inode *inode;
1603
1604
1605 mutex_lock(&cgroup_mutex);
1606 ret = parse_cgroupfs_options(data, &opts);
1607 mutex_unlock(&cgroup_mutex);
1608 if (ret)
1609 goto out_err;
1610
1611
1612
1613
1614
1615 new_root = cgroup_root_from_opts(&opts);
1616 if (IS_ERR(new_root)) {
1617 ret = PTR_ERR(new_root);
1618 goto drop_modules;
1619 }
1620 opts.new_root = new_root;
1621
1622
1623 sb = sget(fs_type, cgroup_test_super, cgroup_set_super, 0, &opts);
1624 if (IS_ERR(sb)) {
1625 ret = PTR_ERR(sb);
1626 cgroup_drop_root(opts.new_root);
1627 goto drop_modules;
1628 }
1629
1630 root = sb->s_fs_info;
1631 BUG_ON(!root);
1632 if (root == opts.new_root) {
1633
1634 struct list_head tmp_cg_links;
1635 struct cgroup *root_cgrp = &root->top_cgroup;
1636 struct cgroupfs_root *existing_root;
1637 const struct cred *cred;
1638 int i;
1639
1640 BUG_ON(sb->s_root != NULL);
1641
1642 ret = cgroup_get_rootdir(sb);
1643 if (ret)
1644 goto drop_new_super;
1645 inode = sb->s_root->d_inode;
1646
1647 mutex_lock(&inode->i_mutex);
1648 mutex_lock(&cgroup_mutex);
1649 mutex_lock(&cgroup_root_mutex);
1650
1651
1652 ret = -EBUSY;
1653 if (strlen(root->name))
1654 for_each_active_root(existing_root)
1655 if (!strcmp(existing_root->name, root->name))
1656 goto unlock_drop;
1657
1658
1659
1660
1661
1662
1663
1664
1665 ret = allocate_cg_links(css_set_count, &tmp_cg_links);
1666 if (ret)
1667 goto unlock_drop;
1668
1669 ret = rebind_subsystems(root, root->subsys_mask);
1670 if (ret == -EBUSY) {
1671 free_cg_links(&tmp_cg_links);
1672 goto unlock_drop;
1673 }
1674
1675
1676
1677
1678
1679
1680
1681 BUG_ON(ret);
1682
1683 list_add(&root->root_list, &roots);
1684 root_count++;
1685
1686 sb->s_root->d_fsdata = root_cgrp;
1687 root->top_cgroup.dentry = sb->s_root;
1688
1689
1690
1691 write_lock(&css_set_lock);
1692 for (i = 0; i < CSS_SET_TABLE_SIZE; i++) {
1693 struct hlist_head *hhead = &css_set_table[i];
1694 struct hlist_node *node;
1695 struct css_set *cg;
1696
1697 hlist_for_each_entry(cg, node, hhead, hlist)
1698 link_css_set(&tmp_cg_links, cg, root_cgrp);
1699 }
1700 write_unlock(&css_set_lock);
1701
1702 free_cg_links(&tmp_cg_links);
1703
1704 BUG_ON(!list_empty(&root_cgrp->sibling));
1705 BUG_ON(!list_empty(&root_cgrp->children));
1706 BUG_ON(root->number_of_cgroups != 1);
1707
1708 cred = override_creds(&init_cred);
1709 cgroup_populate_dir(root_cgrp, true, root->subsys_mask);
1710 revert_creds(cred);
1711 mutex_unlock(&cgroup_root_mutex);
1712 mutex_unlock(&cgroup_mutex);
1713 mutex_unlock(&inode->i_mutex);
1714 } else {
1715
1716
1717
1718
1719 cgroup_drop_root(opts.new_root);
1720
1721 drop_parsed_module_refcounts(opts.subsys_mask);
1722 }
1723
1724 kfree(opts.release_agent);
1725 kfree(opts.name);
1726 return dget(sb->s_root);
1727
1728 unlock_drop:
1729 mutex_unlock(&cgroup_root_mutex);
1730 mutex_unlock(&cgroup_mutex);
1731 mutex_unlock(&inode->i_mutex);
1732 drop_new_super:
1733 deactivate_locked_super(sb);
1734 drop_modules:
1735 drop_parsed_module_refcounts(opts.subsys_mask);
1736 out_err:
1737 kfree(opts.release_agent);
1738 kfree(opts.name);
1739 return ERR_PTR(ret);
1740}
1741
1742static void cgroup_kill_sb(struct super_block *sb) {
1743 struct cgroupfs_root *root = sb->s_fs_info;
1744 struct cgroup *cgrp = &root->top_cgroup;
1745 int ret;
1746 struct cg_cgroup_link *link;
1747 struct cg_cgroup_link *saved_link;
1748
1749 BUG_ON(!root);
1750
1751 BUG_ON(root->number_of_cgroups != 1);
1752 BUG_ON(!list_empty(&cgrp->children));
1753 BUG_ON(!list_empty(&cgrp->sibling));
1754
1755 mutex_lock(&cgroup_mutex);
1756 mutex_lock(&cgroup_root_mutex);
1757
1758
1759 ret = rebind_subsystems(root, 0);
1760
1761 BUG_ON(ret);
1762
1763
1764
1765
1766
1767 write_lock(&css_set_lock);
1768
1769 list_for_each_entry_safe(link, saved_link, &cgrp->css_sets,
1770 cgrp_link_list) {
1771 list_del(&link->cg_link_list);
1772 list_del(&link->cgrp_link_list);
1773 kfree(link);
1774 }
1775 write_unlock(&css_set_lock);
1776
1777 if (!list_empty(&root->root_list)) {
1778 list_del(&root->root_list);
1779 root_count--;
1780 }
1781
1782 mutex_unlock(&cgroup_root_mutex);
1783 mutex_unlock(&cgroup_mutex);
1784
1785 simple_xattrs_free(&cgrp->xattrs);
1786
1787 kill_litter_super(sb);
1788 cgroup_drop_root(root);
1789}
1790
1791static struct file_system_type cgroup_fs_type = {
1792 .name = "cgroup",
1793 .mount = cgroup_mount,
1794 .kill_sb = cgroup_kill_sb,
1795};
1796
1797static struct kobject *cgroup_kobj;
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
1810{
1811 char *start;
1812 struct dentry *dentry = rcu_dereference_check(cgrp->dentry,
1813 cgroup_lock_is_held());
1814
1815 if (!dentry || cgrp == dummytop) {
1816
1817
1818
1819
1820 strcpy(buf, "/");
1821 return 0;
1822 }
1823
1824 start = buf + buflen;
1825
1826 *--start = '\0';
1827 for (;;) {
1828 int len = dentry->d_name.len;
1829
1830 if ((start -= len) < buf)
1831 return -ENAMETOOLONG;
1832 memcpy(start, dentry->d_name.name, len);
1833 cgrp = cgrp->parent;
1834 if (!cgrp)
1835 break;
1836
1837 dentry = rcu_dereference_check(cgrp->dentry,
1838 cgroup_lock_is_held());
1839 if (!cgrp->parent)
1840 continue;
1841 if (--start < buf)
1842 return -ENAMETOOLONG;
1843 *start = '/';
1844 }
1845 memmove(buf, start, buf + buflen - start);
1846 return 0;
1847}
1848EXPORT_SYMBOL_GPL(cgroup_path);
1849
1850
1851
1852
1853struct task_and_cgroup {
1854 struct task_struct *task;
1855 struct cgroup *cgrp;
1856 struct css_set *cg;
1857};
1858
1859struct cgroup_taskset {
1860 struct task_and_cgroup single;
1861 struct flex_array *tc_array;
1862 int tc_array_len;
1863 int idx;
1864 struct cgroup *cur_cgrp;
1865};
1866
1867
1868
1869
1870
1871
1872
1873struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset)
1874{
1875 if (tset->tc_array) {
1876 tset->idx = 0;
1877 return cgroup_taskset_next(tset);
1878 } else {
1879 tset->cur_cgrp = tset->single.cgrp;
1880 return tset->single.task;
1881 }
1882}
1883EXPORT_SYMBOL_GPL(cgroup_taskset_first);
1884
1885
1886
1887
1888
1889
1890
1891
1892struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset)
1893{
1894 struct task_and_cgroup *tc;
1895
1896 if (!tset->tc_array || tset->idx >= tset->tc_array_len)
1897 return NULL;
1898
1899 tc = flex_array_get(tset->tc_array, tset->idx++);
1900 tset->cur_cgrp = tc->cgrp;
1901 return tc->task;
1902}
1903EXPORT_SYMBOL_GPL(cgroup_taskset_next);
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913struct cgroup *cgroup_taskset_cur_cgroup(struct cgroup_taskset *tset)
1914{
1915 return tset->cur_cgrp;
1916}
1917EXPORT_SYMBOL_GPL(cgroup_taskset_cur_cgroup);
1918
1919
1920
1921
1922
1923int cgroup_taskset_size(struct cgroup_taskset *tset)
1924{
1925 return tset->tc_array ? tset->tc_array_len : 1;
1926}
1927EXPORT_SYMBOL_GPL(cgroup_taskset_size);
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937static void cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
1938 struct task_struct *tsk, struct css_set *newcg)
1939{
1940 struct css_set *oldcg;
1941
1942
1943
1944
1945
1946
1947 WARN_ON_ONCE(tsk->flags & PF_EXITING);
1948 oldcg = tsk->cgroups;
1949
1950 task_lock(tsk);
1951 rcu_assign_pointer(tsk->cgroups, newcg);
1952 task_unlock(tsk);
1953
1954
1955 write_lock(&css_set_lock);
1956 if (!list_empty(&tsk->cg_list))
1957 list_move(&tsk->cg_list, &newcg->tasks);
1958 write_unlock(&css_set_lock);
1959
1960
1961
1962
1963
1964
1965 set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
1966 put_css_set(oldcg);
1967}
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1978{
1979 int retval = 0;
1980 struct cgroup_subsys *ss, *failed_ss = NULL;
1981 struct cgroup *oldcgrp;
1982 struct cgroupfs_root *root = cgrp->root;
1983 struct cgroup_taskset tset = { };
1984 struct css_set *newcg;
1985
1986
1987 if (tsk->flags & PF_EXITING)
1988 return -ESRCH;
1989
1990
1991 oldcgrp = task_cgroup_from_root(tsk, root);
1992 if (cgrp == oldcgrp)
1993 return 0;
1994
1995 tset.single.task = tsk;
1996 tset.single.cgrp = oldcgrp;
1997
1998 for_each_subsys(root, ss) {
1999 if (ss->can_attach) {
2000 retval = ss->can_attach(cgrp, &tset);
2001 if (retval) {
2002
2003
2004
2005
2006
2007
2008 failed_ss = ss;
2009 goto out;
2010 }
2011 }
2012 }
2013
2014 newcg = find_css_set(tsk->cgroups, cgrp);
2015 if (!newcg) {
2016 retval = -ENOMEM;
2017 goto out;
2018 }
2019
2020 cgroup_task_migrate(cgrp, oldcgrp, tsk, newcg);
2021
2022 for_each_subsys(root, ss) {
2023 if (ss->attach)
2024 ss->attach(cgrp, &tset);
2025 }
2026
2027 synchronize_rcu();
2028
2029
2030
2031
2032
2033 cgroup_wakeup_rmdir_waiter(cgrp);
2034out:
2035 if (retval) {
2036 for_each_subsys(root, ss) {
2037 if (ss == failed_ss)
2038
2039
2040
2041
2042
2043
2044 break;
2045 if (ss->cancel_attach)
2046 ss->cancel_attach(cgrp, &tset);
2047 }
2048 }
2049 return retval;
2050}
2051
2052
2053
2054
2055
2056
2057int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
2058{
2059 struct cgroupfs_root *root;
2060 int retval = 0;
2061
2062 cgroup_lock();
2063 for_each_active_root(root) {
2064 struct cgroup *from_cg = task_cgroup_from_root(from, root);
2065
2066 retval = cgroup_attach_task(from_cg, tsk);
2067 if (retval)
2068 break;
2069 }
2070 cgroup_unlock();
2071
2072 return retval;
2073}
2074EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
2085{
2086 int retval, i, group_size;
2087 struct cgroup_subsys *ss, *failed_ss = NULL;
2088
2089 struct cgroupfs_root *root = cgrp->root;
2090
2091 struct task_struct *tsk;
2092 struct task_and_cgroup *tc;
2093 struct flex_array *group;
2094 struct cgroup_taskset tset = { };
2095
2096
2097
2098
2099
2100
2101
2102
2103 group_size = get_nr_threads(leader);
2104
2105 group = flex_array_alloc(sizeof(*tc), group_size, GFP_KERNEL);
2106 if (!group)
2107 return -ENOMEM;
2108
2109 retval = flex_array_prealloc(group, 0, group_size - 1, GFP_KERNEL);
2110 if (retval)
2111 goto out_free_group_list;
2112
2113 tsk = leader;
2114 i = 0;
2115
2116
2117
2118
2119
2120 rcu_read_lock();
2121 do {
2122 struct task_and_cgroup ent;
2123
2124
2125 if (tsk->flags & PF_EXITING)
2126 continue;
2127
2128
2129 BUG_ON(i >= group_size);
2130 ent.task = tsk;
2131 ent.cgrp = task_cgroup_from_root(tsk, root);
2132
2133 if (ent.cgrp == cgrp)
2134 continue;
2135
2136
2137
2138
2139 retval = flex_array_put(group, i, &ent, GFP_ATOMIC);
2140 BUG_ON(retval != 0);
2141 i++;
2142 } while_each_thread(leader, tsk);
2143 rcu_read_unlock();
2144
2145 group_size = i;
2146 tset.tc_array = group;
2147 tset.tc_array_len = group_size;
2148
2149
2150 retval = 0;
2151 if (!group_size)
2152 goto out_free_group_list;
2153
2154
2155
2156
2157 for_each_subsys(root, ss) {
2158 if (ss->can_attach) {
2159 retval = ss->can_attach(cgrp, &tset);
2160 if (retval) {
2161 failed_ss = ss;
2162 goto out_cancel_attach;
2163 }
2164 }
2165 }
2166
2167
2168
2169
2170
2171 for (i = 0; i < group_size; i++) {
2172 tc = flex_array_get(group, i);
2173 tc->cg = find_css_set(tc->task->cgroups, cgrp);
2174 if (!tc->cg) {
2175 retval = -ENOMEM;
2176 goto out_put_css_set_refs;
2177 }
2178 }
2179
2180
2181
2182
2183
2184
2185 for (i = 0; i < group_size; i++) {
2186 tc = flex_array_get(group, i);
2187 cgroup_task_migrate(cgrp, tc->cgrp, tc->task, tc->cg);
2188 }
2189
2190
2191
2192
2193
2194 for_each_subsys(root, ss) {
2195 if (ss->attach)
2196 ss->attach(cgrp, &tset);
2197 }
2198
2199
2200
2201
2202 synchronize_rcu();
2203 cgroup_wakeup_rmdir_waiter(cgrp);
2204 retval = 0;
2205out_put_css_set_refs:
2206 if (retval) {
2207 for (i = 0; i < group_size; i++) {
2208 tc = flex_array_get(group, i);
2209 if (!tc->cg)
2210 break;
2211 put_css_set(tc->cg);
2212 }
2213 }
2214out_cancel_attach:
2215 if (retval) {
2216 for_each_subsys(root, ss) {
2217 if (ss == failed_ss)
2218 break;
2219 if (ss->cancel_attach)
2220 ss->cancel_attach(cgrp, &tset);
2221 }
2222 }
2223out_free_group_list:
2224 flex_array_free(group);
2225 return retval;
2226}
2227
2228
2229
2230
2231
2232
2233static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup)
2234{
2235 struct task_struct *tsk;
2236 const struct cred *cred = current_cred(), *tcred;
2237 int ret;
2238
2239 if (!cgroup_lock_live_group(cgrp))
2240 return -ENODEV;
2241
2242retry_find_task:
2243 rcu_read_lock();
2244 if (pid) {
2245 tsk = find_task_by_vpid(pid);
2246 if (!tsk) {
2247 rcu_read_unlock();
2248 ret= -ESRCH;
2249 goto out_unlock_cgroup;
2250 }
2251
2252
2253
2254
2255 tcred = __task_cred(tsk);
2256 if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
2257 !uid_eq(cred->euid, tcred->uid) &&
2258 !uid_eq(cred->euid, tcred->suid)) {
2259 rcu_read_unlock();
2260 ret = -EACCES;
2261 goto out_unlock_cgroup;
2262 }
2263 } else
2264 tsk = current;
2265
2266 if (threadgroup)
2267 tsk = tsk->group_leader;
2268
2269
2270
2271
2272
2273
2274 if (tsk == kthreadd_task || (tsk->flags & PF_THREAD_BOUND)) {
2275 ret = -EINVAL;
2276 rcu_read_unlock();
2277 goto out_unlock_cgroup;
2278 }
2279
2280 get_task_struct(tsk);
2281 rcu_read_unlock();
2282
2283 threadgroup_lock(tsk);
2284 if (threadgroup) {
2285 if (!thread_group_leader(tsk)) {
2286
2287
2288
2289
2290
2291
2292
2293 threadgroup_unlock(tsk);
2294 put_task_struct(tsk);
2295 goto retry_find_task;
2296 }
2297 ret = cgroup_attach_proc(cgrp, tsk);
2298 } else
2299 ret = cgroup_attach_task(cgrp, tsk);
2300 threadgroup_unlock(tsk);
2301
2302 put_task_struct(tsk);
2303out_unlock_cgroup:
2304 cgroup_unlock();
2305 return ret;
2306}
2307
2308static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid)
2309{
2310 return attach_task_by_pid(cgrp, pid, false);
2311}
2312
2313static int cgroup_procs_write(struct cgroup *cgrp, struct cftype *cft, u64 tgid)
2314{
2315 return attach_task_by_pid(cgrp, tgid, true);
2316}
2317
2318
2319
2320
2321
2322
2323
2324
2325bool cgroup_lock_live_group(struct cgroup *cgrp)
2326{
2327 mutex_lock(&cgroup_mutex);
2328 if (cgroup_is_removed(cgrp)) {
2329 mutex_unlock(&cgroup_mutex);
2330 return false;
2331 }
2332 return true;
2333}
2334EXPORT_SYMBOL_GPL(cgroup_lock_live_group);
2335
2336static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
2337 const char *buffer)
2338{
2339 BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
2340 if (strlen(buffer) >= PATH_MAX)
2341 return -EINVAL;
2342 if (!cgroup_lock_live_group(cgrp))
2343 return -ENODEV;
2344 mutex_lock(&cgroup_root_mutex);
2345 strcpy(cgrp->root->release_agent_path, buffer);
2346 mutex_unlock(&cgroup_root_mutex);
2347 cgroup_unlock();
2348 return 0;
2349}
2350
2351static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft,
2352 struct seq_file *seq)
2353{
2354 if (!cgroup_lock_live_group(cgrp))
2355 return -ENODEV;
2356 seq_puts(seq, cgrp->root->release_agent_path);
2357 seq_putc(seq, '\n');
2358 cgroup_unlock();
2359 return 0;
2360}
2361
2362
2363#define CGROUP_LOCAL_BUFFER_SIZE 64
2364
2365static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft,
2366 struct file *file,
2367 const char __user *userbuf,
2368 size_t nbytes, loff_t *unused_ppos)
2369{
2370 char buffer[CGROUP_LOCAL_BUFFER_SIZE];
2371 int retval = 0;
2372 char *end;
2373
2374 if (!nbytes)
2375 return -EINVAL;
2376 if (nbytes >= sizeof(buffer))
2377 return -E2BIG;
2378 if (copy_from_user(buffer, userbuf, nbytes))
2379 return -EFAULT;
2380
2381 buffer[nbytes] = 0;
2382 if (cft->write_u64) {
2383 u64 val = simple_strtoull(strstrip(buffer), &end, 0);
2384 if (*end)
2385 return -EINVAL;
2386 retval = cft->write_u64(cgrp, cft, val);
2387 } else {
2388 s64 val = simple_strtoll(strstrip(buffer), &end, 0);
2389 if (*end)
2390 return -EINVAL;
2391 retval = cft->write_s64(cgrp, cft, val);
2392 }
2393 if (!retval)
2394 retval = nbytes;
2395 return retval;
2396}
2397
2398static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft,
2399 struct file *file,
2400 const char __user *userbuf,
2401 size_t nbytes, loff_t *unused_ppos)
2402{
2403 char local_buffer[CGROUP_LOCAL_BUFFER_SIZE];
2404 int retval = 0;
2405 size_t max_bytes = cft->max_write_len;
2406 char *buffer = local_buffer;
2407
2408 if (!max_bytes)
2409 max_bytes = sizeof(local_buffer) - 1;
2410 if (nbytes >= max_bytes)
2411 return -E2BIG;
2412
2413 if (nbytes >= sizeof(local_buffer)) {
2414 buffer = kmalloc(nbytes + 1, GFP_KERNEL);
2415 if (buffer == NULL)
2416 return -ENOMEM;
2417 }
2418 if (nbytes && copy_from_user(buffer, userbuf, nbytes)) {
2419 retval = -EFAULT;
2420 goto out;
2421 }
2422
2423 buffer[nbytes] = 0;
2424 retval = cft->write_string(cgrp, cft, strstrip(buffer));
2425 if (!retval)
2426 retval = nbytes;
2427out:
2428 if (buffer != local_buffer)
2429 kfree(buffer);
2430 return retval;
2431}
2432
2433static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
2434 size_t nbytes, loff_t *ppos)
2435{
2436 struct cftype *cft = __d_cft(file->f_dentry);
2437 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
2438
2439 if (cgroup_is_removed(cgrp))
2440 return -ENODEV;
2441 if (cft->write)
2442 return cft->write(cgrp, cft, file, buf, nbytes, ppos);
2443 if (cft->write_u64 || cft->write_s64)
2444 return cgroup_write_X64(cgrp, cft, file, buf, nbytes, ppos);
2445 if (cft->write_string)
2446 return cgroup_write_string(cgrp, cft, file, buf, nbytes, ppos);
2447 if (cft->trigger) {
2448 int ret = cft->trigger(cgrp, (unsigned int)cft->private);
2449 return ret ? ret : nbytes;
2450 }
2451 return -EINVAL;
2452}
2453
2454static ssize_t cgroup_read_u64(struct cgroup *cgrp, struct cftype *cft,
2455 struct file *file,
2456 char __user *buf, size_t nbytes,
2457 loff_t *ppos)
2458{
2459 char tmp[CGROUP_LOCAL_BUFFER_SIZE];
2460 u64 val = cft->read_u64(cgrp, cft);
2461 int len = sprintf(tmp, "%llu\n", (unsigned long long) val);
2462
2463 return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
2464}
2465
2466static ssize_t cgroup_read_s64(struct cgroup *cgrp, struct cftype *cft,
2467 struct file *file,
2468 char __user *buf, size_t nbytes,
2469 loff_t *ppos)
2470{
2471 char tmp[CGROUP_LOCAL_BUFFER_SIZE];
2472 s64 val = cft->read_s64(cgrp, cft);
2473 int len = sprintf(tmp, "%lld\n", (long long) val);
2474
2475 return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
2476}
2477
2478static ssize_t cgroup_file_read(struct file *file, char __user *buf,
2479 size_t nbytes, loff_t *ppos)
2480{
2481 struct cftype *cft = __d_cft(file->f_dentry);
2482 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
2483
2484 if (cgroup_is_removed(cgrp))
2485 return -ENODEV;
2486
2487 if (cft->read)
2488 return cft->read(cgrp, cft, file, buf, nbytes, ppos);
2489 if (cft->read_u64)
2490 return cgroup_read_u64(cgrp, cft, file, buf, nbytes, ppos);
2491 if (cft->read_s64)
2492 return cgroup_read_s64(cgrp, cft, file, buf, nbytes, ppos);
2493 return -EINVAL;
2494}
2495
2496
2497
2498
2499
2500
2501struct cgroup_seqfile_state {
2502 struct cftype *cft;
2503 struct cgroup *cgroup;
2504};
2505
2506static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value)
2507{
2508 struct seq_file *sf = cb->state;
2509 return seq_printf(sf, "%s %llu\n", key, (unsigned long long)value);
2510}
2511
2512static int cgroup_seqfile_show(struct seq_file *m, void *arg)
2513{
2514 struct cgroup_seqfile_state *state = m->private;
2515 struct cftype *cft = state->cft;
2516 if (cft->read_map) {
2517 struct cgroup_map_cb cb = {
2518 .fill = cgroup_map_add,
2519 .state = m,
2520 };
2521 return cft->read_map(state->cgroup, cft, &cb);
2522 }
2523 return cft->read_seq_string(state->cgroup, cft, m);
2524}
2525
2526static int cgroup_seqfile_release(struct inode *inode, struct file *file)
2527{
2528 struct seq_file *seq = file->private_data;
2529 kfree(seq->private);
2530 return single_release(inode, file);
2531}
2532
2533static const struct file_operations cgroup_seqfile_operations = {
2534 .read = seq_read,
2535 .write = cgroup_file_write,
2536 .llseek = seq_lseek,
2537 .release = cgroup_seqfile_release,
2538};
2539
2540static int cgroup_file_open(struct inode *inode, struct file *file)
2541{
2542 int err;
2543 struct cftype *cft;
2544
2545 err = generic_file_open(inode, file);
2546 if (err)
2547 return err;
2548 cft = __d_cft(file->f_dentry);
2549
2550 if (cft->read_map || cft->read_seq_string) {
2551 struct cgroup_seqfile_state *state =
2552 kzalloc(sizeof(*state), GFP_USER);
2553 if (!state)
2554 return -ENOMEM;
2555 state->cft = cft;
2556 state->cgroup = __d_cgrp(file->f_dentry->d_parent);
2557 file->f_op = &cgroup_seqfile_operations;
2558 err = single_open(file, cgroup_seqfile_show, state);
2559 if (err < 0)
2560 kfree(state);
2561 } else if (cft->open)
2562 err = cft->open(inode, file);
2563 else
2564 err = 0;
2565
2566 return err;
2567}
2568
2569static int cgroup_file_release(struct inode *inode, struct file *file)
2570{
2571 struct cftype *cft = __d_cft(file->f_dentry);
2572 if (cft->release)
2573 return cft->release(inode, file);
2574 return 0;
2575}
2576
2577
2578
2579
2580static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry,
2581 struct inode *new_dir, struct dentry *new_dentry)
2582{
2583 if (!S_ISDIR(old_dentry->d_inode->i_mode))
2584 return -ENOTDIR;
2585 if (new_dentry->d_inode)
2586 return -EEXIST;
2587 if (old_dir != new_dir)
2588 return -EIO;
2589 return simple_rename(old_dir, old_dentry, new_dir, new_dentry);
2590}
2591
2592static struct simple_xattrs *__d_xattrs(struct dentry *dentry)
2593{
2594 if (S_ISDIR(dentry->d_inode->i_mode))
2595 return &__d_cgrp(dentry)->xattrs;
2596 else
2597 return &__d_cft(dentry)->xattrs;
2598}
2599
2600static inline int xattr_enabled(struct dentry *dentry)
2601{
2602 struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
2603 return test_bit(ROOT_XATTR, &root->flags);
2604}
2605
2606static bool is_valid_xattr(const char *name)
2607{
2608 if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
2609 !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN))
2610 return true;
2611 return false;
2612}
2613
2614static int cgroup_setxattr(struct dentry *dentry, const char *name,
2615 const void *val, size_t size, int flags)
2616{
2617 if (!xattr_enabled(dentry))
2618 return -EOPNOTSUPP;
2619 if (!is_valid_xattr(name))
2620 return -EINVAL;
2621 return simple_xattr_set(__d_xattrs(dentry), name, val, size, flags);
2622}
2623
2624static int cgroup_removexattr(struct dentry *dentry, const char *name)
2625{
2626 if (!xattr_enabled(dentry))
2627 return -EOPNOTSUPP;
2628 if (!is_valid_xattr(name))
2629 return -EINVAL;
2630 return simple_xattr_remove(__d_xattrs(dentry), name);
2631}
2632
2633static ssize_t cgroup_getxattr(struct dentry *dentry, const char *name,
2634 void *buf, size_t size)
2635{
2636 if (!xattr_enabled(dentry))
2637 return -EOPNOTSUPP;
2638 if (!is_valid_xattr(name))
2639 return -EINVAL;
2640 return simple_xattr_get(__d_xattrs(dentry), name, buf, size);
2641}
2642
2643static ssize_t cgroup_listxattr(struct dentry *dentry, char *buf, size_t size)
2644{
2645 if (!xattr_enabled(dentry))
2646 return -EOPNOTSUPP;
2647 return simple_xattr_list(__d_xattrs(dentry), buf, size);
2648}
2649
2650static const struct file_operations cgroup_file_operations = {
2651 .read = cgroup_file_read,
2652 .write = cgroup_file_write,
2653 .llseek = generic_file_llseek,
2654 .open = cgroup_file_open,
2655 .release = cgroup_file_release,
2656};
2657
2658static const struct inode_operations cgroup_file_inode_operations = {
2659 .setxattr = cgroup_setxattr,
2660 .getxattr = cgroup_getxattr,
2661 .listxattr = cgroup_listxattr,
2662 .removexattr = cgroup_removexattr,
2663};
2664
2665static const struct inode_operations cgroup_dir_inode_operations = {
2666 .lookup = cgroup_lookup,
2667 .mkdir = cgroup_mkdir,
2668 .rmdir = cgroup_rmdir,
2669 .rename = cgroup_rename,
2670 .setxattr = cgroup_setxattr,
2671 .getxattr = cgroup_getxattr,
2672 .listxattr = cgroup_listxattr,
2673 .removexattr = cgroup_removexattr,
2674};
2675
2676static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
2677{
2678 if (dentry->d_name.len > NAME_MAX)
2679 return ERR_PTR(-ENAMETOOLONG);
2680 d_add(dentry, NULL);
2681 return NULL;
2682}
2683
2684
2685
2686
2687static inline struct cftype *__file_cft(struct file *file)
2688{
2689 if (file->f_dentry->d_inode->i_fop != &cgroup_file_operations)
2690 return ERR_PTR(-EINVAL);
2691 return __d_cft(file->f_dentry);
2692}
2693
2694static int cgroup_create_file(struct dentry *dentry, umode_t mode,
2695 struct super_block *sb)
2696{
2697 struct inode *inode;
2698
2699 if (!dentry)
2700 return -ENOENT;
2701 if (dentry->d_inode)
2702 return -EEXIST;
2703
2704 inode = cgroup_new_inode(mode, sb);
2705 if (!inode)
2706 return -ENOMEM;
2707
2708 if (S_ISDIR(mode)) {
2709 inode->i_op = &cgroup_dir_inode_operations;
2710 inode->i_fop = &simple_dir_operations;
2711
2712
2713 inc_nlink(inode);
2714
2715
2716
2717 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
2718 } else if (S_ISREG(mode)) {
2719 inode->i_size = 0;
2720 inode->i_fop = &cgroup_file_operations;
2721 inode->i_op = &cgroup_file_inode_operations;
2722 }
2723 d_instantiate(dentry, inode);
2724 dget(dentry);
2725 return 0;
2726}
2727
2728
2729
2730
2731
2732
2733
2734
2735static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry,
2736 umode_t mode)
2737{
2738 struct dentry *parent;
2739 int error = 0;
2740
2741 parent = cgrp->parent->dentry;
2742 error = cgroup_create_file(dentry, S_IFDIR | mode, cgrp->root->sb);
2743 if (!error) {
2744 dentry->d_fsdata = cgrp;
2745 inc_nlink(parent->d_inode);
2746 rcu_assign_pointer(cgrp->dentry, dentry);
2747 dget(dentry);
2748 }
2749 dput(dentry);
2750
2751 return error;
2752}
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763static umode_t cgroup_file_mode(const struct cftype *cft)
2764{
2765 umode_t mode = 0;
2766
2767 if (cft->mode)
2768 return cft->mode;
2769
2770 if (cft->read || cft->read_u64 || cft->read_s64 ||
2771 cft->read_map || cft->read_seq_string)
2772 mode |= S_IRUGO;
2773
2774 if (cft->write || cft->write_u64 || cft->write_s64 ||
2775 cft->write_string || cft->trigger)
2776 mode |= S_IWUSR;
2777
2778 return mode;
2779}
2780
2781static int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys,
2782 struct cftype *cft)
2783{
2784 struct dentry *dir = cgrp->dentry;
2785 struct cgroup *parent = __d_cgrp(dir);
2786 struct dentry *dentry;
2787 struct cfent *cfe;
2788 int error;
2789 umode_t mode;
2790 char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 };
2791
2792 simple_xattrs_init(&cft->xattrs);
2793
2794
2795 if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent)
2796 return 0;
2797 if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent)
2798 return 0;
2799
2800 if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) {
2801 strcpy(name, subsys->name);
2802 strcat(name, ".");
2803 }
2804 strcat(name, cft->name);
2805
2806 BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex));
2807
2808 cfe = kzalloc(sizeof(*cfe), GFP_KERNEL);
2809 if (!cfe)
2810 return -ENOMEM;
2811
2812 dentry = lookup_one_len(name, dir, strlen(name));
2813 if (IS_ERR(dentry)) {
2814 error = PTR_ERR(dentry);
2815 goto out;
2816 }
2817
2818 mode = cgroup_file_mode(cft);
2819 error = cgroup_create_file(dentry, mode | S_IFREG, cgrp->root->sb);
2820 if (!error) {
2821 cfe->type = (void *)cft;
2822 cfe->dentry = dentry;
2823 dentry->d_fsdata = cfe;
2824 list_add_tail(&cfe->node, &parent->files);
2825 cfe = NULL;
2826 }
2827 dput(dentry);
2828out:
2829 kfree(cfe);
2830 return error;
2831}
2832
2833static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys,
2834 struct cftype cfts[], bool is_add)
2835{
2836 struct cftype *cft;
2837 int err, ret = 0;
2838
2839 for (cft = cfts; cft->name[0] != '\0'; cft++) {
2840 if (is_add)
2841 err = cgroup_add_file(cgrp, subsys, cft);
2842 else
2843 err = cgroup_rm_file(cgrp, cft);
2844 if (err) {
2845 pr_warning("cgroup_addrm_files: failed to %s %s, err=%d\n",
2846 is_add ? "add" : "remove", cft->name, err);
2847 ret = err;
2848 }
2849 }
2850 return ret;
2851}
2852
2853static DEFINE_MUTEX(cgroup_cft_mutex);
2854
2855static void cgroup_cfts_prepare(void)
2856 __acquires(&cgroup_cft_mutex) __acquires(&cgroup_mutex)
2857{
2858
2859
2860
2861
2862
2863
2864
2865 mutex_lock(&cgroup_cft_mutex);
2866 mutex_lock(&cgroup_mutex);
2867}
2868
2869static void cgroup_cfts_commit(struct cgroup_subsys *ss,
2870 struct cftype *cfts, bool is_add)
2871 __releases(&cgroup_mutex) __releases(&cgroup_cft_mutex)
2872{
2873 LIST_HEAD(pending);
2874 struct cgroup *cgrp, *n;
2875
2876
2877 if (cfts && ss->root != &rootnode) {
2878 list_for_each_entry(cgrp, &ss->root->allcg_list, allcg_node) {
2879 dget(cgrp->dentry);
2880 list_add_tail(&cgrp->cft_q_node, &pending);
2881 }
2882 }
2883
2884 mutex_unlock(&cgroup_mutex);
2885
2886
2887
2888
2889
2890 list_for_each_entry_safe(cgrp, n, &pending, cft_q_node) {
2891 struct inode *inode = cgrp->dentry->d_inode;
2892
2893 mutex_lock(&inode->i_mutex);
2894 mutex_lock(&cgroup_mutex);
2895 if (!cgroup_is_removed(cgrp))
2896 cgroup_addrm_files(cgrp, ss, cfts, is_add);
2897 mutex_unlock(&cgroup_mutex);
2898 mutex_unlock(&inode->i_mutex);
2899
2900 list_del_init(&cgrp->cft_q_node);
2901 dput(cgrp->dentry);
2902 }
2903
2904 mutex_unlock(&cgroup_cft_mutex);
2905}
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
2922{
2923 struct cftype_set *set;
2924
2925 set = kzalloc(sizeof(*set), GFP_KERNEL);
2926 if (!set)
2927 return -ENOMEM;
2928
2929 cgroup_cfts_prepare();
2930 set->cfts = cfts;
2931 list_add_tail(&set->node, &ss->cftsets);
2932 cgroup_cfts_commit(ss, cfts, true);
2933
2934 return 0;
2935}
2936EXPORT_SYMBOL_GPL(cgroup_add_cftypes);
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
2952{
2953 struct cftype_set *set;
2954
2955 cgroup_cfts_prepare();
2956
2957 list_for_each_entry(set, &ss->cftsets, node) {
2958 if (set->cfts == cfts) {
2959 list_del_init(&set->node);
2960 cgroup_cfts_commit(ss, cfts, false);
2961 return 0;
2962 }
2963 }
2964
2965 cgroup_cfts_commit(ss, NULL, false);
2966 return -ENOENT;
2967}
2968
2969
2970
2971
2972
2973
2974
2975int cgroup_task_count(const struct cgroup *cgrp)
2976{
2977 int count = 0;
2978 struct cg_cgroup_link *link;
2979
2980 read_lock(&css_set_lock);
2981 list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {
2982 count += atomic_read(&link->cg->refcount);
2983 }
2984 read_unlock(&css_set_lock);
2985 return count;
2986}
2987
2988
2989
2990
2991
2992static void cgroup_advance_iter(struct cgroup *cgrp,
2993 struct cgroup_iter *it)
2994{
2995 struct list_head *l = it->cg_link;
2996 struct cg_cgroup_link *link;
2997 struct css_set *cg;
2998
2999
3000 do {
3001 l = l->next;
3002 if (l == &cgrp->css_sets) {
3003 it->cg_link = NULL;
3004 return;
3005 }
3006 link = list_entry(l, struct cg_cgroup_link, cgrp_link_list);
3007 cg = link->cg;
3008 } while (list_empty(&cg->tasks));
3009 it->cg_link = l;
3010 it->task = cg->tasks.next;
3011}
3012
3013
3014
3015
3016
3017
3018
3019static void cgroup_enable_task_cg_lists(void)
3020{
3021 struct task_struct *p, *g;
3022 write_lock(&css_set_lock);
3023 use_task_css_set_links = 1;
3024
3025
3026
3027
3028
3029
3030
3031 read_lock(&tasklist_lock);
3032 do_each_thread(g, p) {
3033 task_lock(p);
3034
3035
3036
3037
3038
3039 if (!(p->flags & PF_EXITING) && list_empty(&p->cg_list))
3040 list_add(&p->cg_list, &p->cgroups->tasks);
3041 task_unlock(p);
3042 } while_each_thread(g, p);
3043 read_unlock(&tasklist_lock);
3044 write_unlock(&css_set_lock);
3045}
3046
3047void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it)
3048 __acquires(css_set_lock)
3049{
3050
3051
3052
3053
3054
3055 if (!use_task_css_set_links)
3056 cgroup_enable_task_cg_lists();
3057
3058 read_lock(&css_set_lock);
3059 it->cg_link = &cgrp->css_sets;
3060 cgroup_advance_iter(cgrp, it);
3061}
3062
3063struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
3064 struct cgroup_iter *it)
3065{
3066 struct task_struct *res;
3067 struct list_head *l = it->task;
3068 struct cg_cgroup_link *link;
3069
3070
3071 if (!it->cg_link)
3072 return NULL;
3073 res = list_entry(l, struct task_struct, cg_list);
3074
3075 l = l->next;
3076 link = list_entry(it->cg_link, struct cg_cgroup_link, cgrp_link_list);
3077 if (l == &link->cg->tasks) {
3078
3079
3080 cgroup_advance_iter(cgrp, it);
3081 } else {
3082 it->task = l;
3083 }
3084 return res;
3085}
3086
3087void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it)
3088 __releases(css_set_lock)
3089{
3090 read_unlock(&css_set_lock);
3091}
3092
3093static inline int started_after_time(struct task_struct *t1,
3094 struct timespec *time,
3095 struct task_struct *t2)
3096{
3097 int start_diff = timespec_compare(&t1->start_time, time);
3098 if (start_diff > 0) {
3099 return 1;
3100 } else if (start_diff < 0) {
3101 return 0;
3102 } else {
3103
3104
3105
3106
3107
3108
3109
3110
3111 return t1 > t2;
3112 }
3113}
3114
3115
3116
3117
3118
3119
3120static inline int started_after(void *p1, void *p2)
3121{
3122 struct task_struct *t1 = p1;
3123 struct task_struct *t2 = p2;
3124 return started_after_time(t1, &t2->start_time, t2);
3125}
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154int cgroup_scan_tasks(struct cgroup_scanner *scan)
3155{
3156 int retval, i;
3157 struct cgroup_iter it;
3158 struct task_struct *p, *dropped;
3159
3160 struct task_struct *latest_task = NULL;
3161 struct ptr_heap tmp_heap;
3162 struct ptr_heap *heap;
3163 struct timespec latest_time = { 0, 0 };
3164
3165 if (scan->heap) {
3166
3167 heap = scan->heap;
3168 heap->gt = &started_after;
3169 } else {
3170
3171 heap = &tmp_heap;
3172 retval = heap_init(heap, PAGE_SIZE, GFP_KERNEL, &started_after);
3173 if (retval)
3174
3175 return retval;
3176 }
3177
3178 again:
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191 heap->size = 0;
3192 cgroup_iter_start(scan->cg, &it);
3193 while ((p = cgroup_iter_next(scan->cg, &it))) {
3194
3195
3196
3197
3198 if (scan->test_task && !scan->test_task(p, scan))
3199 continue;
3200
3201
3202
3203
3204 if (!started_after_time(p, &latest_time, latest_task))
3205 continue;
3206 dropped = heap_insert(heap, p);
3207 if (dropped == NULL) {
3208
3209
3210
3211
3212 get_task_struct(p);
3213 } else if (dropped != p) {
3214
3215
3216
3217
3218 get_task_struct(p);
3219 put_task_struct(dropped);
3220 }
3221
3222
3223
3224
3225 }
3226 cgroup_iter_end(scan->cg, &it);
3227
3228 if (heap->size) {
3229 for (i = 0; i < heap->size; i++) {
3230 struct task_struct *q = heap->ptrs[i];
3231 if (i == 0) {
3232 latest_time = q->start_time;
3233 latest_task = q;
3234 }
3235
3236 scan->process_task(q, scan);
3237 put_task_struct(q);
3238 }
3239
3240
3241
3242
3243
3244
3245
3246 goto again;
3247 }
3248 if (heap == &tmp_heap)
3249 heap_free(&tmp_heap);
3250 return 0;
3251}
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264enum cgroup_filetype {
3265 CGROUP_FILE_PROCS,
3266 CGROUP_FILE_TASKS,
3267};
3268
3269
3270
3271
3272
3273
3274
3275struct cgroup_pidlist {
3276
3277
3278
3279
3280 struct { enum cgroup_filetype type; struct pid_namespace *ns; } key;
3281
3282 pid_t *list;
3283
3284 int length;
3285
3286 int use_count;
3287
3288 struct list_head links;
3289
3290 struct cgroup *owner;
3291
3292 struct rw_semaphore mutex;
3293};
3294
3295
3296
3297
3298
3299
3300#define PIDLIST_TOO_LARGE(c) ((c) * sizeof(pid_t) > (PAGE_SIZE * 2))
3301static void *pidlist_allocate(int count)
3302{
3303 if (PIDLIST_TOO_LARGE(count))
3304 return vmalloc(count * sizeof(pid_t));
3305 else
3306 return kmalloc(count * sizeof(pid_t), GFP_KERNEL);
3307}
3308static void pidlist_free(void *p)
3309{
3310 if (is_vmalloc_addr(p))
3311 vfree(p);
3312 else
3313 kfree(p);
3314}
3315static void *pidlist_resize(void *p, int newcount)
3316{
3317 void *newlist;
3318
3319 if (is_vmalloc_addr(p)) {
3320 newlist = vmalloc(newcount * sizeof(pid_t));
3321 if (!newlist)
3322 return NULL;
3323 memcpy(newlist, p, newcount * sizeof(pid_t));
3324 vfree(p);
3325 } else {
3326 newlist = krealloc(p, newcount * sizeof(pid_t), GFP_KERNEL);
3327 }
3328 return newlist;
3329}
3330
3331
3332
3333
3334
3335
3336
3337
3338#define PIDLIST_REALLOC_DIFFERENCE(old, new) ((old) - PAGE_SIZE >= (new))
3339static int pidlist_uniq(pid_t **p, int length)
3340{
3341 int src, dest = 1;
3342 pid_t *list = *p;
3343 pid_t *newlist;
3344
3345
3346
3347
3348
3349 if (length == 0 || length == 1)
3350 return length;
3351
3352 for (src = 1; src < length; src++) {
3353
3354 while (list[src] == list[src-1]) {
3355 src++;
3356 if (src == length)
3357 goto after;
3358 }
3359
3360 list[dest] = list[src];
3361 dest++;
3362 }
3363after:
3364
3365
3366
3367
3368
3369 if (PIDLIST_REALLOC_DIFFERENCE(length, dest)) {
3370 newlist = pidlist_resize(list, dest);
3371 if (newlist)
3372 *p = newlist;
3373 }
3374 return dest;
3375}
3376
3377static int cmppid(const void *a, const void *b)
3378{
3379 return *(pid_t *)a - *(pid_t *)b;
3380}
3381
3382
3383
3384
3385
3386
3387
3388static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
3389 enum cgroup_filetype type)
3390{
3391 struct cgroup_pidlist *l;
3392
3393 struct pid_namespace *ns = current->nsproxy->pid_ns;
3394
3395
3396
3397
3398
3399
3400
3401 mutex_lock(&cgrp->pidlist_mutex);
3402 list_for_each_entry(l, &cgrp->pidlists, links) {
3403 if (l->key.type == type && l->key.ns == ns) {
3404
3405 down_write(&l->mutex);
3406 mutex_unlock(&cgrp->pidlist_mutex);
3407 return l;
3408 }
3409 }
3410
3411 l = kmalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL);
3412 if (!l) {
3413 mutex_unlock(&cgrp->pidlist_mutex);
3414 return l;
3415 }
3416 init_rwsem(&l->mutex);
3417 down_write(&l->mutex);
3418 l->key.type = type;
3419 l->key.ns = get_pid_ns(ns);
3420 l->use_count = 0;
3421 l->list = NULL;
3422 l->owner = cgrp;
3423 list_add(&l->links, &cgrp->pidlists);
3424 mutex_unlock(&cgrp->pidlist_mutex);
3425 return l;
3426}
3427
3428
3429
3430
3431static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
3432 struct cgroup_pidlist **lp)
3433{
3434 pid_t *array;
3435 int length;
3436 int pid, n = 0;
3437 struct cgroup_iter it;
3438 struct task_struct *tsk;
3439 struct cgroup_pidlist *l;
3440
3441
3442
3443
3444
3445
3446
3447 length = cgroup_task_count(cgrp);
3448 array = pidlist_allocate(length);
3449 if (!array)
3450 return -ENOMEM;
3451
3452 cgroup_iter_start(cgrp, &it);
3453 while ((tsk = cgroup_iter_next(cgrp, &it))) {
3454 if (unlikely(n == length))
3455 break;
3456
3457 if (type == CGROUP_FILE_PROCS)
3458 pid = task_tgid_vnr(tsk);
3459 else
3460 pid = task_pid_vnr(tsk);
3461 if (pid > 0)
3462 array[n++] = pid;
3463 }
3464 cgroup_iter_end(cgrp, &it);
3465 length = n;
3466
3467 sort(array, length, sizeof(pid_t), cmppid, NULL);
3468 if (type == CGROUP_FILE_PROCS)
3469 length = pidlist_uniq(&array, length);
3470 l = cgroup_pidlist_find(cgrp, type);
3471 if (!l) {
3472 pidlist_free(array);
3473 return -ENOMEM;
3474 }
3475
3476 pidlist_free(l->list);
3477 l->list = array;
3478 l->length = length;
3479 l->use_count++;
3480 up_write(&l->mutex);
3481 *lp = l;
3482 return 0;
3483}
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
3495{
3496 int ret = -EINVAL;
3497 struct cgroup *cgrp;
3498 struct cgroup_iter it;
3499 struct task_struct *tsk;
3500
3501
3502
3503
3504
3505 if (dentry->d_sb->s_op != &cgroup_ops ||
3506 !S_ISDIR(dentry->d_inode->i_mode))
3507 goto err;
3508
3509 ret = 0;
3510 cgrp = dentry->d_fsdata;
3511
3512 cgroup_iter_start(cgrp, &it);
3513 while ((tsk = cgroup_iter_next(cgrp, &it))) {
3514 switch (tsk->state) {
3515 case TASK_RUNNING:
3516 stats->nr_running++;
3517 break;
3518 case TASK_INTERRUPTIBLE:
3519 stats->nr_sleeping++;
3520 break;
3521 case TASK_UNINTERRUPTIBLE:
3522 stats->nr_uninterruptible++;
3523 break;
3524 case TASK_STOPPED:
3525 stats->nr_stopped++;
3526 break;
3527 default:
3528 if (delayacct_is_task_waiting_on_io(tsk))
3529 stats->nr_io_wait++;
3530 break;
3531 }
3532 }
3533 cgroup_iter_end(cgrp, &it);
3534
3535err:
3536 return ret;
3537}
3538
3539
3540
3541
3542
3543
3544
3545
3546static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
3547{
3548
3549
3550
3551
3552
3553
3554 struct cgroup_pidlist *l = s->private;
3555 int index = 0, pid = *pos;
3556 int *iter;
3557
3558 down_read(&l->mutex);
3559 if (pid) {
3560 int end = l->length;
3561
3562 while (index < end) {
3563 int mid = (index + end) / 2;
3564 if (l->list[mid] == pid) {
3565 index = mid;
3566 break;
3567 } else if (l->list[mid] <= pid)
3568 index = mid + 1;
3569 else
3570 end = mid;
3571 }
3572 }
3573
3574 if (index >= l->length)
3575 return NULL;
3576
3577 iter = l->list + index;
3578 *pos = *iter;
3579 return iter;
3580}
3581
3582static void cgroup_pidlist_stop(struct seq_file *s, void *v)
3583{
3584 struct cgroup_pidlist *l = s->private;
3585 up_read(&l->mutex);
3586}
3587
3588static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
3589{
3590 struct cgroup_pidlist *l = s->private;
3591 pid_t *p = v;
3592 pid_t *end = l->list + l->length;
3593
3594
3595
3596
3597 p++;
3598 if (p >= end) {
3599 return NULL;
3600 } else {
3601 *pos = *p;
3602 return p;
3603 }
3604}
3605
3606static int cgroup_pidlist_show(struct seq_file *s, void *v)
3607{
3608 return seq_printf(s, "%d\n", *(int *)v);
3609}
3610
3611
3612
3613
3614
3615static const struct seq_operations cgroup_pidlist_seq_operations = {
3616 .start = cgroup_pidlist_start,
3617 .stop = cgroup_pidlist_stop,
3618 .next = cgroup_pidlist_next,
3619 .show = cgroup_pidlist_show,
3620};
3621
3622static void cgroup_release_pid_array(struct cgroup_pidlist *l)
3623{
3624
3625
3626
3627
3628
3629
3630 mutex_lock(&l->owner->pidlist_mutex);
3631 down_write(&l->mutex);
3632 BUG_ON(!l->use_count);
3633 if (!--l->use_count) {
3634
3635 list_del(&l->links);
3636 mutex_unlock(&l->owner->pidlist_mutex);
3637 pidlist_free(l->list);
3638 put_pid_ns(l->key.ns);
3639 up_write(&l->mutex);
3640 kfree(l);
3641 return;
3642 }
3643 mutex_unlock(&l->owner->pidlist_mutex);
3644 up_write(&l->mutex);
3645}
3646
3647static int cgroup_pidlist_release(struct inode *inode, struct file *file)
3648{
3649 struct cgroup_pidlist *l;
3650 if (!(file->f_mode & FMODE_READ))
3651 return 0;
3652
3653
3654
3655
3656 l = ((struct seq_file *)file->private_data)->private;
3657 cgroup_release_pid_array(l);
3658 return seq_release(inode, file);
3659}
3660
3661static const struct file_operations cgroup_pidlist_operations = {
3662 .read = seq_read,
3663 .llseek = seq_lseek,
3664 .write = cgroup_file_write,
3665 .release = cgroup_pidlist_release,
3666};
3667
3668
3669
3670
3671
3672
3673
3674static int cgroup_pidlist_open(struct file *file, enum cgroup_filetype type)
3675{
3676 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
3677 struct cgroup_pidlist *l;
3678 int retval;
3679
3680
3681 if (!(file->f_mode & FMODE_READ))
3682 return 0;
3683
3684
3685 retval = pidlist_array_load(cgrp, type, &l);
3686 if (retval)
3687 return retval;
3688
3689 file->f_op = &cgroup_pidlist_operations;
3690
3691 retval = seq_open(file, &cgroup_pidlist_seq_operations);
3692 if (retval) {
3693 cgroup_release_pid_array(l);
3694 return retval;
3695 }
3696 ((struct seq_file *)file->private_data)->private = l;
3697 return 0;
3698}
3699static int cgroup_tasks_open(struct inode *unused, struct file *file)
3700{
3701 return cgroup_pidlist_open(file, CGROUP_FILE_TASKS);
3702}
3703static int cgroup_procs_open(struct inode *unused, struct file *file)
3704{
3705 return cgroup_pidlist_open(file, CGROUP_FILE_PROCS);
3706}
3707
3708static u64 cgroup_read_notify_on_release(struct cgroup *cgrp,
3709 struct cftype *cft)
3710{
3711 return notify_on_release(cgrp);
3712}
3713
3714static int cgroup_write_notify_on_release(struct cgroup *cgrp,
3715 struct cftype *cft,
3716 u64 val)
3717{
3718 clear_bit(CGRP_RELEASABLE, &cgrp->flags);
3719 if (val)
3720 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
3721 else
3722 clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
3723 return 0;
3724}
3725
3726
3727
3728
3729
3730
3731static void cgroup_event_remove(struct work_struct *work)
3732{
3733 struct cgroup_event *event = container_of(work, struct cgroup_event,
3734 remove);
3735 struct cgroup *cgrp = event->cgrp;
3736
3737 event->cft->unregister_event(cgrp, event->cft, event->eventfd);
3738
3739 eventfd_ctx_put(event->eventfd);
3740 kfree(event);
3741 dput(cgrp->dentry);
3742}
3743
3744
3745
3746
3747
3748
3749static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
3750 int sync, void *key)
3751{
3752 struct cgroup_event *event = container_of(wait,
3753 struct cgroup_event, wait);
3754 struct cgroup *cgrp = event->cgrp;
3755 unsigned long flags = (unsigned long)key;
3756
3757 if (flags & POLLHUP) {
3758 __remove_wait_queue(event->wqh, &event->wait);
3759 spin_lock(&cgrp->event_list_lock);
3760 list_del(&event->list);
3761 spin_unlock(&cgrp->event_list_lock);
3762
3763
3764
3765
3766 schedule_work(&event->remove);
3767 }
3768
3769 return 0;
3770}
3771
3772static void cgroup_event_ptable_queue_proc(struct file *file,
3773 wait_queue_head_t *wqh, poll_table *pt)
3774{
3775 struct cgroup_event *event = container_of(pt,
3776 struct cgroup_event, pt);
3777
3778 event->wqh = wqh;
3779 add_wait_queue(wqh, &event->wait);
3780}
3781
3782
3783
3784
3785
3786
3787
3788static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft,
3789 const char *buffer)
3790{
3791 struct cgroup_event *event = NULL;
3792 unsigned int efd, cfd;
3793 struct file *efile = NULL;
3794 struct file *cfile = NULL;
3795 char *endp;
3796 int ret;
3797
3798 efd = simple_strtoul(buffer, &endp, 10);
3799 if (*endp != ' ')
3800 return -EINVAL;
3801 buffer = endp + 1;
3802
3803 cfd = simple_strtoul(buffer, &endp, 10);
3804 if ((*endp != ' ') && (*endp != '\0'))
3805 return -EINVAL;
3806 buffer = endp + 1;
3807
3808 event = kzalloc(sizeof(*event), GFP_KERNEL);
3809 if (!event)
3810 return -ENOMEM;
3811 event->cgrp = cgrp;
3812 INIT_LIST_HEAD(&event->list);
3813 init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc);
3814 init_waitqueue_func_entry(&event->wait, cgroup_event_wake);
3815 INIT_WORK(&event->remove, cgroup_event_remove);
3816
3817 efile = eventfd_fget(efd);
3818 if (IS_ERR(efile)) {
3819 ret = PTR_ERR(efile);
3820 goto fail;
3821 }
3822
3823 event->eventfd = eventfd_ctx_fileget(efile);
3824 if (IS_ERR(event->eventfd)) {
3825 ret = PTR_ERR(event->eventfd);
3826 goto fail;
3827 }
3828
3829 cfile = fget(cfd);
3830 if (!cfile) {
3831 ret = -EBADF;
3832 goto fail;
3833 }
3834
3835
3836
3837 ret = inode_permission(cfile->f_path.dentry->d_inode, MAY_READ);
3838 if (ret < 0)
3839 goto fail;
3840
3841 event->cft = __file_cft(cfile);
3842 if (IS_ERR(event->cft)) {
3843 ret = PTR_ERR(event->cft);
3844 goto fail;
3845 }
3846
3847 if (!event->cft->register_event || !event->cft->unregister_event) {
3848 ret = -EINVAL;
3849 goto fail;
3850 }
3851
3852 ret = event->cft->register_event(cgrp, event->cft,
3853 event->eventfd, buffer);
3854 if (ret)
3855 goto fail;
3856
3857 if (efile->f_op->poll(efile, &event->pt) & POLLHUP) {
3858 event->cft->unregister_event(cgrp, event->cft, event->eventfd);
3859 ret = 0;
3860 goto fail;
3861 }
3862
3863
3864
3865
3866
3867
3868 dget(cgrp->dentry);
3869
3870 spin_lock(&cgrp->event_list_lock);
3871 list_add(&event->list, &cgrp->event_list);
3872 spin_unlock(&cgrp->event_list_lock);
3873
3874 fput(cfile);
3875 fput(efile);
3876
3877 return 0;
3878
3879fail:
3880 if (cfile)
3881 fput(cfile);
3882
3883 if (event && event->eventfd && !IS_ERR(event->eventfd))
3884 eventfd_ctx_put(event->eventfd);
3885
3886 if (!IS_ERR_OR_NULL(efile))
3887 fput(efile);
3888
3889 kfree(event);
3890
3891 return ret;
3892}
3893
3894static u64 cgroup_clone_children_read(struct cgroup *cgrp,
3895 struct cftype *cft)
3896{
3897 return clone_children(cgrp);
3898}
3899
3900static int cgroup_clone_children_write(struct cgroup *cgrp,
3901 struct cftype *cft,
3902 u64 val)
3903{
3904 if (val)
3905 set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
3906 else
3907 clear_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
3908 return 0;
3909}
3910
3911
3912
3913
3914
3915#define CGROUP_FILE_GENERIC_PREFIX "cgroup."
3916static struct cftype files[] = {
3917 {
3918 .name = "tasks",
3919 .open = cgroup_tasks_open,
3920 .write_u64 = cgroup_tasks_write,
3921 .release = cgroup_pidlist_release,
3922 .mode = S_IRUGO | S_IWUSR,
3923 },
3924 {
3925 .name = CGROUP_FILE_GENERIC_PREFIX "procs",
3926 .open = cgroup_procs_open,
3927 .write_u64 = cgroup_procs_write,
3928 .release = cgroup_pidlist_release,
3929 .mode = S_IRUGO | S_IWUSR,
3930 },
3931 {
3932 .name = "notify_on_release",
3933 .read_u64 = cgroup_read_notify_on_release,
3934 .write_u64 = cgroup_write_notify_on_release,
3935 },
3936 {
3937 .name = CGROUP_FILE_GENERIC_PREFIX "event_control",
3938 .write_string = cgroup_write_event_control,
3939 .mode = S_IWUGO,
3940 },
3941 {
3942 .name = "cgroup.clone_children",
3943 .read_u64 = cgroup_clone_children_read,
3944 .write_u64 = cgroup_clone_children_write,
3945 },
3946 {
3947 .name = "release_agent",
3948 .flags = CFTYPE_ONLY_ON_ROOT,
3949 .read_seq_string = cgroup_release_agent_show,
3950 .write_string = cgroup_release_agent_write,
3951 .max_write_len = PATH_MAX,
3952 },
3953 { }
3954};
3955
3956
3957
3958
3959
3960
3961
3962static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files,
3963 unsigned long subsys_mask)
3964{
3965 int err;
3966 struct cgroup_subsys *ss;
3967
3968 if (base_files) {
3969 err = cgroup_addrm_files(cgrp, NULL, files, true);
3970 if (err < 0)
3971 return err;
3972 }
3973
3974
3975 for_each_subsys(cgrp->root, ss) {
3976 struct cftype_set *set;
3977 if (!test_bit(ss->subsys_id, &subsys_mask))
3978 continue;
3979
3980 list_for_each_entry(set, &ss->cftsets, node)
3981 cgroup_addrm_files(cgrp, ss, set->cfts, true);
3982 }
3983
3984
3985 for_each_subsys(cgrp->root, ss) {
3986 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
3987
3988
3989
3990
3991
3992 if (css->id)
3993 rcu_assign_pointer(css->id->css, css);
3994 }
3995
3996 return 0;
3997}
3998
3999static void css_dput_fn(struct work_struct *work)
4000{
4001 struct cgroup_subsys_state *css =
4002 container_of(work, struct cgroup_subsys_state, dput_work);
4003 struct dentry *dentry = css->cgroup->dentry;
4004 struct super_block *sb = dentry->d_sb;
4005
4006 atomic_inc(&sb->s_active);
4007 dput(dentry);
4008 deactivate_super(sb);
4009}
4010
4011static void init_cgroup_css(struct cgroup_subsys_state *css,
4012 struct cgroup_subsys *ss,
4013 struct cgroup *cgrp)
4014{
4015 css->cgroup = cgrp;
4016 atomic_set(&css->refcnt, 1);
4017 css->flags = 0;
4018 css->id = NULL;
4019 if (cgrp == dummytop)
4020 set_bit(CSS_ROOT, &css->flags);
4021 BUG_ON(cgrp->subsys[ss->subsys_id]);
4022 cgrp->subsys[ss->subsys_id] = css;
4023
4024
4025
4026
4027
4028
4029
4030 INIT_WORK(&css->dput_work, css_dput_fn);
4031 if (ss->__DEPRECATED_clear_css_refs)
4032 set_bit(CSS_CLEAR_CSS_REFS, &css->flags);
4033}
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
4044 umode_t mode)
4045{
4046 struct cgroup *cgrp;
4047 struct cgroupfs_root *root = parent->root;
4048 int err = 0;
4049 struct cgroup_subsys *ss;
4050 struct super_block *sb = root->sb;
4051
4052 cgrp = kzalloc(sizeof(*cgrp), GFP_KERNEL);
4053 if (!cgrp)
4054 return -ENOMEM;
4055
4056
4057
4058
4059
4060
4061 atomic_inc(&sb->s_active);
4062
4063 mutex_lock(&cgroup_mutex);
4064
4065 init_cgroup_housekeeping(cgrp);
4066
4067 cgrp->parent = parent;
4068 cgrp->root = parent->root;
4069 cgrp->top_cgroup = parent->top_cgroup;
4070
4071 if (notify_on_release(parent))
4072 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
4073
4074 if (clone_children(parent))
4075 set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
4076
4077 for_each_subsys(root, ss) {
4078 struct cgroup_subsys_state *css;
4079
4080 css = ss->create(cgrp);
4081 if (IS_ERR(css)) {
4082 err = PTR_ERR(css);
4083 goto err_destroy;
4084 }
4085 init_cgroup_css(css, ss, cgrp);
4086 if (ss->use_id) {
4087 err = alloc_css_id(ss, parent, cgrp);
4088 if (err)
4089 goto err_destroy;
4090 }
4091
4092 if (clone_children(parent) && ss->post_clone)
4093 ss->post_clone(cgrp);
4094
4095 if (ss->broken_hierarchy && !ss->warned_broken_hierarchy &&
4096 parent->parent) {
4097 pr_warning("cgroup: %s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n",
4098 current->comm, current->pid, ss->name);
4099 if (!strcmp(ss->name, "memory"))
4100 pr_warning("cgroup: \"memory\" requires setting use_hierarchy to 1 on the root.\n");
4101 ss->warned_broken_hierarchy = true;
4102 }
4103 }
4104
4105 list_add(&cgrp->sibling, &cgrp->parent->children);
4106 root->number_of_cgroups++;
4107
4108 err = cgroup_create_dir(cgrp, dentry, mode);
4109 if (err < 0)
4110 goto err_remove;
4111
4112
4113 for_each_subsys(root, ss)
4114 if (!ss->__DEPRECATED_clear_css_refs)
4115 dget(dentry);
4116
4117
4118 BUG_ON(!mutex_is_locked(&cgrp->dentry->d_inode->i_mutex));
4119
4120 list_add_tail(&cgrp->allcg_node, &root->allcg_list);
4121
4122 err = cgroup_populate_dir(cgrp, true, root->subsys_mask);
4123
4124
4125 mutex_unlock(&cgroup_mutex);
4126 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
4127
4128 return 0;
4129
4130 err_remove:
4131
4132 list_del(&cgrp->sibling);
4133 root->number_of_cgroups--;
4134
4135 err_destroy:
4136
4137 for_each_subsys(root, ss) {
4138 if (cgrp->subsys[ss->subsys_id])
4139 ss->destroy(cgrp);
4140 }
4141
4142 mutex_unlock(&cgroup_mutex);
4143
4144
4145 deactivate_super(sb);
4146
4147 kfree(cgrp);
4148 return err;
4149}
4150
4151static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
4152{
4153 struct cgroup *c_parent = dentry->d_parent->d_fsdata;
4154
4155
4156 return cgroup_create(c_parent, dentry, mode | S_IFDIR);
4157}
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168static int cgroup_has_css_refs(struct cgroup *cgrp)
4169{
4170 int i;
4171
4172
4173
4174
4175
4176
4177 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
4178 struct cgroup_subsys *ss = subsys[i];
4179 struct cgroup_subsys_state *css;
4180
4181
4182 if (ss == NULL || ss->root != cgrp->root)
4183 continue;
4184
4185 css = cgrp->subsys[ss->subsys_id];
4186
4187
4188
4189
4190
4191
4192
4193
4194 if (css && css_refcnt(css) > 1)
4195 return 1;
4196 }
4197 return 0;
4198}
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222static int cgroup_clear_css_refs(struct cgroup *cgrp)
4223{
4224 struct cgroup_subsys *ss;
4225 unsigned long flags;
4226 bool failed = false;
4227
4228 local_irq_save(flags);
4229
4230
4231
4232
4233
4234
4235 for_each_subsys(cgrp->root, ss) {
4236 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
4237
4238 WARN_ON(atomic_read(&css->refcnt) < 0);
4239 atomic_add(CSS_DEACT_BIAS, &css->refcnt);
4240
4241 if (ss->__DEPRECATED_clear_css_refs)
4242 failed |= css_refcnt(css) != 1;
4243 }
4244
4245
4246
4247
4248
4249
4250 for_each_subsys(cgrp->root, ss) {
4251 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
4252
4253 if (!failed) {
4254 set_bit(CSS_REMOVED, &css->flags);
4255 css_put(css);
4256 } else {
4257 atomic_sub(CSS_DEACT_BIAS, &css->refcnt);
4258 }
4259 }
4260
4261 local_irq_restore(flags);
4262 return !failed;
4263}
4264
4265static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
4266{
4267 struct cgroup *cgrp = dentry->d_fsdata;
4268 struct dentry *d;
4269 struct cgroup *parent;
4270 DEFINE_WAIT(wait);
4271 struct cgroup_event *event, *tmp;
4272 int ret;
4273
4274
4275again:
4276 mutex_lock(&cgroup_mutex);
4277 if (atomic_read(&cgrp->count) != 0) {
4278 mutex_unlock(&cgroup_mutex);
4279 return -EBUSY;
4280 }
4281 if (!list_empty(&cgrp->children)) {
4282 mutex_unlock(&cgroup_mutex);
4283 return -EBUSY;
4284 }
4285 mutex_unlock(&cgroup_mutex);
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296 set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
4297
4298
4299
4300
4301
4302 ret = cgroup_call_pre_destroy(cgrp);
4303 if (ret) {
4304 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
4305 return ret;
4306 }
4307
4308 mutex_lock(&cgroup_mutex);
4309 parent = cgrp->parent;
4310 if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) {
4311 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
4312 mutex_unlock(&cgroup_mutex);
4313 return -EBUSY;
4314 }
4315 prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE);
4316 if (!cgroup_clear_css_refs(cgrp)) {
4317 mutex_unlock(&cgroup_mutex);
4318
4319
4320
4321
4322 if (test_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags))
4323 schedule();
4324 finish_wait(&cgroup_rmdir_waitq, &wait);
4325 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
4326 if (signal_pending(current))
4327 return -EINTR;
4328 goto again;
4329 }
4330
4331 finish_wait(&cgroup_rmdir_waitq, &wait);
4332 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
4333
4334 raw_spin_lock(&release_list_lock);
4335 set_bit(CGRP_REMOVED, &cgrp->flags);
4336 if (!list_empty(&cgrp->release_list))
4337 list_del_init(&cgrp->release_list);
4338 raw_spin_unlock(&release_list_lock);
4339
4340
4341 list_del_init(&cgrp->sibling);
4342
4343 list_del_init(&cgrp->allcg_node);
4344
4345 d = dget(cgrp->dentry);
4346
4347 cgroup_d_remove_dir(d);
4348 dput(d);
4349
4350 set_bit(CGRP_RELEASABLE, &parent->flags);
4351 check_for_release(parent);
4352
4353
4354
4355
4356
4357
4358 spin_lock(&cgrp->event_list_lock);
4359 list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
4360 list_del(&event->list);
4361 remove_wait_queue(event->wqh, &event->wait);
4362 eventfd_signal(event->eventfd, 1);
4363 schedule_work(&event->remove);
4364 }
4365 spin_unlock(&cgrp->event_list_lock);
4366
4367 mutex_unlock(&cgroup_mutex);
4368 return 0;
4369}
4370
4371static void __init_or_module cgroup_init_cftsets(struct cgroup_subsys *ss)
4372{
4373 INIT_LIST_HEAD(&ss->cftsets);
4374
4375
4376
4377
4378
4379 if (ss->base_cftypes) {
4380 ss->base_cftset.cfts = ss->base_cftypes;
4381 list_add_tail(&ss->base_cftset.node, &ss->cftsets);
4382 }
4383}
4384
4385static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
4386{
4387 struct cgroup_subsys_state *css;
4388
4389 printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);
4390
4391
4392 cgroup_init_cftsets(ss);
4393
4394
4395 list_add(&ss->sibling, &rootnode.subsys_list);
4396 ss->root = &rootnode;
4397 css = ss->create(dummytop);
4398
4399 BUG_ON(IS_ERR(css));
4400 init_cgroup_css(css, ss, dummytop);
4401
4402
4403
4404
4405
4406 init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id];
4407
4408 need_forkexit_callback |= ss->fork || ss->exit;
4409
4410
4411
4412
4413 BUG_ON(!list_empty(&init_task.tasks));
4414
4415 ss->active = 1;
4416
4417
4418
4419 BUG_ON(ss->module);
4420}
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
4432{
4433 int i;
4434 struct cgroup_subsys_state *css;
4435
4436
4437 if (ss->name == NULL || strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN ||
4438 ss->create == NULL || ss->destroy == NULL)
4439 return -EINVAL;
4440
4441
4442
4443
4444
4445
4446
4447 if (ss->fork || ss->exit)
4448 return -EINVAL;
4449
4450
4451
4452
4453
4454 if (ss->module == NULL) {
4455
4456 BUG_ON(subsys[ss->subsys_id] != ss);
4457 return 0;
4458 }
4459
4460
4461 cgroup_init_cftsets(ss);
4462
4463 mutex_lock(&cgroup_mutex);
4464 subsys[ss->subsys_id] = ss;
4465
4466
4467
4468
4469
4470 css = ss->create(dummytop);
4471 if (IS_ERR(css)) {
4472
4473 subsys[ss->subsys_id] = NULL;
4474 mutex_unlock(&cgroup_mutex);
4475 return PTR_ERR(css);
4476 }
4477
4478 list_add(&ss->sibling, &rootnode.subsys_list);
4479 ss->root = &rootnode;
4480
4481
4482 init_cgroup_css(css, ss, dummytop);
4483
4484 if (ss->use_id) {
4485 int ret = cgroup_init_idr(ss, css);
4486 if (ret) {
4487 dummytop->subsys[ss->subsys_id] = NULL;
4488 ss->destroy(dummytop);
4489 subsys[ss->subsys_id] = NULL;
4490 mutex_unlock(&cgroup_mutex);
4491 return ret;
4492 }
4493 }
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503 write_lock(&css_set_lock);
4504 for (i = 0; i < CSS_SET_TABLE_SIZE; i++) {
4505 struct css_set *cg;
4506 struct hlist_node *node, *tmp;
4507 struct hlist_head *bucket = &css_set_table[i], *new_bucket;
4508
4509 hlist_for_each_entry_safe(cg, node, tmp, bucket, hlist) {
4510
4511 if (cg->subsys[ss->subsys_id])
4512 continue;
4513
4514 hlist_del(&cg->hlist);
4515
4516 cg->subsys[ss->subsys_id] = css;
4517
4518 new_bucket = css_set_hash(cg->subsys);
4519 hlist_add_head(&cg->hlist, new_bucket);
4520 }
4521 }
4522 write_unlock(&css_set_lock);
4523
4524 ss->active = 1;
4525
4526
4527 mutex_unlock(&cgroup_mutex);
4528 return 0;
4529}
4530EXPORT_SYMBOL_GPL(cgroup_load_subsys);
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540void cgroup_unload_subsys(struct cgroup_subsys *ss)
4541{
4542 struct cg_cgroup_link *link;
4543 struct hlist_head *hhead;
4544
4545 BUG_ON(ss->module == NULL);
4546
4547
4548
4549
4550
4551
4552 BUG_ON(ss->root != &rootnode);
4553
4554 mutex_lock(&cgroup_mutex);
4555
4556 subsys[ss->subsys_id] = NULL;
4557
4558
4559 list_del_init(&ss->sibling);
4560
4561
4562
4563
4564
4565 write_lock(&css_set_lock);
4566 list_for_each_entry(link, &dummytop->css_sets, cgrp_link_list) {
4567 struct css_set *cg = link->cg;
4568
4569 hlist_del(&cg->hlist);
4570 BUG_ON(!cg->subsys[ss->subsys_id]);
4571 cg->subsys[ss->subsys_id] = NULL;
4572 hhead = css_set_hash(cg->subsys);
4573 hlist_add_head(&cg->hlist, hhead);
4574 }
4575 write_unlock(&css_set_lock);
4576
4577
4578
4579
4580
4581
4582
4583 ss->destroy(dummytop);
4584 dummytop->subsys[ss->subsys_id] = NULL;
4585
4586 mutex_unlock(&cgroup_mutex);
4587}
4588EXPORT_SYMBOL_GPL(cgroup_unload_subsys);
4589
4590
4591
4592
4593
4594
4595
4596int __init cgroup_init_early(void)
4597{
4598 int i;
4599 atomic_set(&init_css_set.refcount, 1);
4600 INIT_LIST_HEAD(&init_css_set.cg_links);
4601 INIT_LIST_HEAD(&init_css_set.tasks);
4602 INIT_HLIST_NODE(&init_css_set.hlist);
4603 css_set_count = 1;
4604 init_cgroup_root(&rootnode);
4605 root_count = 1;
4606 init_task.cgroups = &init_css_set;
4607
4608 init_css_set_link.cg = &init_css_set;
4609 init_css_set_link.cgrp = dummytop;
4610 list_add(&init_css_set_link.cgrp_link_list,
4611 &rootnode.top_cgroup.css_sets);
4612 list_add(&init_css_set_link.cg_link_list,
4613 &init_css_set.cg_links);
4614
4615 for (i = 0; i < CSS_SET_TABLE_SIZE; i++)
4616 INIT_HLIST_HEAD(&css_set_table[i]);
4617
4618 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
4619 struct cgroup_subsys *ss = subsys[i];
4620
4621
4622 if (!ss || ss->module)
4623 continue;
4624
4625 BUG_ON(!ss->name);
4626 BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN);
4627 BUG_ON(!ss->create);
4628 BUG_ON(!ss->destroy);
4629 if (ss->subsys_id != i) {
4630 printk(KERN_ERR "cgroup: Subsys %s id == %d\n",
4631 ss->name, ss->subsys_id);
4632 BUG();
4633 }
4634
4635 if (ss->early_init)
4636 cgroup_init_subsys(ss);
4637 }
4638 return 0;
4639}
4640
4641
4642
4643
4644
4645
4646
4647int __init cgroup_init(void)
4648{
4649 int err;
4650 int i;
4651 struct hlist_head *hhead;
4652
4653 err = bdi_init(&cgroup_backing_dev_info);
4654 if (err)
4655 return err;
4656
4657 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
4658 struct cgroup_subsys *ss = subsys[i];
4659
4660
4661 if (!ss || ss->module)
4662 continue;
4663 if (!ss->early_init)
4664 cgroup_init_subsys(ss);
4665 if (ss->use_id)
4666 cgroup_init_idr(ss, init_css_set.subsys[ss->subsys_id]);
4667 }
4668
4669
4670 hhead = css_set_hash(init_css_set.subsys);
4671 hlist_add_head(&init_css_set.hlist, hhead);
4672 BUG_ON(!init_root_id(&rootnode));
4673
4674 cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj);
4675 if (!cgroup_kobj) {
4676 err = -ENOMEM;
4677 goto out;
4678 }
4679
4680 err = register_filesystem(&cgroup_fs_type);
4681 if (err < 0) {
4682 kobject_put(cgroup_kobj);
4683 goto out;
4684 }
4685
4686 proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations);
4687
4688out:
4689 if (err)
4690 bdi_destroy(&cgroup_backing_dev_info);
4691
4692 return err;
4693}
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708static int proc_cgroup_show(struct seq_file *m, void *v)
4709{
4710 struct pid *pid;
4711 struct task_struct *tsk;
4712 char *buf;
4713 int retval;
4714 struct cgroupfs_root *root;
4715
4716 retval = -ENOMEM;
4717 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
4718 if (!buf)
4719 goto out;
4720
4721 retval = -ESRCH;
4722 pid = m->private;
4723 tsk = get_pid_task(pid, PIDTYPE_PID);
4724 if (!tsk)
4725 goto out_free;
4726
4727 retval = 0;
4728
4729 mutex_lock(&cgroup_mutex);
4730
4731 for_each_active_root(root) {
4732 struct cgroup_subsys *ss;
4733 struct cgroup *cgrp;
4734 int count = 0;
4735
4736 seq_printf(m, "%d:", root->hierarchy_id);
4737 for_each_subsys(root, ss)
4738 seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
4739 if (strlen(root->name))
4740 seq_printf(m, "%sname=%s", count ? "," : "",
4741 root->name);
4742 seq_putc(m, ':');
4743 cgrp = task_cgroup_from_root(tsk, root);
4744 retval = cgroup_path(cgrp, buf, PAGE_SIZE);
4745 if (retval < 0)
4746 goto out_unlock;
4747 seq_puts(m, buf);
4748 seq_putc(m, '\n');
4749 }
4750
4751out_unlock:
4752 mutex_unlock(&cgroup_mutex);
4753 put_task_struct(tsk);
4754out_free:
4755 kfree(buf);
4756out:
4757 return retval;
4758}
4759
4760static int cgroup_open(struct inode *inode, struct file *file)
4761{
4762 struct pid *pid = PROC_I(inode)->pid;
4763 return single_open(file, proc_cgroup_show, pid);
4764}
4765
4766const struct file_operations proc_cgroup_operations = {
4767 .open = cgroup_open,
4768 .read = seq_read,
4769 .llseek = seq_lseek,
4770 .release = single_release,
4771};
4772
4773
4774static int proc_cgroupstats_show(struct seq_file *m, void *v)
4775{
4776 int i;
4777
4778 seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
4779
4780
4781
4782
4783
4784 mutex_lock(&cgroup_mutex);
4785 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
4786 struct cgroup_subsys *ss = subsys[i];
4787 if (ss == NULL)
4788 continue;
4789 seq_printf(m, "%s\t%d\t%d\t%d\n",
4790 ss->name, ss->root->hierarchy_id,
4791 ss->root->number_of_cgroups, !ss->disabled);
4792 }
4793 mutex_unlock(&cgroup_mutex);
4794 return 0;
4795}
4796
4797static int cgroupstats_open(struct inode *inode, struct file *file)
4798{
4799 return single_open(file, proc_cgroupstats_show, NULL);
4800}
4801
4802static const struct file_operations proc_cgroupstats_operations = {
4803 .open = cgroupstats_open,
4804 .read = seq_read,
4805 .llseek = seq_lseek,
4806 .release = single_release,
4807};
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825void cgroup_fork(struct task_struct *child)
4826{
4827 task_lock(current);
4828 child->cgroups = current->cgroups;
4829 get_css_set(child->cgroups);
4830 task_unlock(current);
4831 INIT_LIST_HEAD(&child->cg_list);
4832}
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842void cgroup_fork_callbacks(struct task_struct *child)
4843{
4844 if (need_forkexit_callback) {
4845 int i;
4846 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
4847 struct cgroup_subsys *ss = subsys[i];
4848
4849
4850
4851
4852
4853 if (!ss || ss->module)
4854 continue;
4855
4856 if (ss->fork)
4857 ss->fork(child);
4858 }
4859 }
4860}
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871void cgroup_post_fork(struct task_struct *child)
4872{
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884 if (use_task_css_set_links) {
4885 write_lock(&css_set_lock);
4886 task_lock(child);
4887 if (list_empty(&child->cg_list))
4888 list_add(&child->cg_list, &child->cgroups->tasks);
4889 task_unlock(child);
4890 write_unlock(&css_set_lock);
4891 }
4892}
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928void cgroup_exit(struct task_struct *tsk, int run_callbacks)
4929{
4930 struct css_set *cg;
4931 int i;
4932
4933
4934
4935
4936
4937
4938 if (!list_empty(&tsk->cg_list)) {
4939 write_lock(&css_set_lock);
4940 if (!list_empty(&tsk->cg_list))
4941 list_del_init(&tsk->cg_list);
4942 write_unlock(&css_set_lock);
4943 }
4944
4945
4946 task_lock(tsk);
4947 cg = tsk->cgroups;
4948 tsk->cgroups = &init_css_set;
4949
4950 if (run_callbacks && need_forkexit_callback) {
4951 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
4952 struct cgroup_subsys *ss = subsys[i];
4953
4954
4955 if (!ss || ss->module)
4956 continue;
4957
4958 if (ss->exit) {
4959 struct cgroup *old_cgrp =
4960 rcu_dereference_raw(cg->subsys[i])->cgroup;
4961 struct cgroup *cgrp = task_cgroup(tsk, i);
4962 ss->exit(cgrp, old_cgrp, tsk);
4963 }
4964 }
4965 }
4966 task_unlock(tsk);
4967
4968 if (cg)
4969 put_css_set_taskexit(cg);
4970}
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task)
4986{
4987 int ret;
4988 struct cgroup *target;
4989
4990 if (cgrp == dummytop)
4991 return 1;
4992
4993 target = task_cgroup_from_root(task, cgrp->root);
4994 while (cgrp != target && cgrp!= cgrp->top_cgroup)
4995 cgrp = cgrp->parent;
4996 ret = (cgrp == target);
4997 return ret;
4998}
4999
5000static void check_for_release(struct cgroup *cgrp)
5001{
5002
5003
5004 if (cgroup_is_releasable(cgrp) && !atomic_read(&cgrp->count)
5005 && list_empty(&cgrp->children) && !cgroup_has_css_refs(cgrp)) {
5006
5007
5008
5009 int need_schedule_work = 0;
5010 raw_spin_lock(&release_list_lock);
5011 if (!cgroup_is_removed(cgrp) &&
5012 list_empty(&cgrp->release_list)) {
5013 list_add(&cgrp->release_list, &release_list);
5014 need_schedule_work = 1;
5015 }
5016 raw_spin_unlock(&release_list_lock);
5017 if (need_schedule_work)
5018 schedule_work(&release_agent_work);
5019 }
5020}
5021
5022
5023bool __css_tryget(struct cgroup_subsys_state *css)
5024{
5025 do {
5026 int v = css_refcnt(css);
5027
5028 if (atomic_cmpxchg(&css->refcnt, v, v + 1) == v)
5029 return true;
5030 cpu_relax();
5031 } while (!test_bit(CSS_REMOVED, &css->flags));
5032
5033 return false;
5034}
5035EXPORT_SYMBOL_GPL(__css_tryget);
5036
5037
5038void __css_put(struct cgroup_subsys_state *css)
5039{
5040 struct cgroup *cgrp = css->cgroup;
5041 int v;
5042
5043 rcu_read_lock();
5044 v = css_unbias_refcnt(atomic_dec_return(&css->refcnt));
5045
5046 switch (v) {
5047 case 1:
5048 if (notify_on_release(cgrp)) {
5049 set_bit(CGRP_RELEASABLE, &cgrp->flags);
5050 check_for_release(cgrp);
5051 }
5052 cgroup_wakeup_rmdir_waiter(cgrp);
5053 break;
5054 case 0:
5055 if (!test_bit(CSS_CLEAR_CSS_REFS, &css->flags))
5056 schedule_work(&css->dput_work);
5057 break;
5058 }
5059 rcu_read_unlock();
5060}
5061EXPORT_SYMBOL_GPL(__css_put);
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086static void cgroup_release_agent(struct work_struct *work)
5087{
5088 BUG_ON(work != &release_agent_work);
5089 mutex_lock(&cgroup_mutex);
5090 raw_spin_lock(&release_list_lock);
5091 while (!list_empty(&release_list)) {
5092 char *argv[3], *envp[3];
5093 int i;
5094 char *pathbuf = NULL, *agentbuf = NULL;
5095 struct cgroup *cgrp = list_entry(release_list.next,
5096 struct cgroup,
5097 release_list);
5098 list_del_init(&cgrp->release_list);
5099 raw_spin_unlock(&release_list_lock);
5100 pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
5101 if (!pathbuf)
5102 goto continue_free;
5103 if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0)
5104 goto continue_free;
5105 agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
5106 if (!agentbuf)
5107 goto continue_free;
5108
5109 i = 0;
5110 argv[i++] = agentbuf;
5111 argv[i++] = pathbuf;
5112 argv[i] = NULL;
5113
5114 i = 0;
5115
5116 envp[i++] = "HOME=/";
5117 envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
5118 envp[i] = NULL;
5119
5120
5121
5122
5123 mutex_unlock(&cgroup_mutex);
5124 call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
5125 mutex_lock(&cgroup_mutex);
5126 continue_free:
5127 kfree(pathbuf);
5128 kfree(agentbuf);
5129 raw_spin_lock(&release_list_lock);
5130 }
5131 raw_spin_unlock(&release_list_lock);
5132 mutex_unlock(&cgroup_mutex);
5133}
5134
5135static int __init cgroup_disable(char *str)
5136{
5137 int i;
5138 char *token;
5139
5140 while ((token = strsep(&str, ",")) != NULL) {
5141 if (!*token)
5142 continue;
5143 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
5144 struct cgroup_subsys *ss = subsys[i];
5145
5146
5147
5148
5149
5150
5151 if (!ss || ss->module)
5152 continue;
5153
5154 if (!strcmp(token, ss->name)) {
5155 ss->disabled = 1;
5156 printk(KERN_INFO "Disabling %s control group"
5157 " subsystem\n", ss->name);
5158 break;
5159 }
5160 }
5161 }
5162 return 1;
5163}
5164__setup("cgroup_disable=", cgroup_disable);
5165
5166
5167
5168
5169
5170
5171
5172
5173unsigned short css_id(struct cgroup_subsys_state *css)
5174{
5175 struct css_id *cssid;
5176
5177
5178
5179
5180
5181
5182 cssid = rcu_dereference_check(css->id, css_refcnt(css));
5183
5184 if (cssid)
5185 return cssid->id;
5186 return 0;
5187}
5188EXPORT_SYMBOL_GPL(css_id);
5189
5190unsigned short css_depth(struct cgroup_subsys_state *css)
5191{
5192 struct css_id *cssid;
5193
5194 cssid = rcu_dereference_check(css->id, css_refcnt(css));
5195
5196 if (cssid)
5197 return cssid->depth;
5198 return 0;
5199}
5200EXPORT_SYMBOL_GPL(css_depth);
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215bool css_is_ancestor(struct cgroup_subsys_state *child,
5216 const struct cgroup_subsys_state *root)
5217{
5218 struct css_id *child_id;
5219 struct css_id *root_id;
5220
5221 child_id = rcu_dereference(child->id);
5222 if (!child_id)
5223 return false;
5224 root_id = rcu_dereference(root->id);
5225 if (!root_id)
5226 return false;
5227 if (child_id->depth < root_id->depth)
5228 return false;
5229 if (child_id->stack[root_id->depth] != root_id->id)
5230 return false;
5231 return true;
5232}
5233
5234void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
5235{
5236 struct css_id *id = css->id;
5237
5238 if (!id)
5239 return;
5240
5241 BUG_ON(!ss->use_id);
5242
5243 rcu_assign_pointer(id->css, NULL);
5244 rcu_assign_pointer(css->id, NULL);
5245 spin_lock(&ss->id_lock);
5246 idr_remove(&ss->idr, id->id);
5247 spin_unlock(&ss->id_lock);
5248 kfree_rcu(id, rcu_head);
5249}
5250EXPORT_SYMBOL_GPL(free_css_id);
5251
5252
5253
5254
5255
5256
5257static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth)
5258{
5259 struct css_id *newid;
5260 int myid, error, size;
5261
5262 BUG_ON(!ss->use_id);
5263
5264 size = sizeof(*newid) + sizeof(unsigned short) * (depth + 1);
5265 newid = kzalloc(size, GFP_KERNEL);
5266 if (!newid)
5267 return ERR_PTR(-ENOMEM);
5268
5269 if (unlikely(!idr_pre_get(&ss->idr, GFP_KERNEL))) {
5270 error = -ENOMEM;
5271 goto err_out;
5272 }
5273 spin_lock(&ss->id_lock);
5274
5275 error = idr_get_new_above(&ss->idr, newid, 1, &myid);
5276 spin_unlock(&ss->id_lock);
5277
5278
5279 if (error) {
5280 error = -ENOSPC;
5281 goto err_out;
5282 }
5283 if (myid > CSS_ID_MAX)
5284 goto remove_idr;
5285
5286 newid->id = myid;
5287 newid->depth = depth;
5288 return newid;
5289remove_idr:
5290 error = -ENOSPC;
5291 spin_lock(&ss->id_lock);
5292 idr_remove(&ss->idr, myid);
5293 spin_unlock(&ss->id_lock);
5294err_out:
5295 kfree(newid);
5296 return ERR_PTR(error);
5297
5298}
5299
5300static int __init_or_module cgroup_init_idr(struct cgroup_subsys *ss,
5301 struct cgroup_subsys_state *rootcss)
5302{
5303 struct css_id *newid;
5304
5305 spin_lock_init(&ss->id_lock);
5306 idr_init(&ss->idr);
5307
5308 newid = get_new_cssid(ss, 0);
5309 if (IS_ERR(newid))
5310 return PTR_ERR(newid);
5311
5312 newid->stack[0] = newid->id;
5313 newid->css = rootcss;
5314 rootcss->id = newid;
5315 return 0;
5316}
5317
5318static int alloc_css_id(struct cgroup_subsys *ss, struct cgroup *parent,
5319 struct cgroup *child)
5320{
5321 int subsys_id, i, depth = 0;
5322 struct cgroup_subsys_state *parent_css, *child_css;
5323 struct css_id *child_id, *parent_id;
5324
5325 subsys_id = ss->subsys_id;
5326 parent_css = parent->subsys[subsys_id];
5327 child_css = child->subsys[subsys_id];
5328 parent_id = parent_css->id;
5329 depth = parent_id->depth + 1;
5330
5331 child_id = get_new_cssid(ss, depth);
5332 if (IS_ERR(child_id))
5333 return PTR_ERR(child_id);
5334
5335 for (i = 0; i < depth; i++)
5336 child_id->stack[i] = parent_id->stack[i];
5337 child_id->stack[depth] = child_id->id;
5338
5339
5340
5341
5342 rcu_assign_pointer(child_css->id, child_id);
5343
5344 return 0;
5345}
5346
5347
5348
5349
5350
5351
5352
5353
5354
5355struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id)
5356{
5357 struct css_id *cssid = NULL;
5358
5359 BUG_ON(!ss->use_id);
5360 cssid = idr_find(&ss->idr, id);
5361
5362 if (unlikely(!cssid))
5363 return NULL;
5364
5365 return rcu_dereference(cssid->css);
5366}
5367EXPORT_SYMBOL_GPL(css_lookup);
5368
5369
5370
5371
5372
5373
5374
5375
5376
5377
5378
5379struct cgroup_subsys_state *
5380css_get_next(struct cgroup_subsys *ss, int id,
5381 struct cgroup_subsys_state *root, int *foundid)
5382{
5383 struct cgroup_subsys_state *ret = NULL;
5384 struct css_id *tmp;
5385 int tmpid;
5386 int rootid = css_id(root);
5387 int depth = css_depth(root);
5388
5389 if (!rootid)
5390 return NULL;
5391
5392 BUG_ON(!ss->use_id);
5393 WARN_ON_ONCE(!rcu_read_lock_held());
5394
5395
5396 tmpid = id;
5397 while (1) {
5398
5399
5400
5401
5402 tmp = idr_get_next(&ss->idr, &tmpid);
5403 if (!tmp)
5404 break;
5405 if (tmp->depth >= depth && tmp->stack[depth] == rootid) {
5406 ret = rcu_dereference(tmp->css);
5407 if (ret) {
5408 *foundid = tmpid;
5409 break;
5410 }
5411 }
5412
5413 tmpid = tmpid + 1;
5414 }
5415 return ret;
5416}
5417
5418
5419
5420
5421struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id)
5422{
5423 struct cgroup *cgrp;
5424 struct inode *inode;
5425 struct cgroup_subsys_state *css;
5426
5427 inode = f->f_dentry->d_inode;
5428
5429 if (inode->i_op != &cgroup_dir_inode_operations)
5430 return ERR_PTR(-EBADF);
5431
5432 if (id < 0 || id >= CGROUP_SUBSYS_COUNT)
5433 return ERR_PTR(-EINVAL);
5434
5435
5436 cgrp = __d_cgrp(f->f_dentry);
5437 css = cgrp->subsys[id];
5438 return css ? css : ERR_PTR(-ENOENT);
5439}
5440
5441#ifdef CONFIG_CGROUP_DEBUG
5442static struct cgroup_subsys_state *debug_create(struct cgroup *cont)
5443{
5444 struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
5445
5446 if (!css)
5447 return ERR_PTR(-ENOMEM);
5448
5449 return css;
5450}
5451
5452static void debug_destroy(struct cgroup *cont)
5453{
5454 kfree(cont->subsys[debug_subsys_id]);
5455}
5456
5457static u64 cgroup_refcount_read(struct cgroup *cont, struct cftype *cft)
5458{
5459 return atomic_read(&cont->count);
5460}
5461
5462static u64 debug_taskcount_read(struct cgroup *cont, struct cftype *cft)
5463{
5464 return cgroup_task_count(cont);
5465}
5466
5467static u64 current_css_set_read(struct cgroup *cont, struct cftype *cft)
5468{
5469 return (u64)(unsigned long)current->cgroups;
5470}
5471
5472static u64 current_css_set_refcount_read(struct cgroup *cont,
5473 struct cftype *cft)
5474{
5475 u64 count;
5476
5477 rcu_read_lock();
5478 count = atomic_read(¤t->cgroups->refcount);
5479 rcu_read_unlock();
5480 return count;
5481}
5482
5483static int current_css_set_cg_links_read(struct cgroup *cont,
5484 struct cftype *cft,
5485 struct seq_file *seq)
5486{
5487 struct cg_cgroup_link *link;
5488 struct css_set *cg;
5489
5490 read_lock(&css_set_lock);
5491 rcu_read_lock();
5492 cg = rcu_dereference(current->cgroups);
5493 list_for_each_entry(link, &cg->cg_links, cg_link_list) {
5494 struct cgroup *c = link->cgrp;
5495 const char *name;
5496
5497 if (c->dentry)
5498 name = c->dentry->d_name.name;
5499 else
5500 name = "?";
5501 seq_printf(seq, "Root %d group %s\n",
5502 c->root->hierarchy_id, name);
5503 }
5504 rcu_read_unlock();
5505 read_unlock(&css_set_lock);
5506 return 0;
5507}
5508
5509#define MAX_TASKS_SHOWN_PER_CSS 25
5510static int cgroup_css_links_read(struct cgroup *cont,
5511 struct cftype *cft,
5512 struct seq_file *seq)
5513{
5514 struct cg_cgroup_link *link;
5515
5516 read_lock(&css_set_lock);
5517 list_for_each_entry(link, &cont->css_sets, cgrp_link_list) {
5518 struct css_set *cg = link->cg;
5519 struct task_struct *task;
5520 int count = 0;
5521 seq_printf(seq, "css_set %p\n", cg);
5522 list_for_each_entry(task, &cg->tasks, cg_list) {
5523 if (count++ > MAX_TASKS_SHOWN_PER_CSS) {
5524 seq_puts(seq, " ...\n");
5525 break;
5526 } else {
5527 seq_printf(seq, " task %d\n",
5528 task_pid_vnr(task));
5529 }
5530 }
5531 }
5532 read_unlock(&css_set_lock);
5533 return 0;
5534}
5535
5536static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft)
5537{
5538 return test_bit(CGRP_RELEASABLE, &cgrp->flags);
5539}
5540
5541static struct cftype debug_files[] = {
5542 {
5543 .name = "cgroup_refcount",
5544 .read_u64 = cgroup_refcount_read,
5545 },
5546 {
5547 .name = "taskcount",
5548 .read_u64 = debug_taskcount_read,
5549 },
5550
5551 {
5552 .name = "current_css_set",
5553 .read_u64 = current_css_set_read,
5554 },
5555
5556 {
5557 .name = "current_css_set_refcount",
5558 .read_u64 = current_css_set_refcount_read,
5559 },
5560
5561 {
5562 .name = "current_css_set_cg_links",
5563 .read_seq_string = current_css_set_cg_links_read,
5564 },
5565
5566 {
5567 .name = "cgroup_css_links",
5568 .read_seq_string = cgroup_css_links_read,
5569 },
5570
5571 {
5572 .name = "releasable",
5573 .read_u64 = releasable_read,
5574 },
5575
5576 { }
5577};
5578
5579struct cgroup_subsys debug_subsys = {
5580 .name = "debug",
5581 .create = debug_create,
5582 .destroy = debug_destroy,
5583 .subsys_id = debug_subsys_id,
5584 .base_cftypes = debug_files,
5585};
5586#endif
5587