1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29#include <linux/cgroup.h>
30#include <linux/cred.h>
31#include <linux/ctype.h>
32#include <linux/errno.h>
33#include <linux/fs.h>
34#include <linux/init_task.h>
35#include <linux/kernel.h>
36#include <linux/list.h>
37#include <linux/mm.h>
38#include <linux/mutex.h>
39#include <linux/mount.h>
40#include <linux/pagemap.h>
41#include <linux/proc_fs.h>
42#include <linux/rcupdate.h>
43#include <linux/sched.h>
44#include <linux/backing-dev.h>
45#include <linux/seq_file.h>
46#include <linux/slab.h>
47#include <linux/magic.h>
48#include <linux/spinlock.h>
49#include <linux/string.h>
50#include <linux/sort.h>
51#include <linux/kmod.h>
52#include <linux/module.h>
53#include <linux/delayacct.h>
54#include <linux/cgroupstats.h>
55#include <linux/hashtable.h>
56#include <linux/namei.h>
57#include <linux/pid_namespace.h>
58#include <linux/idr.h>
59#include <linux/vmalloc.h>
60#include <linux/eventfd.h>
61#include <linux/poll.h>
62#include <linux/flex_array.h>
63#include <linux/kthread.h>
64
65#include <linux/atomic.h>
66
67
68#define CSS_DEACT_BIAS INT_MIN
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86static DEFINE_MUTEX(cgroup_mutex);
87static DEFINE_MUTEX(cgroup_root_mutex);
88
89
90
91
92
93
94
95#define SUBSYS(_x) [_x ## _subsys_id] = &_x ## _subsys,
96#define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option)
97static struct cgroup_subsys *subsys[CGROUP_SUBSYS_COUNT] = {
98#include <linux/cgroup_subsys.h>
99};
100
101#define MAX_CGROUP_ROOT_NAMELEN 64
102
103
104
105
106
107
108struct cgroupfs_root {
109 struct super_block *sb;
110
111
112
113
114
115 unsigned long subsys_mask;
116
117
118 int hierarchy_id;
119
120
121 unsigned long actual_subsys_mask;
122
123
124 struct list_head subsys_list;
125
126
127 struct cgroup top_cgroup;
128
129
130 int number_of_cgroups;
131
132
133 struct list_head root_list;
134
135
136 struct list_head allcg_list;
137
138
139 unsigned long flags;
140
141
142 struct ida cgroup_ida;
143
144
145 char release_agent_path[PATH_MAX];
146
147
148 char name[MAX_CGROUP_ROOT_NAMELEN];
149};
150
151
152
153
154
155
156static struct cgroupfs_root rootnode;
157
158
159
160
161struct cfent {
162 struct list_head node;
163 struct dentry *dentry;
164 struct cftype *type;
165
166
167 struct simple_xattrs xattrs;
168};
169
170
171
172
173
174#define CSS_ID_MAX (65535)
175struct css_id {
176
177
178
179
180
181
182
183 struct cgroup_subsys_state __rcu *css;
184
185
186
187 unsigned short id;
188
189
190
191 unsigned short depth;
192
193
194
195 struct rcu_head rcu_head;
196
197
198
199 unsigned short stack[0];
200};
201
202
203
204
205struct cgroup_event {
206
207
208
209 struct cgroup *cgrp;
210
211
212
213 struct cftype *cft;
214
215
216
217 struct eventfd_ctx *eventfd;
218
219
220
221 struct list_head list;
222
223
224
225
226 poll_table pt;
227 wait_queue_head_t *wqh;
228 wait_queue_t wait;
229 struct work_struct remove;
230};
231
232
233
234static LIST_HEAD(roots);
235static int root_count;
236
237static DEFINE_IDA(hierarchy_ida);
238static int next_hierarchy_id;
239static DEFINE_SPINLOCK(hierarchy_id_lock);
240
241
242#define dummytop (&rootnode.top_cgroup)
243
244
245
246
247
248
249static int need_forkexit_callback __read_mostly;
250
251static int cgroup_destroy_locked(struct cgroup *cgrp);
252static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys,
253 struct cftype cfts[], bool is_add);
254
255#ifdef CONFIG_PROVE_LOCKING
256int cgroup_lock_is_held(void)
257{
258 return lockdep_is_held(&cgroup_mutex);
259}
260#else
261int cgroup_lock_is_held(void)
262{
263 return mutex_is_locked(&cgroup_mutex);
264}
265#endif
266
267EXPORT_SYMBOL_GPL(cgroup_lock_is_held);
268
269static int css_unbias_refcnt(int refcnt)
270{
271 return refcnt >= 0 ? refcnt : refcnt - CSS_DEACT_BIAS;
272}
273
274
275static int css_refcnt(struct cgroup_subsys_state *css)
276{
277 int v = atomic_read(&css->refcnt);
278
279 return css_unbias_refcnt(v);
280}
281
282
283inline int cgroup_is_removed(const struct cgroup *cgrp)
284{
285 return test_bit(CGRP_REMOVED, &cgrp->flags);
286}
287
288
289enum {
290 ROOT_NOPREFIX,
291 ROOT_XATTR,
292};
293
294static int cgroup_is_releasable(const struct cgroup *cgrp)
295{
296 const int bits =
297 (1 << CGRP_RELEASABLE) |
298 (1 << CGRP_NOTIFY_ON_RELEASE);
299 return (cgrp->flags & bits) == bits;
300}
301
302static int notify_on_release(const struct cgroup *cgrp)
303{
304 return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
305}
306
307
308
309
310
311#define for_each_subsys(_root, _ss) \
312list_for_each_entry(_ss, &_root->subsys_list, sibling)
313
314
315#define for_each_active_root(_root) \
316list_for_each_entry(_root, &roots, root_list)
317
318static inline struct cgroup *__d_cgrp(struct dentry *dentry)
319{
320 return dentry->d_fsdata;
321}
322
323static inline struct cfent *__d_cfe(struct dentry *dentry)
324{
325 return dentry->d_fsdata;
326}
327
328static inline struct cftype *__d_cft(struct dentry *dentry)
329{
330 return __d_cfe(dentry)->type;
331}
332
333
334
335static LIST_HEAD(release_list);
336static DEFINE_RAW_SPINLOCK(release_list_lock);
337static void cgroup_release_agent(struct work_struct *work);
338static DECLARE_WORK(release_agent_work, cgroup_release_agent);
339static void check_for_release(struct cgroup *cgrp);
340
341
342struct cg_cgroup_link {
343
344
345
346
347 struct list_head cgrp_link_list;
348 struct cgroup *cgrp;
349
350
351
352
353 struct list_head cg_link_list;
354 struct css_set *cg;
355};
356
357
358
359
360
361
362
363
364static struct css_set init_css_set;
365static struct cg_cgroup_link init_css_set_link;
366
367static int cgroup_init_idr(struct cgroup_subsys *ss,
368 struct cgroup_subsys_state *css);
369
370
371
372
373static DEFINE_RWLOCK(css_set_lock);
374static int css_set_count;
375
376
377
378
379
380
381#define CSS_SET_HASH_BITS 7
382static DEFINE_HASHTABLE(css_set_table, CSS_SET_HASH_BITS);
383
384static unsigned long css_set_hash(struct cgroup_subsys_state *css[])
385{
386 int i;
387 unsigned long key = 0UL;
388
389 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++)
390 key += (unsigned long)css[i];
391 key = (key >> 16) ^ key;
392
393 return key;
394}
395
396
397
398
399
400static int use_task_css_set_links __read_mostly;
401
402static void __put_css_set(struct css_set *cg, int taskexit)
403{
404 struct cg_cgroup_link *link;
405 struct cg_cgroup_link *saved_link;
406
407
408
409
410
411 if (atomic_add_unless(&cg->refcount, -1, 1))
412 return;
413 write_lock(&css_set_lock);
414 if (!atomic_dec_and_test(&cg->refcount)) {
415 write_unlock(&css_set_lock);
416 return;
417 }
418
419
420 hash_del(&cg->hlist);
421 css_set_count--;
422
423 list_for_each_entry_safe(link, saved_link, &cg->cg_links,
424 cg_link_list) {
425 struct cgroup *cgrp = link->cgrp;
426 list_del(&link->cg_link_list);
427 list_del(&link->cgrp_link_list);
428
429
430
431
432
433
434 rcu_read_lock();
435 if (atomic_dec_and_test(&cgrp->count) &&
436 notify_on_release(cgrp)) {
437 if (taskexit)
438 set_bit(CGRP_RELEASABLE, &cgrp->flags);
439 check_for_release(cgrp);
440 }
441 rcu_read_unlock();
442
443 kfree(link);
444 }
445
446 write_unlock(&css_set_lock);
447 kfree_rcu(cg, rcu_head);
448}
449
450
451
452
453static inline void get_css_set(struct css_set *cg)
454{
455 atomic_inc(&cg->refcount);
456}
457
458static inline void put_css_set(struct css_set *cg)
459{
460 __put_css_set(cg, 0);
461}
462
463static inline void put_css_set_taskexit(struct css_set *cg)
464{
465 __put_css_set(cg, 1);
466}
467
468
469
470
471
472
473
474
475
476
477
478static bool compare_css_sets(struct css_set *cg,
479 struct css_set *old_cg,
480 struct cgroup *new_cgrp,
481 struct cgroup_subsys_state *template[])
482{
483 struct list_head *l1, *l2;
484
485 if (memcmp(template, cg->subsys, sizeof(cg->subsys))) {
486
487 return false;
488 }
489
490
491
492
493
494
495
496
497
498
499 l1 = &cg->cg_links;
500 l2 = &old_cg->cg_links;
501 while (1) {
502 struct cg_cgroup_link *cgl1, *cgl2;
503 struct cgroup *cg1, *cg2;
504
505 l1 = l1->next;
506 l2 = l2->next;
507
508 if (l1 == &cg->cg_links) {
509 BUG_ON(l2 != &old_cg->cg_links);
510 break;
511 } else {
512 BUG_ON(l2 == &old_cg->cg_links);
513 }
514
515 cgl1 = list_entry(l1, struct cg_cgroup_link, cg_link_list);
516 cgl2 = list_entry(l2, struct cg_cgroup_link, cg_link_list);
517 cg1 = cgl1->cgrp;
518 cg2 = cgl2->cgrp;
519
520 BUG_ON(cg1->root != cg2->root);
521
522
523
524
525
526
527
528
529 if (cg1->root == new_cgrp->root) {
530 if (cg1 != new_cgrp)
531 return false;
532 } else {
533 if (cg1 != cg2)
534 return false;
535 }
536 }
537 return true;
538}
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553static struct css_set *find_existing_css_set(
554 struct css_set *oldcg,
555 struct cgroup *cgrp,
556 struct cgroup_subsys_state *template[])
557{
558 int i;
559 struct cgroupfs_root *root = cgrp->root;
560 struct css_set *cg;
561 unsigned long key;
562
563
564
565
566
567
568 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
569 if (root->subsys_mask & (1UL << i)) {
570
571
572
573 template[i] = cgrp->subsys[i];
574 } else {
575
576
577 template[i] = oldcg->subsys[i];
578 }
579 }
580
581 key = css_set_hash(template);
582 hash_for_each_possible(css_set_table, cg, hlist, key) {
583 if (!compare_css_sets(cg, oldcg, cgrp, template))
584 continue;
585
586
587 return cg;
588 }
589
590
591 return NULL;
592}
593
594static void free_cg_links(struct list_head *tmp)
595{
596 struct cg_cgroup_link *link;
597 struct cg_cgroup_link *saved_link;
598
599 list_for_each_entry_safe(link, saved_link, tmp, cgrp_link_list) {
600 list_del(&link->cgrp_link_list);
601 kfree(link);
602 }
603}
604
605
606
607
608
609
610static int allocate_cg_links(int count, struct list_head *tmp)
611{
612 struct cg_cgroup_link *link;
613 int i;
614 INIT_LIST_HEAD(tmp);
615 for (i = 0; i < count; i++) {
616 link = kmalloc(sizeof(*link), GFP_KERNEL);
617 if (!link) {
618 free_cg_links(tmp);
619 return -ENOMEM;
620 }
621 list_add(&link->cgrp_link_list, tmp);
622 }
623 return 0;
624}
625
626
627
628
629
630
631
632static void link_css_set(struct list_head *tmp_cg_links,
633 struct css_set *cg, struct cgroup *cgrp)
634{
635 struct cg_cgroup_link *link;
636
637 BUG_ON(list_empty(tmp_cg_links));
638 link = list_first_entry(tmp_cg_links, struct cg_cgroup_link,
639 cgrp_link_list);
640 link->cg = cg;
641 link->cgrp = cgrp;
642 atomic_inc(&cgrp->count);
643 list_move(&link->cgrp_link_list, &cgrp->css_sets);
644
645
646
647
648 list_add_tail(&link->cg_link_list, &cg->cg_links);
649}
650
651
652
653
654
655
656
657
658static struct css_set *find_css_set(
659 struct css_set *oldcg, struct cgroup *cgrp)
660{
661 struct css_set *res;
662 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
663
664 struct list_head tmp_cg_links;
665
666 struct cg_cgroup_link *link;
667 unsigned long key;
668
669
670
671 read_lock(&css_set_lock);
672 res = find_existing_css_set(oldcg, cgrp, template);
673 if (res)
674 get_css_set(res);
675 read_unlock(&css_set_lock);
676
677 if (res)
678 return res;
679
680 res = kmalloc(sizeof(*res), GFP_KERNEL);
681 if (!res)
682 return NULL;
683
684
685 if (allocate_cg_links(root_count, &tmp_cg_links) < 0) {
686 kfree(res);
687 return NULL;
688 }
689
690 atomic_set(&res->refcount, 1);
691 INIT_LIST_HEAD(&res->cg_links);
692 INIT_LIST_HEAD(&res->tasks);
693 INIT_HLIST_NODE(&res->hlist);
694
695
696
697 memcpy(res->subsys, template, sizeof(res->subsys));
698
699 write_lock(&css_set_lock);
700
701 list_for_each_entry(link, &oldcg->cg_links, cg_link_list) {
702 struct cgroup *c = link->cgrp;
703 if (c->root == cgrp->root)
704 c = cgrp;
705 link_css_set(&tmp_cg_links, res, c);
706 }
707
708 BUG_ON(!list_empty(&tmp_cg_links));
709
710 css_set_count++;
711
712
713 key = css_set_hash(res->subsys);
714 hash_add(css_set_table, &res->hlist, key);
715
716 write_unlock(&css_set_lock);
717
718 return res;
719}
720
721
722
723
724
725static struct cgroup *task_cgroup_from_root(struct task_struct *task,
726 struct cgroupfs_root *root)
727{
728 struct css_set *css;
729 struct cgroup *res = NULL;
730
731 BUG_ON(!mutex_is_locked(&cgroup_mutex));
732 read_lock(&css_set_lock);
733
734
735
736
737
738 css = task->cgroups;
739 if (css == &init_css_set) {
740 res = &root->top_cgroup;
741 } else {
742 struct cg_cgroup_link *link;
743 list_for_each_entry(link, &css->cg_links, cg_link_list) {
744 struct cgroup *c = link->cgrp;
745 if (c->root == root) {
746 res = c;
747 break;
748 }
749 }
750 }
751 read_unlock(&css_set_lock);
752 BUG_ON(!res);
753 return res;
754}
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810void cgroup_lock(void)
811{
812 mutex_lock(&cgroup_mutex);
813}
814EXPORT_SYMBOL_GPL(cgroup_lock);
815
816
817
818
819
820
821void cgroup_unlock(void)
822{
823 mutex_unlock(&cgroup_mutex);
824}
825EXPORT_SYMBOL_GPL(cgroup_unlock);
826
827
828
829
830
831
832
833
834static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
835static struct dentry *cgroup_lookup(struct inode *, struct dentry *, unsigned int);
836static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
837static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files,
838 unsigned long subsys_mask);
839static const struct inode_operations cgroup_dir_inode_operations;
840static const struct file_operations proc_cgroupstats_operations;
841
842static struct backing_dev_info cgroup_backing_dev_info = {
843 .name = "cgroup",
844 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
845};
846
847static int alloc_css_id(struct cgroup_subsys *ss,
848 struct cgroup *parent, struct cgroup *child);
849
850static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb)
851{
852 struct inode *inode = new_inode(sb);
853
854 if (inode) {
855 inode->i_ino = get_next_ino();
856 inode->i_mode = mode;
857 inode->i_uid = current_fsuid();
858 inode->i_gid = current_fsgid();
859 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
860 inode->i_mapping->backing_dev_info = &cgroup_backing_dev_info;
861 }
862 return inode;
863}
864
865static void cgroup_free_fn(struct work_struct *work)
866{
867 struct cgroup *cgrp = container_of(work, struct cgroup, free_work);
868 struct cgroup_subsys *ss;
869
870 mutex_lock(&cgroup_mutex);
871
872
873
874 for_each_subsys(cgrp->root, ss)
875 ss->css_free(cgrp);
876
877 cgrp->root->number_of_cgroups--;
878 mutex_unlock(&cgroup_mutex);
879
880
881
882
883
884 deactivate_super(cgrp->root->sb);
885
886
887
888
889
890 BUG_ON(!list_empty(&cgrp->pidlists));
891
892 simple_xattrs_free(&cgrp->xattrs);
893
894 ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id);
895 kfree(cgrp);
896}
897
898static void cgroup_free_rcu(struct rcu_head *head)
899{
900 struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head);
901
902 schedule_work(&cgrp->free_work);
903}
904
905static void cgroup_diput(struct dentry *dentry, struct inode *inode)
906{
907
908 if (S_ISDIR(inode->i_mode)) {
909 struct cgroup *cgrp = dentry->d_fsdata;
910
911 BUG_ON(!(cgroup_is_removed(cgrp)));
912 call_rcu(&cgrp->rcu_head, cgroup_free_rcu);
913 } else {
914 struct cfent *cfe = __d_cfe(dentry);
915 struct cgroup *cgrp = dentry->d_parent->d_fsdata;
916
917 WARN_ONCE(!list_empty(&cfe->node) &&
918 cgrp != &cgrp->root->top_cgroup,
919 "cfe still linked for %s\n", cfe->type->name);
920 simple_xattrs_free(&cfe->xattrs);
921 kfree(cfe);
922 }
923 iput(inode);
924}
925
926static int cgroup_delete(const struct dentry *d)
927{
928 return 1;
929}
930
931static void remove_dir(struct dentry *d)
932{
933 struct dentry *parent = dget(d->d_parent);
934
935 d_delete(d);
936 simple_rmdir(parent->d_inode, d);
937 dput(parent);
938}
939
940static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
941{
942 struct cfent *cfe;
943
944 lockdep_assert_held(&cgrp->dentry->d_inode->i_mutex);
945 lockdep_assert_held(&cgroup_mutex);
946
947
948
949
950
951 list_for_each_entry(cfe, &cgrp->files, node) {
952 struct dentry *d = cfe->dentry;
953
954 if (cft && cfe->type != cft)
955 continue;
956
957 dget(d);
958 d_delete(d);
959 simple_unlink(cgrp->dentry->d_inode, d);
960 list_del_init(&cfe->node);
961 dput(d);
962
963 break;
964 }
965}
966
967
968
969
970
971
972
973static void cgroup_clear_directory(struct dentry *dir, bool base_files,
974 unsigned long subsys_mask)
975{
976 struct cgroup *cgrp = __d_cgrp(dir);
977 struct cgroup_subsys *ss;
978
979 for_each_subsys(cgrp->root, ss) {
980 struct cftype_set *set;
981 if (!test_bit(ss->subsys_id, &subsys_mask))
982 continue;
983 list_for_each_entry(set, &ss->cftsets, node)
984 cgroup_addrm_files(cgrp, NULL, set->cfts, false);
985 }
986 if (base_files) {
987 while (!list_empty(&cgrp->files))
988 cgroup_rm_file(cgrp, NULL);
989 }
990}
991
992
993
994
995static void cgroup_d_remove_dir(struct dentry *dentry)
996{
997 struct dentry *parent;
998 struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
999
1000 cgroup_clear_directory(dentry, true, root->subsys_mask);
1001
1002 parent = dentry->d_parent;
1003 spin_lock(&parent->d_lock);
1004 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1005 list_del_init(&dentry->d_u.d_child);
1006 spin_unlock(&dentry->d_lock);
1007 spin_unlock(&parent->d_lock);
1008 remove_dir(dentry);
1009}
1010
1011
1012
1013
1014
1015
1016static int rebind_subsystems(struct cgroupfs_root *root,
1017 unsigned long final_subsys_mask)
1018{
1019 unsigned long added_mask, removed_mask;
1020 struct cgroup *cgrp = &root->top_cgroup;
1021 int i;
1022
1023 BUG_ON(!mutex_is_locked(&cgroup_mutex));
1024 BUG_ON(!mutex_is_locked(&cgroup_root_mutex));
1025
1026 removed_mask = root->actual_subsys_mask & ~final_subsys_mask;
1027 added_mask = final_subsys_mask & ~root->actual_subsys_mask;
1028
1029 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1030 unsigned long bit = 1UL << i;
1031 struct cgroup_subsys *ss = subsys[i];
1032 if (!(bit & added_mask))
1033 continue;
1034
1035
1036
1037
1038
1039 BUG_ON(ss == NULL);
1040 if (ss->root != &rootnode) {
1041
1042 return -EBUSY;
1043 }
1044 }
1045
1046
1047
1048
1049
1050 if (root->number_of_cgroups > 1)
1051 return -EBUSY;
1052
1053
1054 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1055 struct cgroup_subsys *ss = subsys[i];
1056 unsigned long bit = 1UL << i;
1057 if (bit & added_mask) {
1058
1059 BUG_ON(ss == NULL);
1060 BUG_ON(cgrp->subsys[i]);
1061 BUG_ON(!dummytop->subsys[i]);
1062 BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
1063 cgrp->subsys[i] = dummytop->subsys[i];
1064 cgrp->subsys[i]->cgroup = cgrp;
1065 list_move(&ss->sibling, &root->subsys_list);
1066 ss->root = root;
1067 if (ss->bind)
1068 ss->bind(cgrp);
1069
1070 } else if (bit & removed_mask) {
1071
1072 BUG_ON(ss == NULL);
1073 BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
1074 BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
1075 if (ss->bind)
1076 ss->bind(dummytop);
1077 dummytop->subsys[i]->cgroup = dummytop;
1078 cgrp->subsys[i] = NULL;
1079 subsys[i]->root = &rootnode;
1080 list_move(&ss->sibling, &rootnode.subsys_list);
1081
1082 module_put(ss->module);
1083 } else if (bit & final_subsys_mask) {
1084
1085 BUG_ON(ss == NULL);
1086 BUG_ON(!cgrp->subsys[i]);
1087
1088
1089
1090
1091 module_put(ss->module);
1092#ifdef CONFIG_MODULE_UNLOAD
1093 BUG_ON(ss->module && !module_refcount(ss->module));
1094#endif
1095 } else {
1096
1097 BUG_ON(cgrp->subsys[i]);
1098 }
1099 }
1100 root->subsys_mask = root->actual_subsys_mask = final_subsys_mask;
1101
1102 return 0;
1103}
1104
1105static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry)
1106{
1107 struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
1108 struct cgroup_subsys *ss;
1109
1110 mutex_lock(&cgroup_root_mutex);
1111 for_each_subsys(root, ss)
1112 seq_printf(seq, ",%s", ss->name);
1113 if (test_bit(ROOT_NOPREFIX, &root->flags))
1114 seq_puts(seq, ",noprefix");
1115 if (test_bit(ROOT_XATTR, &root->flags))
1116 seq_puts(seq, ",xattr");
1117 if (strlen(root->release_agent_path))
1118 seq_printf(seq, ",release_agent=%s", root->release_agent_path);
1119 if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->top_cgroup.flags))
1120 seq_puts(seq, ",clone_children");
1121 if (strlen(root->name))
1122 seq_printf(seq, ",name=%s", root->name);
1123 mutex_unlock(&cgroup_root_mutex);
1124 return 0;
1125}
1126
1127struct cgroup_sb_opts {
1128 unsigned long subsys_mask;
1129 unsigned long flags;
1130 char *release_agent;
1131 bool cpuset_clone_children;
1132 char *name;
1133
1134 bool none;
1135
1136 struct cgroupfs_root *new_root;
1137
1138};
1139
1140
1141
1142
1143
1144
1145
1146static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
1147{
1148 char *token, *o = data;
1149 bool all_ss = false, one_ss = false;
1150 unsigned long mask = (unsigned long)-1;
1151 int i;
1152 bool module_pin_failed = false;
1153
1154 BUG_ON(!mutex_is_locked(&cgroup_mutex));
1155
1156#ifdef CONFIG_CPUSETS
1157 mask = ~(1UL << cpuset_subsys_id);
1158#endif
1159
1160 memset(opts, 0, sizeof(*opts));
1161
1162 while ((token = strsep(&o, ",")) != NULL) {
1163 if (!*token)
1164 return -EINVAL;
1165 if (!strcmp(token, "none")) {
1166
1167 opts->none = true;
1168 continue;
1169 }
1170 if (!strcmp(token, "all")) {
1171
1172 if (one_ss)
1173 return -EINVAL;
1174 all_ss = true;
1175 continue;
1176 }
1177 if (!strcmp(token, "noprefix")) {
1178 set_bit(ROOT_NOPREFIX, &opts->flags);
1179 continue;
1180 }
1181 if (!strcmp(token, "clone_children")) {
1182 opts->cpuset_clone_children = true;
1183 continue;
1184 }
1185 if (!strcmp(token, "xattr")) {
1186 set_bit(ROOT_XATTR, &opts->flags);
1187 continue;
1188 }
1189 if (!strncmp(token, "release_agent=", 14)) {
1190
1191 if (opts->release_agent)
1192 return -EINVAL;
1193 opts->release_agent =
1194 kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL);
1195 if (!opts->release_agent)
1196 return -ENOMEM;
1197 continue;
1198 }
1199 if (!strncmp(token, "name=", 5)) {
1200 const char *name = token + 5;
1201
1202 if (!strlen(name))
1203 return -EINVAL;
1204
1205 for (i = 0; i < strlen(name); i++) {
1206 char c = name[i];
1207 if (isalnum(c))
1208 continue;
1209 if ((c == '.') || (c == '-') || (c == '_'))
1210 continue;
1211 return -EINVAL;
1212 }
1213
1214 if (opts->name)
1215 return -EINVAL;
1216 opts->name = kstrndup(name,
1217 MAX_CGROUP_ROOT_NAMELEN - 1,
1218 GFP_KERNEL);
1219 if (!opts->name)
1220 return -ENOMEM;
1221
1222 continue;
1223 }
1224
1225 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1226 struct cgroup_subsys *ss = subsys[i];
1227 if (ss == NULL)
1228 continue;
1229 if (strcmp(token, ss->name))
1230 continue;
1231 if (ss->disabled)
1232 continue;
1233
1234
1235 if (all_ss)
1236 return -EINVAL;
1237 set_bit(i, &opts->subsys_mask);
1238 one_ss = true;
1239
1240 break;
1241 }
1242 if (i == CGROUP_SUBSYS_COUNT)
1243 return -ENOENT;
1244 }
1245
1246
1247
1248
1249
1250
1251 if (all_ss || (!one_ss && !opts->none && !opts->name)) {
1252 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1253 struct cgroup_subsys *ss = subsys[i];
1254 if (ss == NULL)
1255 continue;
1256 if (ss->disabled)
1257 continue;
1258 set_bit(i, &opts->subsys_mask);
1259 }
1260 }
1261
1262
1263
1264
1265
1266
1267
1268
1269 if (test_bit(ROOT_NOPREFIX, &opts->flags) &&
1270 (opts->subsys_mask & mask))
1271 return -EINVAL;
1272
1273
1274
1275 if (opts->subsys_mask && opts->none)
1276 return -EINVAL;
1277
1278
1279
1280
1281
1282 if (!opts->subsys_mask && !opts->name)
1283 return -EINVAL;
1284
1285
1286
1287
1288
1289
1290
1291 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1292 unsigned long bit = 1UL << i;
1293
1294 if (!(bit & opts->subsys_mask))
1295 continue;
1296 if (!try_module_get(subsys[i]->module)) {
1297 module_pin_failed = true;
1298 break;
1299 }
1300 }
1301 if (module_pin_failed) {
1302
1303
1304
1305
1306
1307 for (i--; i >= 0; i--) {
1308
1309 unsigned long bit = 1UL << i;
1310
1311 if (!(bit & opts->subsys_mask))
1312 continue;
1313 module_put(subsys[i]->module);
1314 }
1315 return -ENOENT;
1316 }
1317
1318 return 0;
1319}
1320
1321static void drop_parsed_module_refcounts(unsigned long subsys_mask)
1322{
1323 int i;
1324 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1325 unsigned long bit = 1UL << i;
1326
1327 if (!(bit & subsys_mask))
1328 continue;
1329 module_put(subsys[i]->module);
1330 }
1331}
1332
1333static int cgroup_remount(struct super_block *sb, int *flags, char *data)
1334{
1335 int ret = 0;
1336 struct cgroupfs_root *root = sb->s_fs_info;
1337 struct cgroup *cgrp = &root->top_cgroup;
1338 struct cgroup_sb_opts opts;
1339 unsigned long added_mask, removed_mask;
1340
1341 mutex_lock(&cgrp->dentry->d_inode->i_mutex);
1342 mutex_lock(&cgroup_mutex);
1343 mutex_lock(&cgroup_root_mutex);
1344
1345
1346 ret = parse_cgroupfs_options(data, &opts);
1347 if (ret)
1348 goto out_unlock;
1349
1350 if (opts.subsys_mask != root->actual_subsys_mask || opts.release_agent)
1351 pr_warning("cgroup: option changes via remount are deprecated (pid=%d comm=%s)\n",
1352 task_tgid_nr(current), current->comm);
1353
1354 added_mask = opts.subsys_mask & ~root->subsys_mask;
1355 removed_mask = root->subsys_mask & ~opts.subsys_mask;
1356
1357
1358 if (opts.flags != root->flags ||
1359 (opts.name && strcmp(opts.name, root->name))) {
1360 ret = -EINVAL;
1361 drop_parsed_module_refcounts(opts.subsys_mask);
1362 goto out_unlock;
1363 }
1364
1365
1366
1367
1368
1369
1370 cgroup_clear_directory(cgrp->dentry, false, removed_mask);
1371
1372 ret = rebind_subsystems(root, opts.subsys_mask);
1373 if (ret) {
1374
1375 cgroup_populate_dir(cgrp, false, removed_mask);
1376 drop_parsed_module_refcounts(opts.subsys_mask);
1377 goto out_unlock;
1378 }
1379
1380
1381 cgroup_populate_dir(cgrp, false, added_mask);
1382
1383 if (opts.release_agent)
1384 strcpy(root->release_agent_path, opts.release_agent);
1385 out_unlock:
1386 kfree(opts.release_agent);
1387 kfree(opts.name);
1388 mutex_unlock(&cgroup_root_mutex);
1389 mutex_unlock(&cgroup_mutex);
1390 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
1391 return ret;
1392}
1393
1394static const struct super_operations cgroup_ops = {
1395 .statfs = simple_statfs,
1396 .drop_inode = generic_delete_inode,
1397 .show_options = cgroup_show_options,
1398 .remount_fs = cgroup_remount,
1399};
1400
1401static void init_cgroup_housekeeping(struct cgroup *cgrp)
1402{
1403 INIT_LIST_HEAD(&cgrp->sibling);
1404 INIT_LIST_HEAD(&cgrp->children);
1405 INIT_LIST_HEAD(&cgrp->files);
1406 INIT_LIST_HEAD(&cgrp->css_sets);
1407 INIT_LIST_HEAD(&cgrp->allcg_node);
1408 INIT_LIST_HEAD(&cgrp->release_list);
1409 INIT_LIST_HEAD(&cgrp->pidlists);
1410 INIT_WORK(&cgrp->free_work, cgroup_free_fn);
1411 mutex_init(&cgrp->pidlist_mutex);
1412 INIT_LIST_HEAD(&cgrp->event_list);
1413 spin_lock_init(&cgrp->event_list_lock);
1414 simple_xattrs_init(&cgrp->xattrs);
1415}
1416
1417static void init_cgroup_root(struct cgroupfs_root *root)
1418{
1419 struct cgroup *cgrp = &root->top_cgroup;
1420
1421 INIT_LIST_HEAD(&root->subsys_list);
1422 INIT_LIST_HEAD(&root->root_list);
1423 INIT_LIST_HEAD(&root->allcg_list);
1424 root->number_of_cgroups = 1;
1425 cgrp->root = root;
1426 cgrp->top_cgroup = cgrp;
1427 init_cgroup_housekeeping(cgrp);
1428 list_add_tail(&cgrp->allcg_node, &root->allcg_list);
1429}
1430
1431static bool init_root_id(struct cgroupfs_root *root)
1432{
1433 int ret = 0;
1434
1435 do {
1436 if (!ida_pre_get(&hierarchy_ida, GFP_KERNEL))
1437 return false;
1438 spin_lock(&hierarchy_id_lock);
1439
1440 ret = ida_get_new_above(&hierarchy_ida, next_hierarchy_id,
1441 &root->hierarchy_id);
1442 if (ret == -ENOSPC)
1443
1444 ret = ida_get_new(&hierarchy_ida, &root->hierarchy_id);
1445 if (!ret) {
1446 next_hierarchy_id = root->hierarchy_id + 1;
1447 } else if (ret != -EAGAIN) {
1448
1449 BUG_ON(ret);
1450 }
1451 spin_unlock(&hierarchy_id_lock);
1452 } while (ret);
1453 return true;
1454}
1455
1456static int cgroup_test_super(struct super_block *sb, void *data)
1457{
1458 struct cgroup_sb_opts *opts = data;
1459 struct cgroupfs_root *root = sb->s_fs_info;
1460
1461
1462 if (opts->name && strcmp(opts->name, root->name))
1463 return 0;
1464
1465
1466
1467
1468
1469 if ((opts->subsys_mask || opts->none)
1470 && (opts->subsys_mask != root->subsys_mask))
1471 return 0;
1472
1473 return 1;
1474}
1475
1476static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
1477{
1478 struct cgroupfs_root *root;
1479
1480 if (!opts->subsys_mask && !opts->none)
1481 return NULL;
1482
1483 root = kzalloc(sizeof(*root), GFP_KERNEL);
1484 if (!root)
1485 return ERR_PTR(-ENOMEM);
1486
1487 if (!init_root_id(root)) {
1488 kfree(root);
1489 return ERR_PTR(-ENOMEM);
1490 }
1491 init_cgroup_root(root);
1492
1493 root->subsys_mask = opts->subsys_mask;
1494 root->flags = opts->flags;
1495 ida_init(&root->cgroup_ida);
1496 if (opts->release_agent)
1497 strcpy(root->release_agent_path, opts->release_agent);
1498 if (opts->name)
1499 strcpy(root->name, opts->name);
1500 if (opts->cpuset_clone_children)
1501 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->top_cgroup.flags);
1502 return root;
1503}
1504
1505static void cgroup_drop_root(struct cgroupfs_root *root)
1506{
1507 if (!root)
1508 return;
1509
1510 BUG_ON(!root->hierarchy_id);
1511 spin_lock(&hierarchy_id_lock);
1512 ida_remove(&hierarchy_ida, root->hierarchy_id);
1513 spin_unlock(&hierarchy_id_lock);
1514 ida_destroy(&root->cgroup_ida);
1515 kfree(root);
1516}
1517
1518static int cgroup_set_super(struct super_block *sb, void *data)
1519{
1520 int ret;
1521 struct cgroup_sb_opts *opts = data;
1522
1523
1524 if (!opts->new_root)
1525 return -EINVAL;
1526
1527 BUG_ON(!opts->subsys_mask && !opts->none);
1528
1529 ret = set_anon_super(sb, NULL);
1530 if (ret)
1531 return ret;
1532
1533 sb->s_fs_info = opts->new_root;
1534 opts->new_root->sb = sb;
1535
1536 sb->s_blocksize = PAGE_CACHE_SIZE;
1537 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
1538 sb->s_magic = CGROUP_SUPER_MAGIC;
1539 sb->s_op = &cgroup_ops;
1540
1541 return 0;
1542}
1543
1544static int cgroup_get_rootdir(struct super_block *sb)
1545{
1546 static const struct dentry_operations cgroup_dops = {
1547 .d_iput = cgroup_diput,
1548 .d_delete = cgroup_delete,
1549 };
1550
1551 struct inode *inode =
1552 cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb);
1553
1554 if (!inode)
1555 return -ENOMEM;
1556
1557 inode->i_fop = &simple_dir_operations;
1558 inode->i_op = &cgroup_dir_inode_operations;
1559
1560 inc_nlink(inode);
1561 sb->s_root = d_make_root(inode);
1562 if (!sb->s_root)
1563 return -ENOMEM;
1564
1565 sb->s_d_op = &cgroup_dops;
1566 return 0;
1567}
1568
1569static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1570 int flags, const char *unused_dev_name,
1571 void *data)
1572{
1573 struct cgroup_sb_opts opts;
1574 struct cgroupfs_root *root;
1575 int ret = 0;
1576 struct super_block *sb;
1577 struct cgroupfs_root *new_root;
1578 struct inode *inode;
1579
1580
1581 mutex_lock(&cgroup_mutex);
1582 ret = parse_cgroupfs_options(data, &opts);
1583 mutex_unlock(&cgroup_mutex);
1584 if (ret)
1585 goto out_err;
1586
1587
1588
1589
1590
1591 new_root = cgroup_root_from_opts(&opts);
1592 if (IS_ERR(new_root)) {
1593 ret = PTR_ERR(new_root);
1594 goto drop_modules;
1595 }
1596 opts.new_root = new_root;
1597
1598
1599 sb = sget(fs_type, cgroup_test_super, cgroup_set_super, 0, &opts);
1600 if (IS_ERR(sb)) {
1601 ret = PTR_ERR(sb);
1602 cgroup_drop_root(opts.new_root);
1603 goto drop_modules;
1604 }
1605
1606 root = sb->s_fs_info;
1607 BUG_ON(!root);
1608 if (root == opts.new_root) {
1609
1610 struct list_head tmp_cg_links;
1611 struct cgroup *root_cgrp = &root->top_cgroup;
1612 struct cgroupfs_root *existing_root;
1613 const struct cred *cred;
1614 int i;
1615 struct css_set *cg;
1616
1617 BUG_ON(sb->s_root != NULL);
1618
1619 ret = cgroup_get_rootdir(sb);
1620 if (ret)
1621 goto drop_new_super;
1622 inode = sb->s_root->d_inode;
1623
1624 mutex_lock(&inode->i_mutex);
1625 mutex_lock(&cgroup_mutex);
1626 mutex_lock(&cgroup_root_mutex);
1627
1628
1629 ret = -EBUSY;
1630 if (strlen(root->name))
1631 for_each_active_root(existing_root)
1632 if (!strcmp(existing_root->name, root->name))
1633 goto unlock_drop;
1634
1635
1636
1637
1638
1639
1640
1641
1642 ret = allocate_cg_links(css_set_count, &tmp_cg_links);
1643 if (ret)
1644 goto unlock_drop;
1645
1646 ret = rebind_subsystems(root, root->subsys_mask);
1647 if (ret == -EBUSY) {
1648 free_cg_links(&tmp_cg_links);
1649 goto unlock_drop;
1650 }
1651
1652
1653
1654
1655
1656
1657
1658 BUG_ON(ret);
1659
1660 list_add(&root->root_list, &roots);
1661 root_count++;
1662
1663 sb->s_root->d_fsdata = root_cgrp;
1664 root->top_cgroup.dentry = sb->s_root;
1665
1666
1667
1668 write_lock(&css_set_lock);
1669 hash_for_each(css_set_table, i, cg, hlist)
1670 link_css_set(&tmp_cg_links, cg, root_cgrp);
1671 write_unlock(&css_set_lock);
1672
1673 free_cg_links(&tmp_cg_links);
1674
1675 BUG_ON(!list_empty(&root_cgrp->children));
1676 BUG_ON(root->number_of_cgroups != 1);
1677
1678 cred = override_creds(&init_cred);
1679 cgroup_populate_dir(root_cgrp, true, root->subsys_mask);
1680 revert_creds(cred);
1681 mutex_unlock(&cgroup_root_mutex);
1682 mutex_unlock(&cgroup_mutex);
1683 mutex_unlock(&inode->i_mutex);
1684 } else {
1685
1686
1687
1688
1689 cgroup_drop_root(opts.new_root);
1690
1691 drop_parsed_module_refcounts(opts.subsys_mask);
1692 }
1693
1694 kfree(opts.release_agent);
1695 kfree(opts.name);
1696 return dget(sb->s_root);
1697
1698 unlock_drop:
1699 mutex_unlock(&cgroup_root_mutex);
1700 mutex_unlock(&cgroup_mutex);
1701 mutex_unlock(&inode->i_mutex);
1702 drop_new_super:
1703 deactivate_locked_super(sb);
1704 drop_modules:
1705 drop_parsed_module_refcounts(opts.subsys_mask);
1706 out_err:
1707 kfree(opts.release_agent);
1708 kfree(opts.name);
1709 return ERR_PTR(ret);
1710}
1711
1712static void cgroup_kill_sb(struct super_block *sb) {
1713 struct cgroupfs_root *root = sb->s_fs_info;
1714 struct cgroup *cgrp = &root->top_cgroup;
1715 int ret;
1716 struct cg_cgroup_link *link;
1717 struct cg_cgroup_link *saved_link;
1718
1719 BUG_ON(!root);
1720
1721 BUG_ON(root->number_of_cgroups != 1);
1722 BUG_ON(!list_empty(&cgrp->children));
1723
1724 mutex_lock(&cgroup_mutex);
1725 mutex_lock(&cgroup_root_mutex);
1726
1727
1728 ret = rebind_subsystems(root, 0);
1729
1730 BUG_ON(ret);
1731
1732
1733
1734
1735
1736 write_lock(&css_set_lock);
1737
1738 list_for_each_entry_safe(link, saved_link, &cgrp->css_sets,
1739 cgrp_link_list) {
1740 list_del(&link->cg_link_list);
1741 list_del(&link->cgrp_link_list);
1742 kfree(link);
1743 }
1744 write_unlock(&css_set_lock);
1745
1746 if (!list_empty(&root->root_list)) {
1747 list_del(&root->root_list);
1748 root_count--;
1749 }
1750
1751 mutex_unlock(&cgroup_root_mutex);
1752 mutex_unlock(&cgroup_mutex);
1753
1754 simple_xattrs_free(&cgrp->xattrs);
1755
1756 kill_litter_super(sb);
1757 cgroup_drop_root(root);
1758}
1759
1760static struct file_system_type cgroup_fs_type = {
1761 .name = "cgroup",
1762 .mount = cgroup_mount,
1763 .kill_sb = cgroup_kill_sb,
1764};
1765
1766static struct kobject *cgroup_kobj;
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
1779{
1780 struct dentry *dentry = cgrp->dentry;
1781 char *start;
1782
1783 rcu_lockdep_assert(rcu_read_lock_held() || cgroup_lock_is_held(),
1784 "cgroup_path() called without proper locking");
1785
1786 if (cgrp == dummytop) {
1787
1788
1789
1790
1791 strcpy(buf, "/");
1792 return 0;
1793 }
1794
1795 start = buf + buflen - 1;
1796
1797 *start = '\0';
1798 for (;;) {
1799 int len = dentry->d_name.len;
1800
1801 if ((start -= len) < buf)
1802 return -ENAMETOOLONG;
1803 memcpy(start, dentry->d_name.name, len);
1804 cgrp = cgrp->parent;
1805 if (!cgrp)
1806 break;
1807
1808 dentry = cgrp->dentry;
1809 if (!cgrp->parent)
1810 continue;
1811 if (--start < buf)
1812 return -ENAMETOOLONG;
1813 *start = '/';
1814 }
1815 memmove(buf, start, buf + buflen - start);
1816 return 0;
1817}
1818EXPORT_SYMBOL_GPL(cgroup_path);
1819
1820
1821
1822
1823struct task_and_cgroup {
1824 struct task_struct *task;
1825 struct cgroup *cgrp;
1826 struct css_set *cg;
1827};
1828
1829struct cgroup_taskset {
1830 struct task_and_cgroup single;
1831 struct flex_array *tc_array;
1832 int tc_array_len;
1833 int idx;
1834 struct cgroup *cur_cgrp;
1835};
1836
1837
1838
1839
1840
1841
1842
1843struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset)
1844{
1845 if (tset->tc_array) {
1846 tset->idx = 0;
1847 return cgroup_taskset_next(tset);
1848 } else {
1849 tset->cur_cgrp = tset->single.cgrp;
1850 return tset->single.task;
1851 }
1852}
1853EXPORT_SYMBOL_GPL(cgroup_taskset_first);
1854
1855
1856
1857
1858
1859
1860
1861
1862struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset)
1863{
1864 struct task_and_cgroup *tc;
1865
1866 if (!tset->tc_array || tset->idx >= tset->tc_array_len)
1867 return NULL;
1868
1869 tc = flex_array_get(tset->tc_array, tset->idx++);
1870 tset->cur_cgrp = tc->cgrp;
1871 return tc->task;
1872}
1873EXPORT_SYMBOL_GPL(cgroup_taskset_next);
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883struct cgroup *cgroup_taskset_cur_cgroup(struct cgroup_taskset *tset)
1884{
1885 return tset->cur_cgrp;
1886}
1887EXPORT_SYMBOL_GPL(cgroup_taskset_cur_cgroup);
1888
1889
1890
1891
1892
1893int cgroup_taskset_size(struct cgroup_taskset *tset)
1894{
1895 return tset->tc_array ? tset->tc_array_len : 1;
1896}
1897EXPORT_SYMBOL_GPL(cgroup_taskset_size);
1898
1899
1900
1901
1902
1903
1904
1905static void cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
1906 struct task_struct *tsk, struct css_set *newcg)
1907{
1908 struct css_set *oldcg;
1909
1910
1911
1912
1913
1914
1915 WARN_ON_ONCE(tsk->flags & PF_EXITING);
1916 oldcg = tsk->cgroups;
1917
1918 task_lock(tsk);
1919 rcu_assign_pointer(tsk->cgroups, newcg);
1920 task_unlock(tsk);
1921
1922
1923 write_lock(&css_set_lock);
1924 if (!list_empty(&tsk->cg_list))
1925 list_move(&tsk->cg_list, &newcg->tasks);
1926 write_unlock(&css_set_lock);
1927
1928
1929
1930
1931
1932
1933 set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
1934 put_css_set(oldcg);
1935}
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1946{
1947 int retval = 0;
1948 struct cgroup_subsys *ss, *failed_ss = NULL;
1949 struct cgroup *oldcgrp;
1950 struct cgroupfs_root *root = cgrp->root;
1951 struct cgroup_taskset tset = { };
1952 struct css_set *newcg;
1953
1954
1955 if (tsk->flags & PF_EXITING)
1956 return -ESRCH;
1957
1958
1959 oldcgrp = task_cgroup_from_root(tsk, root);
1960 if (cgrp == oldcgrp)
1961 return 0;
1962
1963 tset.single.task = tsk;
1964 tset.single.cgrp = oldcgrp;
1965
1966 for_each_subsys(root, ss) {
1967 if (ss->can_attach) {
1968 retval = ss->can_attach(cgrp, &tset);
1969 if (retval) {
1970
1971
1972
1973
1974
1975
1976 failed_ss = ss;
1977 goto out;
1978 }
1979 }
1980 }
1981
1982 newcg = find_css_set(tsk->cgroups, cgrp);
1983 if (!newcg) {
1984 retval = -ENOMEM;
1985 goto out;
1986 }
1987
1988 cgroup_task_migrate(cgrp, oldcgrp, tsk, newcg);
1989
1990 for_each_subsys(root, ss) {
1991 if (ss->attach)
1992 ss->attach(cgrp, &tset);
1993 }
1994
1995out:
1996 if (retval) {
1997 for_each_subsys(root, ss) {
1998 if (ss == failed_ss)
1999
2000
2001
2002
2003
2004
2005 break;
2006 if (ss->cancel_attach)
2007 ss->cancel_attach(cgrp, &tset);
2008 }
2009 }
2010 return retval;
2011}
2012
2013
2014
2015
2016
2017
2018int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
2019{
2020 struct cgroupfs_root *root;
2021 int retval = 0;
2022
2023 cgroup_lock();
2024 for_each_active_root(root) {
2025 struct cgroup *from_cg = task_cgroup_from_root(from, root);
2026
2027 retval = cgroup_attach_task(from_cg, tsk);
2028 if (retval)
2029 break;
2030 }
2031 cgroup_unlock();
2032
2033 return retval;
2034}
2035EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
2046{
2047 int retval, i, group_size;
2048 struct cgroup_subsys *ss, *failed_ss = NULL;
2049
2050 struct cgroupfs_root *root = cgrp->root;
2051
2052 struct task_struct *tsk;
2053 struct task_and_cgroup *tc;
2054 struct flex_array *group;
2055 struct cgroup_taskset tset = { };
2056
2057
2058
2059
2060
2061
2062
2063
2064 group_size = get_nr_threads(leader);
2065
2066 group = flex_array_alloc(sizeof(*tc), group_size, GFP_KERNEL);
2067 if (!group)
2068 return -ENOMEM;
2069
2070 retval = flex_array_prealloc(group, 0, group_size, GFP_KERNEL);
2071 if (retval)
2072 goto out_free_group_list;
2073
2074 tsk = leader;
2075 i = 0;
2076
2077
2078
2079
2080
2081 rcu_read_lock();
2082 do {
2083 struct task_and_cgroup ent;
2084
2085
2086 if (tsk->flags & PF_EXITING)
2087 continue;
2088
2089
2090 BUG_ON(i >= group_size);
2091 ent.task = tsk;
2092 ent.cgrp = task_cgroup_from_root(tsk, root);
2093
2094 if (ent.cgrp == cgrp)
2095 continue;
2096
2097
2098
2099
2100 retval = flex_array_put(group, i, &ent, GFP_ATOMIC);
2101 BUG_ON(retval != 0);
2102 i++;
2103 } while_each_thread(leader, tsk);
2104 rcu_read_unlock();
2105
2106 group_size = i;
2107 tset.tc_array = group;
2108 tset.tc_array_len = group_size;
2109
2110
2111 retval = 0;
2112 if (!group_size)
2113 goto out_free_group_list;
2114
2115
2116
2117
2118 for_each_subsys(root, ss) {
2119 if (ss->can_attach) {
2120 retval = ss->can_attach(cgrp, &tset);
2121 if (retval) {
2122 failed_ss = ss;
2123 goto out_cancel_attach;
2124 }
2125 }
2126 }
2127
2128
2129
2130
2131
2132 for (i = 0; i < group_size; i++) {
2133 tc = flex_array_get(group, i);
2134 tc->cg = find_css_set(tc->task->cgroups, cgrp);
2135 if (!tc->cg) {
2136 retval = -ENOMEM;
2137 goto out_put_css_set_refs;
2138 }
2139 }
2140
2141
2142
2143
2144
2145
2146 for (i = 0; i < group_size; i++) {
2147 tc = flex_array_get(group, i);
2148 cgroup_task_migrate(cgrp, tc->cgrp, tc->task, tc->cg);
2149 }
2150
2151
2152
2153
2154
2155 for_each_subsys(root, ss) {
2156 if (ss->attach)
2157 ss->attach(cgrp, &tset);
2158 }
2159
2160
2161
2162
2163 retval = 0;
2164out_put_css_set_refs:
2165 if (retval) {
2166 for (i = 0; i < group_size; i++) {
2167 tc = flex_array_get(group, i);
2168 if (!tc->cg)
2169 break;
2170 put_css_set(tc->cg);
2171 }
2172 }
2173out_cancel_attach:
2174 if (retval) {
2175 for_each_subsys(root, ss) {
2176 if (ss == failed_ss)
2177 break;
2178 if (ss->cancel_attach)
2179 ss->cancel_attach(cgrp, &tset);
2180 }
2181 }
2182out_free_group_list:
2183 flex_array_free(group);
2184 return retval;
2185}
2186
2187
2188
2189
2190
2191
2192static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup)
2193{
2194 struct task_struct *tsk;
2195 const struct cred *cred = current_cred(), *tcred;
2196 int ret;
2197
2198 if (!cgroup_lock_live_group(cgrp))
2199 return -ENODEV;
2200
2201retry_find_task:
2202 rcu_read_lock();
2203 if (pid) {
2204 tsk = find_task_by_vpid(pid);
2205 if (!tsk) {
2206 rcu_read_unlock();
2207 ret= -ESRCH;
2208 goto out_unlock_cgroup;
2209 }
2210
2211
2212
2213
2214 tcred = __task_cred(tsk);
2215 if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
2216 !uid_eq(cred->euid, tcred->uid) &&
2217 !uid_eq(cred->euid, tcred->suid)) {
2218 rcu_read_unlock();
2219 ret = -EACCES;
2220 goto out_unlock_cgroup;
2221 }
2222 } else
2223 tsk = current;
2224
2225 if (threadgroup)
2226 tsk = tsk->group_leader;
2227
2228
2229
2230
2231
2232
2233 if (tsk == kthreadd_task || (tsk->flags & PF_THREAD_BOUND)) {
2234 ret = -EINVAL;
2235 rcu_read_unlock();
2236 goto out_unlock_cgroup;
2237 }
2238
2239 get_task_struct(tsk);
2240 rcu_read_unlock();
2241
2242 threadgroup_lock(tsk);
2243 if (threadgroup) {
2244 if (!thread_group_leader(tsk)) {
2245
2246
2247
2248
2249
2250
2251
2252 threadgroup_unlock(tsk);
2253 put_task_struct(tsk);
2254 goto retry_find_task;
2255 }
2256 ret = cgroup_attach_proc(cgrp, tsk);
2257 } else
2258 ret = cgroup_attach_task(cgrp, tsk);
2259 threadgroup_unlock(tsk);
2260
2261 put_task_struct(tsk);
2262out_unlock_cgroup:
2263 cgroup_unlock();
2264 return ret;
2265}
2266
2267static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid)
2268{
2269 return attach_task_by_pid(cgrp, pid, false);
2270}
2271
2272static int cgroup_procs_write(struct cgroup *cgrp, struct cftype *cft, u64 tgid)
2273{
2274 return attach_task_by_pid(cgrp, tgid, true);
2275}
2276
2277
2278
2279
2280
2281
2282
2283
2284bool cgroup_lock_live_group(struct cgroup *cgrp)
2285{
2286 mutex_lock(&cgroup_mutex);
2287 if (cgroup_is_removed(cgrp)) {
2288 mutex_unlock(&cgroup_mutex);
2289 return false;
2290 }
2291 return true;
2292}
2293EXPORT_SYMBOL_GPL(cgroup_lock_live_group);
2294
2295static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
2296 const char *buffer)
2297{
2298 BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
2299 if (strlen(buffer) >= PATH_MAX)
2300 return -EINVAL;
2301 if (!cgroup_lock_live_group(cgrp))
2302 return -ENODEV;
2303 mutex_lock(&cgroup_root_mutex);
2304 strcpy(cgrp->root->release_agent_path, buffer);
2305 mutex_unlock(&cgroup_root_mutex);
2306 cgroup_unlock();
2307 return 0;
2308}
2309
2310static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft,
2311 struct seq_file *seq)
2312{
2313 if (!cgroup_lock_live_group(cgrp))
2314 return -ENODEV;
2315 seq_puts(seq, cgrp->root->release_agent_path);
2316 seq_putc(seq, '\n');
2317 cgroup_unlock();
2318 return 0;
2319}
2320
2321
2322#define CGROUP_LOCAL_BUFFER_SIZE 64
2323
2324static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft,
2325 struct file *file,
2326 const char __user *userbuf,
2327 size_t nbytes, loff_t *unused_ppos)
2328{
2329 char buffer[CGROUP_LOCAL_BUFFER_SIZE];
2330 int retval = 0;
2331 char *end;
2332
2333 if (!nbytes)
2334 return -EINVAL;
2335 if (nbytes >= sizeof(buffer))
2336 return -E2BIG;
2337 if (copy_from_user(buffer, userbuf, nbytes))
2338 return -EFAULT;
2339
2340 buffer[nbytes] = 0;
2341 if (cft->write_u64) {
2342 u64 val = simple_strtoull(strstrip(buffer), &end, 0);
2343 if (*end)
2344 return -EINVAL;
2345 retval = cft->write_u64(cgrp, cft, val);
2346 } else {
2347 s64 val = simple_strtoll(strstrip(buffer), &end, 0);
2348 if (*end)
2349 return -EINVAL;
2350 retval = cft->write_s64(cgrp, cft, val);
2351 }
2352 if (!retval)
2353 retval = nbytes;
2354 return retval;
2355}
2356
2357static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft,
2358 struct file *file,
2359 const char __user *userbuf,
2360 size_t nbytes, loff_t *unused_ppos)
2361{
2362 char local_buffer[CGROUP_LOCAL_BUFFER_SIZE];
2363 int retval = 0;
2364 size_t max_bytes = cft->max_write_len;
2365 char *buffer = local_buffer;
2366
2367 if (!max_bytes)
2368 max_bytes = sizeof(local_buffer) - 1;
2369 if (nbytes >= max_bytes)
2370 return -E2BIG;
2371
2372 if (nbytes >= sizeof(local_buffer)) {
2373 buffer = kmalloc(nbytes + 1, GFP_KERNEL);
2374 if (buffer == NULL)
2375 return -ENOMEM;
2376 }
2377 if (nbytes && copy_from_user(buffer, userbuf, nbytes)) {
2378 retval = -EFAULT;
2379 goto out;
2380 }
2381
2382 buffer[nbytes] = 0;
2383 retval = cft->write_string(cgrp, cft, strstrip(buffer));
2384 if (!retval)
2385 retval = nbytes;
2386out:
2387 if (buffer != local_buffer)
2388 kfree(buffer);
2389 return retval;
2390}
2391
2392static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
2393 size_t nbytes, loff_t *ppos)
2394{
2395 struct cftype *cft = __d_cft(file->f_dentry);
2396 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
2397
2398 if (cgroup_is_removed(cgrp))
2399 return -ENODEV;
2400 if (cft->write)
2401 return cft->write(cgrp, cft, file, buf, nbytes, ppos);
2402 if (cft->write_u64 || cft->write_s64)
2403 return cgroup_write_X64(cgrp, cft, file, buf, nbytes, ppos);
2404 if (cft->write_string)
2405 return cgroup_write_string(cgrp, cft, file, buf, nbytes, ppos);
2406 if (cft->trigger) {
2407 int ret = cft->trigger(cgrp, (unsigned int)cft->private);
2408 return ret ? ret : nbytes;
2409 }
2410 return -EINVAL;
2411}
2412
2413static ssize_t cgroup_read_u64(struct cgroup *cgrp, struct cftype *cft,
2414 struct file *file,
2415 char __user *buf, size_t nbytes,
2416 loff_t *ppos)
2417{
2418 char tmp[CGROUP_LOCAL_BUFFER_SIZE];
2419 u64 val = cft->read_u64(cgrp, cft);
2420 int len = sprintf(tmp, "%llu\n", (unsigned long long) val);
2421
2422 return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
2423}
2424
2425static ssize_t cgroup_read_s64(struct cgroup *cgrp, struct cftype *cft,
2426 struct file *file,
2427 char __user *buf, size_t nbytes,
2428 loff_t *ppos)
2429{
2430 char tmp[CGROUP_LOCAL_BUFFER_SIZE];
2431 s64 val = cft->read_s64(cgrp, cft);
2432 int len = sprintf(tmp, "%lld\n", (long long) val);
2433
2434 return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
2435}
2436
2437static ssize_t cgroup_file_read(struct file *file, char __user *buf,
2438 size_t nbytes, loff_t *ppos)
2439{
2440 struct cftype *cft = __d_cft(file->f_dentry);
2441 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
2442
2443 if (cgroup_is_removed(cgrp))
2444 return -ENODEV;
2445
2446 if (cft->read)
2447 return cft->read(cgrp, cft, file, buf, nbytes, ppos);
2448 if (cft->read_u64)
2449 return cgroup_read_u64(cgrp, cft, file, buf, nbytes, ppos);
2450 if (cft->read_s64)
2451 return cgroup_read_s64(cgrp, cft, file, buf, nbytes, ppos);
2452 return -EINVAL;
2453}
2454
2455
2456
2457
2458
2459
2460struct cgroup_seqfile_state {
2461 struct cftype *cft;
2462 struct cgroup *cgroup;
2463};
2464
2465static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value)
2466{
2467 struct seq_file *sf = cb->state;
2468 return seq_printf(sf, "%s %llu\n", key, (unsigned long long)value);
2469}
2470
2471static int cgroup_seqfile_show(struct seq_file *m, void *arg)
2472{
2473 struct cgroup_seqfile_state *state = m->private;
2474 struct cftype *cft = state->cft;
2475 if (cft->read_map) {
2476 struct cgroup_map_cb cb = {
2477 .fill = cgroup_map_add,
2478 .state = m,
2479 };
2480 return cft->read_map(state->cgroup, cft, &cb);
2481 }
2482 return cft->read_seq_string(state->cgroup, cft, m);
2483}
2484
2485static int cgroup_seqfile_release(struct inode *inode, struct file *file)
2486{
2487 struct seq_file *seq = file->private_data;
2488 kfree(seq->private);
2489 return single_release(inode, file);
2490}
2491
2492static const struct file_operations cgroup_seqfile_operations = {
2493 .read = seq_read,
2494 .write = cgroup_file_write,
2495 .llseek = seq_lseek,
2496 .release = cgroup_seqfile_release,
2497};
2498
2499static int cgroup_file_open(struct inode *inode, struct file *file)
2500{
2501 int err;
2502 struct cftype *cft;
2503
2504 err = generic_file_open(inode, file);
2505 if (err)
2506 return err;
2507 cft = __d_cft(file->f_dentry);
2508
2509 if (cft->read_map || cft->read_seq_string) {
2510 struct cgroup_seqfile_state *state =
2511 kzalloc(sizeof(*state), GFP_USER);
2512 if (!state)
2513 return -ENOMEM;
2514 state->cft = cft;
2515 state->cgroup = __d_cgrp(file->f_dentry->d_parent);
2516 file->f_op = &cgroup_seqfile_operations;
2517 err = single_open(file, cgroup_seqfile_show, state);
2518 if (err < 0)
2519 kfree(state);
2520 } else if (cft->open)
2521 err = cft->open(inode, file);
2522 else
2523 err = 0;
2524
2525 return err;
2526}
2527
2528static int cgroup_file_release(struct inode *inode, struct file *file)
2529{
2530 struct cftype *cft = __d_cft(file->f_dentry);
2531 if (cft->release)
2532 return cft->release(inode, file);
2533 return 0;
2534}
2535
2536
2537
2538
2539static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry,
2540 struct inode *new_dir, struct dentry *new_dentry)
2541{
2542 if (!S_ISDIR(old_dentry->d_inode->i_mode))
2543 return -ENOTDIR;
2544 if (new_dentry->d_inode)
2545 return -EEXIST;
2546 if (old_dir != new_dir)
2547 return -EIO;
2548 return simple_rename(old_dir, old_dentry, new_dir, new_dentry);
2549}
2550
2551static struct simple_xattrs *__d_xattrs(struct dentry *dentry)
2552{
2553 if (S_ISDIR(dentry->d_inode->i_mode))
2554 return &__d_cgrp(dentry)->xattrs;
2555 else
2556 return &__d_cfe(dentry)->xattrs;
2557}
2558
2559static inline int xattr_enabled(struct dentry *dentry)
2560{
2561 struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
2562 return test_bit(ROOT_XATTR, &root->flags);
2563}
2564
2565static bool is_valid_xattr(const char *name)
2566{
2567 if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
2568 !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN))
2569 return true;
2570 return false;
2571}
2572
2573static int cgroup_setxattr(struct dentry *dentry, const char *name,
2574 const void *val, size_t size, int flags)
2575{
2576 if (!xattr_enabled(dentry))
2577 return -EOPNOTSUPP;
2578 if (!is_valid_xattr(name))
2579 return -EINVAL;
2580 return simple_xattr_set(__d_xattrs(dentry), name, val, size, flags);
2581}
2582
2583static int cgroup_removexattr(struct dentry *dentry, const char *name)
2584{
2585 if (!xattr_enabled(dentry))
2586 return -EOPNOTSUPP;
2587 if (!is_valid_xattr(name))
2588 return -EINVAL;
2589 return simple_xattr_remove(__d_xattrs(dentry), name);
2590}
2591
2592static ssize_t cgroup_getxattr(struct dentry *dentry, const char *name,
2593 void *buf, size_t size)
2594{
2595 if (!xattr_enabled(dentry))
2596 return -EOPNOTSUPP;
2597 if (!is_valid_xattr(name))
2598 return -EINVAL;
2599 return simple_xattr_get(__d_xattrs(dentry), name, buf, size);
2600}
2601
2602static ssize_t cgroup_listxattr(struct dentry *dentry, char *buf, size_t size)
2603{
2604 if (!xattr_enabled(dentry))
2605 return -EOPNOTSUPP;
2606 return simple_xattr_list(__d_xattrs(dentry), buf, size);
2607}
2608
2609static const struct file_operations cgroup_file_operations = {
2610 .read = cgroup_file_read,
2611 .write = cgroup_file_write,
2612 .llseek = generic_file_llseek,
2613 .open = cgroup_file_open,
2614 .release = cgroup_file_release,
2615};
2616
2617static const struct inode_operations cgroup_file_inode_operations = {
2618 .setxattr = cgroup_setxattr,
2619 .getxattr = cgroup_getxattr,
2620 .listxattr = cgroup_listxattr,
2621 .removexattr = cgroup_removexattr,
2622};
2623
2624static const struct inode_operations cgroup_dir_inode_operations = {
2625 .lookup = cgroup_lookup,
2626 .mkdir = cgroup_mkdir,
2627 .rmdir = cgroup_rmdir,
2628 .rename = cgroup_rename,
2629 .setxattr = cgroup_setxattr,
2630 .getxattr = cgroup_getxattr,
2631 .listxattr = cgroup_listxattr,
2632 .removexattr = cgroup_removexattr,
2633};
2634
2635static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
2636{
2637 if (dentry->d_name.len > NAME_MAX)
2638 return ERR_PTR(-ENAMETOOLONG);
2639 d_add(dentry, NULL);
2640 return NULL;
2641}
2642
2643
2644
2645
2646static inline struct cftype *__file_cft(struct file *file)
2647{
2648 if (file_inode(file)->i_fop != &cgroup_file_operations)
2649 return ERR_PTR(-EINVAL);
2650 return __d_cft(file->f_dentry);
2651}
2652
2653static int cgroup_create_file(struct dentry *dentry, umode_t mode,
2654 struct super_block *sb)
2655{
2656 struct inode *inode;
2657
2658 if (!dentry)
2659 return -ENOENT;
2660 if (dentry->d_inode)
2661 return -EEXIST;
2662
2663 inode = cgroup_new_inode(mode, sb);
2664 if (!inode)
2665 return -ENOMEM;
2666
2667 if (S_ISDIR(mode)) {
2668 inode->i_op = &cgroup_dir_inode_operations;
2669 inode->i_fop = &simple_dir_operations;
2670
2671
2672 inc_nlink(inode);
2673 inc_nlink(dentry->d_parent->d_inode);
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683 WARN_ON_ONCE(!mutex_trylock(&inode->i_mutex));
2684 } else if (S_ISREG(mode)) {
2685 inode->i_size = 0;
2686 inode->i_fop = &cgroup_file_operations;
2687 inode->i_op = &cgroup_file_inode_operations;
2688 }
2689 d_instantiate(dentry, inode);
2690 dget(dentry);
2691 return 0;
2692}
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703static umode_t cgroup_file_mode(const struct cftype *cft)
2704{
2705 umode_t mode = 0;
2706
2707 if (cft->mode)
2708 return cft->mode;
2709
2710 if (cft->read || cft->read_u64 || cft->read_s64 ||
2711 cft->read_map || cft->read_seq_string)
2712 mode |= S_IRUGO;
2713
2714 if (cft->write || cft->write_u64 || cft->write_s64 ||
2715 cft->write_string || cft->trigger)
2716 mode |= S_IWUSR;
2717
2718 return mode;
2719}
2720
2721static int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys,
2722 struct cftype *cft)
2723{
2724 struct dentry *dir = cgrp->dentry;
2725 struct cgroup *parent = __d_cgrp(dir);
2726 struct dentry *dentry;
2727 struct cfent *cfe;
2728 int error;
2729 umode_t mode;
2730 char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 };
2731
2732 if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) {
2733 strcpy(name, subsys->name);
2734 strcat(name, ".");
2735 }
2736 strcat(name, cft->name);
2737
2738 BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex));
2739
2740 cfe = kzalloc(sizeof(*cfe), GFP_KERNEL);
2741 if (!cfe)
2742 return -ENOMEM;
2743
2744 dentry = lookup_one_len(name, dir, strlen(name));
2745 if (IS_ERR(dentry)) {
2746 error = PTR_ERR(dentry);
2747 goto out;
2748 }
2749
2750 mode = cgroup_file_mode(cft);
2751 error = cgroup_create_file(dentry, mode | S_IFREG, cgrp->root->sb);
2752 if (!error) {
2753 cfe->type = (void *)cft;
2754 cfe->dentry = dentry;
2755 dentry->d_fsdata = cfe;
2756 simple_xattrs_init(&cfe->xattrs);
2757 list_add_tail(&cfe->node, &parent->files);
2758 cfe = NULL;
2759 }
2760 dput(dentry);
2761out:
2762 kfree(cfe);
2763 return error;
2764}
2765
2766static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys,
2767 struct cftype cfts[], bool is_add)
2768{
2769 struct cftype *cft;
2770 int err, ret = 0;
2771
2772 for (cft = cfts; cft->name[0] != '\0'; cft++) {
2773
2774 if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent)
2775 continue;
2776 if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent)
2777 continue;
2778
2779 if (is_add) {
2780 err = cgroup_add_file(cgrp, subsys, cft);
2781 if (err)
2782 pr_warn("cgroup_addrm_files: failed to add %s, err=%d\n",
2783 cft->name, err);
2784 ret = err;
2785 } else {
2786 cgroup_rm_file(cgrp, cft);
2787 }
2788 }
2789 return ret;
2790}
2791
2792static DEFINE_MUTEX(cgroup_cft_mutex);
2793
2794static void cgroup_cfts_prepare(void)
2795 __acquires(&cgroup_cft_mutex) __acquires(&cgroup_mutex)
2796{
2797
2798
2799
2800
2801
2802
2803
2804 mutex_lock(&cgroup_cft_mutex);
2805 mutex_lock(&cgroup_mutex);
2806}
2807
2808static void cgroup_cfts_commit(struct cgroup_subsys *ss,
2809 struct cftype *cfts, bool is_add)
2810 __releases(&cgroup_mutex) __releases(&cgroup_cft_mutex)
2811{
2812 LIST_HEAD(pending);
2813 struct cgroup *cgrp, *n;
2814
2815
2816 if (cfts && ss->root != &rootnode) {
2817 list_for_each_entry(cgrp, &ss->root->allcg_list, allcg_node) {
2818 dget(cgrp->dentry);
2819 list_add_tail(&cgrp->cft_q_node, &pending);
2820 }
2821 }
2822
2823 mutex_unlock(&cgroup_mutex);
2824
2825
2826
2827
2828
2829 list_for_each_entry_safe(cgrp, n, &pending, cft_q_node) {
2830 struct inode *inode = cgrp->dentry->d_inode;
2831
2832 mutex_lock(&inode->i_mutex);
2833 mutex_lock(&cgroup_mutex);
2834 if (!cgroup_is_removed(cgrp))
2835 cgroup_addrm_files(cgrp, ss, cfts, is_add);
2836 mutex_unlock(&cgroup_mutex);
2837 mutex_unlock(&inode->i_mutex);
2838
2839 list_del_init(&cgrp->cft_q_node);
2840 dput(cgrp->dentry);
2841 }
2842
2843 mutex_unlock(&cgroup_cft_mutex);
2844}
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
2861{
2862 struct cftype_set *set;
2863
2864 set = kzalloc(sizeof(*set), GFP_KERNEL);
2865 if (!set)
2866 return -ENOMEM;
2867
2868 cgroup_cfts_prepare();
2869 set->cfts = cfts;
2870 list_add_tail(&set->node, &ss->cftsets);
2871 cgroup_cfts_commit(ss, cfts, true);
2872
2873 return 0;
2874}
2875EXPORT_SYMBOL_GPL(cgroup_add_cftypes);
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
2891{
2892 struct cftype_set *set;
2893
2894 cgroup_cfts_prepare();
2895
2896 list_for_each_entry(set, &ss->cftsets, node) {
2897 if (set->cfts == cfts) {
2898 list_del_init(&set->node);
2899 cgroup_cfts_commit(ss, cfts, false);
2900 return 0;
2901 }
2902 }
2903
2904 cgroup_cfts_commit(ss, NULL, false);
2905 return -ENOENT;
2906}
2907
2908
2909
2910
2911
2912
2913
2914int cgroup_task_count(const struct cgroup *cgrp)
2915{
2916 int count = 0;
2917 struct cg_cgroup_link *link;
2918
2919 read_lock(&css_set_lock);
2920 list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {
2921 count += atomic_read(&link->cg->refcount);
2922 }
2923 read_unlock(&css_set_lock);
2924 return count;
2925}
2926
2927
2928
2929
2930
2931static void cgroup_advance_iter(struct cgroup *cgrp,
2932 struct cgroup_iter *it)
2933{
2934 struct list_head *l = it->cg_link;
2935 struct cg_cgroup_link *link;
2936 struct css_set *cg;
2937
2938
2939 do {
2940 l = l->next;
2941 if (l == &cgrp->css_sets) {
2942 it->cg_link = NULL;
2943 return;
2944 }
2945 link = list_entry(l, struct cg_cgroup_link, cgrp_link_list);
2946 cg = link->cg;
2947 } while (list_empty(&cg->tasks));
2948 it->cg_link = l;
2949 it->task = cg->tasks.next;
2950}
2951
2952
2953
2954
2955
2956
2957
2958static void cgroup_enable_task_cg_lists(void)
2959{
2960 struct task_struct *p, *g;
2961 write_lock(&css_set_lock);
2962 use_task_css_set_links = 1;
2963
2964
2965
2966
2967
2968
2969
2970 read_lock(&tasklist_lock);
2971 do_each_thread(g, p) {
2972 task_lock(p);
2973
2974
2975
2976
2977
2978 if (!(p->flags & PF_EXITING) && list_empty(&p->cg_list))
2979 list_add(&p->cg_list, &p->cgroups->tasks);
2980 task_unlock(p);
2981 } while_each_thread(g, p);
2982 read_unlock(&tasklist_lock);
2983 write_unlock(&css_set_lock);
2984}
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos,
2995 struct cgroup *cgroup)
2996{
2997 struct cgroup *next;
2998
2999 WARN_ON_ONCE(!rcu_read_lock_held());
3000
3001
3002 if (!pos) {
3003 if (list_empty(&cgroup->children))
3004 return NULL;
3005 pos = cgroup;
3006 }
3007
3008
3009 next = list_first_or_null_rcu(&pos->children, struct cgroup, sibling);
3010 if (next)
3011 return next;
3012
3013
3014 do {
3015 next = list_entry_rcu(pos->sibling.next, struct cgroup,
3016 sibling);
3017 if (&next->sibling != &pos->parent->children)
3018 return next;
3019
3020 pos = pos->parent;
3021 } while (pos != cgroup);
3022
3023 return NULL;
3024}
3025EXPORT_SYMBOL_GPL(cgroup_next_descendant_pre);
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos)
3036{
3037 struct cgroup *last, *tmp;
3038
3039 WARN_ON_ONCE(!rcu_read_lock_held());
3040
3041 do {
3042 last = pos;
3043
3044 pos = NULL;
3045 list_for_each_entry_rcu(tmp, &last->children, sibling)
3046 pos = tmp;
3047 } while (pos);
3048
3049 return last;
3050}
3051EXPORT_SYMBOL_GPL(cgroup_rightmost_descendant);
3052
3053static struct cgroup *cgroup_leftmost_descendant(struct cgroup *pos)
3054{
3055 struct cgroup *last;
3056
3057 do {
3058 last = pos;
3059 pos = list_first_or_null_rcu(&pos->children, struct cgroup,
3060 sibling);
3061 } while (pos);
3062
3063 return last;
3064}
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074struct cgroup *cgroup_next_descendant_post(struct cgroup *pos,
3075 struct cgroup *cgroup)
3076{
3077 struct cgroup *next;
3078
3079 WARN_ON_ONCE(!rcu_read_lock_held());
3080
3081
3082 if (!pos) {
3083 next = cgroup_leftmost_descendant(cgroup);
3084 return next != cgroup ? next : NULL;
3085 }
3086
3087
3088 next = list_entry_rcu(pos->sibling.next, struct cgroup, sibling);
3089 if (&next->sibling != &pos->parent->children)
3090 return cgroup_leftmost_descendant(next);
3091
3092
3093 next = pos->parent;
3094 return next != cgroup ? next : NULL;
3095}
3096EXPORT_SYMBOL_GPL(cgroup_next_descendant_post);
3097
3098void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it)
3099 __acquires(css_set_lock)
3100{
3101
3102
3103
3104
3105
3106 if (!use_task_css_set_links)
3107 cgroup_enable_task_cg_lists();
3108
3109 read_lock(&css_set_lock);
3110 it->cg_link = &cgrp->css_sets;
3111 cgroup_advance_iter(cgrp, it);
3112}
3113
3114struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
3115 struct cgroup_iter *it)
3116{
3117 struct task_struct *res;
3118 struct list_head *l = it->task;
3119 struct cg_cgroup_link *link;
3120
3121
3122 if (!it->cg_link)
3123 return NULL;
3124 res = list_entry(l, struct task_struct, cg_list);
3125
3126 l = l->next;
3127 link = list_entry(it->cg_link, struct cg_cgroup_link, cgrp_link_list);
3128 if (l == &link->cg->tasks) {
3129
3130
3131 cgroup_advance_iter(cgrp, it);
3132 } else {
3133 it->task = l;
3134 }
3135 return res;
3136}
3137
3138void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it)
3139 __releases(css_set_lock)
3140{
3141 read_unlock(&css_set_lock);
3142}
3143
3144static inline int started_after_time(struct task_struct *t1,
3145 struct timespec *time,
3146 struct task_struct *t2)
3147{
3148 int start_diff = timespec_compare(&t1->start_time, time);
3149 if (start_diff > 0) {
3150 return 1;
3151 } else if (start_diff < 0) {
3152 return 0;
3153 } else {
3154
3155
3156
3157
3158
3159
3160
3161
3162 return t1 > t2;
3163 }
3164}
3165
3166
3167
3168
3169
3170
3171static inline int started_after(void *p1, void *p2)
3172{
3173 struct task_struct *t1 = p1;
3174 struct task_struct *t2 = p2;
3175 return started_after_time(t1, &t2->start_time, t2);
3176}
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205int cgroup_scan_tasks(struct cgroup_scanner *scan)
3206{
3207 int retval, i;
3208 struct cgroup_iter it;
3209 struct task_struct *p, *dropped;
3210
3211 struct task_struct *latest_task = NULL;
3212 struct ptr_heap tmp_heap;
3213 struct ptr_heap *heap;
3214 struct timespec latest_time = { 0, 0 };
3215
3216 if (scan->heap) {
3217
3218 heap = scan->heap;
3219 heap->gt = &started_after;
3220 } else {
3221
3222 heap = &tmp_heap;
3223 retval = heap_init(heap, PAGE_SIZE, GFP_KERNEL, &started_after);
3224 if (retval)
3225
3226 return retval;
3227 }
3228
3229 again:
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242 heap->size = 0;
3243 cgroup_iter_start(scan->cg, &it);
3244 while ((p = cgroup_iter_next(scan->cg, &it))) {
3245
3246
3247
3248
3249 if (scan->test_task && !scan->test_task(p, scan))
3250 continue;
3251
3252
3253
3254
3255 if (!started_after_time(p, &latest_time, latest_task))
3256 continue;
3257 dropped = heap_insert(heap, p);
3258 if (dropped == NULL) {
3259
3260
3261
3262
3263 get_task_struct(p);
3264 } else if (dropped != p) {
3265
3266
3267
3268
3269 get_task_struct(p);
3270 put_task_struct(dropped);
3271 }
3272
3273
3274
3275
3276 }
3277 cgroup_iter_end(scan->cg, &it);
3278
3279 if (heap->size) {
3280 for (i = 0; i < heap->size; i++) {
3281 struct task_struct *q = heap->ptrs[i];
3282 if (i == 0) {
3283 latest_time = q->start_time;
3284 latest_task = q;
3285 }
3286
3287 scan->process_task(q, scan);
3288 put_task_struct(q);
3289 }
3290
3291
3292
3293
3294
3295
3296
3297 goto again;
3298 }
3299 if (heap == &tmp_heap)
3300 heap_free(&tmp_heap);
3301 return 0;
3302}
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315enum cgroup_filetype {
3316 CGROUP_FILE_PROCS,
3317 CGROUP_FILE_TASKS,
3318};
3319
3320
3321
3322
3323
3324
3325
3326struct cgroup_pidlist {
3327
3328
3329
3330
3331 struct { enum cgroup_filetype type; struct pid_namespace *ns; } key;
3332
3333 pid_t *list;
3334
3335 int length;
3336
3337 int use_count;
3338
3339 struct list_head links;
3340
3341 struct cgroup *owner;
3342
3343 struct rw_semaphore mutex;
3344};
3345
3346
3347
3348
3349
3350
3351#define PIDLIST_TOO_LARGE(c) ((c) * sizeof(pid_t) > (PAGE_SIZE * 2))
3352static void *pidlist_allocate(int count)
3353{
3354 if (PIDLIST_TOO_LARGE(count))
3355 return vmalloc(count * sizeof(pid_t));
3356 else
3357 return kmalloc(count * sizeof(pid_t), GFP_KERNEL);
3358}
3359static void pidlist_free(void *p)
3360{
3361 if (is_vmalloc_addr(p))
3362 vfree(p);
3363 else
3364 kfree(p);
3365}
3366static void *pidlist_resize(void *p, int newcount)
3367{
3368 void *newlist;
3369
3370 if (is_vmalloc_addr(p)) {
3371 newlist = vmalloc(newcount * sizeof(pid_t));
3372 if (!newlist)
3373 return NULL;
3374 memcpy(newlist, p, newcount * sizeof(pid_t));
3375 vfree(p);
3376 } else {
3377 newlist = krealloc(p, newcount * sizeof(pid_t), GFP_KERNEL);
3378 }
3379 return newlist;
3380}
3381
3382
3383
3384
3385
3386
3387
3388
3389#define PIDLIST_REALLOC_DIFFERENCE(old, new) ((old) - PAGE_SIZE >= (new))
3390static int pidlist_uniq(pid_t **p, int length)
3391{
3392 int src, dest = 1;
3393 pid_t *list = *p;
3394 pid_t *newlist;
3395
3396
3397
3398
3399
3400 if (length == 0 || length == 1)
3401 return length;
3402
3403 for (src = 1; src < length; src++) {
3404
3405 while (list[src] == list[src-1]) {
3406 src++;
3407 if (src == length)
3408 goto after;
3409 }
3410
3411 list[dest] = list[src];
3412 dest++;
3413 }
3414after:
3415
3416
3417
3418
3419
3420 if (PIDLIST_REALLOC_DIFFERENCE(length, dest)) {
3421 newlist = pidlist_resize(list, dest);
3422 if (newlist)
3423 *p = newlist;
3424 }
3425 return dest;
3426}
3427
3428static int cmppid(const void *a, const void *b)
3429{
3430 return *(pid_t *)a - *(pid_t *)b;
3431}
3432
3433
3434
3435
3436
3437
3438
3439static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
3440 enum cgroup_filetype type)
3441{
3442 struct cgroup_pidlist *l;
3443
3444 struct pid_namespace *ns = task_active_pid_ns(current);
3445
3446
3447
3448
3449
3450
3451
3452 mutex_lock(&cgrp->pidlist_mutex);
3453 list_for_each_entry(l, &cgrp->pidlists, links) {
3454 if (l->key.type == type && l->key.ns == ns) {
3455
3456 down_write(&l->mutex);
3457 mutex_unlock(&cgrp->pidlist_mutex);
3458 return l;
3459 }
3460 }
3461
3462 l = kmalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL);
3463 if (!l) {
3464 mutex_unlock(&cgrp->pidlist_mutex);
3465 return l;
3466 }
3467 init_rwsem(&l->mutex);
3468 down_write(&l->mutex);
3469 l->key.type = type;
3470 l->key.ns = get_pid_ns(ns);
3471 l->use_count = 0;
3472 l->list = NULL;
3473 l->owner = cgrp;
3474 list_add(&l->links, &cgrp->pidlists);
3475 mutex_unlock(&cgrp->pidlist_mutex);
3476 return l;
3477}
3478
3479
3480
3481
3482static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
3483 struct cgroup_pidlist **lp)
3484{
3485 pid_t *array;
3486 int length;
3487 int pid, n = 0;
3488 struct cgroup_iter it;
3489 struct task_struct *tsk;
3490 struct cgroup_pidlist *l;
3491
3492
3493
3494
3495
3496
3497
3498 length = cgroup_task_count(cgrp);
3499 array = pidlist_allocate(length);
3500 if (!array)
3501 return -ENOMEM;
3502
3503 cgroup_iter_start(cgrp, &it);
3504 while ((tsk = cgroup_iter_next(cgrp, &it))) {
3505 if (unlikely(n == length))
3506 break;
3507
3508 if (type == CGROUP_FILE_PROCS)
3509 pid = task_tgid_vnr(tsk);
3510 else
3511 pid = task_pid_vnr(tsk);
3512 if (pid > 0)
3513 array[n++] = pid;
3514 }
3515 cgroup_iter_end(cgrp, &it);
3516 length = n;
3517
3518 sort(array, length, sizeof(pid_t), cmppid, NULL);
3519 if (type == CGROUP_FILE_PROCS)
3520 length = pidlist_uniq(&array, length);
3521 l = cgroup_pidlist_find(cgrp, type);
3522 if (!l) {
3523 pidlist_free(array);
3524 return -ENOMEM;
3525 }
3526
3527 pidlist_free(l->list);
3528 l->list = array;
3529 l->length = length;
3530 l->use_count++;
3531 up_write(&l->mutex);
3532 *lp = l;
3533 return 0;
3534}
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
3546{
3547 int ret = -EINVAL;
3548 struct cgroup *cgrp;
3549 struct cgroup_iter it;
3550 struct task_struct *tsk;
3551
3552
3553
3554
3555
3556 if (dentry->d_sb->s_op != &cgroup_ops ||
3557 !S_ISDIR(dentry->d_inode->i_mode))
3558 goto err;
3559
3560 ret = 0;
3561 cgrp = dentry->d_fsdata;
3562
3563 cgroup_iter_start(cgrp, &it);
3564 while ((tsk = cgroup_iter_next(cgrp, &it))) {
3565 switch (tsk->state) {
3566 case TASK_RUNNING:
3567 stats->nr_running++;
3568 break;
3569 case TASK_INTERRUPTIBLE:
3570 stats->nr_sleeping++;
3571 break;
3572 case TASK_UNINTERRUPTIBLE:
3573 stats->nr_uninterruptible++;
3574 break;
3575 case TASK_STOPPED:
3576 stats->nr_stopped++;
3577 break;
3578 default:
3579 if (delayacct_is_task_waiting_on_io(tsk))
3580 stats->nr_io_wait++;
3581 break;
3582 }
3583 }
3584 cgroup_iter_end(cgrp, &it);
3585
3586err:
3587 return ret;
3588}
3589
3590
3591
3592
3593
3594
3595
3596
3597static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
3598{
3599
3600
3601
3602
3603
3604
3605 struct cgroup_pidlist *l = s->private;
3606 int index = 0, pid = *pos;
3607 int *iter;
3608
3609 down_read(&l->mutex);
3610 if (pid) {
3611 int end = l->length;
3612
3613 while (index < end) {
3614 int mid = (index + end) / 2;
3615 if (l->list[mid] == pid) {
3616 index = mid;
3617 break;
3618 } else if (l->list[mid] <= pid)
3619 index = mid + 1;
3620 else
3621 end = mid;
3622 }
3623 }
3624
3625 if (index >= l->length)
3626 return NULL;
3627
3628 iter = l->list + index;
3629 *pos = *iter;
3630 return iter;
3631}
3632
3633static void cgroup_pidlist_stop(struct seq_file *s, void *v)
3634{
3635 struct cgroup_pidlist *l = s->private;
3636 up_read(&l->mutex);
3637}
3638
3639static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
3640{
3641 struct cgroup_pidlist *l = s->private;
3642 pid_t *p = v;
3643 pid_t *end = l->list + l->length;
3644
3645
3646
3647
3648 p++;
3649 if (p >= end) {
3650 return NULL;
3651 } else {
3652 *pos = *p;
3653 return p;
3654 }
3655}
3656
3657static int cgroup_pidlist_show(struct seq_file *s, void *v)
3658{
3659 return seq_printf(s, "%d\n", *(int *)v);
3660}
3661
3662
3663
3664
3665
3666static const struct seq_operations cgroup_pidlist_seq_operations = {
3667 .start = cgroup_pidlist_start,
3668 .stop = cgroup_pidlist_stop,
3669 .next = cgroup_pidlist_next,
3670 .show = cgroup_pidlist_show,
3671};
3672
3673static void cgroup_release_pid_array(struct cgroup_pidlist *l)
3674{
3675
3676
3677
3678
3679
3680
3681 mutex_lock(&l->owner->pidlist_mutex);
3682 down_write(&l->mutex);
3683 BUG_ON(!l->use_count);
3684 if (!--l->use_count) {
3685
3686 list_del(&l->links);
3687 mutex_unlock(&l->owner->pidlist_mutex);
3688 pidlist_free(l->list);
3689 put_pid_ns(l->key.ns);
3690 up_write(&l->mutex);
3691 kfree(l);
3692 return;
3693 }
3694 mutex_unlock(&l->owner->pidlist_mutex);
3695 up_write(&l->mutex);
3696}
3697
3698static int cgroup_pidlist_release(struct inode *inode, struct file *file)
3699{
3700 struct cgroup_pidlist *l;
3701 if (!(file->f_mode & FMODE_READ))
3702 return 0;
3703
3704
3705
3706
3707 l = ((struct seq_file *)file->private_data)->private;
3708 cgroup_release_pid_array(l);
3709 return seq_release(inode, file);
3710}
3711
3712static const struct file_operations cgroup_pidlist_operations = {
3713 .read = seq_read,
3714 .llseek = seq_lseek,
3715 .write = cgroup_file_write,
3716 .release = cgroup_pidlist_release,
3717};
3718
3719
3720
3721
3722
3723
3724
3725static int cgroup_pidlist_open(struct file *file, enum cgroup_filetype type)
3726{
3727 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
3728 struct cgroup_pidlist *l;
3729 int retval;
3730
3731
3732 if (!(file->f_mode & FMODE_READ))
3733 return 0;
3734
3735
3736 retval = pidlist_array_load(cgrp, type, &l);
3737 if (retval)
3738 return retval;
3739
3740 file->f_op = &cgroup_pidlist_operations;
3741
3742 retval = seq_open(file, &cgroup_pidlist_seq_operations);
3743 if (retval) {
3744 cgroup_release_pid_array(l);
3745 return retval;
3746 }
3747 ((struct seq_file *)file->private_data)->private = l;
3748 return 0;
3749}
3750static int cgroup_tasks_open(struct inode *unused, struct file *file)
3751{
3752 return cgroup_pidlist_open(file, CGROUP_FILE_TASKS);
3753}
3754static int cgroup_procs_open(struct inode *unused, struct file *file)
3755{
3756 return cgroup_pidlist_open(file, CGROUP_FILE_PROCS);
3757}
3758
3759static u64 cgroup_read_notify_on_release(struct cgroup *cgrp,
3760 struct cftype *cft)
3761{
3762 return notify_on_release(cgrp);
3763}
3764
3765static int cgroup_write_notify_on_release(struct cgroup *cgrp,
3766 struct cftype *cft,
3767 u64 val)
3768{
3769 clear_bit(CGRP_RELEASABLE, &cgrp->flags);
3770 if (val)
3771 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
3772 else
3773 clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
3774 return 0;
3775}
3776
3777
3778
3779
3780
3781
3782static void cgroup_event_remove(struct work_struct *work)
3783{
3784 struct cgroup_event *event = container_of(work, struct cgroup_event,
3785 remove);
3786 struct cgroup *cgrp = event->cgrp;
3787
3788 remove_wait_queue(event->wqh, &event->wait);
3789
3790 event->cft->unregister_event(cgrp, event->cft, event->eventfd);
3791
3792
3793 eventfd_signal(event->eventfd, 1);
3794
3795 eventfd_ctx_put(event->eventfd);
3796 kfree(event);
3797 dput(cgrp->dentry);
3798}
3799
3800
3801
3802
3803
3804
3805static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
3806 int sync, void *key)
3807{
3808 struct cgroup_event *event = container_of(wait,
3809 struct cgroup_event, wait);
3810 struct cgroup *cgrp = event->cgrp;
3811 unsigned long flags = (unsigned long)key;
3812
3813 if (flags & POLLHUP) {
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823 spin_lock(&cgrp->event_list_lock);
3824 if (!list_empty(&event->list)) {
3825 list_del_init(&event->list);
3826
3827
3828
3829
3830 schedule_work(&event->remove);
3831 }
3832 spin_unlock(&cgrp->event_list_lock);
3833 }
3834
3835 return 0;
3836}
3837
3838static void cgroup_event_ptable_queue_proc(struct file *file,
3839 wait_queue_head_t *wqh, poll_table *pt)
3840{
3841 struct cgroup_event *event = container_of(pt,
3842 struct cgroup_event, pt);
3843
3844 event->wqh = wqh;
3845 add_wait_queue(wqh, &event->wait);
3846}
3847
3848
3849
3850
3851
3852
3853
3854static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft,
3855 const char *buffer)
3856{
3857 struct cgroup_event *event = NULL;
3858 struct cgroup *cgrp_cfile;
3859 unsigned int efd, cfd;
3860 struct file *efile = NULL;
3861 struct file *cfile = NULL;
3862 char *endp;
3863 int ret;
3864
3865 efd = simple_strtoul(buffer, &endp, 10);
3866 if (*endp != ' ')
3867 return -EINVAL;
3868 buffer = endp + 1;
3869
3870 cfd = simple_strtoul(buffer, &endp, 10);
3871 if ((*endp != ' ') && (*endp != '\0'))
3872 return -EINVAL;
3873 buffer = endp + 1;
3874
3875 event = kzalloc(sizeof(*event), GFP_KERNEL);
3876 if (!event)
3877 return -ENOMEM;
3878 event->cgrp = cgrp;
3879 INIT_LIST_HEAD(&event->list);
3880 init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc);
3881 init_waitqueue_func_entry(&event->wait, cgroup_event_wake);
3882 INIT_WORK(&event->remove, cgroup_event_remove);
3883
3884 efile = eventfd_fget(efd);
3885 if (IS_ERR(efile)) {
3886 ret = PTR_ERR(efile);
3887 goto fail;
3888 }
3889
3890 event->eventfd = eventfd_ctx_fileget(efile);
3891 if (IS_ERR(event->eventfd)) {
3892 ret = PTR_ERR(event->eventfd);
3893 goto fail;
3894 }
3895
3896 cfile = fget(cfd);
3897 if (!cfile) {
3898 ret = -EBADF;
3899 goto fail;
3900 }
3901
3902
3903
3904 ret = inode_permission(file_inode(cfile), MAY_READ);
3905 if (ret < 0)
3906 goto fail;
3907
3908 event->cft = __file_cft(cfile);
3909 if (IS_ERR(event->cft)) {
3910 ret = PTR_ERR(event->cft);
3911 goto fail;
3912 }
3913
3914
3915
3916
3917
3918 cgrp_cfile = __d_cgrp(cfile->f_dentry->d_parent);
3919 if (cgrp_cfile != cgrp) {
3920 ret = -EINVAL;
3921 goto fail;
3922 }
3923
3924 if (!event->cft->register_event || !event->cft->unregister_event) {
3925 ret = -EINVAL;
3926 goto fail;
3927 }
3928
3929 ret = event->cft->register_event(cgrp, event->cft,
3930 event->eventfd, buffer);
3931 if (ret)
3932 goto fail;
3933
3934 if (efile->f_op->poll(efile, &event->pt) & POLLHUP) {
3935 event->cft->unregister_event(cgrp, event->cft, event->eventfd);
3936 ret = 0;
3937 goto fail;
3938 }
3939
3940
3941
3942
3943
3944
3945 dget(cgrp->dentry);
3946
3947 spin_lock(&cgrp->event_list_lock);
3948 list_add(&event->list, &cgrp->event_list);
3949 spin_unlock(&cgrp->event_list_lock);
3950
3951 fput(cfile);
3952 fput(efile);
3953
3954 return 0;
3955
3956fail:
3957 if (cfile)
3958 fput(cfile);
3959
3960 if (event && event->eventfd && !IS_ERR(event->eventfd))
3961 eventfd_ctx_put(event->eventfd);
3962
3963 if (!IS_ERR_OR_NULL(efile))
3964 fput(efile);
3965
3966 kfree(event);
3967
3968 return ret;
3969}
3970
3971static u64 cgroup_clone_children_read(struct cgroup *cgrp,
3972 struct cftype *cft)
3973{
3974 return test_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
3975}
3976
3977static int cgroup_clone_children_write(struct cgroup *cgrp,
3978 struct cftype *cft,
3979 u64 val)
3980{
3981 if (val)
3982 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
3983 else
3984 clear_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
3985 return 0;
3986}
3987
3988
3989
3990
3991
3992#define CGROUP_FILE_GENERIC_PREFIX "cgroup."
3993static struct cftype files[] = {
3994 {
3995 .name = "tasks",
3996 .open = cgroup_tasks_open,
3997 .write_u64 = cgroup_tasks_write,
3998 .release = cgroup_pidlist_release,
3999 .mode = S_IRUGO | S_IWUSR,
4000 },
4001 {
4002 .name = CGROUP_FILE_GENERIC_PREFIX "procs",
4003 .open = cgroup_procs_open,
4004 .write_u64 = cgroup_procs_write,
4005 .release = cgroup_pidlist_release,
4006 .mode = S_IRUGO | S_IWUSR,
4007 },
4008 {
4009 .name = "notify_on_release",
4010 .read_u64 = cgroup_read_notify_on_release,
4011 .write_u64 = cgroup_write_notify_on_release,
4012 },
4013 {
4014 .name = CGROUP_FILE_GENERIC_PREFIX "event_control",
4015 .write_string = cgroup_write_event_control,
4016 .mode = S_IWUGO,
4017 },
4018 {
4019 .name = "cgroup.clone_children",
4020 .read_u64 = cgroup_clone_children_read,
4021 .write_u64 = cgroup_clone_children_write,
4022 },
4023 {
4024 .name = "release_agent",
4025 .flags = CFTYPE_ONLY_ON_ROOT,
4026 .read_seq_string = cgroup_release_agent_show,
4027 .write_string = cgroup_release_agent_write,
4028 .max_write_len = PATH_MAX,
4029 },
4030 { }
4031};
4032
4033
4034
4035
4036
4037
4038
4039static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files,
4040 unsigned long subsys_mask)
4041{
4042 int err;
4043 struct cgroup_subsys *ss;
4044
4045 if (base_files) {
4046 err = cgroup_addrm_files(cgrp, NULL, files, true);
4047 if (err < 0)
4048 return err;
4049 }
4050
4051
4052 for_each_subsys(cgrp->root, ss) {
4053 struct cftype_set *set;
4054 if (!test_bit(ss->subsys_id, &subsys_mask))
4055 continue;
4056
4057 list_for_each_entry(set, &ss->cftsets, node)
4058 cgroup_addrm_files(cgrp, ss, set->cfts, true);
4059 }
4060
4061
4062 for_each_subsys(cgrp->root, ss) {
4063 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
4064
4065
4066
4067
4068
4069 if (css->id)
4070 rcu_assign_pointer(css->id->css, css);
4071 }
4072
4073 return 0;
4074}
4075
4076static void css_dput_fn(struct work_struct *work)
4077{
4078 struct cgroup_subsys_state *css =
4079 container_of(work, struct cgroup_subsys_state, dput_work);
4080 struct dentry *dentry = css->cgroup->dentry;
4081 struct super_block *sb = dentry->d_sb;
4082
4083 atomic_inc(&sb->s_active);
4084 dput(dentry);
4085 deactivate_super(sb);
4086}
4087
4088static void init_cgroup_css(struct cgroup_subsys_state *css,
4089 struct cgroup_subsys *ss,
4090 struct cgroup *cgrp)
4091{
4092 css->cgroup = cgrp;
4093 atomic_set(&css->refcnt, 1);
4094 css->flags = 0;
4095 css->id = NULL;
4096 if (cgrp == dummytop)
4097 css->flags |= CSS_ROOT;
4098 BUG_ON(cgrp->subsys[ss->subsys_id]);
4099 cgrp->subsys[ss->subsys_id] = css;
4100
4101
4102
4103
4104
4105
4106
4107 INIT_WORK(&css->dput_work, css_dput_fn);
4108}
4109
4110
4111static int online_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
4112{
4113 int ret = 0;
4114
4115 lockdep_assert_held(&cgroup_mutex);
4116
4117 if (ss->css_online)
4118 ret = ss->css_online(cgrp);
4119 if (!ret)
4120 cgrp->subsys[ss->subsys_id]->flags |= CSS_ONLINE;
4121 return ret;
4122}
4123
4124
4125static void offline_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
4126 __releases(&cgroup_mutex) __acquires(&cgroup_mutex)
4127{
4128 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
4129
4130 lockdep_assert_held(&cgroup_mutex);
4131
4132 if (!(css->flags & CSS_ONLINE))
4133 return;
4134
4135
4136
4137
4138
4139
4140
4141 if (ss->css_offline) {
4142 mutex_unlock(&cgroup_mutex);
4143 ss->css_offline(cgrp);
4144 mutex_lock(&cgroup_mutex);
4145 }
4146
4147 cgrp->subsys[ss->subsys_id]->flags &= ~CSS_ONLINE;
4148}
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
4159 umode_t mode)
4160{
4161 struct cgroup *cgrp;
4162 struct cgroupfs_root *root = parent->root;
4163 int err = 0;
4164 struct cgroup_subsys *ss;
4165 struct super_block *sb = root->sb;
4166
4167
4168 cgrp = kzalloc(sizeof(*cgrp), GFP_KERNEL);
4169 if (!cgrp)
4170 return -ENOMEM;
4171
4172 cgrp->id = ida_simple_get(&root->cgroup_ida, 1, 0, GFP_KERNEL);
4173 if (cgrp->id < 0)
4174 goto err_free_cgrp;
4175
4176
4177
4178
4179
4180
4181
4182
4183 if (!cgroup_lock_live_group(parent)) {
4184 err = -ENODEV;
4185 goto err_free_id;
4186 }
4187
4188
4189
4190
4191
4192
4193 atomic_inc(&sb->s_active);
4194
4195 init_cgroup_housekeeping(cgrp);
4196
4197 dentry->d_fsdata = cgrp;
4198 cgrp->dentry = dentry;
4199
4200 cgrp->parent = parent;
4201 cgrp->root = parent->root;
4202 cgrp->top_cgroup = parent->top_cgroup;
4203
4204 if (notify_on_release(parent))
4205 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
4206
4207 if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &parent->flags))
4208 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
4209
4210 for_each_subsys(root, ss) {
4211 struct cgroup_subsys_state *css;
4212
4213 css = ss->css_alloc(cgrp);
4214 if (IS_ERR(css)) {
4215 err = PTR_ERR(css);
4216 goto err_free_all;
4217 }
4218 init_cgroup_css(css, ss, cgrp);
4219 if (ss->use_id) {
4220 err = alloc_css_id(ss, parent, cgrp);
4221 if (err)
4222 goto err_free_all;
4223 }
4224 }
4225
4226
4227
4228
4229
4230
4231 err = cgroup_create_file(dentry, S_IFDIR | mode, sb);
4232 if (err < 0)
4233 goto err_free_all;
4234 lockdep_assert_held(&dentry->d_inode->i_mutex);
4235
4236
4237 list_add_tail(&cgrp->allcg_node, &root->allcg_list);
4238 list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children);
4239 root->number_of_cgroups++;
4240
4241
4242 for_each_subsys(root, ss)
4243 dget(dentry);
4244
4245
4246 for_each_subsys(root, ss) {
4247 err = online_css(ss, cgrp);
4248 if (err)
4249 goto err_destroy;
4250
4251 if (ss->broken_hierarchy && !ss->warned_broken_hierarchy &&
4252 parent->parent) {
4253 pr_warning("cgroup: %s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n",
4254 current->comm, current->pid, ss->name);
4255 if (!strcmp(ss->name, "memory"))
4256 pr_warning("cgroup: \"memory\" requires setting use_hierarchy to 1 on the root.\n");
4257 ss->warned_broken_hierarchy = true;
4258 }
4259 }
4260
4261 err = cgroup_populate_dir(cgrp, true, root->subsys_mask);
4262 if (err)
4263 goto err_destroy;
4264
4265 mutex_unlock(&cgroup_mutex);
4266 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
4267
4268 return 0;
4269
4270err_free_all:
4271 for_each_subsys(root, ss) {
4272 if (cgrp->subsys[ss->subsys_id])
4273 ss->css_free(cgrp);
4274 }
4275 mutex_unlock(&cgroup_mutex);
4276
4277 deactivate_super(sb);
4278err_free_id:
4279 ida_simple_remove(&root->cgroup_ida, cgrp->id);
4280err_free_cgrp:
4281 kfree(cgrp);
4282 return err;
4283
4284err_destroy:
4285 cgroup_destroy_locked(cgrp);
4286 mutex_unlock(&cgroup_mutex);
4287 mutex_unlock(&dentry->d_inode->i_mutex);
4288 return err;
4289}
4290
4291static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
4292{
4293 struct cgroup *c_parent = dentry->d_parent->d_fsdata;
4294
4295
4296 return cgroup_create(c_parent, dentry, mode | S_IFDIR);
4297}
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308static int cgroup_has_css_refs(struct cgroup *cgrp)
4309{
4310 int i;
4311
4312
4313
4314
4315
4316
4317 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
4318 struct cgroup_subsys *ss = subsys[i];
4319 struct cgroup_subsys_state *css;
4320
4321
4322 if (ss == NULL || ss->root != cgrp->root)
4323 continue;
4324
4325 css = cgrp->subsys[ss->subsys_id];
4326
4327
4328
4329
4330
4331
4332
4333
4334 if (css && css_refcnt(css) > 1)
4335 return 1;
4336 }
4337 return 0;
4338}
4339
4340static int cgroup_destroy_locked(struct cgroup *cgrp)
4341 __releases(&cgroup_mutex) __acquires(&cgroup_mutex)
4342{
4343 struct dentry *d = cgrp->dentry;
4344 struct cgroup *parent = cgrp->parent;
4345 DEFINE_WAIT(wait);
4346 struct cgroup_event *event, *tmp;
4347 struct cgroup_subsys *ss;
4348 LIST_HEAD(tmp_list);
4349
4350 lockdep_assert_held(&d->d_inode->i_mutex);
4351 lockdep_assert_held(&cgroup_mutex);
4352
4353 if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children))
4354 return -EBUSY;
4355
4356
4357
4358
4359
4360
4361
4362 for_each_subsys(cgrp->root, ss) {
4363 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
4364
4365 WARN_ON(atomic_read(&css->refcnt) < 0);
4366 atomic_add(CSS_DEACT_BIAS, &css->refcnt);
4367 }
4368 set_bit(CGRP_REMOVED, &cgrp->flags);
4369
4370
4371 for_each_subsys(cgrp->root, ss)
4372 offline_css(ss, cgrp);
4373
4374
4375
4376
4377
4378
4379
4380
4381 for_each_subsys(cgrp->root, ss)
4382 css_put(cgrp->subsys[ss->subsys_id]);
4383
4384 raw_spin_lock(&release_list_lock);
4385 if (!list_empty(&cgrp->release_list))
4386 list_del_init(&cgrp->release_list);
4387 raw_spin_unlock(&release_list_lock);
4388
4389
4390 list_del_rcu(&cgrp->sibling);
4391 list_del_init(&cgrp->allcg_node);
4392
4393 dget(d);
4394 cgroup_d_remove_dir(d);
4395 dput(d);
4396
4397 set_bit(CGRP_RELEASABLE, &parent->flags);
4398 check_for_release(parent);
4399
4400
4401
4402
4403
4404
4405 spin_lock(&cgrp->event_list_lock);
4406 list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
4407 list_del_init(&event->list);
4408 schedule_work(&event->remove);
4409 }
4410 spin_unlock(&cgrp->event_list_lock);
4411
4412 return 0;
4413}
4414
4415static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
4416{
4417 int ret;
4418
4419 mutex_lock(&cgroup_mutex);
4420 ret = cgroup_destroy_locked(dentry->d_fsdata);
4421 mutex_unlock(&cgroup_mutex);
4422
4423 return ret;
4424}
4425
4426static void __init_or_module cgroup_init_cftsets(struct cgroup_subsys *ss)
4427{
4428 INIT_LIST_HEAD(&ss->cftsets);
4429
4430
4431
4432
4433
4434 if (ss->base_cftypes) {
4435 ss->base_cftset.cfts = ss->base_cftypes;
4436 list_add_tail(&ss->base_cftset.node, &ss->cftsets);
4437 }
4438}
4439
4440static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
4441{
4442 struct cgroup_subsys_state *css;
4443
4444 printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);
4445
4446 mutex_lock(&cgroup_mutex);
4447
4448
4449 cgroup_init_cftsets(ss);
4450
4451
4452 list_add(&ss->sibling, &rootnode.subsys_list);
4453 ss->root = &rootnode;
4454 css = ss->css_alloc(dummytop);
4455
4456 BUG_ON(IS_ERR(css));
4457 init_cgroup_css(css, ss, dummytop);
4458
4459
4460
4461
4462
4463 init_css_set.subsys[ss->subsys_id] = css;
4464
4465 need_forkexit_callback |= ss->fork || ss->exit;
4466
4467
4468
4469
4470 BUG_ON(!list_empty(&init_task.tasks));
4471
4472 ss->active = 1;
4473 BUG_ON(online_css(ss, dummytop));
4474
4475 mutex_unlock(&cgroup_mutex);
4476
4477
4478
4479 BUG_ON(ss->module);
4480}
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
4492{
4493 struct cgroup_subsys_state *css;
4494 int i, ret;
4495 struct hlist_node *tmp;
4496 struct css_set *cg;
4497 unsigned long key;
4498
4499
4500 if (ss->name == NULL || strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN ||
4501 ss->css_alloc == NULL || ss->css_free == NULL)
4502 return -EINVAL;
4503
4504
4505
4506
4507
4508
4509
4510 if (ss->fork || ss->exit)
4511 return -EINVAL;
4512
4513
4514
4515
4516
4517 if (ss->module == NULL) {
4518
4519 BUG_ON(subsys[ss->subsys_id] != ss);
4520 return 0;
4521 }
4522
4523
4524 cgroup_init_cftsets(ss);
4525
4526 mutex_lock(&cgroup_mutex);
4527 subsys[ss->subsys_id] = ss;
4528
4529
4530
4531
4532
4533
4534 css = ss->css_alloc(dummytop);
4535 if (IS_ERR(css)) {
4536
4537 subsys[ss->subsys_id] = NULL;
4538 mutex_unlock(&cgroup_mutex);
4539 return PTR_ERR(css);
4540 }
4541
4542 list_add(&ss->sibling, &rootnode.subsys_list);
4543 ss->root = &rootnode;
4544
4545
4546 init_cgroup_css(css, ss, dummytop);
4547
4548 if (ss->use_id) {
4549 ret = cgroup_init_idr(ss, css);
4550 if (ret)
4551 goto err_unload;
4552 }
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562 write_lock(&css_set_lock);
4563 hash_for_each_safe(css_set_table, i, tmp, cg, hlist) {
4564
4565 if (cg->subsys[ss->subsys_id])
4566 continue;
4567
4568 hash_del(&cg->hlist);
4569
4570 cg->subsys[ss->subsys_id] = css;
4571
4572 key = css_set_hash(cg->subsys);
4573 hash_add(css_set_table, &cg->hlist, key);
4574 }
4575 write_unlock(&css_set_lock);
4576
4577 ss->active = 1;
4578 ret = online_css(ss, dummytop);
4579 if (ret)
4580 goto err_unload;
4581
4582
4583 mutex_unlock(&cgroup_mutex);
4584 return 0;
4585
4586err_unload:
4587 mutex_unlock(&cgroup_mutex);
4588
4589 cgroup_unload_subsys(ss);
4590 return ret;
4591}
4592EXPORT_SYMBOL_GPL(cgroup_load_subsys);
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602void cgroup_unload_subsys(struct cgroup_subsys *ss)
4603{
4604 struct cg_cgroup_link *link;
4605
4606 BUG_ON(ss->module == NULL);
4607
4608
4609
4610
4611
4612
4613 BUG_ON(ss->root != &rootnode);
4614
4615 mutex_lock(&cgroup_mutex);
4616
4617 offline_css(ss, dummytop);
4618 ss->active = 0;
4619
4620 if (ss->use_id)
4621 idr_destroy(&ss->idr);
4622
4623
4624 subsys[ss->subsys_id] = NULL;
4625
4626
4627 list_del_init(&ss->sibling);
4628
4629
4630
4631
4632
4633 write_lock(&css_set_lock);
4634 list_for_each_entry(link, &dummytop->css_sets, cgrp_link_list) {
4635 struct css_set *cg = link->cg;
4636 unsigned long key;
4637
4638 hash_del(&cg->hlist);
4639 cg->subsys[ss->subsys_id] = NULL;
4640 key = css_set_hash(cg->subsys);
4641 hash_add(css_set_table, &cg->hlist, key);
4642 }
4643 write_unlock(&css_set_lock);
4644
4645
4646
4647
4648
4649
4650
4651 ss->css_free(dummytop);
4652 dummytop->subsys[ss->subsys_id] = NULL;
4653
4654 mutex_unlock(&cgroup_mutex);
4655}
4656EXPORT_SYMBOL_GPL(cgroup_unload_subsys);
4657
4658
4659
4660
4661
4662
4663
4664int __init cgroup_init_early(void)
4665{
4666 int i;
4667 atomic_set(&init_css_set.refcount, 1);
4668 INIT_LIST_HEAD(&init_css_set.cg_links);
4669 INIT_LIST_HEAD(&init_css_set.tasks);
4670 INIT_HLIST_NODE(&init_css_set.hlist);
4671 css_set_count = 1;
4672 init_cgroup_root(&rootnode);
4673 root_count = 1;
4674 init_task.cgroups = &init_css_set;
4675
4676 init_css_set_link.cg = &init_css_set;
4677 init_css_set_link.cgrp = dummytop;
4678 list_add(&init_css_set_link.cgrp_link_list,
4679 &rootnode.top_cgroup.css_sets);
4680 list_add(&init_css_set_link.cg_link_list,
4681 &init_css_set.cg_links);
4682
4683 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
4684 struct cgroup_subsys *ss = subsys[i];
4685
4686
4687 if (!ss || ss->module)
4688 continue;
4689
4690 BUG_ON(!ss->name);
4691 BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN);
4692 BUG_ON(!ss->css_alloc);
4693 BUG_ON(!ss->css_free);
4694 if (ss->subsys_id != i) {
4695 printk(KERN_ERR "cgroup: Subsys %s id == %d\n",
4696 ss->name, ss->subsys_id);
4697 BUG();
4698 }
4699
4700 if (ss->early_init)
4701 cgroup_init_subsys(ss);
4702 }
4703 return 0;
4704}
4705
4706
4707
4708
4709
4710
4711
4712int __init cgroup_init(void)
4713{
4714 int err;
4715 int i;
4716 unsigned long key;
4717
4718 err = bdi_init(&cgroup_backing_dev_info);
4719 if (err)
4720 return err;
4721
4722 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
4723 struct cgroup_subsys *ss = subsys[i];
4724
4725
4726 if (!ss || ss->module)
4727 continue;
4728 if (!ss->early_init)
4729 cgroup_init_subsys(ss);
4730 if (ss->use_id)
4731 cgroup_init_idr(ss, init_css_set.subsys[ss->subsys_id]);
4732 }
4733
4734
4735 key = css_set_hash(init_css_set.subsys);
4736 hash_add(css_set_table, &init_css_set.hlist, key);
4737 BUG_ON(!init_root_id(&rootnode));
4738
4739 cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj);
4740 if (!cgroup_kobj) {
4741 err = -ENOMEM;
4742 goto out;
4743 }
4744
4745 err = register_filesystem(&cgroup_fs_type);
4746 if (err < 0) {
4747 kobject_put(cgroup_kobj);
4748 goto out;
4749 }
4750
4751 proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations);
4752
4753out:
4754 if (err)
4755 bdi_destroy(&cgroup_backing_dev_info);
4756
4757 return err;
4758}
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773static int proc_cgroup_show(struct seq_file *m, void *v)
4774{
4775 struct pid *pid;
4776 struct task_struct *tsk;
4777 char *buf;
4778 int retval;
4779 struct cgroupfs_root *root;
4780
4781 retval = -ENOMEM;
4782 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
4783 if (!buf)
4784 goto out;
4785
4786 retval = -ESRCH;
4787 pid = m->private;
4788 tsk = get_pid_task(pid, PIDTYPE_PID);
4789 if (!tsk)
4790 goto out_free;
4791
4792 retval = 0;
4793
4794 mutex_lock(&cgroup_mutex);
4795
4796 for_each_active_root(root) {
4797 struct cgroup_subsys *ss;
4798 struct cgroup *cgrp;
4799 int count = 0;
4800
4801 seq_printf(m, "%d:", root->hierarchy_id);
4802 for_each_subsys(root, ss)
4803 seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
4804 if (strlen(root->name))
4805 seq_printf(m, "%sname=%s", count ? "," : "",
4806 root->name);
4807 seq_putc(m, ':');
4808 cgrp = task_cgroup_from_root(tsk, root);
4809 retval = cgroup_path(cgrp, buf, PAGE_SIZE);
4810 if (retval < 0)
4811 goto out_unlock;
4812 seq_puts(m, buf);
4813 seq_putc(m, '\n');
4814 }
4815
4816out_unlock:
4817 mutex_unlock(&cgroup_mutex);
4818 put_task_struct(tsk);
4819out_free:
4820 kfree(buf);
4821out:
4822 return retval;
4823}
4824
4825static int cgroup_open(struct inode *inode, struct file *file)
4826{
4827 struct pid *pid = PROC_I(inode)->pid;
4828 return single_open(file, proc_cgroup_show, pid);
4829}
4830
4831const struct file_operations proc_cgroup_operations = {
4832 .open = cgroup_open,
4833 .read = seq_read,
4834 .llseek = seq_lseek,
4835 .release = single_release,
4836};
4837
4838
4839static int proc_cgroupstats_show(struct seq_file *m, void *v)
4840{
4841 int i;
4842
4843 seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
4844
4845
4846
4847
4848
4849 mutex_lock(&cgroup_mutex);
4850 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
4851 struct cgroup_subsys *ss = subsys[i];
4852 if (ss == NULL)
4853 continue;
4854 seq_printf(m, "%s\t%d\t%d\t%d\n",
4855 ss->name, ss->root->hierarchy_id,
4856 ss->root->number_of_cgroups, !ss->disabled);
4857 }
4858 mutex_unlock(&cgroup_mutex);
4859 return 0;
4860}
4861
4862static int cgroupstats_open(struct inode *inode, struct file *file)
4863{
4864 return single_open(file, proc_cgroupstats_show, NULL);
4865}
4866
4867static const struct file_operations proc_cgroupstats_operations = {
4868 .open = cgroupstats_open,
4869 .read = seq_read,
4870 .llseek = seq_lseek,
4871 .release = single_release,
4872};
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890void cgroup_fork(struct task_struct *child)
4891{
4892 task_lock(current);
4893 child->cgroups = current->cgroups;
4894 get_css_set(child->cgroups);
4895 task_unlock(current);
4896 INIT_LIST_HEAD(&child->cg_list);
4897}
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909void cgroup_post_fork(struct task_struct *child)
4910{
4911 int i;
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924 if (use_task_css_set_links) {
4925 write_lock(&css_set_lock);
4926 task_lock(child);
4927 if (list_empty(&child->cg_list))
4928 list_add(&child->cg_list, &child->cgroups->tasks);
4929 task_unlock(child);
4930 write_unlock(&css_set_lock);
4931 }
4932
4933
4934
4935
4936
4937
4938 if (need_forkexit_callback) {
4939 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
4940 struct cgroup_subsys *ss = subsys[i];
4941
4942
4943
4944
4945
4946
4947 if (!ss || ss->module)
4948 continue;
4949
4950 if (ss->fork)
4951 ss->fork(child);
4952 }
4953 }
4954}
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991void cgroup_exit(struct task_struct *tsk, int run_callbacks)
4992{
4993 struct css_set *cg;
4994 int i;
4995
4996
4997
4998
4999
5000
5001 if (!list_empty(&tsk->cg_list)) {
5002 write_lock(&css_set_lock);
5003 if (!list_empty(&tsk->cg_list))
5004 list_del_init(&tsk->cg_list);
5005 write_unlock(&css_set_lock);
5006 }
5007
5008
5009 task_lock(tsk);
5010 cg = tsk->cgroups;
5011 tsk->cgroups = &init_css_set;
5012
5013 if (run_callbacks && need_forkexit_callback) {
5014 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
5015 struct cgroup_subsys *ss = subsys[i];
5016
5017
5018 if (!ss || ss->module)
5019 continue;
5020
5021 if (ss->exit) {
5022 struct cgroup *old_cgrp =
5023 rcu_dereference_raw(cg->subsys[i])->cgroup;
5024 struct cgroup *cgrp = task_cgroup(tsk, i);
5025 ss->exit(cgrp, old_cgrp, tsk);
5026 }
5027 }
5028 }
5029 task_unlock(tsk);
5030
5031 put_css_set_taskexit(cg);
5032}
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task)
5048{
5049 int ret;
5050 struct cgroup *target;
5051
5052 if (cgrp == dummytop)
5053 return 1;
5054
5055 target = task_cgroup_from_root(task, cgrp->root);
5056 while (cgrp != target && cgrp!= cgrp->top_cgroup)
5057 cgrp = cgrp->parent;
5058 ret = (cgrp == target);
5059 return ret;
5060}
5061
5062static void check_for_release(struct cgroup *cgrp)
5063{
5064
5065
5066 if (cgroup_is_releasable(cgrp) && !atomic_read(&cgrp->count)
5067 && list_empty(&cgrp->children) && !cgroup_has_css_refs(cgrp)) {
5068
5069
5070
5071 int need_schedule_work = 0;
5072 raw_spin_lock(&release_list_lock);
5073 if (!cgroup_is_removed(cgrp) &&
5074 list_empty(&cgrp->release_list)) {
5075 list_add(&cgrp->release_list, &release_list);
5076 need_schedule_work = 1;
5077 }
5078 raw_spin_unlock(&release_list_lock);
5079 if (need_schedule_work)
5080 schedule_work(&release_agent_work);
5081 }
5082}
5083
5084
5085bool __css_tryget(struct cgroup_subsys_state *css)
5086{
5087 while (true) {
5088 int t, v;
5089
5090 v = css_refcnt(css);
5091 t = atomic_cmpxchg(&css->refcnt, v, v + 1);
5092 if (likely(t == v))
5093 return true;
5094 else if (t < 0)
5095 return false;
5096 cpu_relax();
5097 }
5098}
5099EXPORT_SYMBOL_GPL(__css_tryget);
5100
5101
5102void __css_put(struct cgroup_subsys_state *css)
5103{
5104 struct cgroup *cgrp = css->cgroup;
5105 int v;
5106
5107 rcu_read_lock();
5108 v = css_unbias_refcnt(atomic_dec_return(&css->refcnt));
5109
5110 switch (v) {
5111 case 1:
5112 if (notify_on_release(cgrp)) {
5113 set_bit(CGRP_RELEASABLE, &cgrp->flags);
5114 check_for_release(cgrp);
5115 }
5116 break;
5117 case 0:
5118 schedule_work(&css->dput_work);
5119 break;
5120 }
5121 rcu_read_unlock();
5122}
5123EXPORT_SYMBOL_GPL(__css_put);
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148static void cgroup_release_agent(struct work_struct *work)
5149{
5150 BUG_ON(work != &release_agent_work);
5151 mutex_lock(&cgroup_mutex);
5152 raw_spin_lock(&release_list_lock);
5153 while (!list_empty(&release_list)) {
5154 char *argv[3], *envp[3];
5155 int i;
5156 char *pathbuf = NULL, *agentbuf = NULL;
5157 struct cgroup *cgrp = list_entry(release_list.next,
5158 struct cgroup,
5159 release_list);
5160 list_del_init(&cgrp->release_list);
5161 raw_spin_unlock(&release_list_lock);
5162 pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
5163 if (!pathbuf)
5164 goto continue_free;
5165 if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0)
5166 goto continue_free;
5167 agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
5168 if (!agentbuf)
5169 goto continue_free;
5170
5171 i = 0;
5172 argv[i++] = agentbuf;
5173 argv[i++] = pathbuf;
5174 argv[i] = NULL;
5175
5176 i = 0;
5177
5178 envp[i++] = "HOME=/";
5179 envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
5180 envp[i] = NULL;
5181
5182
5183
5184
5185 mutex_unlock(&cgroup_mutex);
5186 call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
5187 mutex_lock(&cgroup_mutex);
5188 continue_free:
5189 kfree(pathbuf);
5190 kfree(agentbuf);
5191 raw_spin_lock(&release_list_lock);
5192 }
5193 raw_spin_unlock(&release_list_lock);
5194 mutex_unlock(&cgroup_mutex);
5195}
5196
5197static int __init cgroup_disable(char *str)
5198{
5199 int i;
5200 char *token;
5201
5202 while ((token = strsep(&str, ",")) != NULL) {
5203 if (!*token)
5204 continue;
5205 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
5206 struct cgroup_subsys *ss = subsys[i];
5207
5208
5209
5210
5211
5212
5213 if (!ss || ss->module)
5214 continue;
5215
5216 if (!strcmp(token, ss->name)) {
5217 ss->disabled = 1;
5218 printk(KERN_INFO "Disabling %s control group"
5219 " subsystem\n", ss->name);
5220 break;
5221 }
5222 }
5223 }
5224 return 1;
5225}
5226__setup("cgroup_disable=", cgroup_disable);
5227
5228
5229
5230
5231
5232
5233
5234
5235unsigned short css_id(struct cgroup_subsys_state *css)
5236{
5237 struct css_id *cssid;
5238
5239
5240
5241
5242
5243
5244 cssid = rcu_dereference_check(css->id, css_refcnt(css));
5245
5246 if (cssid)
5247 return cssid->id;
5248 return 0;
5249}
5250EXPORT_SYMBOL_GPL(css_id);
5251
5252unsigned short css_depth(struct cgroup_subsys_state *css)
5253{
5254 struct css_id *cssid;
5255
5256 cssid = rcu_dereference_check(css->id, css_refcnt(css));
5257
5258 if (cssid)
5259 return cssid->depth;
5260 return 0;
5261}
5262EXPORT_SYMBOL_GPL(css_depth);
5263
5264
5265
5266
5267
5268
5269
5270
5271
5272
5273
5274
5275
5276
5277bool css_is_ancestor(struct cgroup_subsys_state *child,
5278 const struct cgroup_subsys_state *root)
5279{
5280 struct css_id *child_id;
5281 struct css_id *root_id;
5282
5283 child_id = rcu_dereference(child->id);
5284 if (!child_id)
5285 return false;
5286 root_id = rcu_dereference(root->id);
5287 if (!root_id)
5288 return false;
5289 if (child_id->depth < root_id->depth)
5290 return false;
5291 if (child_id->stack[root_id->depth] != root_id->id)
5292 return false;
5293 return true;
5294}
5295
5296void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
5297{
5298 struct css_id *id = css->id;
5299
5300 if (!id)
5301 return;
5302
5303 BUG_ON(!ss->use_id);
5304
5305 rcu_assign_pointer(id->css, NULL);
5306 rcu_assign_pointer(css->id, NULL);
5307 spin_lock(&ss->id_lock);
5308 idr_remove(&ss->idr, id->id);
5309 spin_unlock(&ss->id_lock);
5310 kfree_rcu(id, rcu_head);
5311}
5312EXPORT_SYMBOL_GPL(free_css_id);
5313
5314
5315
5316
5317
5318
5319static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth)
5320{
5321 struct css_id *newid;
5322 int ret, size;
5323
5324 BUG_ON(!ss->use_id);
5325
5326 size = sizeof(*newid) + sizeof(unsigned short) * (depth + 1);
5327 newid = kzalloc(size, GFP_KERNEL);
5328 if (!newid)
5329 return ERR_PTR(-ENOMEM);
5330
5331 idr_preload(GFP_KERNEL);
5332 spin_lock(&ss->id_lock);
5333
5334 ret = idr_alloc(&ss->idr, newid, 1, CSS_ID_MAX + 1, GFP_NOWAIT);
5335 spin_unlock(&ss->id_lock);
5336 idr_preload_end();
5337
5338
5339 if (ret < 0)
5340 goto err_out;
5341
5342 newid->id = ret;
5343 newid->depth = depth;
5344 return newid;
5345err_out:
5346 kfree(newid);
5347 return ERR_PTR(ret);
5348
5349}
5350
5351static int __init_or_module cgroup_init_idr(struct cgroup_subsys *ss,
5352 struct cgroup_subsys_state *rootcss)
5353{
5354 struct css_id *newid;
5355
5356 spin_lock_init(&ss->id_lock);
5357 idr_init(&ss->idr);
5358
5359 newid = get_new_cssid(ss, 0);
5360 if (IS_ERR(newid))
5361 return PTR_ERR(newid);
5362
5363 newid->stack[0] = newid->id;
5364 newid->css = rootcss;
5365 rootcss->id = newid;
5366 return 0;
5367}
5368
5369static int alloc_css_id(struct cgroup_subsys *ss, struct cgroup *parent,
5370 struct cgroup *child)
5371{
5372 int subsys_id, i, depth = 0;
5373 struct cgroup_subsys_state *parent_css, *child_css;
5374 struct css_id *child_id, *parent_id;
5375
5376 subsys_id = ss->subsys_id;
5377 parent_css = parent->subsys[subsys_id];
5378 child_css = child->subsys[subsys_id];
5379 parent_id = parent_css->id;
5380 depth = parent_id->depth + 1;
5381
5382 child_id = get_new_cssid(ss, depth);
5383 if (IS_ERR(child_id))
5384 return PTR_ERR(child_id);
5385
5386 for (i = 0; i < depth; i++)
5387 child_id->stack[i] = parent_id->stack[i];
5388 child_id->stack[depth] = child_id->id;
5389
5390
5391
5392
5393 rcu_assign_pointer(child_css->id, child_id);
5394
5395 return 0;
5396}
5397
5398
5399
5400
5401
5402
5403
5404
5405
5406struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id)
5407{
5408 struct css_id *cssid = NULL;
5409
5410 BUG_ON(!ss->use_id);
5411 cssid = idr_find(&ss->idr, id);
5412
5413 if (unlikely(!cssid))
5414 return NULL;
5415
5416 return rcu_dereference(cssid->css);
5417}
5418EXPORT_SYMBOL_GPL(css_lookup);
5419
5420
5421
5422
5423
5424
5425
5426
5427
5428
5429
5430struct cgroup_subsys_state *
5431css_get_next(struct cgroup_subsys *ss, int id,
5432 struct cgroup_subsys_state *root, int *foundid)
5433{
5434 struct cgroup_subsys_state *ret = NULL;
5435 struct css_id *tmp;
5436 int tmpid;
5437 int rootid = css_id(root);
5438 int depth = css_depth(root);
5439
5440 if (!rootid)
5441 return NULL;
5442
5443 BUG_ON(!ss->use_id);
5444 WARN_ON_ONCE(!rcu_read_lock_held());
5445
5446
5447 tmpid = id;
5448 while (1) {
5449
5450
5451
5452
5453 tmp = idr_get_next(&ss->idr, &tmpid);
5454 if (!tmp)
5455 break;
5456 if (tmp->depth >= depth && tmp->stack[depth] == rootid) {
5457 ret = rcu_dereference(tmp->css);
5458 if (ret) {
5459 *foundid = tmpid;
5460 break;
5461 }
5462 }
5463
5464 tmpid = tmpid + 1;
5465 }
5466 return ret;
5467}
5468
5469
5470
5471
5472struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id)
5473{
5474 struct cgroup *cgrp;
5475 struct inode *inode;
5476 struct cgroup_subsys_state *css;
5477
5478 inode = file_inode(f);
5479
5480 if (inode->i_op != &cgroup_dir_inode_operations)
5481 return ERR_PTR(-EBADF);
5482
5483 if (id < 0 || id >= CGROUP_SUBSYS_COUNT)
5484 return ERR_PTR(-EINVAL);
5485
5486
5487 cgrp = __d_cgrp(f->f_dentry);
5488 css = cgrp->subsys[id];
5489 return css ? css : ERR_PTR(-ENOENT);
5490}
5491
5492#ifdef CONFIG_CGROUP_DEBUG
5493static struct cgroup_subsys_state *debug_css_alloc(struct cgroup *cont)
5494{
5495 struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
5496
5497 if (!css)
5498 return ERR_PTR(-ENOMEM);
5499
5500 return css;
5501}
5502
5503static void debug_css_free(struct cgroup *cont)
5504{
5505 kfree(cont->subsys[debug_subsys_id]);
5506}
5507
5508static u64 cgroup_refcount_read(struct cgroup *cont, struct cftype *cft)
5509{
5510 return atomic_read(&cont->count);
5511}
5512
5513static u64 debug_taskcount_read(struct cgroup *cont, struct cftype *cft)
5514{
5515 return cgroup_task_count(cont);
5516}
5517
5518static u64 current_css_set_read(struct cgroup *cont, struct cftype *cft)
5519{
5520 return (u64)(unsigned long)current->cgroups;
5521}
5522
5523static u64 current_css_set_refcount_read(struct cgroup *cont,
5524 struct cftype *cft)
5525{
5526 u64 count;
5527
5528 rcu_read_lock();
5529 count = atomic_read(¤t->cgroups->refcount);
5530 rcu_read_unlock();
5531 return count;
5532}
5533
5534static int current_css_set_cg_links_read(struct cgroup *cont,
5535 struct cftype *cft,
5536 struct seq_file *seq)
5537{
5538 struct cg_cgroup_link *link;
5539 struct css_set *cg;
5540
5541 read_lock(&css_set_lock);
5542 rcu_read_lock();
5543 cg = rcu_dereference(current->cgroups);
5544 list_for_each_entry(link, &cg->cg_links, cg_link_list) {
5545 struct cgroup *c = link->cgrp;
5546 const char *name;
5547
5548 if (c->dentry)
5549 name = c->dentry->d_name.name;
5550 else
5551 name = "?";
5552 seq_printf(seq, "Root %d group %s\n",
5553 c->root->hierarchy_id, name);
5554 }
5555 rcu_read_unlock();
5556 read_unlock(&css_set_lock);
5557 return 0;
5558}
5559
5560#define MAX_TASKS_SHOWN_PER_CSS 25
5561static int cgroup_css_links_read(struct cgroup *cont,
5562 struct cftype *cft,
5563 struct seq_file *seq)
5564{
5565 struct cg_cgroup_link *link;
5566
5567 read_lock(&css_set_lock);
5568 list_for_each_entry(link, &cont->css_sets, cgrp_link_list) {
5569 struct css_set *cg = link->cg;
5570 struct task_struct *task;
5571 int count = 0;
5572 seq_printf(seq, "css_set %p\n", cg);
5573 list_for_each_entry(task, &cg->tasks, cg_list) {
5574 if (count++ > MAX_TASKS_SHOWN_PER_CSS) {
5575 seq_puts(seq, " ...\n");
5576 break;
5577 } else {
5578 seq_printf(seq, " task %d\n",
5579 task_pid_vnr(task));
5580 }
5581 }
5582 }
5583 read_unlock(&css_set_lock);
5584 return 0;
5585}
5586
5587static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft)
5588{
5589 return test_bit(CGRP_RELEASABLE, &cgrp->flags);
5590}
5591
5592static struct cftype debug_files[] = {
5593 {
5594 .name = "cgroup_refcount",
5595 .read_u64 = cgroup_refcount_read,
5596 },
5597 {
5598 .name = "taskcount",
5599 .read_u64 = debug_taskcount_read,
5600 },
5601
5602 {
5603 .name = "current_css_set",
5604 .read_u64 = current_css_set_read,
5605 },
5606
5607 {
5608 .name = "current_css_set_refcount",
5609 .read_u64 = current_css_set_refcount_read,
5610 },
5611
5612 {
5613 .name = "current_css_set_cg_links",
5614 .read_seq_string = current_css_set_cg_links_read,
5615 },
5616
5617 {
5618 .name = "cgroup_css_links",
5619 .read_seq_string = cgroup_css_links_read,
5620 },
5621
5622 {
5623 .name = "releasable",
5624 .read_u64 = releasable_read,
5625 },
5626
5627 { }
5628};
5629
5630struct cgroup_subsys debug_subsys = {
5631 .name = "debug",
5632 .css_alloc = debug_css_alloc,
5633 .css_free = debug_css_free,
5634 .subsys_id = debug_subsys_id,
5635 .base_cftypes = debug_files,
5636};
5637#endif
5638