1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29#include <linux/cgroup.h>
30#include <linux/cred.h>
31#include <linux/ctype.h>
32#include <linux/errno.h>
33#include <linux/fs.h>
34#include <linux/init_task.h>
35#include <linux/kernel.h>
36#include <linux/list.h>
37#include <linux/mm.h>
38#include <linux/mutex.h>
39#include <linux/mount.h>
40#include <linux/pagemap.h>
41#include <linux/proc_fs.h>
42#include <linux/rcupdate.h>
43#include <linux/sched.h>
44#include <linux/backing-dev.h>
45#include <linux/seq_file.h>
46#include <linux/slab.h>
47#include <linux/magic.h>
48#include <linux/spinlock.h>
49#include <linux/string.h>
50#include <linux/sort.h>
51#include <linux/kmod.h>
52#include <linux/module.h>
53#include <linux/delayacct.h>
54#include <linux/cgroupstats.h>
55#include <linux/hash.h>
56#include <linux/namei.h>
57#include <linux/pid_namespace.h>
58#include <linux/idr.h>
59#include <linux/vmalloc.h>
60#include <linux/eventfd.h>
61#include <linux/poll.h>
62#include <linux/flex_array.h>
63
64#include <linux/atomic.h>
65
66static DEFINE_MUTEX(cgroup_mutex);
67
68
69
70
71
72
73
74#define SUBSYS(_x) &_x ## _subsys,
75static struct cgroup_subsys *subsys[CGROUP_SUBSYS_COUNT] = {
76#include <linux/cgroup_subsys.h>
77};
78
79#define MAX_CGROUP_ROOT_NAMELEN 64
80
81
82
83
84
85
86struct cgroupfs_root {
87 struct super_block *sb;
88
89
90
91
92
93 unsigned long subsys_bits;
94
95
96 int hierarchy_id;
97
98
99 unsigned long actual_subsys_bits;
100
101
102 struct list_head subsys_list;
103
104
105 struct cgroup top_cgroup;
106
107
108 int number_of_cgroups;
109
110
111 struct list_head root_list;
112
113
114 unsigned long flags;
115
116
117 char release_agent_path[PATH_MAX];
118
119
120 char name[MAX_CGROUP_ROOT_NAMELEN];
121};
122
123
124
125
126
127
128static struct cgroupfs_root rootnode;
129
130
131
132
133
134#define CSS_ID_MAX (65535)
135struct css_id {
136
137
138
139
140
141
142
143 struct cgroup_subsys_state __rcu *css;
144
145
146
147 unsigned short id;
148
149
150
151 unsigned short depth;
152
153
154
155 struct rcu_head rcu_head;
156
157
158
159 unsigned short stack[0];
160};
161
162
163
164
165struct cgroup_event {
166
167
168
169 struct cgroup *cgrp;
170
171
172
173 struct cftype *cft;
174
175
176
177 struct eventfd_ctx *eventfd;
178
179
180
181 struct list_head list;
182
183
184
185
186 poll_table pt;
187 wait_queue_head_t *wqh;
188 wait_queue_t wait;
189 struct work_struct remove;
190};
191
192
193
194static LIST_HEAD(roots);
195static int root_count;
196
197static DEFINE_IDA(hierarchy_ida);
198static int next_hierarchy_id;
199static DEFINE_SPINLOCK(hierarchy_id_lock);
200
201
202#define dummytop (&rootnode.top_cgroup)
203
204
205
206
207
208
209static int need_forkexit_callback __read_mostly;
210
211#ifdef CONFIG_PROVE_LOCKING
212int cgroup_lock_is_held(void)
213{
214 return lockdep_is_held(&cgroup_mutex);
215}
216#else
217int cgroup_lock_is_held(void)
218{
219 return mutex_is_locked(&cgroup_mutex);
220}
221#endif
222
223EXPORT_SYMBOL_GPL(cgroup_lock_is_held);
224
225
226inline int cgroup_is_removed(const struct cgroup *cgrp)
227{
228 return test_bit(CGRP_REMOVED, &cgrp->flags);
229}
230
231
232enum {
233 ROOT_NOPREFIX,
234};
235
236static int cgroup_is_releasable(const struct cgroup *cgrp)
237{
238 const int bits =
239 (1 << CGRP_RELEASABLE) |
240 (1 << CGRP_NOTIFY_ON_RELEASE);
241 return (cgrp->flags & bits) == bits;
242}
243
244static int notify_on_release(const struct cgroup *cgrp)
245{
246 return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
247}
248
249static int clone_children(const struct cgroup *cgrp)
250{
251 return test_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
252}
253
254
255
256
257
258#define for_each_subsys(_root, _ss) \
259list_for_each_entry(_ss, &_root->subsys_list, sibling)
260
261
262#define for_each_active_root(_root) \
263list_for_each_entry(_root, &roots, root_list)
264
265
266
267static LIST_HEAD(release_list);
268static DEFINE_RAW_SPINLOCK(release_list_lock);
269static void cgroup_release_agent(struct work_struct *work);
270static DECLARE_WORK(release_agent_work, cgroup_release_agent);
271static void check_for_release(struct cgroup *cgrp);
272
273
274struct cg_cgroup_link {
275
276
277
278
279 struct list_head cgrp_link_list;
280 struct cgroup *cgrp;
281
282
283
284
285 struct list_head cg_link_list;
286 struct css_set *cg;
287};
288
289
290
291
292
293
294
295
296static struct css_set init_css_set;
297static struct cg_cgroup_link init_css_set_link;
298
299static int cgroup_init_idr(struct cgroup_subsys *ss,
300 struct cgroup_subsys_state *css);
301
302
303
304
305static DEFINE_RWLOCK(css_set_lock);
306static int css_set_count;
307
308
309
310
311
312
313#define CSS_SET_HASH_BITS 7
314#define CSS_SET_TABLE_SIZE (1 << CSS_SET_HASH_BITS)
315static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE];
316
317static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
318{
319 int i;
320 int index;
321 unsigned long tmp = 0UL;
322
323 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++)
324 tmp += (unsigned long)css[i];
325 tmp = (tmp >> 16) ^ tmp;
326
327 index = hash_long(tmp, CSS_SET_HASH_BITS);
328
329 return &css_set_table[index];
330}
331
332
333
334
335
336static int use_task_css_set_links __read_mostly;
337
338static void __put_css_set(struct css_set *cg, int taskexit)
339{
340 struct cg_cgroup_link *link;
341 struct cg_cgroup_link *saved_link;
342
343
344
345
346
347 if (atomic_add_unless(&cg->refcount, -1, 1))
348 return;
349 write_lock(&css_set_lock);
350 if (!atomic_dec_and_test(&cg->refcount)) {
351 write_unlock(&css_set_lock);
352 return;
353 }
354
355
356 hlist_del(&cg->hlist);
357 css_set_count--;
358
359 list_for_each_entry_safe(link, saved_link, &cg->cg_links,
360 cg_link_list) {
361 struct cgroup *cgrp = link->cgrp;
362 list_del(&link->cg_link_list);
363 list_del(&link->cgrp_link_list);
364 if (atomic_dec_and_test(&cgrp->count) &&
365 notify_on_release(cgrp)) {
366 if (taskexit)
367 set_bit(CGRP_RELEASABLE, &cgrp->flags);
368 check_for_release(cgrp);
369 }
370
371 kfree(link);
372 }
373
374 write_unlock(&css_set_lock);
375 kfree_rcu(cg, rcu_head);
376}
377
378
379
380
381static inline void get_css_set(struct css_set *cg)
382{
383 atomic_inc(&cg->refcount);
384}
385
386static inline void put_css_set(struct css_set *cg)
387{
388 __put_css_set(cg, 0);
389}
390
391static inline void put_css_set_taskexit(struct css_set *cg)
392{
393 __put_css_set(cg, 1);
394}
395
396
397
398
399
400
401
402
403
404
405
406static bool compare_css_sets(struct css_set *cg,
407 struct css_set *old_cg,
408 struct cgroup *new_cgrp,
409 struct cgroup_subsys_state *template[])
410{
411 struct list_head *l1, *l2;
412
413 if (memcmp(template, cg->subsys, sizeof(cg->subsys))) {
414
415 return false;
416 }
417
418
419
420
421
422
423
424
425
426
427 l1 = &cg->cg_links;
428 l2 = &old_cg->cg_links;
429 while (1) {
430 struct cg_cgroup_link *cgl1, *cgl2;
431 struct cgroup *cg1, *cg2;
432
433 l1 = l1->next;
434 l2 = l2->next;
435
436 if (l1 == &cg->cg_links) {
437 BUG_ON(l2 != &old_cg->cg_links);
438 break;
439 } else {
440 BUG_ON(l2 == &old_cg->cg_links);
441 }
442
443 cgl1 = list_entry(l1, struct cg_cgroup_link, cg_link_list);
444 cgl2 = list_entry(l2, struct cg_cgroup_link, cg_link_list);
445 cg1 = cgl1->cgrp;
446 cg2 = cgl2->cgrp;
447
448 BUG_ON(cg1->root != cg2->root);
449
450
451
452
453
454
455
456
457 if (cg1->root == new_cgrp->root) {
458 if (cg1 != new_cgrp)
459 return false;
460 } else {
461 if (cg1 != cg2)
462 return false;
463 }
464 }
465 return true;
466}
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481static struct css_set *find_existing_css_set(
482 struct css_set *oldcg,
483 struct cgroup *cgrp,
484 struct cgroup_subsys_state *template[])
485{
486 int i;
487 struct cgroupfs_root *root = cgrp->root;
488 struct hlist_head *hhead;
489 struct hlist_node *node;
490 struct css_set *cg;
491
492
493
494
495
496
497 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
498 if (root->subsys_bits & (1UL << i)) {
499
500
501
502 template[i] = cgrp->subsys[i];
503 } else {
504
505
506 template[i] = oldcg->subsys[i];
507 }
508 }
509
510 hhead = css_set_hash(template);
511 hlist_for_each_entry(cg, node, hhead, hlist) {
512 if (!compare_css_sets(cg, oldcg, cgrp, template))
513 continue;
514
515
516 return cg;
517 }
518
519
520 return NULL;
521}
522
523static void free_cg_links(struct list_head *tmp)
524{
525 struct cg_cgroup_link *link;
526 struct cg_cgroup_link *saved_link;
527
528 list_for_each_entry_safe(link, saved_link, tmp, cgrp_link_list) {
529 list_del(&link->cgrp_link_list);
530 kfree(link);
531 }
532}
533
534
535
536
537
538
539static int allocate_cg_links(int count, struct list_head *tmp)
540{
541 struct cg_cgroup_link *link;
542 int i;
543 INIT_LIST_HEAD(tmp);
544 for (i = 0; i < count; i++) {
545 link = kmalloc(sizeof(*link), GFP_KERNEL);
546 if (!link) {
547 free_cg_links(tmp);
548 return -ENOMEM;
549 }
550 list_add(&link->cgrp_link_list, tmp);
551 }
552 return 0;
553}
554
555
556
557
558
559
560
561static void link_css_set(struct list_head *tmp_cg_links,
562 struct css_set *cg, struct cgroup *cgrp)
563{
564 struct cg_cgroup_link *link;
565
566 BUG_ON(list_empty(tmp_cg_links));
567 link = list_first_entry(tmp_cg_links, struct cg_cgroup_link,
568 cgrp_link_list);
569 link->cg = cg;
570 link->cgrp = cgrp;
571 atomic_inc(&cgrp->count);
572 list_move(&link->cgrp_link_list, &cgrp->css_sets);
573
574
575
576
577 list_add_tail(&link->cg_link_list, &cg->cg_links);
578}
579
580
581
582
583
584
585
586
587static struct css_set *find_css_set(
588 struct css_set *oldcg, struct cgroup *cgrp)
589{
590 struct css_set *res;
591 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
592
593 struct list_head tmp_cg_links;
594
595 struct hlist_head *hhead;
596 struct cg_cgroup_link *link;
597
598
599
600 read_lock(&css_set_lock);
601 res = find_existing_css_set(oldcg, cgrp, template);
602 if (res)
603 get_css_set(res);
604 read_unlock(&css_set_lock);
605
606 if (res)
607 return res;
608
609 res = kmalloc(sizeof(*res), GFP_KERNEL);
610 if (!res)
611 return NULL;
612
613
614 if (allocate_cg_links(root_count, &tmp_cg_links) < 0) {
615 kfree(res);
616 return NULL;
617 }
618
619 atomic_set(&res->refcount, 1);
620 INIT_LIST_HEAD(&res->cg_links);
621 INIT_LIST_HEAD(&res->tasks);
622 INIT_HLIST_NODE(&res->hlist);
623
624
625
626 memcpy(res->subsys, template, sizeof(res->subsys));
627
628 write_lock(&css_set_lock);
629
630 list_for_each_entry(link, &oldcg->cg_links, cg_link_list) {
631 struct cgroup *c = link->cgrp;
632 if (c->root == cgrp->root)
633 c = cgrp;
634 link_css_set(&tmp_cg_links, res, c);
635 }
636
637 BUG_ON(!list_empty(&tmp_cg_links));
638
639 css_set_count++;
640
641
642 hhead = css_set_hash(res->subsys);
643 hlist_add_head(&res->hlist, hhead);
644
645 write_unlock(&css_set_lock);
646
647 return res;
648}
649
650
651
652
653
654static struct cgroup *task_cgroup_from_root(struct task_struct *task,
655 struct cgroupfs_root *root)
656{
657 struct css_set *css;
658 struct cgroup *res = NULL;
659
660 BUG_ON(!mutex_is_locked(&cgroup_mutex));
661 read_lock(&css_set_lock);
662
663
664
665
666
667 css = task->cgroups;
668 if (css == &init_css_set) {
669 res = &root->top_cgroup;
670 } else {
671 struct cg_cgroup_link *link;
672 list_for_each_entry(link, &css->cg_links, cg_link_list) {
673 struct cgroup *c = link->cgrp;
674 if (c->root == root) {
675 res = c;
676 break;
677 }
678 }
679 }
680 read_unlock(&css_set_lock);
681 BUG_ON(!res);
682 return res;
683}
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739void cgroup_lock(void)
740{
741 mutex_lock(&cgroup_mutex);
742}
743EXPORT_SYMBOL_GPL(cgroup_lock);
744
745
746
747
748
749
750void cgroup_unlock(void)
751{
752 mutex_unlock(&cgroup_mutex);
753}
754EXPORT_SYMBOL_GPL(cgroup_unlock);
755
756
757
758
759
760
761
762
763static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode);
764static struct dentry *cgroup_lookup(struct inode *, struct dentry *, struct nameidata *);
765static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
766static int cgroup_populate_dir(struct cgroup *cgrp);
767static const struct inode_operations cgroup_dir_inode_operations;
768static const struct file_operations proc_cgroupstats_operations;
769
770static struct backing_dev_info cgroup_backing_dev_info = {
771 .name = "cgroup",
772 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
773};
774
775static int alloc_css_id(struct cgroup_subsys *ss,
776 struct cgroup *parent, struct cgroup *child);
777
778static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
779{
780 struct inode *inode = new_inode(sb);
781
782 if (inode) {
783 inode->i_ino = get_next_ino();
784 inode->i_mode = mode;
785 inode->i_uid = current_fsuid();
786 inode->i_gid = current_fsgid();
787 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
788 inode->i_mapping->backing_dev_info = &cgroup_backing_dev_info;
789 }
790 return inode;
791}
792
793
794
795
796
797static int cgroup_call_pre_destroy(struct cgroup *cgrp)
798{
799 struct cgroup_subsys *ss;
800 int ret = 0;
801
802 for_each_subsys(cgrp->root, ss)
803 if (ss->pre_destroy) {
804 ret = ss->pre_destroy(ss, cgrp);
805 if (ret)
806 break;
807 }
808
809 return ret;
810}
811
812static void cgroup_diput(struct dentry *dentry, struct inode *inode)
813{
814
815 if (S_ISDIR(inode->i_mode)) {
816 struct cgroup *cgrp = dentry->d_fsdata;
817 struct cgroup_subsys *ss;
818 BUG_ON(!(cgroup_is_removed(cgrp)));
819
820
821
822
823
824
825 synchronize_rcu();
826
827 mutex_lock(&cgroup_mutex);
828
829
830
831 for_each_subsys(cgrp->root, ss)
832 ss->destroy(ss, cgrp);
833
834 cgrp->root->number_of_cgroups--;
835 mutex_unlock(&cgroup_mutex);
836
837
838
839
840
841 deactivate_super(cgrp->root->sb);
842
843
844
845
846
847 BUG_ON(!list_empty(&cgrp->pidlists));
848
849 kfree_rcu(cgrp, rcu_head);
850 }
851 iput(inode);
852}
853
854static int cgroup_delete(const struct dentry *d)
855{
856 return 1;
857}
858
859static void remove_dir(struct dentry *d)
860{
861 struct dentry *parent = dget(d->d_parent);
862
863 d_delete(d);
864 simple_rmdir(parent->d_inode, d);
865 dput(parent);
866}
867
868static void cgroup_clear_directory(struct dentry *dentry)
869{
870 struct list_head *node;
871
872 BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
873 spin_lock(&dentry->d_lock);
874 node = dentry->d_subdirs.next;
875 while (node != &dentry->d_subdirs) {
876 struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
877
878 spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
879 list_del_init(node);
880 if (d->d_inode) {
881
882
883 BUG_ON(d->d_inode->i_mode & S_IFDIR);
884 dget_dlock(d);
885 spin_unlock(&d->d_lock);
886 spin_unlock(&dentry->d_lock);
887 d_delete(d);
888 simple_unlink(dentry->d_inode, d);
889 dput(d);
890 spin_lock(&dentry->d_lock);
891 } else
892 spin_unlock(&d->d_lock);
893 node = dentry->d_subdirs.next;
894 }
895 spin_unlock(&dentry->d_lock);
896}
897
898
899
900
901static void cgroup_d_remove_dir(struct dentry *dentry)
902{
903 struct dentry *parent;
904
905 cgroup_clear_directory(dentry);
906
907 parent = dentry->d_parent;
908 spin_lock(&parent->d_lock);
909 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
910 list_del_init(&dentry->d_u.d_child);
911 spin_unlock(&dentry->d_lock);
912 spin_unlock(&parent->d_lock);
913 remove_dir(dentry);
914}
915
916
917
918
919
920
921
922
923
924DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
925
926static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
927{
928 if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
929 wake_up_all(&cgroup_rmdir_waitq);
930}
931
932void cgroup_exclude_rmdir(struct cgroup_subsys_state *css)
933{
934 css_get(css);
935}
936
937void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
938{
939 cgroup_wakeup_rmdir_waiter(css->cgroup);
940 css_put(css);
941}
942
943
944
945
946
947
948static int rebind_subsystems(struct cgroupfs_root *root,
949 unsigned long final_bits)
950{
951 unsigned long added_bits, removed_bits;
952 struct cgroup *cgrp = &root->top_cgroup;
953 int i;
954
955 BUG_ON(!mutex_is_locked(&cgroup_mutex));
956
957 removed_bits = root->actual_subsys_bits & ~final_bits;
958 added_bits = final_bits & ~root->actual_subsys_bits;
959
960 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
961 unsigned long bit = 1UL << i;
962 struct cgroup_subsys *ss = subsys[i];
963 if (!(bit & added_bits))
964 continue;
965
966
967
968
969
970 BUG_ON(ss == NULL);
971 if (ss->root != &rootnode) {
972
973 return -EBUSY;
974 }
975 }
976
977
978
979
980
981 if (root->number_of_cgroups > 1)
982 return -EBUSY;
983
984
985 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
986 struct cgroup_subsys *ss = subsys[i];
987 unsigned long bit = 1UL << i;
988 if (bit & added_bits) {
989
990 BUG_ON(ss == NULL);
991 BUG_ON(cgrp->subsys[i]);
992 BUG_ON(!dummytop->subsys[i]);
993 BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
994 mutex_lock(&ss->hierarchy_mutex);
995 cgrp->subsys[i] = dummytop->subsys[i];
996 cgrp->subsys[i]->cgroup = cgrp;
997 list_move(&ss->sibling, &root->subsys_list);
998 ss->root = root;
999 if (ss->bind)
1000 ss->bind(ss, cgrp);
1001 mutex_unlock(&ss->hierarchy_mutex);
1002
1003 } else if (bit & removed_bits) {
1004
1005 BUG_ON(ss == NULL);
1006 BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
1007 BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
1008 mutex_lock(&ss->hierarchy_mutex);
1009 if (ss->bind)
1010 ss->bind(ss, dummytop);
1011 dummytop->subsys[i]->cgroup = dummytop;
1012 cgrp->subsys[i] = NULL;
1013 subsys[i]->root = &rootnode;
1014 list_move(&ss->sibling, &rootnode.subsys_list);
1015 mutex_unlock(&ss->hierarchy_mutex);
1016
1017 module_put(ss->module);
1018 } else if (bit & final_bits) {
1019
1020 BUG_ON(ss == NULL);
1021 BUG_ON(!cgrp->subsys[i]);
1022
1023
1024
1025
1026 module_put(ss->module);
1027#ifdef CONFIG_MODULE_UNLOAD
1028 BUG_ON(ss->module && !module_refcount(ss->module));
1029#endif
1030 } else {
1031
1032 BUG_ON(cgrp->subsys[i]);
1033 }
1034 }
1035 root->subsys_bits = root->actual_subsys_bits = final_bits;
1036 synchronize_rcu();
1037
1038 return 0;
1039}
1040
1041static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs)
1042{
1043 struct cgroupfs_root *root = vfs->mnt_sb->s_fs_info;
1044 struct cgroup_subsys *ss;
1045
1046 mutex_lock(&cgroup_mutex);
1047 for_each_subsys(root, ss)
1048 seq_printf(seq, ",%s", ss->name);
1049 if (test_bit(ROOT_NOPREFIX, &root->flags))
1050 seq_puts(seq, ",noprefix");
1051 if (strlen(root->release_agent_path))
1052 seq_printf(seq, ",release_agent=%s", root->release_agent_path);
1053 if (clone_children(&root->top_cgroup))
1054 seq_puts(seq, ",clone_children");
1055 if (strlen(root->name))
1056 seq_printf(seq, ",name=%s", root->name);
1057 mutex_unlock(&cgroup_mutex);
1058 return 0;
1059}
1060
1061struct cgroup_sb_opts {
1062 unsigned long subsys_bits;
1063 unsigned long flags;
1064 char *release_agent;
1065 bool clone_children;
1066 char *name;
1067
1068 bool none;
1069
1070 struct cgroupfs_root *new_root;
1071
1072};
1073
1074
1075
1076
1077
1078
1079
1080static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
1081{
1082 char *token, *o = data;
1083 bool all_ss = false, one_ss = false;
1084 unsigned long mask = (unsigned long)-1;
1085 int i;
1086 bool module_pin_failed = false;
1087
1088 BUG_ON(!mutex_is_locked(&cgroup_mutex));
1089
1090#ifdef CONFIG_CPUSETS
1091 mask = ~(1UL << cpuset_subsys_id);
1092#endif
1093
1094 memset(opts, 0, sizeof(*opts));
1095
1096 while ((token = strsep(&o, ",")) != NULL) {
1097 if (!*token)
1098 return -EINVAL;
1099 if (!strcmp(token, "none")) {
1100
1101 opts->none = true;
1102 continue;
1103 }
1104 if (!strcmp(token, "all")) {
1105
1106 if (one_ss)
1107 return -EINVAL;
1108 all_ss = true;
1109 continue;
1110 }
1111 if (!strcmp(token, "noprefix")) {
1112 set_bit(ROOT_NOPREFIX, &opts->flags);
1113 continue;
1114 }
1115 if (!strcmp(token, "clone_children")) {
1116 opts->clone_children = true;
1117 continue;
1118 }
1119 if (!strncmp(token, "release_agent=", 14)) {
1120
1121 if (opts->release_agent)
1122 return -EINVAL;
1123 opts->release_agent =
1124 kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL);
1125 if (!opts->release_agent)
1126 return -ENOMEM;
1127 continue;
1128 }
1129 if (!strncmp(token, "name=", 5)) {
1130 const char *name = token + 5;
1131
1132 if (!strlen(name))
1133 return -EINVAL;
1134
1135 for (i = 0; i < strlen(name); i++) {
1136 char c = name[i];
1137 if (isalnum(c))
1138 continue;
1139 if ((c == '.') || (c == '-') || (c == '_'))
1140 continue;
1141 return -EINVAL;
1142 }
1143
1144 if (opts->name)
1145 return -EINVAL;
1146 opts->name = kstrndup(name,
1147 MAX_CGROUP_ROOT_NAMELEN - 1,
1148 GFP_KERNEL);
1149 if (!opts->name)
1150 return -ENOMEM;
1151
1152 continue;
1153 }
1154
1155 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1156 struct cgroup_subsys *ss = subsys[i];
1157 if (ss == NULL)
1158 continue;
1159 if (strcmp(token, ss->name))
1160 continue;
1161 if (ss->disabled)
1162 continue;
1163
1164
1165 if (all_ss)
1166 return -EINVAL;
1167 set_bit(i, &opts->subsys_bits);
1168 one_ss = true;
1169
1170 break;
1171 }
1172 if (i == CGROUP_SUBSYS_COUNT)
1173 return -ENOENT;
1174 }
1175
1176
1177
1178
1179
1180
1181 if (all_ss || (!one_ss && !opts->none && !opts->name)) {
1182 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1183 struct cgroup_subsys *ss = subsys[i];
1184 if (ss == NULL)
1185 continue;
1186 if (ss->disabled)
1187 continue;
1188 set_bit(i, &opts->subsys_bits);
1189 }
1190 }
1191
1192
1193
1194
1195
1196
1197
1198
1199 if (test_bit(ROOT_NOPREFIX, &opts->flags) &&
1200 (opts->subsys_bits & mask))
1201 return -EINVAL;
1202
1203
1204
1205 if (opts->subsys_bits && opts->none)
1206 return -EINVAL;
1207
1208
1209
1210
1211
1212 if (!opts->subsys_bits && !opts->name)
1213 return -EINVAL;
1214
1215
1216
1217
1218
1219
1220
1221 for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
1222 unsigned long bit = 1UL << i;
1223
1224 if (!(bit & opts->subsys_bits))
1225 continue;
1226 if (!try_module_get(subsys[i]->module)) {
1227 module_pin_failed = true;
1228 break;
1229 }
1230 }
1231 if (module_pin_failed) {
1232
1233
1234
1235
1236
1237 for (i--; i >= CGROUP_BUILTIN_SUBSYS_COUNT; i--) {
1238
1239 unsigned long bit = 1UL << i;
1240
1241 if (!(bit & opts->subsys_bits))
1242 continue;
1243 module_put(subsys[i]->module);
1244 }
1245 return -ENOENT;
1246 }
1247
1248 return 0;
1249}
1250
1251static void drop_parsed_module_refcounts(unsigned long subsys_bits)
1252{
1253 int i;
1254 for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
1255 unsigned long bit = 1UL << i;
1256
1257 if (!(bit & subsys_bits))
1258 continue;
1259 module_put(subsys[i]->module);
1260 }
1261}
1262
1263static int cgroup_remount(struct super_block *sb, int *flags, char *data)
1264{
1265 int ret = 0;
1266 struct cgroupfs_root *root = sb->s_fs_info;
1267 struct cgroup *cgrp = &root->top_cgroup;
1268 struct cgroup_sb_opts opts;
1269
1270 mutex_lock(&cgrp->dentry->d_inode->i_mutex);
1271 mutex_lock(&cgroup_mutex);
1272
1273
1274 ret = parse_cgroupfs_options(data, &opts);
1275 if (ret)
1276 goto out_unlock;
1277
1278
1279 if (opts.flags != root->flags ||
1280 (opts.name && strcmp(opts.name, root->name))) {
1281 ret = -EINVAL;
1282 drop_parsed_module_refcounts(opts.subsys_bits);
1283 goto out_unlock;
1284 }
1285
1286 ret = rebind_subsystems(root, opts.subsys_bits);
1287 if (ret) {
1288 drop_parsed_module_refcounts(opts.subsys_bits);
1289 goto out_unlock;
1290 }
1291
1292
1293 cgroup_populate_dir(cgrp);
1294
1295 if (opts.release_agent)
1296 strcpy(root->release_agent_path, opts.release_agent);
1297 out_unlock:
1298 kfree(opts.release_agent);
1299 kfree(opts.name);
1300 mutex_unlock(&cgroup_mutex);
1301 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
1302 return ret;
1303}
1304
1305static const struct super_operations cgroup_ops = {
1306 .statfs = simple_statfs,
1307 .drop_inode = generic_delete_inode,
1308 .show_options = cgroup_show_options,
1309 .remount_fs = cgroup_remount,
1310};
1311
1312static void init_cgroup_housekeeping(struct cgroup *cgrp)
1313{
1314 INIT_LIST_HEAD(&cgrp->sibling);
1315 INIT_LIST_HEAD(&cgrp->children);
1316 INIT_LIST_HEAD(&cgrp->css_sets);
1317 INIT_LIST_HEAD(&cgrp->release_list);
1318 INIT_LIST_HEAD(&cgrp->pidlists);
1319 mutex_init(&cgrp->pidlist_mutex);
1320 INIT_LIST_HEAD(&cgrp->event_list);
1321 spin_lock_init(&cgrp->event_list_lock);
1322}
1323
1324static void init_cgroup_root(struct cgroupfs_root *root)
1325{
1326 struct cgroup *cgrp = &root->top_cgroup;
1327 INIT_LIST_HEAD(&root->subsys_list);
1328 INIT_LIST_HEAD(&root->root_list);
1329 root->number_of_cgroups = 1;
1330 cgrp->root = root;
1331 cgrp->top_cgroup = cgrp;
1332 init_cgroup_housekeeping(cgrp);
1333}
1334
1335static bool init_root_id(struct cgroupfs_root *root)
1336{
1337 int ret = 0;
1338
1339 do {
1340 if (!ida_pre_get(&hierarchy_ida, GFP_KERNEL))
1341 return false;
1342 spin_lock(&hierarchy_id_lock);
1343
1344 ret = ida_get_new_above(&hierarchy_ida, next_hierarchy_id,
1345 &root->hierarchy_id);
1346 if (ret == -ENOSPC)
1347
1348 ret = ida_get_new(&hierarchy_ida, &root->hierarchy_id);
1349 if (!ret) {
1350 next_hierarchy_id = root->hierarchy_id + 1;
1351 } else if (ret != -EAGAIN) {
1352
1353 BUG_ON(ret);
1354 }
1355 spin_unlock(&hierarchy_id_lock);
1356 } while (ret);
1357 return true;
1358}
1359
1360static int cgroup_test_super(struct super_block *sb, void *data)
1361{
1362 struct cgroup_sb_opts *opts = data;
1363 struct cgroupfs_root *root = sb->s_fs_info;
1364
1365
1366 if (opts->name && strcmp(opts->name, root->name))
1367 return 0;
1368
1369
1370
1371
1372
1373 if ((opts->subsys_bits || opts->none)
1374 && (opts->subsys_bits != root->subsys_bits))
1375 return 0;
1376
1377 return 1;
1378}
1379
1380static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
1381{
1382 struct cgroupfs_root *root;
1383
1384 if (!opts->subsys_bits && !opts->none)
1385 return NULL;
1386
1387 root = kzalloc(sizeof(*root), GFP_KERNEL);
1388 if (!root)
1389 return ERR_PTR(-ENOMEM);
1390
1391 if (!init_root_id(root)) {
1392 kfree(root);
1393 return ERR_PTR(-ENOMEM);
1394 }
1395 init_cgroup_root(root);
1396
1397 root->subsys_bits = opts->subsys_bits;
1398 root->flags = opts->flags;
1399 if (opts->release_agent)
1400 strcpy(root->release_agent_path, opts->release_agent);
1401 if (opts->name)
1402 strcpy(root->name, opts->name);
1403 if (opts->clone_children)
1404 set_bit(CGRP_CLONE_CHILDREN, &root->top_cgroup.flags);
1405 return root;
1406}
1407
1408static void cgroup_drop_root(struct cgroupfs_root *root)
1409{
1410 if (!root)
1411 return;
1412
1413 BUG_ON(!root->hierarchy_id);
1414 spin_lock(&hierarchy_id_lock);
1415 ida_remove(&hierarchy_ida, root->hierarchy_id);
1416 spin_unlock(&hierarchy_id_lock);
1417 kfree(root);
1418}
1419
1420static int cgroup_set_super(struct super_block *sb, void *data)
1421{
1422 int ret;
1423 struct cgroup_sb_opts *opts = data;
1424
1425
1426 if (!opts->new_root)
1427 return -EINVAL;
1428
1429 BUG_ON(!opts->subsys_bits && !opts->none);
1430
1431 ret = set_anon_super(sb, NULL);
1432 if (ret)
1433 return ret;
1434
1435 sb->s_fs_info = opts->new_root;
1436 opts->new_root->sb = sb;
1437
1438 sb->s_blocksize = PAGE_CACHE_SIZE;
1439 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
1440 sb->s_magic = CGROUP_SUPER_MAGIC;
1441 sb->s_op = &cgroup_ops;
1442
1443 return 0;
1444}
1445
1446static int cgroup_get_rootdir(struct super_block *sb)
1447{
1448 static const struct dentry_operations cgroup_dops = {
1449 .d_iput = cgroup_diput,
1450 .d_delete = cgroup_delete,
1451 };
1452
1453 struct inode *inode =
1454 cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb);
1455 struct dentry *dentry;
1456
1457 if (!inode)
1458 return -ENOMEM;
1459
1460 inode->i_fop = &simple_dir_operations;
1461 inode->i_op = &cgroup_dir_inode_operations;
1462
1463 inc_nlink(inode);
1464 dentry = d_alloc_root(inode);
1465 if (!dentry) {
1466 iput(inode);
1467 return -ENOMEM;
1468 }
1469 sb->s_root = dentry;
1470
1471 sb->s_d_op = &cgroup_dops;
1472 return 0;
1473}
1474
1475static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1476 int flags, const char *unused_dev_name,
1477 void *data)
1478{
1479 struct cgroup_sb_opts opts;
1480 struct cgroupfs_root *root;
1481 int ret = 0;
1482 struct super_block *sb;
1483 struct cgroupfs_root *new_root;
1484
1485
1486 mutex_lock(&cgroup_mutex);
1487 ret = parse_cgroupfs_options(data, &opts);
1488 mutex_unlock(&cgroup_mutex);
1489 if (ret)
1490 goto out_err;
1491
1492
1493
1494
1495
1496 new_root = cgroup_root_from_opts(&opts);
1497 if (IS_ERR(new_root)) {
1498 ret = PTR_ERR(new_root);
1499 goto drop_modules;
1500 }
1501 opts.new_root = new_root;
1502
1503
1504 sb = sget(fs_type, cgroup_test_super, cgroup_set_super, &opts);
1505 if (IS_ERR(sb)) {
1506 ret = PTR_ERR(sb);
1507 cgroup_drop_root(opts.new_root);
1508 goto drop_modules;
1509 }
1510
1511 root = sb->s_fs_info;
1512 BUG_ON(!root);
1513 if (root == opts.new_root) {
1514
1515 struct list_head tmp_cg_links;
1516 struct cgroup *root_cgrp = &root->top_cgroup;
1517 struct inode *inode;
1518 struct cgroupfs_root *existing_root;
1519 const struct cred *cred;
1520 int i;
1521
1522 BUG_ON(sb->s_root != NULL);
1523
1524 ret = cgroup_get_rootdir(sb);
1525 if (ret)
1526 goto drop_new_super;
1527 inode = sb->s_root->d_inode;
1528
1529 mutex_lock(&inode->i_mutex);
1530 mutex_lock(&cgroup_mutex);
1531
1532 if (strlen(root->name)) {
1533
1534 for_each_active_root(existing_root) {
1535 if (!strcmp(existing_root->name, root->name)) {
1536 ret = -EBUSY;
1537 mutex_unlock(&cgroup_mutex);
1538 mutex_unlock(&inode->i_mutex);
1539 goto drop_new_super;
1540 }
1541 }
1542 }
1543
1544
1545
1546
1547
1548
1549
1550
1551 ret = allocate_cg_links(css_set_count, &tmp_cg_links);
1552 if (ret) {
1553 mutex_unlock(&cgroup_mutex);
1554 mutex_unlock(&inode->i_mutex);
1555 goto drop_new_super;
1556 }
1557
1558 ret = rebind_subsystems(root, root->subsys_bits);
1559 if (ret == -EBUSY) {
1560 mutex_unlock(&cgroup_mutex);
1561 mutex_unlock(&inode->i_mutex);
1562 free_cg_links(&tmp_cg_links);
1563 goto drop_new_super;
1564 }
1565
1566
1567
1568
1569
1570
1571
1572 BUG_ON(ret);
1573
1574 list_add(&root->root_list, &roots);
1575 root_count++;
1576
1577 sb->s_root->d_fsdata = root_cgrp;
1578 root->top_cgroup.dentry = sb->s_root;
1579
1580
1581
1582 write_lock(&css_set_lock);
1583 for (i = 0; i < CSS_SET_TABLE_SIZE; i++) {
1584 struct hlist_head *hhead = &css_set_table[i];
1585 struct hlist_node *node;
1586 struct css_set *cg;
1587
1588 hlist_for_each_entry(cg, node, hhead, hlist)
1589 link_css_set(&tmp_cg_links, cg, root_cgrp);
1590 }
1591 write_unlock(&css_set_lock);
1592
1593 free_cg_links(&tmp_cg_links);
1594
1595 BUG_ON(!list_empty(&root_cgrp->sibling));
1596 BUG_ON(!list_empty(&root_cgrp->children));
1597 BUG_ON(root->number_of_cgroups != 1);
1598
1599 cred = override_creds(&init_cred);
1600 cgroup_populate_dir(root_cgrp);
1601 revert_creds(cred);
1602 mutex_unlock(&cgroup_mutex);
1603 mutex_unlock(&inode->i_mutex);
1604 } else {
1605
1606
1607
1608
1609 cgroup_drop_root(opts.new_root);
1610
1611 drop_parsed_module_refcounts(opts.subsys_bits);
1612 }
1613
1614 kfree(opts.release_agent);
1615 kfree(opts.name);
1616 return dget(sb->s_root);
1617
1618 drop_new_super:
1619 deactivate_locked_super(sb);
1620 drop_modules:
1621 drop_parsed_module_refcounts(opts.subsys_bits);
1622 out_err:
1623 kfree(opts.release_agent);
1624 kfree(opts.name);
1625 return ERR_PTR(ret);
1626}
1627
1628static void cgroup_kill_sb(struct super_block *sb) {
1629 struct cgroupfs_root *root = sb->s_fs_info;
1630 struct cgroup *cgrp = &root->top_cgroup;
1631 int ret;
1632 struct cg_cgroup_link *link;
1633 struct cg_cgroup_link *saved_link;
1634
1635 BUG_ON(!root);
1636
1637 BUG_ON(root->number_of_cgroups != 1);
1638 BUG_ON(!list_empty(&cgrp->children));
1639 BUG_ON(!list_empty(&cgrp->sibling));
1640
1641 mutex_lock(&cgroup_mutex);
1642
1643
1644 ret = rebind_subsystems(root, 0);
1645
1646 BUG_ON(ret);
1647
1648
1649
1650
1651
1652 write_lock(&css_set_lock);
1653
1654 list_for_each_entry_safe(link, saved_link, &cgrp->css_sets,
1655 cgrp_link_list) {
1656 list_del(&link->cg_link_list);
1657 list_del(&link->cgrp_link_list);
1658 kfree(link);
1659 }
1660 write_unlock(&css_set_lock);
1661
1662 if (!list_empty(&root->root_list)) {
1663 list_del(&root->root_list);
1664 root_count--;
1665 }
1666
1667 mutex_unlock(&cgroup_mutex);
1668
1669 kill_litter_super(sb);
1670 cgroup_drop_root(root);
1671}
1672
1673static struct file_system_type cgroup_fs_type = {
1674 .name = "cgroup",
1675 .mount = cgroup_mount,
1676 .kill_sb = cgroup_kill_sb,
1677};
1678
1679static struct kobject *cgroup_kobj;
1680
1681static inline struct cgroup *__d_cgrp(struct dentry *dentry)
1682{
1683 return dentry->d_fsdata;
1684}
1685
1686static inline struct cftype *__d_cft(struct dentry *dentry)
1687{
1688 return dentry->d_fsdata;
1689}
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
1702{
1703 char *start;
1704 struct dentry *dentry = rcu_dereference_check(cgrp->dentry,
1705 cgroup_lock_is_held());
1706
1707 if (!dentry || cgrp == dummytop) {
1708
1709
1710
1711
1712 strcpy(buf, "/");
1713 return 0;
1714 }
1715
1716 start = buf + buflen;
1717
1718 *--start = '\0';
1719 for (;;) {
1720 int len = dentry->d_name.len;
1721
1722 if ((start -= len) < buf)
1723 return -ENAMETOOLONG;
1724 memcpy(start, dentry->d_name.name, len);
1725 cgrp = cgrp->parent;
1726 if (!cgrp)
1727 break;
1728
1729 dentry = rcu_dereference_check(cgrp->dentry,
1730 cgroup_lock_is_held());
1731 if (!cgrp->parent)
1732 continue;
1733 if (--start < buf)
1734 return -ENAMETOOLONG;
1735 *start = '/';
1736 }
1737 memmove(buf, start, buf + buflen - start);
1738 return 0;
1739}
1740EXPORT_SYMBOL_GPL(cgroup_path);
1741
1742
1743
1744
1745
1746
1747
1748
1749static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
1750 struct task_struct *tsk, bool guarantee)
1751{
1752 struct css_set *oldcg;
1753 struct css_set *newcg;
1754
1755
1756
1757
1758
1759
1760 task_lock(tsk);
1761 oldcg = tsk->cgroups;
1762 get_css_set(oldcg);
1763 task_unlock(tsk);
1764
1765
1766 if (guarantee) {
1767
1768 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
1769 read_lock(&css_set_lock);
1770 newcg = find_existing_css_set(oldcg, cgrp, template);
1771 BUG_ON(!newcg);
1772 get_css_set(newcg);
1773 read_unlock(&css_set_lock);
1774 } else {
1775 might_sleep();
1776
1777 newcg = find_css_set(oldcg, cgrp);
1778 if (!newcg) {
1779 put_css_set(oldcg);
1780 return -ENOMEM;
1781 }
1782 }
1783 put_css_set(oldcg);
1784
1785
1786 task_lock(tsk);
1787 if (tsk->flags & PF_EXITING) {
1788 task_unlock(tsk);
1789 put_css_set(newcg);
1790 return -ESRCH;
1791 }
1792 rcu_assign_pointer(tsk->cgroups, newcg);
1793 task_unlock(tsk);
1794
1795
1796 write_lock(&css_set_lock);
1797 if (!list_empty(&tsk->cg_list))
1798 list_move(&tsk->cg_list, &newcg->tasks);
1799 write_unlock(&css_set_lock);
1800
1801
1802
1803
1804
1805
1806 put_css_set(oldcg);
1807
1808 set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
1809 return 0;
1810}
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1821{
1822 int retval;
1823 struct cgroup_subsys *ss, *failed_ss = NULL;
1824 struct cgroup *oldcgrp;
1825 struct cgroupfs_root *root = cgrp->root;
1826
1827
1828 oldcgrp = task_cgroup_from_root(tsk, root);
1829 if (cgrp == oldcgrp)
1830 return 0;
1831
1832 for_each_subsys(root, ss) {
1833 if (ss->can_attach) {
1834 retval = ss->can_attach(ss, cgrp, tsk);
1835 if (retval) {
1836
1837
1838
1839
1840
1841
1842 failed_ss = ss;
1843 goto out;
1844 }
1845 }
1846 if (ss->can_attach_task) {
1847 retval = ss->can_attach_task(cgrp, tsk);
1848 if (retval) {
1849 failed_ss = ss;
1850 goto out;
1851 }
1852 }
1853 }
1854
1855 retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, false);
1856 if (retval)
1857 goto out;
1858
1859 for_each_subsys(root, ss) {
1860 if (ss->pre_attach)
1861 ss->pre_attach(cgrp);
1862 if (ss->attach_task)
1863 ss->attach_task(cgrp, tsk);
1864 if (ss->attach)
1865 ss->attach(ss, cgrp, oldcgrp, tsk);
1866 }
1867
1868 synchronize_rcu();
1869
1870
1871
1872
1873
1874 cgroup_wakeup_rmdir_waiter(cgrp);
1875out:
1876 if (retval) {
1877 for_each_subsys(root, ss) {
1878 if (ss == failed_ss)
1879
1880
1881
1882
1883
1884
1885 break;
1886 if (ss->cancel_attach)
1887 ss->cancel_attach(ss, cgrp, tsk);
1888 }
1889 }
1890 return retval;
1891}
1892
1893
1894
1895
1896
1897
1898int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
1899{
1900 struct cgroupfs_root *root;
1901 int retval = 0;
1902
1903 cgroup_lock();
1904 for_each_active_root(root) {
1905 struct cgroup *from_cg = task_cgroup_from_root(from, root);
1906
1907 retval = cgroup_attach_task(from_cg, tsk);
1908 if (retval)
1909 break;
1910 }
1911 cgroup_unlock();
1912
1913 return retval;
1914}
1915EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
1916
1917
1918
1919
1920
1921
1922
1923struct cg_list_entry {
1924 struct css_set *cg;
1925 struct list_head links;
1926};
1927
1928static bool css_set_check_fetched(struct cgroup *cgrp,
1929 struct task_struct *tsk, struct css_set *cg,
1930 struct list_head *newcg_list)
1931{
1932 struct css_set *newcg;
1933 struct cg_list_entry *cg_entry;
1934 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
1935
1936 read_lock(&css_set_lock);
1937 newcg = find_existing_css_set(cg, cgrp, template);
1938 if (newcg)
1939 get_css_set(newcg);
1940 read_unlock(&css_set_lock);
1941
1942
1943 if (!newcg)
1944 return false;
1945
1946 list_for_each_entry(cg_entry, newcg_list, links) {
1947 if (cg_entry->cg == newcg) {
1948 put_css_set(newcg);
1949 return true;
1950 }
1951 }
1952
1953
1954 put_css_set(newcg);
1955 return false;
1956}
1957
1958
1959
1960
1961
1962static int css_set_prefetch(struct cgroup *cgrp, struct css_set *cg,
1963 struct list_head *newcg_list)
1964{
1965 struct css_set *newcg;
1966 struct cg_list_entry *cg_entry;
1967
1968
1969 newcg = find_css_set(cg, cgrp);
1970 if (!newcg)
1971 return -ENOMEM;
1972
1973 cg_entry = kmalloc(sizeof(struct cg_list_entry), GFP_KERNEL);
1974 if (!cg_entry) {
1975 put_css_set(newcg);
1976 return -ENOMEM;
1977 }
1978 cg_entry->cg = newcg;
1979 list_add(&cg_entry->links, newcg_list);
1980 return 0;
1981}
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
1992{
1993 int retval, i, group_size;
1994 struct cgroup_subsys *ss, *failed_ss = NULL;
1995 bool cancel_failed_ss = false;
1996
1997 struct cgroup *oldcgrp = NULL;
1998 struct css_set *oldcg;
1999 struct cgroupfs_root *root = cgrp->root;
2000
2001 struct task_struct *tsk;
2002 struct flex_array *group;
2003
2004
2005
2006
2007
2008 struct list_head newcg_list;
2009 struct cg_list_entry *cg_entry, *temp_nobe;
2010
2011
2012
2013
2014
2015
2016
2017
2018 group_size = get_nr_threads(leader);
2019
2020 group = flex_array_alloc(sizeof(struct task_struct *), group_size,
2021 GFP_KERNEL);
2022 if (!group)
2023 return -ENOMEM;
2024
2025 retval = flex_array_prealloc(group, 0, group_size - 1, GFP_KERNEL);
2026 if (retval)
2027 goto out_free_group_list;
2028
2029
2030 read_lock(&tasklist_lock);
2031 if (!thread_group_leader(leader)) {
2032
2033
2034
2035
2036
2037
2038
2039 read_unlock(&tasklist_lock);
2040 retval = -EAGAIN;
2041 goto out_free_group_list;
2042 }
2043
2044 tsk = leader;
2045 i = 0;
2046 do {
2047
2048 BUG_ON(i >= group_size);
2049 get_task_struct(tsk);
2050
2051
2052
2053
2054 retval = flex_array_put_ptr(group, i, tsk, GFP_ATOMIC);
2055 BUG_ON(retval != 0);
2056 i++;
2057 } while_each_thread(leader, tsk);
2058
2059 group_size = i;
2060 read_unlock(&tasklist_lock);
2061
2062
2063
2064
2065 for_each_subsys(root, ss) {
2066 if (ss->can_attach) {
2067 retval = ss->can_attach(ss, cgrp, leader);
2068 if (retval) {
2069 failed_ss = ss;
2070 goto out_cancel_attach;
2071 }
2072 }
2073
2074 if (ss->can_attach_task) {
2075
2076 for (i = 0; i < group_size; i++) {
2077 tsk = flex_array_get_ptr(group, i);
2078 retval = ss->can_attach_task(cgrp, tsk);
2079 if (retval) {
2080 failed_ss = ss;
2081 cancel_failed_ss = true;
2082 goto out_cancel_attach;
2083 }
2084 }
2085 }
2086 }
2087
2088
2089
2090
2091
2092 INIT_LIST_HEAD(&newcg_list);
2093 for (i = 0; i < group_size; i++) {
2094 tsk = flex_array_get_ptr(group, i);
2095
2096 oldcgrp = task_cgroup_from_root(tsk, root);
2097 if (cgrp == oldcgrp)
2098 continue;
2099
2100 task_lock(tsk);
2101 oldcg = tsk->cgroups;
2102 get_css_set(oldcg);
2103 task_unlock(tsk);
2104
2105 if (css_set_check_fetched(cgrp, tsk, oldcg, &newcg_list)) {
2106
2107 put_css_set(oldcg);
2108 } else {
2109
2110 retval = css_set_prefetch(cgrp, oldcg, &newcg_list);
2111 put_css_set(oldcg);
2112 if (retval)
2113 goto out_list_teardown;
2114 }
2115 }
2116
2117
2118
2119
2120
2121
2122
2123 for_each_subsys(root, ss) {
2124 if (ss->pre_attach)
2125 ss->pre_attach(cgrp);
2126 }
2127 for (i = 0; i < group_size; i++) {
2128 tsk = flex_array_get_ptr(group, i);
2129
2130 oldcgrp = task_cgroup_from_root(tsk, root);
2131 if (cgrp == oldcgrp)
2132 continue;
2133
2134 retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, true);
2135 if (retval == 0) {
2136
2137 for_each_subsys(root, ss) {
2138 if (ss->attach_task)
2139 ss->attach_task(cgrp, tsk);
2140 }
2141 } else {
2142 BUG_ON(retval != -ESRCH);
2143 }
2144 }
2145
2146
2147
2148
2149
2150
2151
2152 for_each_subsys(root, ss) {
2153 if (ss->attach)
2154 ss->attach(ss, cgrp, oldcgrp, leader);
2155 }
2156
2157
2158
2159
2160 synchronize_rcu();
2161 cgroup_wakeup_rmdir_waiter(cgrp);
2162 retval = 0;
2163out_list_teardown:
2164
2165 list_for_each_entry_safe(cg_entry, temp_nobe, &newcg_list, links) {
2166 list_del(&cg_entry->links);
2167 put_css_set(cg_entry->cg);
2168 kfree(cg_entry);
2169 }
2170out_cancel_attach:
2171
2172 if (retval) {
2173 for_each_subsys(root, ss) {
2174 if (ss == failed_ss) {
2175 if (cancel_failed_ss && ss->cancel_attach)
2176 ss->cancel_attach(ss, cgrp, leader);
2177 break;
2178 }
2179 if (ss->cancel_attach)
2180 ss->cancel_attach(ss, cgrp, leader);
2181 }
2182 }
2183
2184 for (i = 0; i < group_size; i++) {
2185 tsk = flex_array_get_ptr(group, i);
2186 put_task_struct(tsk);
2187 }
2188out_free_group_list:
2189 flex_array_free(group);
2190 return retval;
2191}
2192
2193
2194
2195
2196
2197
2198static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup)
2199{
2200 struct task_struct *tsk;
2201 const struct cred *cred = current_cred(), *tcred;
2202 int ret;
2203
2204 if (!cgroup_lock_live_group(cgrp))
2205 return -ENODEV;
2206
2207 if (pid) {
2208 rcu_read_lock();
2209 tsk = find_task_by_vpid(pid);
2210 if (!tsk) {
2211 rcu_read_unlock();
2212 cgroup_unlock();
2213 return -ESRCH;
2214 }
2215 if (threadgroup) {
2216
2217
2218
2219
2220
2221
2222 tsk = tsk->group_leader;
2223 } else if (tsk->flags & PF_EXITING) {
2224
2225 rcu_read_unlock();
2226 cgroup_unlock();
2227 return -ESRCH;
2228 }
2229
2230
2231
2232
2233
2234 tcred = __task_cred(tsk);
2235 if (cred->euid &&
2236 cred->euid != tcred->uid &&
2237 cred->euid != tcred->suid) {
2238 rcu_read_unlock();
2239 cgroup_unlock();
2240 return -EACCES;
2241 }
2242 get_task_struct(tsk);
2243 rcu_read_unlock();
2244 } else {
2245 if (threadgroup)
2246 tsk = current->group_leader;
2247 else
2248 tsk = current;
2249 get_task_struct(tsk);
2250 }
2251
2252 if (threadgroup) {
2253 threadgroup_fork_write_lock(tsk);
2254 ret = cgroup_attach_proc(cgrp, tsk);
2255 threadgroup_fork_write_unlock(tsk);
2256 } else {
2257 ret = cgroup_attach_task(cgrp, tsk);
2258 }
2259 put_task_struct(tsk);
2260 cgroup_unlock();
2261 return ret;
2262}
2263
2264static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid)
2265{
2266 return attach_task_by_pid(cgrp, pid, false);
2267}
2268
2269static int cgroup_procs_write(struct cgroup *cgrp, struct cftype *cft, u64 tgid)
2270{
2271 int ret;
2272 do {
2273
2274
2275
2276
2277
2278 ret = attach_task_by_pid(cgrp, tgid, true);
2279 } while (ret == -EAGAIN);
2280 return ret;
2281}
2282
2283
2284
2285
2286
2287
2288
2289
2290bool cgroup_lock_live_group(struct cgroup *cgrp)
2291{
2292 mutex_lock(&cgroup_mutex);
2293 if (cgroup_is_removed(cgrp)) {
2294 mutex_unlock(&cgroup_mutex);
2295 return false;
2296 }
2297 return true;
2298}
2299EXPORT_SYMBOL_GPL(cgroup_lock_live_group);
2300
2301static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
2302 const char *buffer)
2303{
2304 BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
2305 if (strlen(buffer) >= PATH_MAX)
2306 return -EINVAL;
2307 if (!cgroup_lock_live_group(cgrp))
2308 return -ENODEV;
2309 strcpy(cgrp->root->release_agent_path, buffer);
2310 cgroup_unlock();
2311 return 0;
2312}
2313
2314static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft,
2315 struct seq_file *seq)
2316{
2317 if (!cgroup_lock_live_group(cgrp))
2318 return -ENODEV;
2319 seq_puts(seq, cgrp->root->release_agent_path);
2320 seq_putc(seq, '\n');
2321 cgroup_unlock();
2322 return 0;
2323}
2324
2325
2326#define CGROUP_LOCAL_BUFFER_SIZE 64
2327
2328static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft,
2329 struct file *file,
2330 const char __user *userbuf,
2331 size_t nbytes, loff_t *unused_ppos)
2332{
2333 char buffer[CGROUP_LOCAL_BUFFER_SIZE];
2334 int retval = 0;
2335 char *end;
2336
2337 if (!nbytes)
2338 return -EINVAL;
2339 if (nbytes >= sizeof(buffer))
2340 return -E2BIG;
2341 if (copy_from_user(buffer, userbuf, nbytes))
2342 return -EFAULT;
2343
2344 buffer[nbytes] = 0;
2345 if (cft->write_u64) {
2346 u64 val = simple_strtoull(strstrip(buffer), &end, 0);
2347 if (*end)
2348 return -EINVAL;
2349 retval = cft->write_u64(cgrp, cft, val);
2350 } else {
2351 s64 val = simple_strtoll(strstrip(buffer), &end, 0);
2352 if (*end)
2353 return -EINVAL;
2354 retval = cft->write_s64(cgrp, cft, val);
2355 }
2356 if (!retval)
2357 retval = nbytes;
2358 return retval;
2359}
2360
2361static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft,
2362 struct file *file,
2363 const char __user *userbuf,
2364 size_t nbytes, loff_t *unused_ppos)
2365{
2366 char local_buffer[CGROUP_LOCAL_BUFFER_SIZE];
2367 int retval = 0;
2368 size_t max_bytes = cft->max_write_len;
2369 char *buffer = local_buffer;
2370
2371 if (!max_bytes)
2372 max_bytes = sizeof(local_buffer) - 1;
2373 if (nbytes >= max_bytes)
2374 return -E2BIG;
2375
2376 if (nbytes >= sizeof(local_buffer)) {
2377 buffer = kmalloc(nbytes + 1, GFP_KERNEL);
2378 if (buffer == NULL)
2379 return -ENOMEM;
2380 }
2381 if (nbytes && copy_from_user(buffer, userbuf, nbytes)) {
2382 retval = -EFAULT;
2383 goto out;
2384 }
2385
2386 buffer[nbytes] = 0;
2387 retval = cft->write_string(cgrp, cft, strstrip(buffer));
2388 if (!retval)
2389 retval = nbytes;
2390out:
2391 if (buffer != local_buffer)
2392 kfree(buffer);
2393 return retval;
2394}
2395
2396static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
2397 size_t nbytes, loff_t *ppos)
2398{
2399 struct cftype *cft = __d_cft(file->f_dentry);
2400 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
2401
2402 if (cgroup_is_removed(cgrp))
2403 return -ENODEV;
2404 if (cft->write)
2405 return cft->write(cgrp, cft, file, buf, nbytes, ppos);
2406 if (cft->write_u64 || cft->write_s64)
2407 return cgroup_write_X64(cgrp, cft, file, buf, nbytes, ppos);
2408 if (cft->write_string)
2409 return cgroup_write_string(cgrp, cft, file, buf, nbytes, ppos);
2410 if (cft->trigger) {
2411 int ret = cft->trigger(cgrp, (unsigned int)cft->private);
2412 return ret ? ret : nbytes;
2413 }
2414 return -EINVAL;
2415}
2416
2417static ssize_t cgroup_read_u64(struct cgroup *cgrp, struct cftype *cft,
2418 struct file *file,
2419 char __user *buf, size_t nbytes,
2420 loff_t *ppos)
2421{
2422 char tmp[CGROUP_LOCAL_BUFFER_SIZE];
2423 u64 val = cft->read_u64(cgrp, cft);
2424 int len = sprintf(tmp, "%llu\n", (unsigned long long) val);
2425
2426 return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
2427}
2428
2429static ssize_t cgroup_read_s64(struct cgroup *cgrp, struct cftype *cft,
2430 struct file *file,
2431 char __user *buf, size_t nbytes,
2432 loff_t *ppos)
2433{
2434 char tmp[CGROUP_LOCAL_BUFFER_SIZE];
2435 s64 val = cft->read_s64(cgrp, cft);
2436 int len = sprintf(tmp, "%lld\n", (long long) val);
2437
2438 return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
2439}
2440
2441static ssize_t cgroup_file_read(struct file *file, char __user *buf,
2442 size_t nbytes, loff_t *ppos)
2443{
2444 struct cftype *cft = __d_cft(file->f_dentry);
2445 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
2446
2447 if (cgroup_is_removed(cgrp))
2448 return -ENODEV;
2449
2450 if (cft->read)
2451 return cft->read(cgrp, cft, file, buf, nbytes, ppos);
2452 if (cft->read_u64)
2453 return cgroup_read_u64(cgrp, cft, file, buf, nbytes, ppos);
2454 if (cft->read_s64)
2455 return cgroup_read_s64(cgrp, cft, file, buf, nbytes, ppos);
2456 return -EINVAL;
2457}
2458
2459
2460
2461
2462
2463
2464struct cgroup_seqfile_state {
2465 struct cftype *cft;
2466 struct cgroup *cgroup;
2467};
2468
2469static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value)
2470{
2471 struct seq_file *sf = cb->state;
2472 return seq_printf(sf, "%s %llu\n", key, (unsigned long long)value);
2473}
2474
2475static int cgroup_seqfile_show(struct seq_file *m, void *arg)
2476{
2477 struct cgroup_seqfile_state *state = m->private;
2478 struct cftype *cft = state->cft;
2479 if (cft->read_map) {
2480 struct cgroup_map_cb cb = {
2481 .fill = cgroup_map_add,
2482 .state = m,
2483 };
2484 return cft->read_map(state->cgroup, cft, &cb);
2485 }
2486 return cft->read_seq_string(state->cgroup, cft, m);
2487}
2488
2489static int cgroup_seqfile_release(struct inode *inode, struct file *file)
2490{
2491 struct seq_file *seq = file->private_data;
2492 kfree(seq->private);
2493 return single_release(inode, file);
2494}
2495
2496static const struct file_operations cgroup_seqfile_operations = {
2497 .read = seq_read,
2498 .write = cgroup_file_write,
2499 .llseek = seq_lseek,
2500 .release = cgroup_seqfile_release,
2501};
2502
2503static int cgroup_file_open(struct inode *inode, struct file *file)
2504{
2505 int err;
2506 struct cftype *cft;
2507
2508 err = generic_file_open(inode, file);
2509 if (err)
2510 return err;
2511 cft = __d_cft(file->f_dentry);
2512
2513 if (cft->read_map || cft->read_seq_string) {
2514 struct cgroup_seqfile_state *state =
2515 kzalloc(sizeof(*state), GFP_USER);
2516 if (!state)
2517 return -ENOMEM;
2518 state->cft = cft;
2519 state->cgroup = __d_cgrp(file->f_dentry->d_parent);
2520 file->f_op = &cgroup_seqfile_operations;
2521 err = single_open(file, cgroup_seqfile_show, state);
2522 if (err < 0)
2523 kfree(state);
2524 } else if (cft->open)
2525 err = cft->open(inode, file);
2526 else
2527 err = 0;
2528
2529 return err;
2530}
2531
2532static int cgroup_file_release(struct inode *inode, struct file *file)
2533{
2534 struct cftype *cft = __d_cft(file->f_dentry);
2535 if (cft->release)
2536 return cft->release(inode, file);
2537 return 0;
2538}
2539
2540
2541
2542
2543static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry,
2544 struct inode *new_dir, struct dentry *new_dentry)
2545{
2546 if (!S_ISDIR(old_dentry->d_inode->i_mode))
2547 return -ENOTDIR;
2548 if (new_dentry->d_inode)
2549 return -EEXIST;
2550 if (old_dir != new_dir)
2551 return -EIO;
2552 return simple_rename(old_dir, old_dentry, new_dir, new_dentry);
2553}
2554
2555static const struct file_operations cgroup_file_operations = {
2556 .read = cgroup_file_read,
2557 .write = cgroup_file_write,
2558 .llseek = generic_file_llseek,
2559 .open = cgroup_file_open,
2560 .release = cgroup_file_release,
2561};
2562
2563static const struct inode_operations cgroup_dir_inode_operations = {
2564 .lookup = cgroup_lookup,
2565 .mkdir = cgroup_mkdir,
2566 .rmdir = cgroup_rmdir,
2567 .rename = cgroup_rename,
2568};
2569
2570static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
2571{
2572 if (dentry->d_name.len > NAME_MAX)
2573 return ERR_PTR(-ENAMETOOLONG);
2574 d_add(dentry, NULL);
2575 return NULL;
2576}
2577
2578
2579
2580
2581static inline struct cftype *__file_cft(struct file *file)
2582{
2583 if (file->f_dentry->d_inode->i_fop != &cgroup_file_operations)
2584 return ERR_PTR(-EINVAL);
2585 return __d_cft(file->f_dentry);
2586}
2587
2588static int cgroup_create_file(struct dentry *dentry, mode_t mode,
2589 struct super_block *sb)
2590{
2591 struct inode *inode;
2592
2593 if (!dentry)
2594 return -ENOENT;
2595 if (dentry->d_inode)
2596 return -EEXIST;
2597
2598 inode = cgroup_new_inode(mode, sb);
2599 if (!inode)
2600 return -ENOMEM;
2601
2602 if (S_ISDIR(mode)) {
2603 inode->i_op = &cgroup_dir_inode_operations;
2604 inode->i_fop = &simple_dir_operations;
2605
2606
2607 inc_nlink(inode);
2608
2609
2610
2611 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
2612 } else if (S_ISREG(mode)) {
2613 inode->i_size = 0;
2614 inode->i_fop = &cgroup_file_operations;
2615 }
2616 d_instantiate(dentry, inode);
2617 dget(dentry);
2618 return 0;
2619}
2620
2621
2622
2623
2624
2625
2626
2627
2628static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry,
2629 mode_t mode)
2630{
2631 struct dentry *parent;
2632 int error = 0;
2633
2634 parent = cgrp->parent->dentry;
2635 error = cgroup_create_file(dentry, S_IFDIR | mode, cgrp->root->sb);
2636 if (!error) {
2637 dentry->d_fsdata = cgrp;
2638 inc_nlink(parent->d_inode);
2639 rcu_assign_pointer(cgrp->dentry, dentry);
2640 dget(dentry);
2641 }
2642 dput(dentry);
2643
2644 return error;
2645}
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656static mode_t cgroup_file_mode(const struct cftype *cft)
2657{
2658 mode_t mode = 0;
2659
2660 if (cft->mode)
2661 return cft->mode;
2662
2663 if (cft->read || cft->read_u64 || cft->read_s64 ||
2664 cft->read_map || cft->read_seq_string)
2665 mode |= S_IRUGO;
2666
2667 if (cft->write || cft->write_u64 || cft->write_s64 ||
2668 cft->write_string || cft->trigger)
2669 mode |= S_IWUSR;
2670
2671 return mode;
2672}
2673
2674int cgroup_add_file(struct cgroup *cgrp,
2675 struct cgroup_subsys *subsys,
2676 const struct cftype *cft)
2677{
2678 struct dentry *dir = cgrp->dentry;
2679 struct dentry *dentry;
2680 int error;
2681 mode_t mode;
2682
2683 char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 };
2684 if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) {
2685 strcpy(name, subsys->name);
2686 strcat(name, ".");
2687 }
2688 strcat(name, cft->name);
2689 BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex));
2690 dentry = lookup_one_len(name, dir, strlen(name));
2691 if (!IS_ERR(dentry)) {
2692 mode = cgroup_file_mode(cft);
2693 error = cgroup_create_file(dentry, mode | S_IFREG,
2694 cgrp->root->sb);
2695 if (!error)
2696 dentry->d_fsdata = (void *)cft;
2697 dput(dentry);
2698 } else
2699 error = PTR_ERR(dentry);
2700 return error;
2701}
2702EXPORT_SYMBOL_GPL(cgroup_add_file);
2703
2704int cgroup_add_files(struct cgroup *cgrp,
2705 struct cgroup_subsys *subsys,
2706 const struct cftype cft[],
2707 int count)
2708{
2709 int i, err;
2710 for (i = 0; i < count; i++) {
2711 err = cgroup_add_file(cgrp, subsys, &cft[i]);
2712 if (err)
2713 return err;
2714 }
2715 return 0;
2716}
2717EXPORT_SYMBOL_GPL(cgroup_add_files);
2718
2719
2720
2721
2722
2723
2724
2725int cgroup_task_count(const struct cgroup *cgrp)
2726{
2727 int count = 0;
2728 struct cg_cgroup_link *link;
2729
2730 read_lock(&css_set_lock);
2731 list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {
2732 count += atomic_read(&link->cg->refcount);
2733 }
2734 read_unlock(&css_set_lock);
2735 return count;
2736}
2737
2738
2739
2740
2741
2742static void cgroup_advance_iter(struct cgroup *cgrp,
2743 struct cgroup_iter *it)
2744{
2745 struct list_head *l = it->cg_link;
2746 struct cg_cgroup_link *link;
2747 struct css_set *cg;
2748
2749
2750 do {
2751 l = l->next;
2752 if (l == &cgrp->css_sets) {
2753 it->cg_link = NULL;
2754 return;
2755 }
2756 link = list_entry(l, struct cg_cgroup_link, cgrp_link_list);
2757 cg = link->cg;
2758 } while (list_empty(&cg->tasks));
2759 it->cg_link = l;
2760 it->task = cg->tasks.next;
2761}
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772static void cgroup_enable_task_cg_lists(void)
2773{
2774 struct task_struct *p, *g;
2775 write_lock(&css_set_lock);
2776 use_task_css_set_links = 1;
2777 do_each_thread(g, p) {
2778 task_lock(p);
2779
2780
2781
2782
2783
2784 if (!(p->flags & PF_EXITING) && list_empty(&p->cg_list))
2785 list_add(&p->cg_list, &p->cgroups->tasks);
2786 task_unlock(p);
2787 } while_each_thread(g, p);
2788 write_unlock(&css_set_lock);
2789}
2790
2791void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it)
2792{
2793
2794
2795
2796
2797
2798 if (!use_task_css_set_links)
2799 cgroup_enable_task_cg_lists();
2800
2801 read_lock(&css_set_lock);
2802 it->cg_link = &cgrp->css_sets;
2803 cgroup_advance_iter(cgrp, it);
2804}
2805
2806struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
2807 struct cgroup_iter *it)
2808{
2809 struct task_struct *res;
2810 struct list_head *l = it->task;
2811 struct cg_cgroup_link *link;
2812
2813
2814 if (!it->cg_link)
2815 return NULL;
2816 res = list_entry(l, struct task_struct, cg_list);
2817
2818 l = l->next;
2819 link = list_entry(it->cg_link, struct cg_cgroup_link, cgrp_link_list);
2820 if (l == &link->cg->tasks) {
2821
2822
2823 cgroup_advance_iter(cgrp, it);
2824 } else {
2825 it->task = l;
2826 }
2827 return res;
2828}
2829
2830void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it)
2831{
2832 read_unlock(&css_set_lock);
2833}
2834
2835static inline int started_after_time(struct task_struct *t1,
2836 struct timespec *time,
2837 struct task_struct *t2)
2838{
2839 int start_diff = timespec_compare(&t1->start_time, time);
2840 if (start_diff > 0) {
2841 return 1;
2842 } else if (start_diff < 0) {
2843 return 0;
2844 } else {
2845
2846
2847
2848
2849
2850
2851
2852
2853 return t1 > t2;
2854 }
2855}
2856
2857
2858
2859
2860
2861
2862static inline int started_after(void *p1, void *p2)
2863{
2864 struct task_struct *t1 = p1;
2865 struct task_struct *t2 = p2;
2866 return started_after_time(t1, &t2->start_time, t2);
2867}
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896int cgroup_scan_tasks(struct cgroup_scanner *scan)
2897{
2898 int retval, i;
2899 struct cgroup_iter it;
2900 struct task_struct *p, *dropped;
2901
2902 struct task_struct *latest_task = NULL;
2903 struct ptr_heap tmp_heap;
2904 struct ptr_heap *heap;
2905 struct timespec latest_time = { 0, 0 };
2906
2907 if (scan->heap) {
2908
2909 heap = scan->heap;
2910 heap->gt = &started_after;
2911 } else {
2912
2913 heap = &tmp_heap;
2914 retval = heap_init(heap, PAGE_SIZE, GFP_KERNEL, &started_after);
2915 if (retval)
2916
2917 return retval;
2918 }
2919
2920 again:
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933 heap->size = 0;
2934 cgroup_iter_start(scan->cg, &it);
2935 while ((p = cgroup_iter_next(scan->cg, &it))) {
2936
2937
2938
2939
2940 if (scan->test_task && !scan->test_task(p, scan))
2941 continue;
2942
2943
2944
2945
2946 if (!started_after_time(p, &latest_time, latest_task))
2947 continue;
2948 dropped = heap_insert(heap, p);
2949 if (dropped == NULL) {
2950
2951
2952
2953
2954 get_task_struct(p);
2955 } else if (dropped != p) {
2956
2957
2958
2959
2960 get_task_struct(p);
2961 put_task_struct(dropped);
2962 }
2963
2964
2965
2966
2967 }
2968 cgroup_iter_end(scan->cg, &it);
2969
2970 if (heap->size) {
2971 for (i = 0; i < heap->size; i++) {
2972 struct task_struct *q = heap->ptrs[i];
2973 if (i == 0) {
2974 latest_time = q->start_time;
2975 latest_task = q;
2976 }
2977
2978 scan->process_task(q, scan);
2979 put_task_struct(q);
2980 }
2981
2982
2983
2984
2985
2986
2987
2988 goto again;
2989 }
2990 if (heap == &tmp_heap)
2991 heap_free(&tmp_heap);
2992 return 0;
2993}
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010#define PIDLIST_TOO_LARGE(c) ((c) * sizeof(pid_t) > (PAGE_SIZE * 2))
3011static void *pidlist_allocate(int count)
3012{
3013 if (PIDLIST_TOO_LARGE(count))
3014 return vmalloc(count * sizeof(pid_t));
3015 else
3016 return kmalloc(count * sizeof(pid_t), GFP_KERNEL);
3017}
3018static void pidlist_free(void *p)
3019{
3020 if (is_vmalloc_addr(p))
3021 vfree(p);
3022 else
3023 kfree(p);
3024}
3025static void *pidlist_resize(void *p, int newcount)
3026{
3027 void *newlist;
3028
3029 if (is_vmalloc_addr(p)) {
3030 newlist = vmalloc(newcount * sizeof(pid_t));
3031 if (!newlist)
3032 return NULL;
3033 memcpy(newlist, p, newcount * sizeof(pid_t));
3034 vfree(p);
3035 } else {
3036 newlist = krealloc(p, newcount * sizeof(pid_t), GFP_KERNEL);
3037 }
3038 return newlist;
3039}
3040
3041
3042
3043
3044
3045
3046
3047
3048#define PIDLIST_REALLOC_DIFFERENCE(old, new) ((old) - PAGE_SIZE >= (new))
3049static int pidlist_uniq(pid_t **p, int length)
3050{
3051 int src, dest = 1;
3052 pid_t *list = *p;
3053 pid_t *newlist;
3054
3055
3056
3057
3058
3059 if (length == 0 || length == 1)
3060 return length;
3061
3062 for (src = 1; src < length; src++) {
3063
3064 while (list[src] == list[src-1]) {
3065 src++;
3066 if (src == length)
3067 goto after;
3068 }
3069
3070 list[dest] = list[src];
3071 dest++;
3072 }
3073after:
3074
3075
3076
3077
3078
3079 if (PIDLIST_REALLOC_DIFFERENCE(length, dest)) {
3080 newlist = pidlist_resize(list, dest);
3081 if (newlist)
3082 *p = newlist;
3083 }
3084 return dest;
3085}
3086
3087static int cmppid(const void *a, const void *b)
3088{
3089 return *(pid_t *)a - *(pid_t *)b;
3090}
3091
3092
3093
3094
3095
3096
3097
3098static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
3099 enum cgroup_filetype type)
3100{
3101 struct cgroup_pidlist *l;
3102
3103 struct pid_namespace *ns = current->nsproxy->pid_ns;
3104
3105
3106
3107
3108
3109
3110
3111 mutex_lock(&cgrp->pidlist_mutex);
3112 list_for_each_entry(l, &cgrp->pidlists, links) {
3113 if (l->key.type == type && l->key.ns == ns) {
3114
3115 down_write(&l->mutex);
3116 mutex_unlock(&cgrp->pidlist_mutex);
3117 return l;
3118 }
3119 }
3120
3121 l = kmalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL);
3122 if (!l) {
3123 mutex_unlock(&cgrp->pidlist_mutex);
3124 return l;
3125 }
3126 init_rwsem(&l->mutex);
3127 down_write(&l->mutex);
3128 l->key.type = type;
3129 l->key.ns = get_pid_ns(ns);
3130 l->use_count = 0;
3131 l->list = NULL;
3132 l->owner = cgrp;
3133 list_add(&l->links, &cgrp->pidlists);
3134 mutex_unlock(&cgrp->pidlist_mutex);
3135 return l;
3136}
3137
3138
3139
3140
3141static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
3142 struct cgroup_pidlist **lp)
3143{
3144 pid_t *array;
3145 int length;
3146 int pid, n = 0;
3147 struct cgroup_iter it;
3148 struct task_struct *tsk;
3149 struct cgroup_pidlist *l;
3150
3151
3152
3153
3154
3155
3156
3157 length = cgroup_task_count(cgrp);
3158 array = pidlist_allocate(length);
3159 if (!array)
3160 return -ENOMEM;
3161
3162 cgroup_iter_start(cgrp, &it);
3163 while ((tsk = cgroup_iter_next(cgrp, &it))) {
3164 if (unlikely(n == length))
3165 break;
3166
3167 if (type == CGROUP_FILE_PROCS)
3168 pid = task_tgid_vnr(tsk);
3169 else
3170 pid = task_pid_vnr(tsk);
3171 if (pid > 0)
3172 array[n++] = pid;
3173 }
3174 cgroup_iter_end(cgrp, &it);
3175 length = n;
3176
3177 sort(array, length, sizeof(pid_t), cmppid, NULL);
3178 if (type == CGROUP_FILE_PROCS)
3179 length = pidlist_uniq(&array, length);
3180 l = cgroup_pidlist_find(cgrp, type);
3181 if (!l) {
3182 pidlist_free(array);
3183 return -ENOMEM;
3184 }
3185
3186 pidlist_free(l->list);
3187 l->list = array;
3188 l->length = length;
3189 l->use_count++;
3190 up_write(&l->mutex);
3191 *lp = l;
3192 return 0;
3193}
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
3205{
3206 int ret = -EINVAL;
3207 struct cgroup *cgrp;
3208 struct cgroup_iter it;
3209 struct task_struct *tsk;
3210
3211
3212
3213
3214
3215 if (dentry->d_sb->s_op != &cgroup_ops ||
3216 !S_ISDIR(dentry->d_inode->i_mode))
3217 goto err;
3218
3219 ret = 0;
3220 cgrp = dentry->d_fsdata;
3221
3222 cgroup_iter_start(cgrp, &it);
3223 while ((tsk = cgroup_iter_next(cgrp, &it))) {
3224 switch (tsk->state) {
3225 case TASK_RUNNING:
3226 stats->nr_running++;
3227 break;
3228 case TASK_INTERRUPTIBLE:
3229 stats->nr_sleeping++;
3230 break;
3231 case TASK_UNINTERRUPTIBLE:
3232 stats->nr_uninterruptible++;
3233 break;
3234 case TASK_STOPPED:
3235 stats->nr_stopped++;
3236 break;
3237 default:
3238 if (delayacct_is_task_waiting_on_io(tsk))
3239 stats->nr_io_wait++;
3240 break;
3241 }
3242 }
3243 cgroup_iter_end(cgrp, &it);
3244
3245err:
3246 return ret;
3247}
3248
3249
3250
3251
3252
3253
3254
3255
3256static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
3257{
3258
3259
3260
3261
3262
3263
3264 struct cgroup_pidlist *l = s->private;
3265 int index = 0, pid = *pos;
3266 int *iter;
3267
3268 down_read(&l->mutex);
3269 if (pid) {
3270 int end = l->length;
3271
3272 while (index < end) {
3273 int mid = (index + end) / 2;
3274 if (l->list[mid] == pid) {
3275 index = mid;
3276 break;
3277 } else if (l->list[mid] <= pid)
3278 index = mid + 1;
3279 else
3280 end = mid;
3281 }
3282 }
3283
3284 if (index >= l->length)
3285 return NULL;
3286
3287 iter = l->list + index;
3288 *pos = *iter;
3289 return iter;
3290}
3291
3292static void cgroup_pidlist_stop(struct seq_file *s, void *v)
3293{
3294 struct cgroup_pidlist *l = s->private;
3295 up_read(&l->mutex);
3296}
3297
3298static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
3299{
3300 struct cgroup_pidlist *l = s->private;
3301 pid_t *p = v;
3302 pid_t *end = l->list + l->length;
3303
3304
3305
3306
3307 p++;
3308 if (p >= end) {
3309 return NULL;
3310 } else {
3311 *pos = *p;
3312 return p;
3313 }
3314}
3315
3316static int cgroup_pidlist_show(struct seq_file *s, void *v)
3317{
3318 return seq_printf(s, "%d\n", *(int *)v);
3319}
3320
3321
3322
3323
3324
3325static const struct seq_operations cgroup_pidlist_seq_operations = {
3326 .start = cgroup_pidlist_start,
3327 .stop = cgroup_pidlist_stop,
3328 .next = cgroup_pidlist_next,
3329 .show = cgroup_pidlist_show,
3330};
3331
3332static void cgroup_release_pid_array(struct cgroup_pidlist *l)
3333{
3334
3335
3336
3337
3338
3339
3340 mutex_lock(&l->owner->pidlist_mutex);
3341 down_write(&l->mutex);
3342 BUG_ON(!l->use_count);
3343 if (!--l->use_count) {
3344
3345 list_del(&l->links);
3346 mutex_unlock(&l->owner->pidlist_mutex);
3347 pidlist_free(l->list);
3348 put_pid_ns(l->key.ns);
3349 up_write(&l->mutex);
3350 kfree(l);
3351 return;
3352 }
3353 mutex_unlock(&l->owner->pidlist_mutex);
3354 up_write(&l->mutex);
3355}
3356
3357static int cgroup_pidlist_release(struct inode *inode, struct file *file)
3358{
3359 struct cgroup_pidlist *l;
3360 if (!(file->f_mode & FMODE_READ))
3361 return 0;
3362
3363
3364
3365
3366 l = ((struct seq_file *)file->private_data)->private;
3367 cgroup_release_pid_array(l);
3368 return seq_release(inode, file);
3369}
3370
3371static const struct file_operations cgroup_pidlist_operations = {
3372 .read = seq_read,
3373 .llseek = seq_lseek,
3374 .write = cgroup_file_write,
3375 .release = cgroup_pidlist_release,
3376};
3377
3378
3379
3380
3381
3382
3383
3384static int cgroup_pidlist_open(struct file *file, enum cgroup_filetype type)
3385{
3386 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
3387 struct cgroup_pidlist *l;
3388 int retval;
3389
3390
3391 if (!(file->f_mode & FMODE_READ))
3392 return 0;
3393
3394
3395 retval = pidlist_array_load(cgrp, type, &l);
3396 if (retval)
3397 return retval;
3398
3399 file->f_op = &cgroup_pidlist_operations;
3400
3401 retval = seq_open(file, &cgroup_pidlist_seq_operations);
3402 if (retval) {
3403 cgroup_release_pid_array(l);
3404 return retval;
3405 }
3406 ((struct seq_file *)file->private_data)->private = l;
3407 return 0;
3408}
3409static int cgroup_tasks_open(struct inode *unused, struct file *file)
3410{
3411 return cgroup_pidlist_open(file, CGROUP_FILE_TASKS);
3412}
3413static int cgroup_procs_open(struct inode *unused, struct file *file)
3414{
3415 return cgroup_pidlist_open(file, CGROUP_FILE_PROCS);
3416}
3417
3418static u64 cgroup_read_notify_on_release(struct cgroup *cgrp,
3419 struct cftype *cft)
3420{
3421 return notify_on_release(cgrp);
3422}
3423
3424static int cgroup_write_notify_on_release(struct cgroup *cgrp,
3425 struct cftype *cft,
3426 u64 val)
3427{
3428 clear_bit(CGRP_RELEASABLE, &cgrp->flags);
3429 if (val)
3430 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
3431 else
3432 clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
3433 return 0;
3434}
3435
3436
3437
3438
3439
3440
3441static void cgroup_event_remove(struct work_struct *work)
3442{
3443 struct cgroup_event *event = container_of(work, struct cgroup_event,
3444 remove);
3445 struct cgroup *cgrp = event->cgrp;
3446
3447 event->cft->unregister_event(cgrp, event->cft, event->eventfd);
3448
3449 eventfd_ctx_put(event->eventfd);
3450 kfree(event);
3451 dput(cgrp->dentry);
3452}
3453
3454
3455
3456
3457
3458
3459static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
3460 int sync, void *key)
3461{
3462 struct cgroup_event *event = container_of(wait,
3463 struct cgroup_event, wait);
3464 struct cgroup *cgrp = event->cgrp;
3465 unsigned long flags = (unsigned long)key;
3466
3467 if (flags & POLLHUP) {
3468 __remove_wait_queue(event->wqh, &event->wait);
3469 spin_lock(&cgrp->event_list_lock);
3470 list_del(&event->list);
3471 spin_unlock(&cgrp->event_list_lock);
3472
3473
3474
3475
3476 schedule_work(&event->remove);
3477 }
3478
3479 return 0;
3480}
3481
3482static void cgroup_event_ptable_queue_proc(struct file *file,
3483 wait_queue_head_t *wqh, poll_table *pt)
3484{
3485 struct cgroup_event *event = container_of(pt,
3486 struct cgroup_event, pt);
3487
3488 event->wqh = wqh;
3489 add_wait_queue(wqh, &event->wait);
3490}
3491
3492
3493
3494
3495
3496
3497
3498static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft,
3499 const char *buffer)
3500{
3501 struct cgroup_event *event = NULL;
3502 unsigned int efd, cfd;
3503 struct file *efile = NULL;
3504 struct file *cfile = NULL;
3505 char *endp;
3506 int ret;
3507
3508 efd = simple_strtoul(buffer, &endp, 10);
3509 if (*endp != ' ')
3510 return -EINVAL;
3511 buffer = endp + 1;
3512
3513 cfd = simple_strtoul(buffer, &endp, 10);
3514 if ((*endp != ' ') && (*endp != '\0'))
3515 return -EINVAL;
3516 buffer = endp + 1;
3517
3518 event = kzalloc(sizeof(*event), GFP_KERNEL);
3519 if (!event)
3520 return -ENOMEM;
3521 event->cgrp = cgrp;
3522 INIT_LIST_HEAD(&event->list);
3523 init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc);
3524 init_waitqueue_func_entry(&event->wait, cgroup_event_wake);
3525 INIT_WORK(&event->remove, cgroup_event_remove);
3526
3527 efile = eventfd_fget(efd);
3528 if (IS_ERR(efile)) {
3529 ret = PTR_ERR(efile);
3530 goto fail;
3531 }
3532
3533 event->eventfd = eventfd_ctx_fileget(efile);
3534 if (IS_ERR(event->eventfd)) {
3535 ret = PTR_ERR(event->eventfd);
3536 goto fail;
3537 }
3538
3539 cfile = fget(cfd);
3540 if (!cfile) {
3541 ret = -EBADF;
3542 goto fail;
3543 }
3544
3545
3546
3547 ret = inode_permission(cfile->f_path.dentry->d_inode, MAY_READ);
3548 if (ret < 0)
3549 goto fail;
3550
3551 event->cft = __file_cft(cfile);
3552 if (IS_ERR(event->cft)) {
3553 ret = PTR_ERR(event->cft);
3554 goto fail;
3555 }
3556
3557 if (!event->cft->register_event || !event->cft->unregister_event) {
3558 ret = -EINVAL;
3559 goto fail;
3560 }
3561
3562 ret = event->cft->register_event(cgrp, event->cft,
3563 event->eventfd, buffer);
3564 if (ret)
3565 goto fail;
3566
3567 if (efile->f_op->poll(efile, &event->pt) & POLLHUP) {
3568 event->cft->unregister_event(cgrp, event->cft, event->eventfd);
3569 ret = 0;
3570 goto fail;
3571 }
3572
3573
3574
3575
3576
3577
3578 dget(cgrp->dentry);
3579
3580 spin_lock(&cgrp->event_list_lock);
3581 list_add(&event->list, &cgrp->event_list);
3582 spin_unlock(&cgrp->event_list_lock);
3583
3584 fput(cfile);
3585 fput(efile);
3586
3587 return 0;
3588
3589fail:
3590 if (cfile)
3591 fput(cfile);
3592
3593 if (event && event->eventfd && !IS_ERR(event->eventfd))
3594 eventfd_ctx_put(event->eventfd);
3595
3596 if (!IS_ERR_OR_NULL(efile))
3597 fput(efile);
3598
3599 kfree(event);
3600
3601 return ret;
3602}
3603
3604static u64 cgroup_clone_children_read(struct cgroup *cgrp,
3605 struct cftype *cft)
3606{
3607 return clone_children(cgrp);
3608}
3609
3610static int cgroup_clone_children_write(struct cgroup *cgrp,
3611 struct cftype *cft,
3612 u64 val)
3613{
3614 if (val)
3615 set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
3616 else
3617 clear_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
3618 return 0;
3619}
3620
3621
3622
3623
3624
3625#define CGROUP_FILE_GENERIC_PREFIX "cgroup."
3626static struct cftype files[] = {
3627 {
3628 .name = "tasks",
3629 .open = cgroup_tasks_open,
3630 .write_u64 = cgroup_tasks_write,
3631 .release = cgroup_pidlist_release,
3632 .mode = S_IRUGO | S_IWUSR,
3633 },
3634 {
3635 .name = CGROUP_FILE_GENERIC_PREFIX "procs",
3636 .open = cgroup_procs_open,
3637 .write_u64 = cgroup_procs_write,
3638 .release = cgroup_pidlist_release,
3639 .mode = S_IRUGO | S_IWUSR,
3640 },
3641 {
3642 .name = "notify_on_release",
3643 .read_u64 = cgroup_read_notify_on_release,
3644 .write_u64 = cgroup_write_notify_on_release,
3645 },
3646 {
3647 .name = CGROUP_FILE_GENERIC_PREFIX "event_control",
3648 .write_string = cgroup_write_event_control,
3649 .mode = S_IWUGO,
3650 },
3651 {
3652 .name = "cgroup.clone_children",
3653 .read_u64 = cgroup_clone_children_read,
3654 .write_u64 = cgroup_clone_children_write,
3655 },
3656};
3657
3658static struct cftype cft_release_agent = {
3659 .name = "release_agent",
3660 .read_seq_string = cgroup_release_agent_show,
3661 .write_string = cgroup_release_agent_write,
3662 .max_write_len = PATH_MAX,
3663};
3664
3665static int cgroup_populate_dir(struct cgroup *cgrp)
3666{
3667 int err;
3668 struct cgroup_subsys *ss;
3669
3670
3671 cgroup_clear_directory(cgrp->dentry);
3672
3673 err = cgroup_add_files(cgrp, NULL, files, ARRAY_SIZE(files));
3674 if (err < 0)
3675 return err;
3676
3677 if (cgrp == cgrp->top_cgroup) {
3678 if ((err = cgroup_add_file(cgrp, NULL, &cft_release_agent)) < 0)
3679 return err;
3680 }
3681
3682 for_each_subsys(cgrp->root, ss) {
3683 if (ss->populate && (err = ss->populate(ss, cgrp)) < 0)
3684 return err;
3685 }
3686
3687 for_each_subsys(cgrp->root, ss) {
3688 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
3689
3690
3691
3692
3693
3694 if (css->id)
3695 rcu_assign_pointer(css->id->css, css);
3696 }
3697
3698 return 0;
3699}
3700
3701static void init_cgroup_css(struct cgroup_subsys_state *css,
3702 struct cgroup_subsys *ss,
3703 struct cgroup *cgrp)
3704{
3705 css->cgroup = cgrp;
3706 atomic_set(&css->refcnt, 1);
3707 css->flags = 0;
3708 css->id = NULL;
3709 if (cgrp == dummytop)
3710 set_bit(CSS_ROOT, &css->flags);
3711 BUG_ON(cgrp->subsys[ss->subsys_id]);
3712 cgrp->subsys[ss->subsys_id] = css;
3713}
3714
3715static void cgroup_lock_hierarchy(struct cgroupfs_root *root)
3716{
3717
3718 int i;
3719
3720
3721
3722
3723
3724 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3725 struct cgroup_subsys *ss = subsys[i];
3726 if (ss == NULL)
3727 continue;
3728 if (ss->root == root)
3729 mutex_lock(&ss->hierarchy_mutex);
3730 }
3731}
3732
3733static void cgroup_unlock_hierarchy(struct cgroupfs_root *root)
3734{
3735 int i;
3736
3737 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3738 struct cgroup_subsys *ss = subsys[i];
3739 if (ss == NULL)
3740 continue;
3741 if (ss->root == root)
3742 mutex_unlock(&ss->hierarchy_mutex);
3743 }
3744}
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
3755 mode_t mode)
3756{
3757 struct cgroup *cgrp;
3758 struct cgroupfs_root *root = parent->root;
3759 int err = 0;
3760 struct cgroup_subsys *ss;
3761 struct super_block *sb = root->sb;
3762
3763 cgrp = kzalloc(sizeof(*cgrp), GFP_KERNEL);
3764 if (!cgrp)
3765 return -ENOMEM;
3766
3767
3768
3769
3770
3771
3772 atomic_inc(&sb->s_active);
3773
3774 mutex_lock(&cgroup_mutex);
3775
3776 init_cgroup_housekeeping(cgrp);
3777
3778 cgrp->parent = parent;
3779 cgrp->root = parent->root;
3780 cgrp->top_cgroup = parent->top_cgroup;
3781
3782 if (notify_on_release(parent))
3783 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
3784
3785 if (clone_children(parent))
3786 set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
3787
3788 for_each_subsys(root, ss) {
3789 struct cgroup_subsys_state *css = ss->create(ss, cgrp);
3790
3791 if (IS_ERR(css)) {
3792 err = PTR_ERR(css);
3793 goto err_destroy;
3794 }
3795 init_cgroup_css(css, ss, cgrp);
3796 if (ss->use_id) {
3797 err = alloc_css_id(ss, parent, cgrp);
3798 if (err)
3799 goto err_destroy;
3800 }
3801
3802 if (clone_children(parent) && ss->post_clone)
3803 ss->post_clone(ss, cgrp);
3804 }
3805
3806 cgroup_lock_hierarchy(root);
3807 list_add(&cgrp->sibling, &cgrp->parent->children);
3808 cgroup_unlock_hierarchy(root);
3809 root->number_of_cgroups++;
3810
3811 err = cgroup_create_dir(cgrp, dentry, mode);
3812 if (err < 0)
3813 goto err_remove;
3814
3815
3816 BUG_ON(!mutex_is_locked(&cgrp->dentry->d_inode->i_mutex));
3817
3818 err = cgroup_populate_dir(cgrp);
3819
3820
3821 mutex_unlock(&cgroup_mutex);
3822 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
3823
3824 return 0;
3825
3826 err_remove:
3827
3828 cgroup_lock_hierarchy(root);
3829 list_del(&cgrp->sibling);
3830 cgroup_unlock_hierarchy(root);
3831 root->number_of_cgroups--;
3832
3833 err_destroy:
3834
3835 for_each_subsys(root, ss) {
3836 if (cgrp->subsys[ss->subsys_id])
3837 ss->destroy(ss, cgrp);
3838 }
3839
3840 mutex_unlock(&cgroup_mutex);
3841
3842
3843 deactivate_super(sb);
3844
3845 kfree(cgrp);
3846 return err;
3847}
3848
3849static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode)
3850{
3851 struct cgroup *c_parent = dentry->d_parent->d_fsdata;
3852
3853
3854 return cgroup_create(c_parent, dentry, mode | S_IFDIR);
3855}
3856
3857static int cgroup_has_css_refs(struct cgroup *cgrp)
3858{
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868 int i;
3869
3870
3871
3872
3873
3874 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3875 struct cgroup_subsys *ss = subsys[i];
3876 struct cgroup_subsys_state *css;
3877
3878 if (ss == NULL || ss->root != cgrp->root)
3879 continue;
3880 css = cgrp->subsys[ss->subsys_id];
3881
3882
3883
3884
3885
3886
3887 if (css && (atomic_read(&css->refcnt) > 1))
3888 return 1;
3889 }
3890 return 0;
3891}
3892
3893
3894
3895
3896
3897
3898
3899static int cgroup_clear_css_refs(struct cgroup *cgrp)
3900{
3901 struct cgroup_subsys *ss;
3902 unsigned long flags;
3903 bool failed = false;
3904 local_irq_save(flags);
3905 for_each_subsys(cgrp->root, ss) {
3906 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
3907 int refcnt;
3908 while (1) {
3909
3910 refcnt = atomic_read(&css->refcnt);
3911 if (refcnt > 1) {
3912 failed = true;
3913 goto done;
3914 }
3915 BUG_ON(!refcnt);
3916
3917
3918
3919
3920
3921
3922 if (atomic_cmpxchg(&css->refcnt, refcnt, 0) == refcnt)
3923 break;
3924 cpu_relax();
3925 }
3926 }
3927 done:
3928 for_each_subsys(cgrp->root, ss) {
3929 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
3930 if (failed) {
3931
3932
3933
3934
3935 if (!atomic_read(&css->refcnt))
3936 atomic_set(&css->refcnt, 1);
3937 } else {
3938
3939 set_bit(CSS_REMOVED, &css->flags);
3940 }
3941 }
3942 local_irq_restore(flags);
3943 return !failed;
3944}
3945
3946static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
3947{
3948 struct cgroup *cgrp = dentry->d_fsdata;
3949 struct dentry *d;
3950 struct cgroup *parent;
3951 DEFINE_WAIT(wait);
3952 struct cgroup_event *event, *tmp;
3953 int ret;
3954
3955
3956again:
3957 mutex_lock(&cgroup_mutex);
3958 if (atomic_read(&cgrp->count) != 0) {
3959 mutex_unlock(&cgroup_mutex);
3960 return -EBUSY;
3961 }
3962 if (!list_empty(&cgrp->children)) {
3963 mutex_unlock(&cgroup_mutex);
3964 return -EBUSY;
3965 }
3966 mutex_unlock(&cgroup_mutex);
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977 set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
3978
3979
3980
3981
3982
3983 ret = cgroup_call_pre_destroy(cgrp);
3984 if (ret) {
3985 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
3986 return ret;
3987 }
3988
3989 mutex_lock(&cgroup_mutex);
3990 parent = cgrp->parent;
3991 if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) {
3992 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
3993 mutex_unlock(&cgroup_mutex);
3994 return -EBUSY;
3995 }
3996 prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE);
3997 if (!cgroup_clear_css_refs(cgrp)) {
3998 mutex_unlock(&cgroup_mutex);
3999
4000
4001
4002
4003 if (test_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags))
4004 schedule();
4005 finish_wait(&cgroup_rmdir_waitq, &wait);
4006 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
4007 if (signal_pending(current))
4008 return -EINTR;
4009 goto again;
4010 }
4011
4012 finish_wait(&cgroup_rmdir_waitq, &wait);
4013 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
4014
4015 raw_spin_lock(&release_list_lock);
4016 set_bit(CGRP_REMOVED, &cgrp->flags);
4017 if (!list_empty(&cgrp->release_list))
4018 list_del_init(&cgrp->release_list);
4019 raw_spin_unlock(&release_list_lock);
4020
4021 cgroup_lock_hierarchy(cgrp->root);
4022
4023 list_del_init(&cgrp->sibling);
4024 cgroup_unlock_hierarchy(cgrp->root);
4025
4026 d = dget(cgrp->dentry);
4027
4028 cgroup_d_remove_dir(d);
4029 dput(d);
4030
4031 set_bit(CGRP_RELEASABLE, &parent->flags);
4032 check_for_release(parent);
4033
4034
4035
4036
4037
4038
4039 spin_lock(&cgrp->event_list_lock);
4040 list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
4041 list_del(&event->list);
4042 remove_wait_queue(event->wqh, &event->wait);
4043 eventfd_signal(event->eventfd, 1);
4044 schedule_work(&event->remove);
4045 }
4046 spin_unlock(&cgrp->event_list_lock);
4047
4048 mutex_unlock(&cgroup_mutex);
4049 return 0;
4050}
4051
4052static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
4053{
4054 struct cgroup_subsys_state *css;
4055
4056 printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);
4057
4058
4059 list_add(&ss->sibling, &rootnode.subsys_list);
4060 ss->root = &rootnode;
4061 css = ss->create(ss, dummytop);
4062
4063 BUG_ON(IS_ERR(css));
4064 init_cgroup_css(css, ss, dummytop);
4065
4066
4067
4068
4069
4070 init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id];
4071
4072 need_forkexit_callback |= ss->fork || ss->exit;
4073
4074
4075
4076
4077 BUG_ON(!list_empty(&init_task.tasks));
4078
4079 mutex_init(&ss->hierarchy_mutex);
4080 lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
4081 ss->active = 1;
4082
4083
4084
4085 BUG_ON(ss->module);
4086}
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
4098{
4099 int i;
4100 struct cgroup_subsys_state *css;
4101
4102
4103 if (ss->name == NULL || strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN ||
4104 ss->create == NULL || ss->destroy == NULL)
4105 return -EINVAL;
4106
4107
4108
4109
4110
4111
4112
4113 if (ss->fork || ss->exit)
4114 return -EINVAL;
4115
4116
4117
4118
4119
4120 if (ss->module == NULL) {
4121
4122 BUG_ON(ss->subsys_id >= CGROUP_BUILTIN_SUBSYS_COUNT);
4123 BUG_ON(subsys[ss->subsys_id] != ss);
4124 return 0;
4125 }
4126
4127
4128
4129
4130
4131 mutex_lock(&cgroup_mutex);
4132
4133 for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
4134 if (subsys[i] == NULL)
4135 break;
4136 }
4137 if (i == CGROUP_SUBSYS_COUNT) {
4138
4139 mutex_unlock(&cgroup_mutex);
4140 return -EBUSY;
4141 }
4142
4143 ss->subsys_id = i;
4144 subsys[i] = ss;
4145
4146
4147
4148
4149
4150 css = ss->create(ss, dummytop);
4151 if (IS_ERR(css)) {
4152
4153 subsys[i] = NULL;
4154 mutex_unlock(&cgroup_mutex);
4155 return PTR_ERR(css);
4156 }
4157
4158 list_add(&ss->sibling, &rootnode.subsys_list);
4159 ss->root = &rootnode;
4160
4161
4162 init_cgroup_css(css, ss, dummytop);
4163
4164 if (ss->use_id) {
4165 int ret = cgroup_init_idr(ss, css);
4166 if (ret) {
4167 dummytop->subsys[ss->subsys_id] = NULL;
4168 ss->destroy(ss, dummytop);
4169 subsys[i] = NULL;
4170 mutex_unlock(&cgroup_mutex);
4171 return ret;
4172 }
4173 }
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183 write_lock(&css_set_lock);
4184 for (i = 0; i < CSS_SET_TABLE_SIZE; i++) {
4185 struct css_set *cg;
4186 struct hlist_node *node, *tmp;
4187 struct hlist_head *bucket = &css_set_table[i], *new_bucket;
4188
4189 hlist_for_each_entry_safe(cg, node, tmp, bucket, hlist) {
4190
4191 if (cg->subsys[ss->subsys_id])
4192 continue;
4193
4194 hlist_del(&cg->hlist);
4195
4196 cg->subsys[ss->subsys_id] = css;
4197
4198 new_bucket = css_set_hash(cg->subsys);
4199 hlist_add_head(&cg->hlist, new_bucket);
4200 }
4201 }
4202 write_unlock(&css_set_lock);
4203
4204 mutex_init(&ss->hierarchy_mutex);
4205 lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
4206 ss->active = 1;
4207
4208
4209 mutex_unlock(&cgroup_mutex);
4210 return 0;
4211}
4212EXPORT_SYMBOL_GPL(cgroup_load_subsys);
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222void cgroup_unload_subsys(struct cgroup_subsys *ss)
4223{
4224 struct cg_cgroup_link *link;
4225 struct hlist_head *hhead;
4226
4227 BUG_ON(ss->module == NULL);
4228
4229
4230
4231
4232
4233
4234 BUG_ON(ss->root != &rootnode);
4235
4236 mutex_lock(&cgroup_mutex);
4237
4238 BUG_ON(ss->subsys_id < CGROUP_BUILTIN_SUBSYS_COUNT);
4239 subsys[ss->subsys_id] = NULL;
4240
4241
4242 list_del_init(&ss->sibling);
4243
4244
4245
4246
4247
4248 write_lock(&css_set_lock);
4249 list_for_each_entry(link, &dummytop->css_sets, cgrp_link_list) {
4250 struct css_set *cg = link->cg;
4251
4252 hlist_del(&cg->hlist);
4253 BUG_ON(!cg->subsys[ss->subsys_id]);
4254 cg->subsys[ss->subsys_id] = NULL;
4255 hhead = css_set_hash(cg->subsys);
4256 hlist_add_head(&cg->hlist, hhead);
4257 }
4258 write_unlock(&css_set_lock);
4259
4260
4261
4262
4263
4264
4265
4266 ss->destroy(ss, dummytop);
4267 dummytop->subsys[ss->subsys_id] = NULL;
4268
4269 mutex_unlock(&cgroup_mutex);
4270}
4271EXPORT_SYMBOL_GPL(cgroup_unload_subsys);
4272
4273
4274
4275
4276
4277
4278
4279int __init cgroup_init_early(void)
4280{
4281 int i;
4282 atomic_set(&init_css_set.refcount, 1);
4283 INIT_LIST_HEAD(&init_css_set.cg_links);
4284 INIT_LIST_HEAD(&init_css_set.tasks);
4285 INIT_HLIST_NODE(&init_css_set.hlist);
4286 css_set_count = 1;
4287 init_cgroup_root(&rootnode);
4288 root_count = 1;
4289 init_task.cgroups = &init_css_set;
4290
4291 init_css_set_link.cg = &init_css_set;
4292 init_css_set_link.cgrp = dummytop;
4293 list_add(&init_css_set_link.cgrp_link_list,
4294 &rootnode.top_cgroup.css_sets);
4295 list_add(&init_css_set_link.cg_link_list,
4296 &init_css_set.cg_links);
4297
4298 for (i = 0; i < CSS_SET_TABLE_SIZE; i++)
4299 INIT_HLIST_HEAD(&css_set_table[i]);
4300
4301
4302 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4303 struct cgroup_subsys *ss = subsys[i];
4304
4305 BUG_ON(!ss->name);
4306 BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN);
4307 BUG_ON(!ss->create);
4308 BUG_ON(!ss->destroy);
4309 if (ss->subsys_id != i) {
4310 printk(KERN_ERR "cgroup: Subsys %s id == %d\n",
4311 ss->name, ss->subsys_id);
4312 BUG();
4313 }
4314
4315 if (ss->early_init)
4316 cgroup_init_subsys(ss);
4317 }
4318 return 0;
4319}
4320
4321
4322
4323
4324
4325
4326
4327int __init cgroup_init(void)
4328{
4329 int err;
4330 int i;
4331 struct hlist_head *hhead;
4332
4333 err = bdi_init(&cgroup_backing_dev_info);
4334 if (err)
4335 return err;
4336
4337
4338 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4339 struct cgroup_subsys *ss = subsys[i];
4340 if (!ss->early_init)
4341 cgroup_init_subsys(ss);
4342 if (ss->use_id)
4343 cgroup_init_idr(ss, init_css_set.subsys[ss->subsys_id]);
4344 }
4345
4346
4347 hhead = css_set_hash(init_css_set.subsys);
4348 hlist_add_head(&init_css_set.hlist, hhead);
4349 BUG_ON(!init_root_id(&rootnode));
4350
4351 cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj);
4352 if (!cgroup_kobj) {
4353 err = -ENOMEM;
4354 goto out;
4355 }
4356
4357 err = register_filesystem(&cgroup_fs_type);
4358 if (err < 0) {
4359 kobject_put(cgroup_kobj);
4360 goto out;
4361 }
4362
4363 proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations);
4364
4365out:
4366 if (err)
4367 bdi_destroy(&cgroup_backing_dev_info);
4368
4369 return err;
4370}
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385static int proc_cgroup_show(struct seq_file *m, void *v)
4386{
4387 struct pid *pid;
4388 struct task_struct *tsk;
4389 char *buf;
4390 int retval;
4391 struct cgroupfs_root *root;
4392
4393 retval = -ENOMEM;
4394 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
4395 if (!buf)
4396 goto out;
4397
4398 retval = -ESRCH;
4399 pid = m->private;
4400 tsk = get_pid_task(pid, PIDTYPE_PID);
4401 if (!tsk)
4402 goto out_free;
4403
4404 retval = 0;
4405
4406 mutex_lock(&cgroup_mutex);
4407
4408 for_each_active_root(root) {
4409 struct cgroup_subsys *ss;
4410 struct cgroup *cgrp;
4411 int count = 0;
4412
4413 seq_printf(m, "%d:", root->hierarchy_id);
4414 for_each_subsys(root, ss)
4415 seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
4416 if (strlen(root->name))
4417 seq_printf(m, "%sname=%s", count ? "," : "",
4418 root->name);
4419 seq_putc(m, ':');
4420 cgrp = task_cgroup_from_root(tsk, root);
4421 retval = cgroup_path(cgrp, buf, PAGE_SIZE);
4422 if (retval < 0)
4423 goto out_unlock;
4424 seq_puts(m, buf);
4425 seq_putc(m, '\n');
4426 }
4427
4428out_unlock:
4429 mutex_unlock(&cgroup_mutex);
4430 put_task_struct(tsk);
4431out_free:
4432 kfree(buf);
4433out:
4434 return retval;
4435}
4436
4437static int cgroup_open(struct inode *inode, struct file *file)
4438{
4439 struct pid *pid = PROC_I(inode)->pid;
4440 return single_open(file, proc_cgroup_show, pid);
4441}
4442
4443const struct file_operations proc_cgroup_operations = {
4444 .open = cgroup_open,
4445 .read = seq_read,
4446 .llseek = seq_lseek,
4447 .release = single_release,
4448};
4449
4450
4451static int proc_cgroupstats_show(struct seq_file *m, void *v)
4452{
4453 int i;
4454
4455 seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
4456
4457
4458
4459
4460
4461 mutex_lock(&cgroup_mutex);
4462 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
4463 struct cgroup_subsys *ss = subsys[i];
4464 if (ss == NULL)
4465 continue;
4466 seq_printf(m, "%s\t%d\t%d\t%d\n",
4467 ss->name, ss->root->hierarchy_id,
4468 ss->root->number_of_cgroups, !ss->disabled);
4469 }
4470 mutex_unlock(&cgroup_mutex);
4471 return 0;
4472}
4473
4474static int cgroupstats_open(struct inode *inode, struct file *file)
4475{
4476 return single_open(file, proc_cgroupstats_show, NULL);
4477}
4478
4479static const struct file_operations proc_cgroupstats_operations = {
4480 .open = cgroupstats_open,
4481 .read = seq_read,
4482 .llseek = seq_lseek,
4483 .release = single_release,
4484};
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502void cgroup_fork(struct task_struct *child)
4503{
4504 task_lock(current);
4505 child->cgroups = current->cgroups;
4506 get_css_set(child->cgroups);
4507 task_unlock(current);
4508 INIT_LIST_HEAD(&child->cg_list);
4509}
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519void cgroup_fork_callbacks(struct task_struct *child)
4520{
4521 if (need_forkexit_callback) {
4522 int i;
4523
4524
4525
4526
4527
4528 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4529 struct cgroup_subsys *ss = subsys[i];
4530 if (ss->fork)
4531 ss->fork(ss, child);
4532 }
4533 }
4534}
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545void cgroup_post_fork(struct task_struct *child)
4546{
4547 if (use_task_css_set_links) {
4548 write_lock(&css_set_lock);
4549 task_lock(child);
4550 if (list_empty(&child->cg_list))
4551 list_add(&child->cg_list, &child->cgroups->tasks);
4552 task_unlock(child);
4553 write_unlock(&css_set_lock);
4554 }
4555}
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591void cgroup_exit(struct task_struct *tsk, int run_callbacks)
4592{
4593 struct css_set *cg;
4594 int i;
4595
4596
4597
4598
4599
4600
4601 if (!list_empty(&tsk->cg_list)) {
4602 write_lock(&css_set_lock);
4603 if (!list_empty(&tsk->cg_list))
4604 list_del_init(&tsk->cg_list);
4605 write_unlock(&css_set_lock);
4606 }
4607
4608
4609 task_lock(tsk);
4610 cg = tsk->cgroups;
4611 tsk->cgroups = &init_css_set;
4612
4613 if (run_callbacks && need_forkexit_callback) {
4614
4615
4616
4617
4618 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4619 struct cgroup_subsys *ss = subsys[i];
4620 if (ss->exit) {
4621 struct cgroup *old_cgrp =
4622 rcu_dereference_raw(cg->subsys[i])->cgroup;
4623 struct cgroup *cgrp = task_cgroup(tsk, i);
4624 ss->exit(ss, cgrp, old_cgrp, tsk);
4625 }
4626 }
4627 }
4628 task_unlock(tsk);
4629
4630 if (cg)
4631 put_css_set_taskexit(cg);
4632}
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task)
4648{
4649 int ret;
4650 struct cgroup *target;
4651
4652 if (cgrp == dummytop)
4653 return 1;
4654
4655 target = task_cgroup_from_root(task, cgrp->root);
4656 while (cgrp != target && cgrp!= cgrp->top_cgroup)
4657 cgrp = cgrp->parent;
4658 ret = (cgrp == target);
4659 return ret;
4660}
4661
4662static void check_for_release(struct cgroup *cgrp)
4663{
4664
4665
4666 if (cgroup_is_releasable(cgrp) && !atomic_read(&cgrp->count)
4667 && list_empty(&cgrp->children) && !cgroup_has_css_refs(cgrp)) {
4668
4669
4670
4671 int need_schedule_work = 0;
4672 raw_spin_lock(&release_list_lock);
4673 if (!cgroup_is_removed(cgrp) &&
4674 list_empty(&cgrp->release_list)) {
4675 list_add(&cgrp->release_list, &release_list);
4676 need_schedule_work = 1;
4677 }
4678 raw_spin_unlock(&release_list_lock);
4679 if (need_schedule_work)
4680 schedule_work(&release_agent_work);
4681 }
4682}
4683
4684
4685void __css_put(struct cgroup_subsys_state *css, int count)
4686{
4687 struct cgroup *cgrp = css->cgroup;
4688 int val;
4689 rcu_read_lock();
4690 val = atomic_sub_return(count, &css->refcnt);
4691 if (val == 1) {
4692 if (notify_on_release(cgrp)) {
4693 set_bit(CGRP_RELEASABLE, &cgrp->flags);
4694 check_for_release(cgrp);
4695 }
4696 cgroup_wakeup_rmdir_waiter(cgrp);
4697 }
4698 rcu_read_unlock();
4699 WARN_ON_ONCE(val < 1);
4700}
4701EXPORT_SYMBOL_GPL(__css_put);
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726static void cgroup_release_agent(struct work_struct *work)
4727{
4728 BUG_ON(work != &release_agent_work);
4729 mutex_lock(&cgroup_mutex);
4730 raw_spin_lock(&release_list_lock);
4731 while (!list_empty(&release_list)) {
4732 char *argv[3], *envp[3];
4733 int i;
4734 char *pathbuf = NULL, *agentbuf = NULL;
4735 struct cgroup *cgrp = list_entry(release_list.next,
4736 struct cgroup,
4737 release_list);
4738 list_del_init(&cgrp->release_list);
4739 raw_spin_unlock(&release_list_lock);
4740 pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
4741 if (!pathbuf)
4742 goto continue_free;
4743 if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0)
4744 goto continue_free;
4745 agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
4746 if (!agentbuf)
4747 goto continue_free;
4748
4749 i = 0;
4750 argv[i++] = agentbuf;
4751 argv[i++] = pathbuf;
4752 argv[i] = NULL;
4753
4754 i = 0;
4755
4756 envp[i++] = "HOME=/";
4757 envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
4758 envp[i] = NULL;
4759
4760
4761
4762
4763 mutex_unlock(&cgroup_mutex);
4764 call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
4765 mutex_lock(&cgroup_mutex);
4766 continue_free:
4767 kfree(pathbuf);
4768 kfree(agentbuf);
4769 raw_spin_lock(&release_list_lock);
4770 }
4771 raw_spin_unlock(&release_list_lock);
4772 mutex_unlock(&cgroup_mutex);
4773}
4774
4775static int __init cgroup_disable(char *str)
4776{
4777 int i;
4778 char *token;
4779
4780 while ((token = strsep(&str, ",")) != NULL) {
4781 if (!*token)
4782 continue;
4783
4784
4785
4786
4787 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4788 struct cgroup_subsys *ss = subsys[i];
4789
4790 if (!strcmp(token, ss->name)) {
4791 ss->disabled = 1;
4792 printk(KERN_INFO "Disabling %s control group"
4793 " subsystem\n", ss->name);
4794 break;
4795 }
4796 }
4797 }
4798 return 1;
4799}
4800__setup("cgroup_disable=", cgroup_disable);
4801
4802
4803
4804
4805
4806
4807
4808
4809unsigned short css_id(struct cgroup_subsys_state *css)
4810{
4811 struct css_id *cssid;
4812
4813
4814
4815
4816
4817
4818 cssid = rcu_dereference_check(css->id, atomic_read(&css->refcnt));
4819
4820 if (cssid)
4821 return cssid->id;
4822 return 0;
4823}
4824EXPORT_SYMBOL_GPL(css_id);
4825
4826unsigned short css_depth(struct cgroup_subsys_state *css)
4827{
4828 struct css_id *cssid;
4829
4830 cssid = rcu_dereference_check(css->id, atomic_read(&css->refcnt));
4831
4832 if (cssid)
4833 return cssid->depth;
4834 return 0;
4835}
4836EXPORT_SYMBOL_GPL(css_depth);
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851bool css_is_ancestor(struct cgroup_subsys_state *child,
4852 const struct cgroup_subsys_state *root)
4853{
4854 struct css_id *child_id;
4855 struct css_id *root_id;
4856 bool ret = true;
4857
4858 rcu_read_lock();
4859 child_id = rcu_dereference(child->id);
4860 root_id = rcu_dereference(root->id);
4861 if (!child_id
4862 || !root_id
4863 || (child_id->depth < root_id->depth)
4864 || (child_id->stack[root_id->depth] != root_id->id))
4865 ret = false;
4866 rcu_read_unlock();
4867 return ret;
4868}
4869
4870void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
4871{
4872 struct css_id *id = css->id;
4873
4874 if (!id)
4875 return;
4876
4877 BUG_ON(!ss->use_id);
4878
4879 rcu_assign_pointer(id->css, NULL);
4880 rcu_assign_pointer(css->id, NULL);
4881 write_lock(&ss->id_lock);
4882 idr_remove(&ss->idr, id->id);
4883 write_unlock(&ss->id_lock);
4884 kfree_rcu(id, rcu_head);
4885}
4886EXPORT_SYMBOL_GPL(free_css_id);
4887
4888
4889
4890
4891
4892
4893static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth)
4894{
4895 struct css_id *newid;
4896 int myid, error, size;
4897
4898 BUG_ON(!ss->use_id);
4899
4900 size = sizeof(*newid) + sizeof(unsigned short) * (depth + 1);
4901 newid = kzalloc(size, GFP_KERNEL);
4902 if (!newid)
4903 return ERR_PTR(-ENOMEM);
4904
4905 if (unlikely(!idr_pre_get(&ss->idr, GFP_KERNEL))) {
4906 error = -ENOMEM;
4907 goto err_out;
4908 }
4909 write_lock(&ss->id_lock);
4910
4911 error = idr_get_new_above(&ss->idr, newid, 1, &myid);
4912 write_unlock(&ss->id_lock);
4913
4914
4915 if (error) {
4916 error = -ENOSPC;
4917 goto err_out;
4918 }
4919 if (myid > CSS_ID_MAX)
4920 goto remove_idr;
4921
4922 newid->id = myid;
4923 newid->depth = depth;
4924 return newid;
4925remove_idr:
4926 error = -ENOSPC;
4927 write_lock(&ss->id_lock);
4928 idr_remove(&ss->idr, myid);
4929 write_unlock(&ss->id_lock);
4930err_out:
4931 kfree(newid);
4932 return ERR_PTR(error);
4933
4934}
4935
4936static int __init_or_module cgroup_init_idr(struct cgroup_subsys *ss,
4937 struct cgroup_subsys_state *rootcss)
4938{
4939 struct css_id *newid;
4940
4941 rwlock_init(&ss->id_lock);
4942 idr_init(&ss->idr);
4943
4944 newid = get_new_cssid(ss, 0);
4945 if (IS_ERR(newid))
4946 return PTR_ERR(newid);
4947
4948 newid->stack[0] = newid->id;
4949 newid->css = rootcss;
4950 rootcss->id = newid;
4951 return 0;
4952}
4953
4954static int alloc_css_id(struct cgroup_subsys *ss, struct cgroup *parent,
4955 struct cgroup *child)
4956{
4957 int subsys_id, i, depth = 0;
4958 struct cgroup_subsys_state *parent_css, *child_css;
4959 struct css_id *child_id, *parent_id;
4960
4961 subsys_id = ss->subsys_id;
4962 parent_css = parent->subsys[subsys_id];
4963 child_css = child->subsys[subsys_id];
4964 parent_id = parent_css->id;
4965 depth = parent_id->depth + 1;
4966
4967 child_id = get_new_cssid(ss, depth);
4968 if (IS_ERR(child_id))
4969 return PTR_ERR(child_id);
4970
4971 for (i = 0; i < depth; i++)
4972 child_id->stack[i] = parent_id->stack[i];
4973 child_id->stack[depth] = child_id->id;
4974
4975
4976
4977
4978 rcu_assign_pointer(child_css->id, child_id);
4979
4980 return 0;
4981}
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id)
4992{
4993 struct css_id *cssid = NULL;
4994
4995 BUG_ON(!ss->use_id);
4996 cssid = idr_find(&ss->idr, id);
4997
4998 if (unlikely(!cssid))
4999 return NULL;
5000
5001 return rcu_dereference(cssid->css);
5002}
5003EXPORT_SYMBOL_GPL(css_lookup);
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015struct cgroup_subsys_state *
5016css_get_next(struct cgroup_subsys *ss, int id,
5017 struct cgroup_subsys_state *root, int *foundid)
5018{
5019 struct cgroup_subsys_state *ret = NULL;
5020 struct css_id *tmp;
5021 int tmpid;
5022 int rootid = css_id(root);
5023 int depth = css_depth(root);
5024
5025 if (!rootid)
5026 return NULL;
5027
5028 BUG_ON(!ss->use_id);
5029
5030 tmpid = id;
5031 while (1) {
5032
5033
5034
5035
5036 read_lock(&ss->id_lock);
5037 tmp = idr_get_next(&ss->idr, &tmpid);
5038 read_unlock(&ss->id_lock);
5039
5040 if (!tmp)
5041 break;
5042 if (tmp->depth >= depth && tmp->stack[depth] == rootid) {
5043 ret = rcu_dereference(tmp->css);
5044 if (ret) {
5045 *foundid = tmpid;
5046 break;
5047 }
5048 }
5049
5050 tmpid = tmpid + 1;
5051 }
5052 return ret;
5053}
5054
5055
5056
5057
5058struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id)
5059{
5060 struct cgroup *cgrp;
5061 struct inode *inode;
5062 struct cgroup_subsys_state *css;
5063
5064 inode = f->f_dentry->d_inode;
5065
5066 if (inode->i_op != &cgroup_dir_inode_operations)
5067 return ERR_PTR(-EBADF);
5068
5069 if (id < 0 || id >= CGROUP_SUBSYS_COUNT)
5070 return ERR_PTR(-EINVAL);
5071
5072
5073 cgrp = __d_cgrp(f->f_dentry);
5074 css = cgrp->subsys[id];
5075 return css ? css : ERR_PTR(-ENOENT);
5076}
5077
5078#ifdef CONFIG_CGROUP_DEBUG
5079static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss,
5080 struct cgroup *cont)
5081{
5082 struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
5083
5084 if (!css)
5085 return ERR_PTR(-ENOMEM);
5086
5087 return css;
5088}
5089
5090static void debug_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
5091{
5092 kfree(cont->subsys[debug_subsys_id]);
5093}
5094
5095static u64 cgroup_refcount_read(struct cgroup *cont, struct cftype *cft)
5096{
5097 return atomic_read(&cont->count);
5098}
5099
5100static u64 debug_taskcount_read(struct cgroup *cont, struct cftype *cft)
5101{
5102 return cgroup_task_count(cont);
5103}
5104
5105static u64 current_css_set_read(struct cgroup *cont, struct cftype *cft)
5106{
5107 return (u64)(unsigned long)current->cgroups;
5108}
5109
5110static u64 current_css_set_refcount_read(struct cgroup *cont,
5111 struct cftype *cft)
5112{
5113 u64 count;
5114
5115 rcu_read_lock();
5116 count = atomic_read(¤t->cgroups->refcount);
5117 rcu_read_unlock();
5118 return count;
5119}
5120
5121static int current_css_set_cg_links_read(struct cgroup *cont,
5122 struct cftype *cft,
5123 struct seq_file *seq)
5124{
5125 struct cg_cgroup_link *link;
5126 struct css_set *cg;
5127
5128 read_lock(&css_set_lock);
5129 rcu_read_lock();
5130 cg = rcu_dereference(current->cgroups);
5131 list_for_each_entry(link, &cg->cg_links, cg_link_list) {
5132 struct cgroup *c = link->cgrp;
5133 const char *name;
5134
5135 if (c->dentry)
5136 name = c->dentry->d_name.name;
5137 else
5138 name = "?";
5139 seq_printf(seq, "Root %d group %s\n",
5140 c->root->hierarchy_id, name);
5141 }
5142 rcu_read_unlock();
5143 read_unlock(&css_set_lock);
5144 return 0;
5145}
5146
5147#define MAX_TASKS_SHOWN_PER_CSS 25
5148static int cgroup_css_links_read(struct cgroup *cont,
5149 struct cftype *cft,
5150 struct seq_file *seq)
5151{
5152 struct cg_cgroup_link *link;
5153
5154 read_lock(&css_set_lock);
5155 list_for_each_entry(link, &cont->css_sets, cgrp_link_list) {
5156 struct css_set *cg = link->cg;
5157 struct task_struct *task;
5158 int count = 0;
5159 seq_printf(seq, "css_set %p\n", cg);
5160 list_for_each_entry(task, &cg->tasks, cg_list) {
5161 if (count++ > MAX_TASKS_SHOWN_PER_CSS) {
5162 seq_puts(seq, " ...\n");
5163 break;
5164 } else {
5165 seq_printf(seq, " task %d\n",
5166 task_pid_vnr(task));
5167 }
5168 }
5169 }
5170 read_unlock(&css_set_lock);
5171 return 0;
5172}
5173
5174static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft)
5175{
5176 return test_bit(CGRP_RELEASABLE, &cgrp->flags);
5177}
5178
5179static struct cftype debug_files[] = {
5180 {
5181 .name = "cgroup_refcount",
5182 .read_u64 = cgroup_refcount_read,
5183 },
5184 {
5185 .name = "taskcount",
5186 .read_u64 = debug_taskcount_read,
5187 },
5188
5189 {
5190 .name = "current_css_set",
5191 .read_u64 = current_css_set_read,
5192 },
5193
5194 {
5195 .name = "current_css_set_refcount",
5196 .read_u64 = current_css_set_refcount_read,
5197 },
5198
5199 {
5200 .name = "current_css_set_cg_links",
5201 .read_seq_string = current_css_set_cg_links_read,
5202 },
5203
5204 {
5205 .name = "cgroup_css_links",
5206 .read_seq_string = cgroup_css_links_read,
5207 },
5208
5209 {
5210 .name = "releasable",
5211 .read_u64 = releasable_read,
5212 },
5213};
5214
5215static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont)
5216{
5217 return cgroup_add_files(cont, ss, debug_files,
5218 ARRAY_SIZE(debug_files));
5219}
5220
5221struct cgroup_subsys debug_subsys = {
5222 .name = "debug",
5223 .create = debug_create,
5224 .destroy = debug_destroy,
5225 .populate = debug_populate,
5226 .subsys_id = debug_subsys_id,
5227};
5228#endif
5229