1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29#include <linux/cgroup.h>
30#include <linux/cred.h>
31#include <linux/ctype.h>
32#include <linux/errno.h>
33#include <linux/fs.h>
34#include <linux/init_task.h>
35#include <linux/kernel.h>
36#include <linux/list.h>
37#include <linux/mm.h>
38#include <linux/mutex.h>
39#include <linux/mount.h>
40#include <linux/pagemap.h>
41#include <linux/proc_fs.h>
42#include <linux/rcupdate.h>
43#include <linux/sched.h>
44#include <linux/backing-dev.h>
45#include <linux/seq_file.h>
46#include <linux/slab.h>
47#include <linux/magic.h>
48#include <linux/spinlock.h>
49#include <linux/string.h>
50#include <linux/sort.h>
51#include <linux/kmod.h>
52#include <linux/module.h>
53#include <linux/delayacct.h>
54#include <linux/cgroupstats.h>
55#include <linux/hash.h>
56#include <linux/namei.h>
57#include <linux/pid_namespace.h>
58#include <linux/idr.h>
59#include <linux/vmalloc.h>
60#include <linux/eventfd.h>
61#include <linux/poll.h>
62#include <linux/flex_array.h>
63
64#include <linux/atomic.h>
65
66static DEFINE_MUTEX(cgroup_mutex);
67
68
69
70
71
72
73
74#define SUBSYS(_x) &_x ## _subsys,
75static struct cgroup_subsys *subsys[CGROUP_SUBSYS_COUNT] = {
76#include <linux/cgroup_subsys.h>
77};
78
79#define MAX_CGROUP_ROOT_NAMELEN 64
80
81
82
83
84
85
86struct cgroupfs_root {
87 struct super_block *sb;
88
89
90
91
92
93 unsigned long subsys_bits;
94
95
96 int hierarchy_id;
97
98
99 unsigned long actual_subsys_bits;
100
101
102 struct list_head subsys_list;
103
104
105 struct cgroup top_cgroup;
106
107
108 int number_of_cgroups;
109
110
111 struct list_head root_list;
112
113
114 unsigned long flags;
115
116
117 char release_agent_path[PATH_MAX];
118
119
120 char name[MAX_CGROUP_ROOT_NAMELEN];
121};
122
123
124
125
126
127
128static struct cgroupfs_root rootnode;
129
130
131
132
133
134#define CSS_ID_MAX (65535)
135struct css_id {
136
137
138
139
140
141
142
143 struct cgroup_subsys_state __rcu *css;
144
145
146
147 unsigned short id;
148
149
150
151 unsigned short depth;
152
153
154
155 struct rcu_head rcu_head;
156
157
158
159 unsigned short stack[0];
160};
161
162
163
164
165struct cgroup_event {
166
167
168
169 struct cgroup *cgrp;
170
171
172
173 struct cftype *cft;
174
175
176
177 struct eventfd_ctx *eventfd;
178
179
180
181 struct list_head list;
182
183
184
185
186 poll_table pt;
187 wait_queue_head_t *wqh;
188 wait_queue_t wait;
189 struct work_struct remove;
190};
191
192
193
194static LIST_HEAD(roots);
195static int root_count;
196
197static DEFINE_IDA(hierarchy_ida);
198static int next_hierarchy_id;
199static DEFINE_SPINLOCK(hierarchy_id_lock);
200
201
202#define dummytop (&rootnode.top_cgroup)
203
204
205
206
207
208
209static int need_forkexit_callback __read_mostly;
210
211#ifdef CONFIG_PROVE_LOCKING
212int cgroup_lock_is_held(void)
213{
214 return lockdep_is_held(&cgroup_mutex);
215}
216#else
217int cgroup_lock_is_held(void)
218{
219 return mutex_is_locked(&cgroup_mutex);
220}
221#endif
222
223EXPORT_SYMBOL_GPL(cgroup_lock_is_held);
224
225
226inline int cgroup_is_removed(const struct cgroup *cgrp)
227{
228 return test_bit(CGRP_REMOVED, &cgrp->flags);
229}
230
231
232enum {
233 ROOT_NOPREFIX,
234};
235
236static int cgroup_is_releasable(const struct cgroup *cgrp)
237{
238 const int bits =
239 (1 << CGRP_RELEASABLE) |
240 (1 << CGRP_NOTIFY_ON_RELEASE);
241 return (cgrp->flags & bits) == bits;
242}
243
244static int notify_on_release(const struct cgroup *cgrp)
245{
246 return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
247}
248
249static int clone_children(const struct cgroup *cgrp)
250{
251 return test_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
252}
253
254
255
256
257
258#define for_each_subsys(_root, _ss) \
259list_for_each_entry(_ss, &_root->subsys_list, sibling)
260
261
262#define for_each_active_root(_root) \
263list_for_each_entry(_root, &roots, root_list)
264
265
266
267static LIST_HEAD(release_list);
268static DEFINE_SPINLOCK(release_list_lock);
269static void cgroup_release_agent(struct work_struct *work);
270static DECLARE_WORK(release_agent_work, cgroup_release_agent);
271static void check_for_release(struct cgroup *cgrp);
272
273
274struct cg_cgroup_link {
275
276
277
278
279 struct list_head cgrp_link_list;
280 struct cgroup *cgrp;
281
282
283
284
285 struct list_head cg_link_list;
286 struct css_set *cg;
287};
288
289
290
291
292
293
294
295
296static struct css_set init_css_set;
297static struct cg_cgroup_link init_css_set_link;
298
299static int cgroup_init_idr(struct cgroup_subsys *ss,
300 struct cgroup_subsys_state *css);
301
302
303
304
305static DEFINE_RWLOCK(css_set_lock);
306static int css_set_count;
307
308
309
310
311
312
313#define CSS_SET_HASH_BITS 7
314#define CSS_SET_TABLE_SIZE (1 << CSS_SET_HASH_BITS)
315static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE];
316
317static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
318{
319 int i;
320 int index;
321 unsigned long tmp = 0UL;
322
323 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++)
324 tmp += (unsigned long)css[i];
325 tmp = (tmp >> 16) ^ tmp;
326
327 index = hash_long(tmp, CSS_SET_HASH_BITS);
328
329 return &css_set_table[index];
330}
331
332
333
334
335
336static int use_task_css_set_links __read_mostly;
337
338static void __put_css_set(struct css_set *cg, int taskexit)
339{
340 struct cg_cgroup_link *link;
341 struct cg_cgroup_link *saved_link;
342
343
344
345
346
347 if (atomic_add_unless(&cg->refcount, -1, 1))
348 return;
349 write_lock(&css_set_lock);
350 if (!atomic_dec_and_test(&cg->refcount)) {
351 write_unlock(&css_set_lock);
352 return;
353 }
354
355
356 hlist_del(&cg->hlist);
357 css_set_count--;
358
359 list_for_each_entry_safe(link, saved_link, &cg->cg_links,
360 cg_link_list) {
361 struct cgroup *cgrp = link->cgrp;
362 list_del(&link->cg_link_list);
363 list_del(&link->cgrp_link_list);
364 if (atomic_dec_and_test(&cgrp->count) &&
365 notify_on_release(cgrp)) {
366 if (taskexit)
367 set_bit(CGRP_RELEASABLE, &cgrp->flags);
368 check_for_release(cgrp);
369 }
370
371 kfree(link);
372 }
373
374 write_unlock(&css_set_lock);
375 kfree_rcu(cg, rcu_head);
376}
377
378
379
380
381static inline void get_css_set(struct css_set *cg)
382{
383 atomic_inc(&cg->refcount);
384}
385
386static inline void put_css_set(struct css_set *cg)
387{
388 __put_css_set(cg, 0);
389}
390
391static inline void put_css_set_taskexit(struct css_set *cg)
392{
393 __put_css_set(cg, 1);
394}
395
396
397
398
399
400
401
402
403
404
405
406static bool compare_css_sets(struct css_set *cg,
407 struct css_set *old_cg,
408 struct cgroup *new_cgrp,
409 struct cgroup_subsys_state *template[])
410{
411 struct list_head *l1, *l2;
412
413 if (memcmp(template, cg->subsys, sizeof(cg->subsys))) {
414
415 return false;
416 }
417
418
419
420
421
422
423
424
425
426
427 l1 = &cg->cg_links;
428 l2 = &old_cg->cg_links;
429 while (1) {
430 struct cg_cgroup_link *cgl1, *cgl2;
431 struct cgroup *cg1, *cg2;
432
433 l1 = l1->next;
434 l2 = l2->next;
435
436 if (l1 == &cg->cg_links) {
437 BUG_ON(l2 != &old_cg->cg_links);
438 break;
439 } else {
440 BUG_ON(l2 == &old_cg->cg_links);
441 }
442
443 cgl1 = list_entry(l1, struct cg_cgroup_link, cg_link_list);
444 cgl2 = list_entry(l2, struct cg_cgroup_link, cg_link_list);
445 cg1 = cgl1->cgrp;
446 cg2 = cgl2->cgrp;
447
448 BUG_ON(cg1->root != cg2->root);
449
450
451
452
453
454
455
456
457 if (cg1->root == new_cgrp->root) {
458 if (cg1 != new_cgrp)
459 return false;
460 } else {
461 if (cg1 != cg2)
462 return false;
463 }
464 }
465 return true;
466}
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481static struct css_set *find_existing_css_set(
482 struct css_set *oldcg,
483 struct cgroup *cgrp,
484 struct cgroup_subsys_state *template[])
485{
486 int i;
487 struct cgroupfs_root *root = cgrp->root;
488 struct hlist_head *hhead;
489 struct hlist_node *node;
490 struct css_set *cg;
491
492
493
494
495
496
497 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
498 if (root->subsys_bits & (1UL << i)) {
499
500
501
502 template[i] = cgrp->subsys[i];
503 } else {
504
505
506 template[i] = oldcg->subsys[i];
507 }
508 }
509
510 hhead = css_set_hash(template);
511 hlist_for_each_entry(cg, node, hhead, hlist) {
512 if (!compare_css_sets(cg, oldcg, cgrp, template))
513 continue;
514
515
516 return cg;
517 }
518
519
520 return NULL;
521}
522
523static void free_cg_links(struct list_head *tmp)
524{
525 struct cg_cgroup_link *link;
526 struct cg_cgroup_link *saved_link;
527
528 list_for_each_entry_safe(link, saved_link, tmp, cgrp_link_list) {
529 list_del(&link->cgrp_link_list);
530 kfree(link);
531 }
532}
533
534
535
536
537
538
539static int allocate_cg_links(int count, struct list_head *tmp)
540{
541 struct cg_cgroup_link *link;
542 int i;
543 INIT_LIST_HEAD(tmp);
544 for (i = 0; i < count; i++) {
545 link = kmalloc(sizeof(*link), GFP_KERNEL);
546 if (!link) {
547 free_cg_links(tmp);
548 return -ENOMEM;
549 }
550 list_add(&link->cgrp_link_list, tmp);
551 }
552 return 0;
553}
554
555
556
557
558
559
560
561static void link_css_set(struct list_head *tmp_cg_links,
562 struct css_set *cg, struct cgroup *cgrp)
563{
564 struct cg_cgroup_link *link;
565
566 BUG_ON(list_empty(tmp_cg_links));
567 link = list_first_entry(tmp_cg_links, struct cg_cgroup_link,
568 cgrp_link_list);
569 link->cg = cg;
570 link->cgrp = cgrp;
571 atomic_inc(&cgrp->count);
572 list_move(&link->cgrp_link_list, &cgrp->css_sets);
573
574
575
576
577 list_add_tail(&link->cg_link_list, &cg->cg_links);
578}
579
580
581
582
583
584
585
586
587static struct css_set *find_css_set(
588 struct css_set *oldcg, struct cgroup *cgrp)
589{
590 struct css_set *res;
591 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
592
593 struct list_head tmp_cg_links;
594
595 struct hlist_head *hhead;
596 struct cg_cgroup_link *link;
597
598
599
600 read_lock(&css_set_lock);
601 res = find_existing_css_set(oldcg, cgrp, template);
602 if (res)
603 get_css_set(res);
604 read_unlock(&css_set_lock);
605
606 if (res)
607 return res;
608
609 res = kmalloc(sizeof(*res), GFP_KERNEL);
610 if (!res)
611 return NULL;
612
613
614 if (allocate_cg_links(root_count, &tmp_cg_links) < 0) {
615 kfree(res);
616 return NULL;
617 }
618
619 atomic_set(&res->refcount, 1);
620 INIT_LIST_HEAD(&res->cg_links);
621 INIT_LIST_HEAD(&res->tasks);
622 INIT_HLIST_NODE(&res->hlist);
623
624
625
626 memcpy(res->subsys, template, sizeof(res->subsys));
627
628 write_lock(&css_set_lock);
629
630 list_for_each_entry(link, &oldcg->cg_links, cg_link_list) {
631 struct cgroup *c = link->cgrp;
632 if (c->root == cgrp->root)
633 c = cgrp;
634 link_css_set(&tmp_cg_links, res, c);
635 }
636
637 BUG_ON(!list_empty(&tmp_cg_links));
638
639 css_set_count++;
640
641
642 hhead = css_set_hash(res->subsys);
643 hlist_add_head(&res->hlist, hhead);
644
645 write_unlock(&css_set_lock);
646
647 return res;
648}
649
650
651
652
653
654static struct cgroup *task_cgroup_from_root(struct task_struct *task,
655 struct cgroupfs_root *root)
656{
657 struct css_set *css;
658 struct cgroup *res = NULL;
659
660 BUG_ON(!mutex_is_locked(&cgroup_mutex));
661 read_lock(&css_set_lock);
662
663
664
665
666
667 css = task->cgroups;
668 if (css == &init_css_set) {
669 res = &root->top_cgroup;
670 } else {
671 struct cg_cgroup_link *link;
672 list_for_each_entry(link, &css->cg_links, cg_link_list) {
673 struct cgroup *c = link->cgrp;
674 if (c->root == root) {
675 res = c;
676 break;
677 }
678 }
679 }
680 read_unlock(&css_set_lock);
681 BUG_ON(!res);
682 return res;
683}
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739void cgroup_lock(void)
740{
741 mutex_lock(&cgroup_mutex);
742}
743EXPORT_SYMBOL_GPL(cgroup_lock);
744
745
746
747
748
749
750void cgroup_unlock(void)
751{
752 mutex_unlock(&cgroup_mutex);
753}
754EXPORT_SYMBOL_GPL(cgroup_unlock);
755
756
757
758
759
760
761
762
763static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode);
764static struct dentry *cgroup_lookup(struct inode *, struct dentry *, struct nameidata *);
765static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
766static int cgroup_populate_dir(struct cgroup *cgrp);
767static const struct inode_operations cgroup_dir_inode_operations;
768static const struct file_operations proc_cgroupstats_operations;
769
770static struct backing_dev_info cgroup_backing_dev_info = {
771 .name = "cgroup",
772 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
773};
774
775static int alloc_css_id(struct cgroup_subsys *ss,
776 struct cgroup *parent, struct cgroup *child);
777
778static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
779{
780 struct inode *inode = new_inode(sb);
781
782 if (inode) {
783 inode->i_ino = get_next_ino();
784 inode->i_mode = mode;
785 inode->i_uid = current_fsuid();
786 inode->i_gid = current_fsgid();
787 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
788 inode->i_mapping->backing_dev_info = &cgroup_backing_dev_info;
789 }
790 return inode;
791}
792
793
794
795
796
797static int cgroup_call_pre_destroy(struct cgroup *cgrp)
798{
799 struct cgroup_subsys *ss;
800 int ret = 0;
801
802 for_each_subsys(cgrp->root, ss)
803 if (ss->pre_destroy) {
804 ret = ss->pre_destroy(ss, cgrp);
805 if (ret)
806 break;
807 }
808
809 return ret;
810}
811
812static void cgroup_diput(struct dentry *dentry, struct inode *inode)
813{
814
815 if (S_ISDIR(inode->i_mode)) {
816 struct cgroup *cgrp = dentry->d_fsdata;
817 struct cgroup_subsys *ss;
818 BUG_ON(!(cgroup_is_removed(cgrp)));
819
820
821
822
823
824
825 synchronize_rcu();
826
827 mutex_lock(&cgroup_mutex);
828
829
830
831 for_each_subsys(cgrp->root, ss)
832 ss->destroy(ss, cgrp);
833
834 cgrp->root->number_of_cgroups--;
835 mutex_unlock(&cgroup_mutex);
836
837
838
839
840
841 deactivate_super(cgrp->root->sb);
842
843
844
845
846
847 BUG_ON(!list_empty(&cgrp->pidlists));
848
849 kfree_rcu(cgrp, rcu_head);
850 }
851 iput(inode);
852}
853
854static int cgroup_delete(const struct dentry *d)
855{
856 return 1;
857}
858
859static void remove_dir(struct dentry *d)
860{
861 struct dentry *parent = dget(d->d_parent);
862
863 d_delete(d);
864 simple_rmdir(parent->d_inode, d);
865 dput(parent);
866}
867
868static void cgroup_clear_directory(struct dentry *dentry)
869{
870 struct list_head *node;
871
872 BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
873 spin_lock(&dentry->d_lock);
874 node = dentry->d_subdirs.next;
875 while (node != &dentry->d_subdirs) {
876 struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
877
878 spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
879 list_del_init(node);
880 if (d->d_inode) {
881
882
883 BUG_ON(d->d_inode->i_mode & S_IFDIR);
884 dget_dlock(d);
885 spin_unlock(&d->d_lock);
886 spin_unlock(&dentry->d_lock);
887 d_delete(d);
888 simple_unlink(dentry->d_inode, d);
889 dput(d);
890 spin_lock(&dentry->d_lock);
891 } else
892 spin_unlock(&d->d_lock);
893 node = dentry->d_subdirs.next;
894 }
895 spin_unlock(&dentry->d_lock);
896}
897
898
899
900
901static void cgroup_d_remove_dir(struct dentry *dentry)
902{
903 struct dentry *parent;
904
905 cgroup_clear_directory(dentry);
906
907 parent = dentry->d_parent;
908 spin_lock(&parent->d_lock);
909 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
910 list_del_init(&dentry->d_u.d_child);
911 spin_unlock(&dentry->d_lock);
912 spin_unlock(&parent->d_lock);
913 remove_dir(dentry);
914}
915
916
917
918
919
920
921
922
923
924DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
925
926static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
927{
928 if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
929 wake_up_all(&cgroup_rmdir_waitq);
930}
931
932void cgroup_exclude_rmdir(struct cgroup_subsys_state *css)
933{
934 css_get(css);
935}
936
937void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
938{
939 cgroup_wakeup_rmdir_waiter(css->cgroup);
940 css_put(css);
941}
942
943
944
945
946
947
948static int rebind_subsystems(struct cgroupfs_root *root,
949 unsigned long final_bits)
950{
951 unsigned long added_bits, removed_bits;
952 struct cgroup *cgrp = &root->top_cgroup;
953 int i;
954
955 BUG_ON(!mutex_is_locked(&cgroup_mutex));
956
957 removed_bits = root->actual_subsys_bits & ~final_bits;
958 added_bits = final_bits & ~root->actual_subsys_bits;
959
960 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
961 unsigned long bit = 1UL << i;
962 struct cgroup_subsys *ss = subsys[i];
963 if (!(bit & added_bits))
964 continue;
965
966
967
968
969
970 BUG_ON(ss == NULL);
971 if (ss->root != &rootnode) {
972
973 return -EBUSY;
974 }
975 }
976
977
978
979
980
981 if (root->number_of_cgroups > 1)
982 return -EBUSY;
983
984
985 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
986 struct cgroup_subsys *ss = subsys[i];
987 unsigned long bit = 1UL << i;
988 if (bit & added_bits) {
989
990 BUG_ON(ss == NULL);
991 BUG_ON(cgrp->subsys[i]);
992 BUG_ON(!dummytop->subsys[i]);
993 BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
994 mutex_lock(&ss->hierarchy_mutex);
995 cgrp->subsys[i] = dummytop->subsys[i];
996 cgrp->subsys[i]->cgroup = cgrp;
997 list_move(&ss->sibling, &root->subsys_list);
998 ss->root = root;
999 if (ss->bind)
1000 ss->bind(ss, cgrp);
1001 mutex_unlock(&ss->hierarchy_mutex);
1002
1003 } else if (bit & removed_bits) {
1004
1005 BUG_ON(ss == NULL);
1006 BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
1007 BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
1008 mutex_lock(&ss->hierarchy_mutex);
1009 if (ss->bind)
1010 ss->bind(ss, dummytop);
1011 dummytop->subsys[i]->cgroup = dummytop;
1012 cgrp->subsys[i] = NULL;
1013 subsys[i]->root = &rootnode;
1014 list_move(&ss->sibling, &rootnode.subsys_list);
1015 mutex_unlock(&ss->hierarchy_mutex);
1016
1017 module_put(ss->module);
1018 } else if (bit & final_bits) {
1019
1020 BUG_ON(ss == NULL);
1021 BUG_ON(!cgrp->subsys[i]);
1022
1023
1024
1025
1026 module_put(ss->module);
1027#ifdef CONFIG_MODULE_UNLOAD
1028 BUG_ON(ss->module && !module_refcount(ss->module));
1029#endif
1030 } else {
1031
1032 BUG_ON(cgrp->subsys[i]);
1033 }
1034 }
1035 root->subsys_bits = root->actual_subsys_bits = final_bits;
1036 synchronize_rcu();
1037
1038 return 0;
1039}
1040
1041static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs)
1042{
1043 struct cgroupfs_root *root = vfs->mnt_sb->s_fs_info;
1044 struct cgroup_subsys *ss;
1045
1046 mutex_lock(&cgroup_mutex);
1047 for_each_subsys(root, ss)
1048 seq_printf(seq, ",%s", ss->name);
1049 if (test_bit(ROOT_NOPREFIX, &root->flags))
1050 seq_puts(seq, ",noprefix");
1051 if (strlen(root->release_agent_path))
1052 seq_printf(seq, ",release_agent=%s", root->release_agent_path);
1053 if (clone_children(&root->top_cgroup))
1054 seq_puts(seq, ",clone_children");
1055 if (strlen(root->name))
1056 seq_printf(seq, ",name=%s", root->name);
1057 mutex_unlock(&cgroup_mutex);
1058 return 0;
1059}
1060
1061struct cgroup_sb_opts {
1062 unsigned long subsys_bits;
1063 unsigned long flags;
1064 char *release_agent;
1065 bool clone_children;
1066 char *name;
1067
1068 bool none;
1069
1070 struct cgroupfs_root *new_root;
1071
1072};
1073
1074
1075
1076
1077
1078
1079
1080static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
1081{
1082 char *token, *o = data;
1083 bool all_ss = false, one_ss = false;
1084 unsigned long mask = (unsigned long)-1;
1085 int i;
1086 bool module_pin_failed = false;
1087
1088 BUG_ON(!mutex_is_locked(&cgroup_mutex));
1089
1090#ifdef CONFIG_CPUSETS
1091 mask = ~(1UL << cpuset_subsys_id);
1092#endif
1093
1094 memset(opts, 0, sizeof(*opts));
1095
1096 while ((token = strsep(&o, ",")) != NULL) {
1097 if (!*token)
1098 return -EINVAL;
1099 if (!strcmp(token, "none")) {
1100
1101 opts->none = true;
1102 continue;
1103 }
1104 if (!strcmp(token, "all")) {
1105
1106 if (one_ss)
1107 return -EINVAL;
1108 all_ss = true;
1109 continue;
1110 }
1111 if (!strcmp(token, "noprefix")) {
1112 set_bit(ROOT_NOPREFIX, &opts->flags);
1113 continue;
1114 }
1115 if (!strcmp(token, "clone_children")) {
1116 opts->clone_children = true;
1117 continue;
1118 }
1119 if (!strncmp(token, "release_agent=", 14)) {
1120
1121 if (opts->release_agent)
1122 return -EINVAL;
1123 opts->release_agent =
1124 kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL);
1125 if (!opts->release_agent)
1126 return -ENOMEM;
1127 continue;
1128 }
1129 if (!strncmp(token, "name=", 5)) {
1130 const char *name = token + 5;
1131
1132 if (!strlen(name))
1133 return -EINVAL;
1134
1135 for (i = 0; i < strlen(name); i++) {
1136 char c = name[i];
1137 if (isalnum(c))
1138 continue;
1139 if ((c == '.') || (c == '-') || (c == '_'))
1140 continue;
1141 return -EINVAL;
1142 }
1143
1144 if (opts->name)
1145 return -EINVAL;
1146 opts->name = kstrndup(name,
1147 MAX_CGROUP_ROOT_NAMELEN - 1,
1148 GFP_KERNEL);
1149 if (!opts->name)
1150 return -ENOMEM;
1151
1152 continue;
1153 }
1154
1155 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1156 struct cgroup_subsys *ss = subsys[i];
1157 if (ss == NULL)
1158 continue;
1159 if (strcmp(token, ss->name))
1160 continue;
1161 if (ss->disabled)
1162 continue;
1163
1164
1165 if (all_ss)
1166 return -EINVAL;
1167 set_bit(i, &opts->subsys_bits);
1168 one_ss = true;
1169
1170 break;
1171 }
1172 if (i == CGROUP_SUBSYS_COUNT)
1173 return -ENOENT;
1174 }
1175
1176
1177
1178
1179
1180
1181 if (all_ss || (!all_ss && !one_ss && !opts->none)) {
1182 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1183 struct cgroup_subsys *ss = subsys[i];
1184 if (ss == NULL)
1185 continue;
1186 if (ss->disabled)
1187 continue;
1188 set_bit(i, &opts->subsys_bits);
1189 }
1190 }
1191
1192
1193
1194
1195
1196
1197
1198
1199 if (test_bit(ROOT_NOPREFIX, &opts->flags) &&
1200 (opts->subsys_bits & mask))
1201 return -EINVAL;
1202
1203
1204
1205 if (opts->subsys_bits && opts->none)
1206 return -EINVAL;
1207
1208
1209
1210
1211
1212 if (!opts->subsys_bits && !opts->name)
1213 return -EINVAL;
1214
1215
1216
1217
1218
1219
1220
1221 for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
1222 unsigned long bit = 1UL << i;
1223
1224 if (!(bit & opts->subsys_bits))
1225 continue;
1226 if (!try_module_get(subsys[i]->module)) {
1227 module_pin_failed = true;
1228 break;
1229 }
1230 }
1231 if (module_pin_failed) {
1232
1233
1234
1235
1236
1237 for (i--; i >= CGROUP_BUILTIN_SUBSYS_COUNT; i--) {
1238
1239 unsigned long bit = 1UL << i;
1240
1241 if (!(bit & opts->subsys_bits))
1242 continue;
1243 module_put(subsys[i]->module);
1244 }
1245 return -ENOENT;
1246 }
1247
1248 return 0;
1249}
1250
1251static void drop_parsed_module_refcounts(unsigned long subsys_bits)
1252{
1253 int i;
1254 for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
1255 unsigned long bit = 1UL << i;
1256
1257 if (!(bit & subsys_bits))
1258 continue;
1259 module_put(subsys[i]->module);
1260 }
1261}
1262
1263static int cgroup_remount(struct super_block *sb, int *flags, char *data)
1264{
1265 int ret = 0;
1266 struct cgroupfs_root *root = sb->s_fs_info;
1267 struct cgroup *cgrp = &root->top_cgroup;
1268 struct cgroup_sb_opts opts;
1269
1270 mutex_lock(&cgrp->dentry->d_inode->i_mutex);
1271 mutex_lock(&cgroup_mutex);
1272
1273
1274 ret = parse_cgroupfs_options(data, &opts);
1275 if (ret)
1276 goto out_unlock;
1277
1278
1279 if (opts.flags != root->flags ||
1280 (opts.name && strcmp(opts.name, root->name))) {
1281 ret = -EINVAL;
1282 drop_parsed_module_refcounts(opts.subsys_bits);
1283 goto out_unlock;
1284 }
1285
1286 ret = rebind_subsystems(root, opts.subsys_bits);
1287 if (ret) {
1288 drop_parsed_module_refcounts(opts.subsys_bits);
1289 goto out_unlock;
1290 }
1291
1292
1293 cgroup_populate_dir(cgrp);
1294
1295 if (opts.release_agent)
1296 strcpy(root->release_agent_path, opts.release_agent);
1297 out_unlock:
1298 kfree(opts.release_agent);
1299 kfree(opts.name);
1300 mutex_unlock(&cgroup_mutex);
1301 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
1302 return ret;
1303}
1304
1305static const struct super_operations cgroup_ops = {
1306 .statfs = simple_statfs,
1307 .drop_inode = generic_delete_inode,
1308 .show_options = cgroup_show_options,
1309 .remount_fs = cgroup_remount,
1310};
1311
1312static void init_cgroup_housekeeping(struct cgroup *cgrp)
1313{
1314 INIT_LIST_HEAD(&cgrp->sibling);
1315 INIT_LIST_HEAD(&cgrp->children);
1316 INIT_LIST_HEAD(&cgrp->css_sets);
1317 INIT_LIST_HEAD(&cgrp->release_list);
1318 INIT_LIST_HEAD(&cgrp->pidlists);
1319 mutex_init(&cgrp->pidlist_mutex);
1320 INIT_LIST_HEAD(&cgrp->event_list);
1321 spin_lock_init(&cgrp->event_list_lock);
1322}
1323
1324static void init_cgroup_root(struct cgroupfs_root *root)
1325{
1326 struct cgroup *cgrp = &root->top_cgroup;
1327 INIT_LIST_HEAD(&root->subsys_list);
1328 INIT_LIST_HEAD(&root->root_list);
1329 root->number_of_cgroups = 1;
1330 cgrp->root = root;
1331 cgrp->top_cgroup = cgrp;
1332 init_cgroup_housekeeping(cgrp);
1333}
1334
1335static bool init_root_id(struct cgroupfs_root *root)
1336{
1337 int ret = 0;
1338
1339 do {
1340 if (!ida_pre_get(&hierarchy_ida, GFP_KERNEL))
1341 return false;
1342 spin_lock(&hierarchy_id_lock);
1343
1344 ret = ida_get_new_above(&hierarchy_ida, next_hierarchy_id,
1345 &root->hierarchy_id);
1346 if (ret == -ENOSPC)
1347
1348 ret = ida_get_new(&hierarchy_ida, &root->hierarchy_id);
1349 if (!ret) {
1350 next_hierarchy_id = root->hierarchy_id + 1;
1351 } else if (ret != -EAGAIN) {
1352
1353 BUG_ON(ret);
1354 }
1355 spin_unlock(&hierarchy_id_lock);
1356 } while (ret);
1357 return true;
1358}
1359
1360static int cgroup_test_super(struct super_block *sb, void *data)
1361{
1362 struct cgroup_sb_opts *opts = data;
1363 struct cgroupfs_root *root = sb->s_fs_info;
1364
1365
1366 if (opts->name && strcmp(opts->name, root->name))
1367 return 0;
1368
1369
1370
1371
1372
1373 if ((opts->subsys_bits || opts->none)
1374 && (opts->subsys_bits != root->subsys_bits))
1375 return 0;
1376
1377 return 1;
1378}
1379
1380static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
1381{
1382 struct cgroupfs_root *root;
1383
1384 if (!opts->subsys_bits && !opts->none)
1385 return NULL;
1386
1387 root = kzalloc(sizeof(*root), GFP_KERNEL);
1388 if (!root)
1389 return ERR_PTR(-ENOMEM);
1390
1391 if (!init_root_id(root)) {
1392 kfree(root);
1393 return ERR_PTR(-ENOMEM);
1394 }
1395 init_cgroup_root(root);
1396
1397 root->subsys_bits = opts->subsys_bits;
1398 root->flags = opts->flags;
1399 if (opts->release_agent)
1400 strcpy(root->release_agent_path, opts->release_agent);
1401 if (opts->name)
1402 strcpy(root->name, opts->name);
1403 if (opts->clone_children)
1404 set_bit(CGRP_CLONE_CHILDREN, &root->top_cgroup.flags);
1405 return root;
1406}
1407
1408static void cgroup_drop_root(struct cgroupfs_root *root)
1409{
1410 if (!root)
1411 return;
1412
1413 BUG_ON(!root->hierarchy_id);
1414 spin_lock(&hierarchy_id_lock);
1415 ida_remove(&hierarchy_ida, root->hierarchy_id);
1416 spin_unlock(&hierarchy_id_lock);
1417 kfree(root);
1418}
1419
1420static int cgroup_set_super(struct super_block *sb, void *data)
1421{
1422 int ret;
1423 struct cgroup_sb_opts *opts = data;
1424
1425
1426 if (!opts->new_root)
1427 return -EINVAL;
1428
1429 BUG_ON(!opts->subsys_bits && !opts->none);
1430
1431 ret = set_anon_super(sb, NULL);
1432 if (ret)
1433 return ret;
1434
1435 sb->s_fs_info = opts->new_root;
1436 opts->new_root->sb = sb;
1437
1438 sb->s_blocksize = PAGE_CACHE_SIZE;
1439 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
1440 sb->s_magic = CGROUP_SUPER_MAGIC;
1441 sb->s_op = &cgroup_ops;
1442
1443 return 0;
1444}
1445
1446static int cgroup_get_rootdir(struct super_block *sb)
1447{
1448 static const struct dentry_operations cgroup_dops = {
1449 .d_iput = cgroup_diput,
1450 .d_delete = cgroup_delete,
1451 };
1452
1453 struct inode *inode =
1454 cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb);
1455 struct dentry *dentry;
1456
1457 if (!inode)
1458 return -ENOMEM;
1459
1460 inode->i_fop = &simple_dir_operations;
1461 inode->i_op = &cgroup_dir_inode_operations;
1462
1463 inc_nlink(inode);
1464 dentry = d_alloc_root(inode);
1465 if (!dentry) {
1466 iput(inode);
1467 return -ENOMEM;
1468 }
1469 sb->s_root = dentry;
1470
1471 sb->s_d_op = &cgroup_dops;
1472 return 0;
1473}
1474
1475static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1476 int flags, const char *unused_dev_name,
1477 void *data)
1478{
1479 struct cgroup_sb_opts opts;
1480 struct cgroupfs_root *root;
1481 int ret = 0;
1482 struct super_block *sb;
1483 struct cgroupfs_root *new_root;
1484
1485
1486 mutex_lock(&cgroup_mutex);
1487 ret = parse_cgroupfs_options(data, &opts);
1488 mutex_unlock(&cgroup_mutex);
1489 if (ret)
1490 goto out_err;
1491
1492
1493
1494
1495
1496 new_root = cgroup_root_from_opts(&opts);
1497 if (IS_ERR(new_root)) {
1498 ret = PTR_ERR(new_root);
1499 goto drop_modules;
1500 }
1501 opts.new_root = new_root;
1502
1503
1504 sb = sget(fs_type, cgroup_test_super, cgroup_set_super, &opts);
1505 if (IS_ERR(sb)) {
1506 ret = PTR_ERR(sb);
1507 cgroup_drop_root(opts.new_root);
1508 goto drop_modules;
1509 }
1510
1511 root = sb->s_fs_info;
1512 BUG_ON(!root);
1513 if (root == opts.new_root) {
1514
1515 struct list_head tmp_cg_links;
1516 struct cgroup *root_cgrp = &root->top_cgroup;
1517 struct inode *inode;
1518 struct cgroupfs_root *existing_root;
1519 const struct cred *cred;
1520 int i;
1521
1522 BUG_ON(sb->s_root != NULL);
1523
1524 ret = cgroup_get_rootdir(sb);
1525 if (ret)
1526 goto drop_new_super;
1527 inode = sb->s_root->d_inode;
1528
1529 mutex_lock(&inode->i_mutex);
1530 mutex_lock(&cgroup_mutex);
1531
1532 if (strlen(root->name)) {
1533
1534 for_each_active_root(existing_root) {
1535 if (!strcmp(existing_root->name, root->name)) {
1536 ret = -EBUSY;
1537 mutex_unlock(&cgroup_mutex);
1538 mutex_unlock(&inode->i_mutex);
1539 goto drop_new_super;
1540 }
1541 }
1542 }
1543
1544
1545
1546
1547
1548
1549
1550
1551 ret = allocate_cg_links(css_set_count, &tmp_cg_links);
1552 if (ret) {
1553 mutex_unlock(&cgroup_mutex);
1554 mutex_unlock(&inode->i_mutex);
1555 goto drop_new_super;
1556 }
1557
1558 ret = rebind_subsystems(root, root->subsys_bits);
1559 if (ret == -EBUSY) {
1560 mutex_unlock(&cgroup_mutex);
1561 mutex_unlock(&inode->i_mutex);
1562 free_cg_links(&tmp_cg_links);
1563 goto drop_new_super;
1564 }
1565
1566
1567
1568
1569
1570
1571
1572 BUG_ON(ret);
1573
1574 list_add(&root->root_list, &roots);
1575 root_count++;
1576
1577 sb->s_root->d_fsdata = root_cgrp;
1578 root->top_cgroup.dentry = sb->s_root;
1579
1580
1581
1582 write_lock(&css_set_lock);
1583 for (i = 0; i < CSS_SET_TABLE_SIZE; i++) {
1584 struct hlist_head *hhead = &css_set_table[i];
1585 struct hlist_node *node;
1586 struct css_set *cg;
1587
1588 hlist_for_each_entry(cg, node, hhead, hlist)
1589 link_css_set(&tmp_cg_links, cg, root_cgrp);
1590 }
1591 write_unlock(&css_set_lock);
1592
1593 free_cg_links(&tmp_cg_links);
1594
1595 BUG_ON(!list_empty(&root_cgrp->sibling));
1596 BUG_ON(!list_empty(&root_cgrp->children));
1597 BUG_ON(root->number_of_cgroups != 1);
1598
1599 cred = override_creds(&init_cred);
1600 cgroup_populate_dir(root_cgrp);
1601 revert_creds(cred);
1602 mutex_unlock(&cgroup_mutex);
1603 mutex_unlock(&inode->i_mutex);
1604 } else {
1605
1606
1607
1608
1609 cgroup_drop_root(opts.new_root);
1610
1611 drop_parsed_module_refcounts(opts.subsys_bits);
1612 }
1613
1614 kfree(opts.release_agent);
1615 kfree(opts.name);
1616 return dget(sb->s_root);
1617
1618 drop_new_super:
1619 deactivate_locked_super(sb);
1620 drop_modules:
1621 drop_parsed_module_refcounts(opts.subsys_bits);
1622 out_err:
1623 kfree(opts.release_agent);
1624 kfree(opts.name);
1625 return ERR_PTR(ret);
1626}
1627
1628static void cgroup_kill_sb(struct super_block *sb) {
1629 struct cgroupfs_root *root = sb->s_fs_info;
1630 struct cgroup *cgrp = &root->top_cgroup;
1631 int ret;
1632 struct cg_cgroup_link *link;
1633 struct cg_cgroup_link *saved_link;
1634
1635 BUG_ON(!root);
1636
1637 BUG_ON(root->number_of_cgroups != 1);
1638 BUG_ON(!list_empty(&cgrp->children));
1639 BUG_ON(!list_empty(&cgrp->sibling));
1640
1641 mutex_lock(&cgroup_mutex);
1642
1643
1644 ret = rebind_subsystems(root, 0);
1645
1646 BUG_ON(ret);
1647
1648
1649
1650
1651
1652 write_lock(&css_set_lock);
1653
1654 list_for_each_entry_safe(link, saved_link, &cgrp->css_sets,
1655 cgrp_link_list) {
1656 list_del(&link->cg_link_list);
1657 list_del(&link->cgrp_link_list);
1658 kfree(link);
1659 }
1660 write_unlock(&css_set_lock);
1661
1662 if (!list_empty(&root->root_list)) {
1663 list_del(&root->root_list);
1664 root_count--;
1665 }
1666
1667 mutex_unlock(&cgroup_mutex);
1668
1669 kill_litter_super(sb);
1670 cgroup_drop_root(root);
1671}
1672
1673static struct file_system_type cgroup_fs_type = {
1674 .name = "cgroup",
1675 .mount = cgroup_mount,
1676 .kill_sb = cgroup_kill_sb,
1677};
1678
1679static struct kobject *cgroup_kobj;
1680
1681static inline struct cgroup *__d_cgrp(struct dentry *dentry)
1682{
1683 return dentry->d_fsdata;
1684}
1685
1686static inline struct cftype *__d_cft(struct dentry *dentry)
1687{
1688 return dentry->d_fsdata;
1689}
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
1702{
1703 char *start;
1704 struct dentry *dentry = rcu_dereference_check(cgrp->dentry,
1705 cgroup_lock_is_held());
1706
1707 if (!dentry || cgrp == dummytop) {
1708
1709
1710
1711
1712 strcpy(buf, "/");
1713 return 0;
1714 }
1715
1716 start = buf + buflen;
1717
1718 *--start = '\0';
1719 for (;;) {
1720 int len = dentry->d_name.len;
1721
1722 if ((start -= len) < buf)
1723 return -ENAMETOOLONG;
1724 memcpy(start, dentry->d_name.name, len);
1725 cgrp = cgrp->parent;
1726 if (!cgrp)
1727 break;
1728
1729 dentry = rcu_dereference_check(cgrp->dentry,
1730 cgroup_lock_is_held());
1731 if (!cgrp->parent)
1732 continue;
1733 if (--start < buf)
1734 return -ENAMETOOLONG;
1735 *start = '/';
1736 }
1737 memmove(buf, start, buf + buflen - start);
1738 return 0;
1739}
1740EXPORT_SYMBOL_GPL(cgroup_path);
1741
1742
1743
1744
1745
1746
1747
1748
1749static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
1750 struct task_struct *tsk, bool guarantee)
1751{
1752 struct css_set *oldcg;
1753 struct css_set *newcg;
1754
1755
1756
1757
1758
1759
1760 task_lock(tsk);
1761 oldcg = tsk->cgroups;
1762 get_css_set(oldcg);
1763 task_unlock(tsk);
1764
1765
1766 if (guarantee) {
1767
1768 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
1769 read_lock(&css_set_lock);
1770 newcg = find_existing_css_set(oldcg, cgrp, template);
1771 BUG_ON(!newcg);
1772 get_css_set(newcg);
1773 read_unlock(&css_set_lock);
1774 } else {
1775 might_sleep();
1776
1777 newcg = find_css_set(oldcg, cgrp);
1778 if (!newcg) {
1779 put_css_set(oldcg);
1780 return -ENOMEM;
1781 }
1782 }
1783 put_css_set(oldcg);
1784
1785
1786 task_lock(tsk);
1787 if (tsk->flags & PF_EXITING) {
1788 task_unlock(tsk);
1789 put_css_set(newcg);
1790 return -ESRCH;
1791 }
1792 rcu_assign_pointer(tsk->cgroups, newcg);
1793 task_unlock(tsk);
1794
1795
1796 write_lock(&css_set_lock);
1797 if (!list_empty(&tsk->cg_list))
1798 list_move(&tsk->cg_list, &newcg->tasks);
1799 write_unlock(&css_set_lock);
1800
1801
1802
1803
1804
1805
1806 put_css_set(oldcg);
1807
1808 set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
1809 return 0;
1810}
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1821{
1822 int retval;
1823 struct cgroup_subsys *ss, *failed_ss = NULL;
1824 struct cgroup *oldcgrp;
1825 struct cgroupfs_root *root = cgrp->root;
1826
1827
1828 oldcgrp = task_cgroup_from_root(tsk, root);
1829 if (cgrp == oldcgrp)
1830 return 0;
1831
1832 for_each_subsys(root, ss) {
1833 if (ss->can_attach) {
1834 retval = ss->can_attach(ss, cgrp, tsk);
1835 if (retval) {
1836
1837
1838
1839
1840
1841
1842 failed_ss = ss;
1843 goto out;
1844 }
1845 }
1846 if (ss->can_attach_task) {
1847 retval = ss->can_attach_task(cgrp, tsk);
1848 if (retval) {
1849 failed_ss = ss;
1850 goto out;
1851 }
1852 }
1853 }
1854
1855 retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, false);
1856 if (retval)
1857 goto out;
1858
1859 for_each_subsys(root, ss) {
1860 if (ss->pre_attach)
1861 ss->pre_attach(cgrp);
1862 if (ss->attach_task)
1863 ss->attach_task(cgrp, tsk);
1864 if (ss->attach)
1865 ss->attach(ss, cgrp, oldcgrp, tsk);
1866 }
1867
1868 synchronize_rcu();
1869
1870
1871
1872
1873
1874 cgroup_wakeup_rmdir_waiter(cgrp);
1875out:
1876 if (retval) {
1877 for_each_subsys(root, ss) {
1878 if (ss == failed_ss)
1879
1880
1881
1882
1883
1884
1885 break;
1886 if (ss->cancel_attach)
1887 ss->cancel_attach(ss, cgrp, tsk);
1888 }
1889 }
1890 return retval;
1891}
1892
1893
1894
1895
1896
1897
1898int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
1899{
1900 struct cgroupfs_root *root;
1901 int retval = 0;
1902
1903 cgroup_lock();
1904 for_each_active_root(root) {
1905 struct cgroup *from_cg = task_cgroup_from_root(from, root);
1906
1907 retval = cgroup_attach_task(from_cg, tsk);
1908 if (retval)
1909 break;
1910 }
1911 cgroup_unlock();
1912
1913 return retval;
1914}
1915EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
1916
1917
1918
1919
1920
1921
1922
1923struct cg_list_entry {
1924 struct css_set *cg;
1925 struct list_head links;
1926};
1927
1928static bool css_set_check_fetched(struct cgroup *cgrp,
1929 struct task_struct *tsk, struct css_set *cg,
1930 struct list_head *newcg_list)
1931{
1932 struct css_set *newcg;
1933 struct cg_list_entry *cg_entry;
1934 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
1935
1936 read_lock(&css_set_lock);
1937 newcg = find_existing_css_set(cg, cgrp, template);
1938 if (newcg)
1939 get_css_set(newcg);
1940 read_unlock(&css_set_lock);
1941
1942
1943 if (!newcg)
1944 return false;
1945
1946 list_for_each_entry(cg_entry, newcg_list, links) {
1947 if (cg_entry->cg == newcg) {
1948 put_css_set(newcg);
1949 return true;
1950 }
1951 }
1952
1953
1954 put_css_set(newcg);
1955 return false;
1956}
1957
1958
1959
1960
1961
1962static int css_set_prefetch(struct cgroup *cgrp, struct css_set *cg,
1963 struct list_head *newcg_list)
1964{
1965 struct css_set *newcg;
1966 struct cg_list_entry *cg_entry;
1967
1968
1969 newcg = find_css_set(cg, cgrp);
1970 if (!newcg)
1971 return -ENOMEM;
1972
1973 cg_entry = kmalloc(sizeof(struct cg_list_entry), GFP_KERNEL);
1974 if (!cg_entry) {
1975 put_css_set(newcg);
1976 return -ENOMEM;
1977 }
1978 cg_entry->cg = newcg;
1979 list_add(&cg_entry->links, newcg_list);
1980 return 0;
1981}
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
1992{
1993 int retval, i, group_size;
1994 struct cgroup_subsys *ss, *failed_ss = NULL;
1995 bool cancel_failed_ss = false;
1996
1997 struct cgroup *oldcgrp = NULL;
1998 struct css_set *oldcg;
1999 struct cgroupfs_root *root = cgrp->root;
2000
2001 struct task_struct *tsk;
2002 struct flex_array *group;
2003
2004
2005
2006
2007
2008 struct list_head newcg_list;
2009 struct cg_list_entry *cg_entry, *temp_nobe;
2010
2011
2012
2013
2014
2015
2016
2017
2018 group_size = get_nr_threads(leader);
2019
2020 group = flex_array_alloc(sizeof(struct task_struct *), group_size,
2021 GFP_KERNEL);
2022 if (!group)
2023 return -ENOMEM;
2024
2025 retval = flex_array_prealloc(group, 0, group_size - 1, GFP_KERNEL);
2026 if (retval)
2027 goto out_free_group_list;
2028
2029
2030 rcu_read_lock();
2031 if (!thread_group_leader(leader)) {
2032
2033
2034
2035
2036
2037
2038
2039 rcu_read_unlock();
2040 retval = -EAGAIN;
2041 goto out_free_group_list;
2042 }
2043
2044 tsk = leader;
2045 i = 0;
2046 do {
2047
2048 BUG_ON(i >= group_size);
2049 get_task_struct(tsk);
2050
2051
2052
2053
2054 retval = flex_array_put_ptr(group, i, tsk, GFP_ATOMIC);
2055 BUG_ON(retval != 0);
2056 i++;
2057 } while_each_thread(leader, tsk);
2058
2059 group_size = i;
2060 rcu_read_unlock();
2061
2062
2063
2064
2065 for_each_subsys(root, ss) {
2066 if (ss->can_attach) {
2067 retval = ss->can_attach(ss, cgrp, leader);
2068 if (retval) {
2069 failed_ss = ss;
2070 goto out_cancel_attach;
2071 }
2072 }
2073
2074 if (ss->can_attach_task) {
2075
2076 for (i = 0; i < group_size; i++) {
2077 tsk = flex_array_get_ptr(group, i);
2078 retval = ss->can_attach_task(cgrp, tsk);
2079 if (retval) {
2080 failed_ss = ss;
2081 cancel_failed_ss = true;
2082 goto out_cancel_attach;
2083 }
2084 }
2085 }
2086 }
2087
2088
2089
2090
2091
2092 INIT_LIST_HEAD(&newcg_list);
2093 for (i = 0; i < group_size; i++) {
2094 tsk = flex_array_get_ptr(group, i);
2095
2096 oldcgrp = task_cgroup_from_root(tsk, root);
2097 if (cgrp == oldcgrp)
2098 continue;
2099
2100 task_lock(tsk);
2101 if (tsk->flags & PF_EXITING) {
2102
2103 task_unlock(tsk);
2104 continue;
2105 }
2106 oldcg = tsk->cgroups;
2107 get_css_set(oldcg);
2108 task_unlock(tsk);
2109
2110 if (css_set_check_fetched(cgrp, tsk, oldcg, &newcg_list)) {
2111
2112 put_css_set(oldcg);
2113 } else {
2114
2115 retval = css_set_prefetch(cgrp, oldcg, &newcg_list);
2116 put_css_set(oldcg);
2117 if (retval)
2118 goto out_list_teardown;
2119 }
2120 }
2121
2122
2123
2124
2125
2126
2127
2128 for_each_subsys(root, ss) {
2129 if (ss->pre_attach)
2130 ss->pre_attach(cgrp);
2131 }
2132 for (i = 0; i < group_size; i++) {
2133 tsk = flex_array_get_ptr(group, i);
2134
2135 oldcgrp = task_cgroup_from_root(tsk, root);
2136 if (cgrp == oldcgrp)
2137 continue;
2138
2139 for_each_subsys(root, ss) {
2140 if (ss->attach_task)
2141 ss->attach_task(cgrp, tsk);
2142 }
2143
2144 retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, true);
2145 BUG_ON(retval != 0 && retval != -ESRCH);
2146 }
2147
2148
2149
2150
2151
2152
2153
2154 for_each_subsys(root, ss) {
2155 if (ss->attach)
2156 ss->attach(ss, cgrp, oldcgrp, leader);
2157 }
2158
2159
2160
2161
2162 synchronize_rcu();
2163 cgroup_wakeup_rmdir_waiter(cgrp);
2164 retval = 0;
2165out_list_teardown:
2166
2167 list_for_each_entry_safe(cg_entry, temp_nobe, &newcg_list, links) {
2168 list_del(&cg_entry->links);
2169 put_css_set(cg_entry->cg);
2170 kfree(cg_entry);
2171 }
2172out_cancel_attach:
2173
2174 if (retval) {
2175 for_each_subsys(root, ss) {
2176 if (ss == failed_ss) {
2177 if (cancel_failed_ss && ss->cancel_attach)
2178 ss->cancel_attach(ss, cgrp, leader);
2179 break;
2180 }
2181 if (ss->cancel_attach)
2182 ss->cancel_attach(ss, cgrp, leader);
2183 }
2184 }
2185
2186 for (i = 0; i < group_size; i++) {
2187 tsk = flex_array_get_ptr(group, i);
2188 put_task_struct(tsk);
2189 }
2190out_free_group_list:
2191 flex_array_free(group);
2192 return retval;
2193}
2194
2195
2196
2197
2198
2199
2200static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup)
2201{
2202 struct task_struct *tsk;
2203 const struct cred *cred = current_cred(), *tcred;
2204 int ret;
2205
2206 if (!cgroup_lock_live_group(cgrp))
2207 return -ENODEV;
2208
2209 if (pid) {
2210 rcu_read_lock();
2211 tsk = find_task_by_vpid(pid);
2212 if (!tsk) {
2213 rcu_read_unlock();
2214 cgroup_unlock();
2215 return -ESRCH;
2216 }
2217 if (threadgroup) {
2218
2219
2220
2221
2222
2223
2224 tsk = tsk->group_leader;
2225 } else if (tsk->flags & PF_EXITING) {
2226
2227 rcu_read_unlock();
2228 cgroup_unlock();
2229 return -ESRCH;
2230 }
2231
2232
2233
2234
2235
2236 tcred = __task_cred(tsk);
2237 if (cred->euid &&
2238 cred->euid != tcred->uid &&
2239 cred->euid != tcred->suid) {
2240 rcu_read_unlock();
2241 cgroup_unlock();
2242 return -EACCES;
2243 }
2244 get_task_struct(tsk);
2245 rcu_read_unlock();
2246 } else {
2247 if (threadgroup)
2248 tsk = current->group_leader;
2249 else
2250 tsk = current;
2251 get_task_struct(tsk);
2252 }
2253
2254 if (threadgroup) {
2255 threadgroup_fork_write_lock(tsk);
2256 ret = cgroup_attach_proc(cgrp, tsk);
2257 threadgroup_fork_write_unlock(tsk);
2258 } else {
2259 ret = cgroup_attach_task(cgrp, tsk);
2260 }
2261 put_task_struct(tsk);
2262 cgroup_unlock();
2263 return ret;
2264}
2265
2266static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid)
2267{
2268 return attach_task_by_pid(cgrp, pid, false);
2269}
2270
2271static int cgroup_procs_write(struct cgroup *cgrp, struct cftype *cft, u64 tgid)
2272{
2273 int ret;
2274 do {
2275
2276
2277
2278
2279
2280 ret = attach_task_by_pid(cgrp, tgid, true);
2281 } while (ret == -EAGAIN);
2282 return ret;
2283}
2284
2285
2286
2287
2288
2289
2290
2291
2292bool cgroup_lock_live_group(struct cgroup *cgrp)
2293{
2294 mutex_lock(&cgroup_mutex);
2295 if (cgroup_is_removed(cgrp)) {
2296 mutex_unlock(&cgroup_mutex);
2297 return false;
2298 }
2299 return true;
2300}
2301EXPORT_SYMBOL_GPL(cgroup_lock_live_group);
2302
2303static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
2304 const char *buffer)
2305{
2306 BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
2307 if (strlen(buffer) >= PATH_MAX)
2308 return -EINVAL;
2309 if (!cgroup_lock_live_group(cgrp))
2310 return -ENODEV;
2311 strcpy(cgrp->root->release_agent_path, buffer);
2312 cgroup_unlock();
2313 return 0;
2314}
2315
2316static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft,
2317 struct seq_file *seq)
2318{
2319 if (!cgroup_lock_live_group(cgrp))
2320 return -ENODEV;
2321 seq_puts(seq, cgrp->root->release_agent_path);
2322 seq_putc(seq, '\n');
2323 cgroup_unlock();
2324 return 0;
2325}
2326
2327
2328#define CGROUP_LOCAL_BUFFER_SIZE 64
2329
2330static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft,
2331 struct file *file,
2332 const char __user *userbuf,
2333 size_t nbytes, loff_t *unused_ppos)
2334{
2335 char buffer[CGROUP_LOCAL_BUFFER_SIZE];
2336 int retval = 0;
2337 char *end;
2338
2339 if (!nbytes)
2340 return -EINVAL;
2341 if (nbytes >= sizeof(buffer))
2342 return -E2BIG;
2343 if (copy_from_user(buffer, userbuf, nbytes))
2344 return -EFAULT;
2345
2346 buffer[nbytes] = 0;
2347 if (cft->write_u64) {
2348 u64 val = simple_strtoull(strstrip(buffer), &end, 0);
2349 if (*end)
2350 return -EINVAL;
2351 retval = cft->write_u64(cgrp, cft, val);
2352 } else {
2353 s64 val = simple_strtoll(strstrip(buffer), &end, 0);
2354 if (*end)
2355 return -EINVAL;
2356 retval = cft->write_s64(cgrp, cft, val);
2357 }
2358 if (!retval)
2359 retval = nbytes;
2360 return retval;
2361}
2362
2363static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft,
2364 struct file *file,
2365 const char __user *userbuf,
2366 size_t nbytes, loff_t *unused_ppos)
2367{
2368 char local_buffer[CGROUP_LOCAL_BUFFER_SIZE];
2369 int retval = 0;
2370 size_t max_bytes = cft->max_write_len;
2371 char *buffer = local_buffer;
2372
2373 if (!max_bytes)
2374 max_bytes = sizeof(local_buffer) - 1;
2375 if (nbytes >= max_bytes)
2376 return -E2BIG;
2377
2378 if (nbytes >= sizeof(local_buffer)) {
2379 buffer = kmalloc(nbytes + 1, GFP_KERNEL);
2380 if (buffer == NULL)
2381 return -ENOMEM;
2382 }
2383 if (nbytes && copy_from_user(buffer, userbuf, nbytes)) {
2384 retval = -EFAULT;
2385 goto out;
2386 }
2387
2388 buffer[nbytes] = 0;
2389 retval = cft->write_string(cgrp, cft, strstrip(buffer));
2390 if (!retval)
2391 retval = nbytes;
2392out:
2393 if (buffer != local_buffer)
2394 kfree(buffer);
2395 return retval;
2396}
2397
2398static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
2399 size_t nbytes, loff_t *ppos)
2400{
2401 struct cftype *cft = __d_cft(file->f_dentry);
2402 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
2403
2404 if (cgroup_is_removed(cgrp))
2405 return -ENODEV;
2406 if (cft->write)
2407 return cft->write(cgrp, cft, file, buf, nbytes, ppos);
2408 if (cft->write_u64 || cft->write_s64)
2409 return cgroup_write_X64(cgrp, cft, file, buf, nbytes, ppos);
2410 if (cft->write_string)
2411 return cgroup_write_string(cgrp, cft, file, buf, nbytes, ppos);
2412 if (cft->trigger) {
2413 int ret = cft->trigger(cgrp, (unsigned int)cft->private);
2414 return ret ? ret : nbytes;
2415 }
2416 return -EINVAL;
2417}
2418
2419static ssize_t cgroup_read_u64(struct cgroup *cgrp, struct cftype *cft,
2420 struct file *file,
2421 char __user *buf, size_t nbytes,
2422 loff_t *ppos)
2423{
2424 char tmp[CGROUP_LOCAL_BUFFER_SIZE];
2425 u64 val = cft->read_u64(cgrp, cft);
2426 int len = sprintf(tmp, "%llu\n", (unsigned long long) val);
2427
2428 return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
2429}
2430
2431static ssize_t cgroup_read_s64(struct cgroup *cgrp, struct cftype *cft,
2432 struct file *file,
2433 char __user *buf, size_t nbytes,
2434 loff_t *ppos)
2435{
2436 char tmp[CGROUP_LOCAL_BUFFER_SIZE];
2437 s64 val = cft->read_s64(cgrp, cft);
2438 int len = sprintf(tmp, "%lld\n", (long long) val);
2439
2440 return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
2441}
2442
2443static ssize_t cgroup_file_read(struct file *file, char __user *buf,
2444 size_t nbytes, loff_t *ppos)
2445{
2446 struct cftype *cft = __d_cft(file->f_dentry);
2447 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
2448
2449 if (cgroup_is_removed(cgrp))
2450 return -ENODEV;
2451
2452 if (cft->read)
2453 return cft->read(cgrp, cft, file, buf, nbytes, ppos);
2454 if (cft->read_u64)
2455 return cgroup_read_u64(cgrp, cft, file, buf, nbytes, ppos);
2456 if (cft->read_s64)
2457 return cgroup_read_s64(cgrp, cft, file, buf, nbytes, ppos);
2458 return -EINVAL;
2459}
2460
2461
2462
2463
2464
2465
2466struct cgroup_seqfile_state {
2467 struct cftype *cft;
2468 struct cgroup *cgroup;
2469};
2470
2471static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value)
2472{
2473 struct seq_file *sf = cb->state;
2474 return seq_printf(sf, "%s %llu\n", key, (unsigned long long)value);
2475}
2476
2477static int cgroup_seqfile_show(struct seq_file *m, void *arg)
2478{
2479 struct cgroup_seqfile_state *state = m->private;
2480 struct cftype *cft = state->cft;
2481 if (cft->read_map) {
2482 struct cgroup_map_cb cb = {
2483 .fill = cgroup_map_add,
2484 .state = m,
2485 };
2486 return cft->read_map(state->cgroup, cft, &cb);
2487 }
2488 return cft->read_seq_string(state->cgroup, cft, m);
2489}
2490
2491static int cgroup_seqfile_release(struct inode *inode, struct file *file)
2492{
2493 struct seq_file *seq = file->private_data;
2494 kfree(seq->private);
2495 return single_release(inode, file);
2496}
2497
2498static const struct file_operations cgroup_seqfile_operations = {
2499 .read = seq_read,
2500 .write = cgroup_file_write,
2501 .llseek = seq_lseek,
2502 .release = cgroup_seqfile_release,
2503};
2504
2505static int cgroup_file_open(struct inode *inode, struct file *file)
2506{
2507 int err;
2508 struct cftype *cft;
2509
2510 err = generic_file_open(inode, file);
2511 if (err)
2512 return err;
2513 cft = __d_cft(file->f_dentry);
2514
2515 if (cft->read_map || cft->read_seq_string) {
2516 struct cgroup_seqfile_state *state =
2517 kzalloc(sizeof(*state), GFP_USER);
2518 if (!state)
2519 return -ENOMEM;
2520 state->cft = cft;
2521 state->cgroup = __d_cgrp(file->f_dentry->d_parent);
2522 file->f_op = &cgroup_seqfile_operations;
2523 err = single_open(file, cgroup_seqfile_show, state);
2524 if (err < 0)
2525 kfree(state);
2526 } else if (cft->open)
2527 err = cft->open(inode, file);
2528 else
2529 err = 0;
2530
2531 return err;
2532}
2533
2534static int cgroup_file_release(struct inode *inode, struct file *file)
2535{
2536 struct cftype *cft = __d_cft(file->f_dentry);
2537 if (cft->release)
2538 return cft->release(inode, file);
2539 return 0;
2540}
2541
2542
2543
2544
2545static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry,
2546 struct inode *new_dir, struct dentry *new_dentry)
2547{
2548 if (!S_ISDIR(old_dentry->d_inode->i_mode))
2549 return -ENOTDIR;
2550 if (new_dentry->d_inode)
2551 return -EEXIST;
2552 if (old_dir != new_dir)
2553 return -EIO;
2554 return simple_rename(old_dir, old_dentry, new_dir, new_dentry);
2555}
2556
2557static const struct file_operations cgroup_file_operations = {
2558 .read = cgroup_file_read,
2559 .write = cgroup_file_write,
2560 .llseek = generic_file_llseek,
2561 .open = cgroup_file_open,
2562 .release = cgroup_file_release,
2563};
2564
2565static const struct inode_operations cgroup_dir_inode_operations = {
2566 .lookup = cgroup_lookup,
2567 .mkdir = cgroup_mkdir,
2568 .rmdir = cgroup_rmdir,
2569 .rename = cgroup_rename,
2570};
2571
2572static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
2573{
2574 if (dentry->d_name.len > NAME_MAX)
2575 return ERR_PTR(-ENAMETOOLONG);
2576 d_add(dentry, NULL);
2577 return NULL;
2578}
2579
2580
2581
2582
2583static inline struct cftype *__file_cft(struct file *file)
2584{
2585 if (file->f_dentry->d_inode->i_fop != &cgroup_file_operations)
2586 return ERR_PTR(-EINVAL);
2587 return __d_cft(file->f_dentry);
2588}
2589
2590static int cgroup_create_file(struct dentry *dentry, mode_t mode,
2591 struct super_block *sb)
2592{
2593 struct inode *inode;
2594
2595 if (!dentry)
2596 return -ENOENT;
2597 if (dentry->d_inode)
2598 return -EEXIST;
2599
2600 inode = cgroup_new_inode(mode, sb);
2601 if (!inode)
2602 return -ENOMEM;
2603
2604 if (S_ISDIR(mode)) {
2605 inode->i_op = &cgroup_dir_inode_operations;
2606 inode->i_fop = &simple_dir_operations;
2607
2608
2609 inc_nlink(inode);
2610
2611
2612
2613 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
2614 } else if (S_ISREG(mode)) {
2615 inode->i_size = 0;
2616 inode->i_fop = &cgroup_file_operations;
2617 }
2618 d_instantiate(dentry, inode);
2619 dget(dentry);
2620 return 0;
2621}
2622
2623
2624
2625
2626
2627
2628
2629
2630static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry,
2631 mode_t mode)
2632{
2633 struct dentry *parent;
2634 int error = 0;
2635
2636 parent = cgrp->parent->dentry;
2637 error = cgroup_create_file(dentry, S_IFDIR | mode, cgrp->root->sb);
2638 if (!error) {
2639 dentry->d_fsdata = cgrp;
2640 inc_nlink(parent->d_inode);
2641 rcu_assign_pointer(cgrp->dentry, dentry);
2642 dget(dentry);
2643 }
2644 dput(dentry);
2645
2646 return error;
2647}
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658static mode_t cgroup_file_mode(const struct cftype *cft)
2659{
2660 mode_t mode = 0;
2661
2662 if (cft->mode)
2663 return cft->mode;
2664
2665 if (cft->read || cft->read_u64 || cft->read_s64 ||
2666 cft->read_map || cft->read_seq_string)
2667 mode |= S_IRUGO;
2668
2669 if (cft->write || cft->write_u64 || cft->write_s64 ||
2670 cft->write_string || cft->trigger)
2671 mode |= S_IWUSR;
2672
2673 return mode;
2674}
2675
2676int cgroup_add_file(struct cgroup *cgrp,
2677 struct cgroup_subsys *subsys,
2678 const struct cftype *cft)
2679{
2680 struct dentry *dir = cgrp->dentry;
2681 struct dentry *dentry;
2682 int error;
2683 mode_t mode;
2684
2685 char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 };
2686 if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) {
2687 strcpy(name, subsys->name);
2688 strcat(name, ".");
2689 }
2690 strcat(name, cft->name);
2691 BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex));
2692 dentry = lookup_one_len(name, dir, strlen(name));
2693 if (!IS_ERR(dentry)) {
2694 mode = cgroup_file_mode(cft);
2695 error = cgroup_create_file(dentry, mode | S_IFREG,
2696 cgrp->root->sb);
2697 if (!error)
2698 dentry->d_fsdata = (void *)cft;
2699 dput(dentry);
2700 } else
2701 error = PTR_ERR(dentry);
2702 return error;
2703}
2704EXPORT_SYMBOL_GPL(cgroup_add_file);
2705
2706int cgroup_add_files(struct cgroup *cgrp,
2707 struct cgroup_subsys *subsys,
2708 const struct cftype cft[],
2709 int count)
2710{
2711 int i, err;
2712 for (i = 0; i < count; i++) {
2713 err = cgroup_add_file(cgrp, subsys, &cft[i]);
2714 if (err)
2715 return err;
2716 }
2717 return 0;
2718}
2719EXPORT_SYMBOL_GPL(cgroup_add_files);
2720
2721
2722
2723
2724
2725
2726
2727int cgroup_task_count(const struct cgroup *cgrp)
2728{
2729 int count = 0;
2730 struct cg_cgroup_link *link;
2731
2732 read_lock(&css_set_lock);
2733 list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {
2734 count += atomic_read(&link->cg->refcount);
2735 }
2736 read_unlock(&css_set_lock);
2737 return count;
2738}
2739
2740
2741
2742
2743
2744static void cgroup_advance_iter(struct cgroup *cgrp,
2745 struct cgroup_iter *it)
2746{
2747 struct list_head *l = it->cg_link;
2748 struct cg_cgroup_link *link;
2749 struct css_set *cg;
2750
2751
2752 do {
2753 l = l->next;
2754 if (l == &cgrp->css_sets) {
2755 it->cg_link = NULL;
2756 return;
2757 }
2758 link = list_entry(l, struct cg_cgroup_link, cgrp_link_list);
2759 cg = link->cg;
2760 } while (list_empty(&cg->tasks));
2761 it->cg_link = l;
2762 it->task = cg->tasks.next;
2763}
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774static void cgroup_enable_task_cg_lists(void)
2775{
2776 struct task_struct *p, *g;
2777 write_lock(&css_set_lock);
2778 use_task_css_set_links = 1;
2779 do_each_thread(g, p) {
2780 task_lock(p);
2781
2782
2783
2784
2785
2786 if (!(p->flags & PF_EXITING) && list_empty(&p->cg_list))
2787 list_add(&p->cg_list, &p->cgroups->tasks);
2788 task_unlock(p);
2789 } while_each_thread(g, p);
2790 write_unlock(&css_set_lock);
2791}
2792
2793void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it)
2794{
2795
2796
2797
2798
2799
2800 if (!use_task_css_set_links)
2801 cgroup_enable_task_cg_lists();
2802
2803 read_lock(&css_set_lock);
2804 it->cg_link = &cgrp->css_sets;
2805 cgroup_advance_iter(cgrp, it);
2806}
2807
2808struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
2809 struct cgroup_iter *it)
2810{
2811 struct task_struct *res;
2812 struct list_head *l = it->task;
2813 struct cg_cgroup_link *link;
2814
2815
2816 if (!it->cg_link)
2817 return NULL;
2818 res = list_entry(l, struct task_struct, cg_list);
2819
2820 l = l->next;
2821 link = list_entry(it->cg_link, struct cg_cgroup_link, cgrp_link_list);
2822 if (l == &link->cg->tasks) {
2823
2824
2825 cgroup_advance_iter(cgrp, it);
2826 } else {
2827 it->task = l;
2828 }
2829 return res;
2830}
2831
2832void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it)
2833{
2834 read_unlock(&css_set_lock);
2835}
2836
2837static inline int started_after_time(struct task_struct *t1,
2838 struct timespec *time,
2839 struct task_struct *t2)
2840{
2841 int start_diff = timespec_compare(&t1->start_time, time);
2842 if (start_diff > 0) {
2843 return 1;
2844 } else if (start_diff < 0) {
2845 return 0;
2846 } else {
2847
2848
2849
2850
2851
2852
2853
2854
2855 return t1 > t2;
2856 }
2857}
2858
2859
2860
2861
2862
2863
2864static inline int started_after(void *p1, void *p2)
2865{
2866 struct task_struct *t1 = p1;
2867 struct task_struct *t2 = p2;
2868 return started_after_time(t1, &t2->start_time, t2);
2869}
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898int cgroup_scan_tasks(struct cgroup_scanner *scan)
2899{
2900 int retval, i;
2901 struct cgroup_iter it;
2902 struct task_struct *p, *dropped;
2903
2904 struct task_struct *latest_task = NULL;
2905 struct ptr_heap tmp_heap;
2906 struct ptr_heap *heap;
2907 struct timespec latest_time = { 0, 0 };
2908
2909 if (scan->heap) {
2910
2911 heap = scan->heap;
2912 heap->gt = &started_after;
2913 } else {
2914
2915 heap = &tmp_heap;
2916 retval = heap_init(heap, PAGE_SIZE, GFP_KERNEL, &started_after);
2917 if (retval)
2918
2919 return retval;
2920 }
2921
2922 again:
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935 heap->size = 0;
2936 cgroup_iter_start(scan->cg, &it);
2937 while ((p = cgroup_iter_next(scan->cg, &it))) {
2938
2939
2940
2941
2942 if (scan->test_task && !scan->test_task(p, scan))
2943 continue;
2944
2945
2946
2947
2948 if (!started_after_time(p, &latest_time, latest_task))
2949 continue;
2950 dropped = heap_insert(heap, p);
2951 if (dropped == NULL) {
2952
2953
2954
2955
2956 get_task_struct(p);
2957 } else if (dropped != p) {
2958
2959
2960
2961
2962 get_task_struct(p);
2963 put_task_struct(dropped);
2964 }
2965
2966
2967
2968
2969 }
2970 cgroup_iter_end(scan->cg, &it);
2971
2972 if (heap->size) {
2973 for (i = 0; i < heap->size; i++) {
2974 struct task_struct *q = heap->ptrs[i];
2975 if (i == 0) {
2976 latest_time = q->start_time;
2977 latest_task = q;
2978 }
2979
2980 scan->process_task(q, scan);
2981 put_task_struct(q);
2982 }
2983
2984
2985
2986
2987
2988
2989
2990 goto again;
2991 }
2992 if (heap == &tmp_heap)
2993 heap_free(&tmp_heap);
2994 return 0;
2995}
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012#define PIDLIST_TOO_LARGE(c) ((c) * sizeof(pid_t) > (PAGE_SIZE * 2))
3013static void *pidlist_allocate(int count)
3014{
3015 if (PIDLIST_TOO_LARGE(count))
3016 return vmalloc(count * sizeof(pid_t));
3017 else
3018 return kmalloc(count * sizeof(pid_t), GFP_KERNEL);
3019}
3020static void pidlist_free(void *p)
3021{
3022 if (is_vmalloc_addr(p))
3023 vfree(p);
3024 else
3025 kfree(p);
3026}
3027static void *pidlist_resize(void *p, int newcount)
3028{
3029 void *newlist;
3030
3031 if (is_vmalloc_addr(p)) {
3032 newlist = vmalloc(newcount * sizeof(pid_t));
3033 if (!newlist)
3034 return NULL;
3035 memcpy(newlist, p, newcount * sizeof(pid_t));
3036 vfree(p);
3037 } else {
3038 newlist = krealloc(p, newcount * sizeof(pid_t), GFP_KERNEL);
3039 }
3040 return newlist;
3041}
3042
3043
3044
3045
3046
3047
3048
3049
3050#define PIDLIST_REALLOC_DIFFERENCE(old, new) ((old) - PAGE_SIZE >= (new))
3051static int pidlist_uniq(pid_t **p, int length)
3052{
3053 int src, dest = 1;
3054 pid_t *list = *p;
3055 pid_t *newlist;
3056
3057
3058
3059
3060
3061 if (length == 0 || length == 1)
3062 return length;
3063
3064 for (src = 1; src < length; src++) {
3065
3066 while (list[src] == list[src-1]) {
3067 src++;
3068 if (src == length)
3069 goto after;
3070 }
3071
3072 list[dest] = list[src];
3073 dest++;
3074 }
3075after:
3076
3077
3078
3079
3080
3081 if (PIDLIST_REALLOC_DIFFERENCE(length, dest)) {
3082 newlist = pidlist_resize(list, dest);
3083 if (newlist)
3084 *p = newlist;
3085 }
3086 return dest;
3087}
3088
3089static int cmppid(const void *a, const void *b)
3090{
3091 return *(pid_t *)a - *(pid_t *)b;
3092}
3093
3094
3095
3096
3097
3098
3099
3100static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
3101 enum cgroup_filetype type)
3102{
3103 struct cgroup_pidlist *l;
3104
3105 struct pid_namespace *ns = current->nsproxy->pid_ns;
3106
3107
3108
3109
3110
3111
3112
3113 mutex_lock(&cgrp->pidlist_mutex);
3114 list_for_each_entry(l, &cgrp->pidlists, links) {
3115 if (l->key.type == type && l->key.ns == ns) {
3116
3117 down_write(&l->mutex);
3118 mutex_unlock(&cgrp->pidlist_mutex);
3119 return l;
3120 }
3121 }
3122
3123 l = kmalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL);
3124 if (!l) {
3125 mutex_unlock(&cgrp->pidlist_mutex);
3126 return l;
3127 }
3128 init_rwsem(&l->mutex);
3129 down_write(&l->mutex);
3130 l->key.type = type;
3131 l->key.ns = get_pid_ns(ns);
3132 l->use_count = 0;
3133 l->list = NULL;
3134 l->owner = cgrp;
3135 list_add(&l->links, &cgrp->pidlists);
3136 mutex_unlock(&cgrp->pidlist_mutex);
3137 return l;
3138}
3139
3140
3141
3142
3143static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
3144 struct cgroup_pidlist **lp)
3145{
3146 pid_t *array;
3147 int length;
3148 int pid, n = 0;
3149 struct cgroup_iter it;
3150 struct task_struct *tsk;
3151 struct cgroup_pidlist *l;
3152
3153
3154
3155
3156
3157
3158
3159 length = cgroup_task_count(cgrp);
3160 array = pidlist_allocate(length);
3161 if (!array)
3162 return -ENOMEM;
3163
3164 cgroup_iter_start(cgrp, &it);
3165 while ((tsk = cgroup_iter_next(cgrp, &it))) {
3166 if (unlikely(n == length))
3167 break;
3168
3169 if (type == CGROUP_FILE_PROCS)
3170 pid = task_tgid_vnr(tsk);
3171 else
3172 pid = task_pid_vnr(tsk);
3173 if (pid > 0)
3174 array[n++] = pid;
3175 }
3176 cgroup_iter_end(cgrp, &it);
3177 length = n;
3178
3179 sort(array, length, sizeof(pid_t), cmppid, NULL);
3180 if (type == CGROUP_FILE_PROCS)
3181 length = pidlist_uniq(&array, length);
3182 l = cgroup_pidlist_find(cgrp, type);
3183 if (!l) {
3184 pidlist_free(array);
3185 return -ENOMEM;
3186 }
3187
3188 pidlist_free(l->list);
3189 l->list = array;
3190 l->length = length;
3191 l->use_count++;
3192 up_write(&l->mutex);
3193 *lp = l;
3194 return 0;
3195}
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
3207{
3208 int ret = -EINVAL;
3209 struct cgroup *cgrp;
3210 struct cgroup_iter it;
3211 struct task_struct *tsk;
3212
3213
3214
3215
3216
3217 if (dentry->d_sb->s_op != &cgroup_ops ||
3218 !S_ISDIR(dentry->d_inode->i_mode))
3219 goto err;
3220
3221 ret = 0;
3222 cgrp = dentry->d_fsdata;
3223
3224 cgroup_iter_start(cgrp, &it);
3225 while ((tsk = cgroup_iter_next(cgrp, &it))) {
3226 switch (tsk->state) {
3227 case TASK_RUNNING:
3228 stats->nr_running++;
3229 break;
3230 case TASK_INTERRUPTIBLE:
3231 stats->nr_sleeping++;
3232 break;
3233 case TASK_UNINTERRUPTIBLE:
3234 stats->nr_uninterruptible++;
3235 break;
3236 case TASK_STOPPED:
3237 stats->nr_stopped++;
3238 break;
3239 default:
3240 if (delayacct_is_task_waiting_on_io(tsk))
3241 stats->nr_io_wait++;
3242 break;
3243 }
3244 }
3245 cgroup_iter_end(cgrp, &it);
3246
3247err:
3248 return ret;
3249}
3250
3251
3252
3253
3254
3255
3256
3257
3258static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
3259{
3260
3261
3262
3263
3264
3265
3266 struct cgroup_pidlist *l = s->private;
3267 int index = 0, pid = *pos;
3268 int *iter;
3269
3270 down_read(&l->mutex);
3271 if (pid) {
3272 int end = l->length;
3273
3274 while (index < end) {
3275 int mid = (index + end) / 2;
3276 if (l->list[mid] == pid) {
3277 index = mid;
3278 break;
3279 } else if (l->list[mid] <= pid)
3280 index = mid + 1;
3281 else
3282 end = mid;
3283 }
3284 }
3285
3286 if (index >= l->length)
3287 return NULL;
3288
3289 iter = l->list + index;
3290 *pos = *iter;
3291 return iter;
3292}
3293
3294static void cgroup_pidlist_stop(struct seq_file *s, void *v)
3295{
3296 struct cgroup_pidlist *l = s->private;
3297 up_read(&l->mutex);
3298}
3299
3300static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
3301{
3302 struct cgroup_pidlist *l = s->private;
3303 pid_t *p = v;
3304 pid_t *end = l->list + l->length;
3305
3306
3307
3308
3309 p++;
3310 if (p >= end) {
3311 return NULL;
3312 } else {
3313 *pos = *p;
3314 return p;
3315 }
3316}
3317
3318static int cgroup_pidlist_show(struct seq_file *s, void *v)
3319{
3320 return seq_printf(s, "%d\n", *(int *)v);
3321}
3322
3323
3324
3325
3326
3327static const struct seq_operations cgroup_pidlist_seq_operations = {
3328 .start = cgroup_pidlist_start,
3329 .stop = cgroup_pidlist_stop,
3330 .next = cgroup_pidlist_next,
3331 .show = cgroup_pidlist_show,
3332};
3333
3334static void cgroup_release_pid_array(struct cgroup_pidlist *l)
3335{
3336
3337
3338
3339
3340
3341
3342 mutex_lock(&l->owner->pidlist_mutex);
3343 down_write(&l->mutex);
3344 BUG_ON(!l->use_count);
3345 if (!--l->use_count) {
3346
3347 list_del(&l->links);
3348 mutex_unlock(&l->owner->pidlist_mutex);
3349 pidlist_free(l->list);
3350 put_pid_ns(l->key.ns);
3351 up_write(&l->mutex);
3352 kfree(l);
3353 return;
3354 }
3355 mutex_unlock(&l->owner->pidlist_mutex);
3356 up_write(&l->mutex);
3357}
3358
3359static int cgroup_pidlist_release(struct inode *inode, struct file *file)
3360{
3361 struct cgroup_pidlist *l;
3362 if (!(file->f_mode & FMODE_READ))
3363 return 0;
3364
3365
3366
3367
3368 l = ((struct seq_file *)file->private_data)->private;
3369 cgroup_release_pid_array(l);
3370 return seq_release(inode, file);
3371}
3372
3373static const struct file_operations cgroup_pidlist_operations = {
3374 .read = seq_read,
3375 .llseek = seq_lseek,
3376 .write = cgroup_file_write,
3377 .release = cgroup_pidlist_release,
3378};
3379
3380
3381
3382
3383
3384
3385
3386static int cgroup_pidlist_open(struct file *file, enum cgroup_filetype type)
3387{
3388 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
3389 struct cgroup_pidlist *l;
3390 int retval;
3391
3392
3393 if (!(file->f_mode & FMODE_READ))
3394 return 0;
3395
3396
3397 retval = pidlist_array_load(cgrp, type, &l);
3398 if (retval)
3399 return retval;
3400
3401 file->f_op = &cgroup_pidlist_operations;
3402
3403 retval = seq_open(file, &cgroup_pidlist_seq_operations);
3404 if (retval) {
3405 cgroup_release_pid_array(l);
3406 return retval;
3407 }
3408 ((struct seq_file *)file->private_data)->private = l;
3409 return 0;
3410}
3411static int cgroup_tasks_open(struct inode *unused, struct file *file)
3412{
3413 return cgroup_pidlist_open(file, CGROUP_FILE_TASKS);
3414}
3415static int cgroup_procs_open(struct inode *unused, struct file *file)
3416{
3417 return cgroup_pidlist_open(file, CGROUP_FILE_PROCS);
3418}
3419
3420static u64 cgroup_read_notify_on_release(struct cgroup *cgrp,
3421 struct cftype *cft)
3422{
3423 return notify_on_release(cgrp);
3424}
3425
3426static int cgroup_write_notify_on_release(struct cgroup *cgrp,
3427 struct cftype *cft,
3428 u64 val)
3429{
3430 clear_bit(CGRP_RELEASABLE, &cgrp->flags);
3431 if (val)
3432 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
3433 else
3434 clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
3435 return 0;
3436}
3437
3438
3439
3440
3441
3442
3443static void cgroup_event_remove(struct work_struct *work)
3444{
3445 struct cgroup_event *event = container_of(work, struct cgroup_event,
3446 remove);
3447 struct cgroup *cgrp = event->cgrp;
3448
3449 event->cft->unregister_event(cgrp, event->cft, event->eventfd);
3450
3451 eventfd_ctx_put(event->eventfd);
3452 kfree(event);
3453 dput(cgrp->dentry);
3454}
3455
3456
3457
3458
3459
3460
3461static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
3462 int sync, void *key)
3463{
3464 struct cgroup_event *event = container_of(wait,
3465 struct cgroup_event, wait);
3466 struct cgroup *cgrp = event->cgrp;
3467 unsigned long flags = (unsigned long)key;
3468
3469 if (flags & POLLHUP) {
3470 __remove_wait_queue(event->wqh, &event->wait);
3471 spin_lock(&cgrp->event_list_lock);
3472 list_del(&event->list);
3473 spin_unlock(&cgrp->event_list_lock);
3474
3475
3476
3477
3478 schedule_work(&event->remove);
3479 }
3480
3481 return 0;
3482}
3483
3484static void cgroup_event_ptable_queue_proc(struct file *file,
3485 wait_queue_head_t *wqh, poll_table *pt)
3486{
3487 struct cgroup_event *event = container_of(pt,
3488 struct cgroup_event, pt);
3489
3490 event->wqh = wqh;
3491 add_wait_queue(wqh, &event->wait);
3492}
3493
3494
3495
3496
3497
3498
3499
3500static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft,
3501 const char *buffer)
3502{
3503 struct cgroup_event *event = NULL;
3504 unsigned int efd, cfd;
3505 struct file *efile = NULL;
3506 struct file *cfile = NULL;
3507 char *endp;
3508 int ret;
3509
3510 efd = simple_strtoul(buffer, &endp, 10);
3511 if (*endp != ' ')
3512 return -EINVAL;
3513 buffer = endp + 1;
3514
3515 cfd = simple_strtoul(buffer, &endp, 10);
3516 if ((*endp != ' ') && (*endp != '\0'))
3517 return -EINVAL;
3518 buffer = endp + 1;
3519
3520 event = kzalloc(sizeof(*event), GFP_KERNEL);
3521 if (!event)
3522 return -ENOMEM;
3523 event->cgrp = cgrp;
3524 INIT_LIST_HEAD(&event->list);
3525 init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc);
3526 init_waitqueue_func_entry(&event->wait, cgroup_event_wake);
3527 INIT_WORK(&event->remove, cgroup_event_remove);
3528
3529 efile = eventfd_fget(efd);
3530 if (IS_ERR(efile)) {
3531 ret = PTR_ERR(efile);
3532 goto fail;
3533 }
3534
3535 event->eventfd = eventfd_ctx_fileget(efile);
3536 if (IS_ERR(event->eventfd)) {
3537 ret = PTR_ERR(event->eventfd);
3538 goto fail;
3539 }
3540
3541 cfile = fget(cfd);
3542 if (!cfile) {
3543 ret = -EBADF;
3544 goto fail;
3545 }
3546
3547
3548
3549 ret = inode_permission(cfile->f_path.dentry->d_inode, MAY_READ);
3550 if (ret < 0)
3551 goto fail;
3552
3553 event->cft = __file_cft(cfile);
3554 if (IS_ERR(event->cft)) {
3555 ret = PTR_ERR(event->cft);
3556 goto fail;
3557 }
3558
3559 if (!event->cft->register_event || !event->cft->unregister_event) {
3560 ret = -EINVAL;
3561 goto fail;
3562 }
3563
3564 ret = event->cft->register_event(cgrp, event->cft,
3565 event->eventfd, buffer);
3566 if (ret)
3567 goto fail;
3568
3569 if (efile->f_op->poll(efile, &event->pt) & POLLHUP) {
3570 event->cft->unregister_event(cgrp, event->cft, event->eventfd);
3571 ret = 0;
3572 goto fail;
3573 }
3574
3575
3576
3577
3578
3579
3580 dget(cgrp->dentry);
3581
3582 spin_lock(&cgrp->event_list_lock);
3583 list_add(&event->list, &cgrp->event_list);
3584 spin_unlock(&cgrp->event_list_lock);
3585
3586 fput(cfile);
3587 fput(efile);
3588
3589 return 0;
3590
3591fail:
3592 if (cfile)
3593 fput(cfile);
3594
3595 if (event && event->eventfd && !IS_ERR(event->eventfd))
3596 eventfd_ctx_put(event->eventfd);
3597
3598 if (!IS_ERR_OR_NULL(efile))
3599 fput(efile);
3600
3601 kfree(event);
3602
3603 return ret;
3604}
3605
3606static u64 cgroup_clone_children_read(struct cgroup *cgrp,
3607 struct cftype *cft)
3608{
3609 return clone_children(cgrp);
3610}
3611
3612static int cgroup_clone_children_write(struct cgroup *cgrp,
3613 struct cftype *cft,
3614 u64 val)
3615{
3616 if (val)
3617 set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
3618 else
3619 clear_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
3620 return 0;
3621}
3622
3623
3624
3625
3626
3627#define CGROUP_FILE_GENERIC_PREFIX "cgroup."
3628static struct cftype files[] = {
3629 {
3630 .name = "tasks",
3631 .open = cgroup_tasks_open,
3632 .write_u64 = cgroup_tasks_write,
3633 .release = cgroup_pidlist_release,
3634 .mode = S_IRUGO | S_IWUSR,
3635 },
3636 {
3637 .name = CGROUP_FILE_GENERIC_PREFIX "procs",
3638 .open = cgroup_procs_open,
3639 .write_u64 = cgroup_procs_write,
3640 .release = cgroup_pidlist_release,
3641 .mode = S_IRUGO | S_IWUSR,
3642 },
3643 {
3644 .name = "notify_on_release",
3645 .read_u64 = cgroup_read_notify_on_release,
3646 .write_u64 = cgroup_write_notify_on_release,
3647 },
3648 {
3649 .name = CGROUP_FILE_GENERIC_PREFIX "event_control",
3650 .write_string = cgroup_write_event_control,
3651 .mode = S_IWUGO,
3652 },
3653 {
3654 .name = "cgroup.clone_children",
3655 .read_u64 = cgroup_clone_children_read,
3656 .write_u64 = cgroup_clone_children_write,
3657 },
3658};
3659
3660static struct cftype cft_release_agent = {
3661 .name = "release_agent",
3662 .read_seq_string = cgroup_release_agent_show,
3663 .write_string = cgroup_release_agent_write,
3664 .max_write_len = PATH_MAX,
3665};
3666
3667static int cgroup_populate_dir(struct cgroup *cgrp)
3668{
3669 int err;
3670 struct cgroup_subsys *ss;
3671
3672
3673 cgroup_clear_directory(cgrp->dentry);
3674
3675 err = cgroup_add_files(cgrp, NULL, files, ARRAY_SIZE(files));
3676 if (err < 0)
3677 return err;
3678
3679 if (cgrp == cgrp->top_cgroup) {
3680 if ((err = cgroup_add_file(cgrp, NULL, &cft_release_agent)) < 0)
3681 return err;
3682 }
3683
3684 for_each_subsys(cgrp->root, ss) {
3685 if (ss->populate && (err = ss->populate(ss, cgrp)) < 0)
3686 return err;
3687 }
3688
3689 for_each_subsys(cgrp->root, ss) {
3690 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
3691
3692
3693
3694
3695
3696 if (css->id)
3697 rcu_assign_pointer(css->id->css, css);
3698 }
3699
3700 return 0;
3701}
3702
3703static void init_cgroup_css(struct cgroup_subsys_state *css,
3704 struct cgroup_subsys *ss,
3705 struct cgroup *cgrp)
3706{
3707 css->cgroup = cgrp;
3708 atomic_set(&css->refcnt, 1);
3709 css->flags = 0;
3710 css->id = NULL;
3711 if (cgrp == dummytop)
3712 set_bit(CSS_ROOT, &css->flags);
3713 BUG_ON(cgrp->subsys[ss->subsys_id]);
3714 cgrp->subsys[ss->subsys_id] = css;
3715}
3716
3717static void cgroup_lock_hierarchy(struct cgroupfs_root *root)
3718{
3719
3720 int i;
3721
3722
3723
3724
3725
3726 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3727 struct cgroup_subsys *ss = subsys[i];
3728 if (ss == NULL)
3729 continue;
3730 if (ss->root == root)
3731 mutex_lock(&ss->hierarchy_mutex);
3732 }
3733}
3734
3735static void cgroup_unlock_hierarchy(struct cgroupfs_root *root)
3736{
3737 int i;
3738
3739 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3740 struct cgroup_subsys *ss = subsys[i];
3741 if (ss == NULL)
3742 continue;
3743 if (ss->root == root)
3744 mutex_unlock(&ss->hierarchy_mutex);
3745 }
3746}
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
3757 mode_t mode)
3758{
3759 struct cgroup *cgrp;
3760 struct cgroupfs_root *root = parent->root;
3761 int err = 0;
3762 struct cgroup_subsys *ss;
3763 struct super_block *sb = root->sb;
3764
3765 cgrp = kzalloc(sizeof(*cgrp), GFP_KERNEL);
3766 if (!cgrp)
3767 return -ENOMEM;
3768
3769
3770
3771
3772
3773
3774 atomic_inc(&sb->s_active);
3775
3776 mutex_lock(&cgroup_mutex);
3777
3778 init_cgroup_housekeeping(cgrp);
3779
3780 cgrp->parent = parent;
3781 cgrp->root = parent->root;
3782 cgrp->top_cgroup = parent->top_cgroup;
3783
3784 if (notify_on_release(parent))
3785 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
3786
3787 if (clone_children(parent))
3788 set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
3789
3790 for_each_subsys(root, ss) {
3791 struct cgroup_subsys_state *css = ss->create(ss, cgrp);
3792
3793 if (IS_ERR(css)) {
3794 err = PTR_ERR(css);
3795 goto err_destroy;
3796 }
3797 init_cgroup_css(css, ss, cgrp);
3798 if (ss->use_id) {
3799 err = alloc_css_id(ss, parent, cgrp);
3800 if (err)
3801 goto err_destroy;
3802 }
3803
3804 if (clone_children(parent) && ss->post_clone)
3805 ss->post_clone(ss, cgrp);
3806 }
3807
3808 cgroup_lock_hierarchy(root);
3809 list_add(&cgrp->sibling, &cgrp->parent->children);
3810 cgroup_unlock_hierarchy(root);
3811 root->number_of_cgroups++;
3812
3813 err = cgroup_create_dir(cgrp, dentry, mode);
3814 if (err < 0)
3815 goto err_remove;
3816
3817
3818 BUG_ON(!mutex_is_locked(&cgrp->dentry->d_inode->i_mutex));
3819
3820 err = cgroup_populate_dir(cgrp);
3821
3822
3823 mutex_unlock(&cgroup_mutex);
3824 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
3825
3826 return 0;
3827
3828 err_remove:
3829
3830 cgroup_lock_hierarchy(root);
3831 list_del(&cgrp->sibling);
3832 cgroup_unlock_hierarchy(root);
3833 root->number_of_cgroups--;
3834
3835 err_destroy:
3836
3837 for_each_subsys(root, ss) {
3838 if (cgrp->subsys[ss->subsys_id])
3839 ss->destroy(ss, cgrp);
3840 }
3841
3842 mutex_unlock(&cgroup_mutex);
3843
3844
3845 deactivate_super(sb);
3846
3847 kfree(cgrp);
3848 return err;
3849}
3850
3851static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode)
3852{
3853 struct cgroup *c_parent = dentry->d_parent->d_fsdata;
3854
3855
3856 return cgroup_create(c_parent, dentry, mode | S_IFDIR);
3857}
3858
3859static int cgroup_has_css_refs(struct cgroup *cgrp)
3860{
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870 int i;
3871
3872
3873
3874
3875
3876 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3877 struct cgroup_subsys *ss = subsys[i];
3878 struct cgroup_subsys_state *css;
3879
3880 if (ss == NULL || ss->root != cgrp->root)
3881 continue;
3882 css = cgrp->subsys[ss->subsys_id];
3883
3884
3885
3886
3887
3888
3889 if (css && (atomic_read(&css->refcnt) > 1))
3890 return 1;
3891 }
3892 return 0;
3893}
3894
3895
3896
3897
3898
3899
3900
3901static int cgroup_clear_css_refs(struct cgroup *cgrp)
3902{
3903 struct cgroup_subsys *ss;
3904 unsigned long flags;
3905 bool failed = false;
3906 local_irq_save(flags);
3907 for_each_subsys(cgrp->root, ss) {
3908 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
3909 int refcnt;
3910 while (1) {
3911
3912 refcnt = atomic_read(&css->refcnt);
3913 if (refcnt > 1) {
3914 failed = true;
3915 goto done;
3916 }
3917 BUG_ON(!refcnt);
3918
3919
3920
3921
3922
3923
3924 if (atomic_cmpxchg(&css->refcnt, refcnt, 0) == refcnt)
3925 break;
3926 cpu_relax();
3927 }
3928 }
3929 done:
3930 for_each_subsys(cgrp->root, ss) {
3931 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
3932 if (failed) {
3933
3934
3935
3936
3937 if (!atomic_read(&css->refcnt))
3938 atomic_set(&css->refcnt, 1);
3939 } else {
3940
3941 set_bit(CSS_REMOVED, &css->flags);
3942 }
3943 }
3944 local_irq_restore(flags);
3945 return !failed;
3946}
3947
3948static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
3949{
3950 struct cgroup *cgrp = dentry->d_fsdata;
3951 struct dentry *d;
3952 struct cgroup *parent;
3953 DEFINE_WAIT(wait);
3954 struct cgroup_event *event, *tmp;
3955 int ret;
3956
3957
3958again:
3959 mutex_lock(&cgroup_mutex);
3960 if (atomic_read(&cgrp->count) != 0) {
3961 mutex_unlock(&cgroup_mutex);
3962 return -EBUSY;
3963 }
3964 if (!list_empty(&cgrp->children)) {
3965 mutex_unlock(&cgroup_mutex);
3966 return -EBUSY;
3967 }
3968 mutex_unlock(&cgroup_mutex);
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979 set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
3980
3981
3982
3983
3984
3985 ret = cgroup_call_pre_destroy(cgrp);
3986 if (ret) {
3987 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
3988 return ret;
3989 }
3990
3991 mutex_lock(&cgroup_mutex);
3992 parent = cgrp->parent;
3993 if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) {
3994 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
3995 mutex_unlock(&cgroup_mutex);
3996 return -EBUSY;
3997 }
3998 prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE);
3999 if (!cgroup_clear_css_refs(cgrp)) {
4000 mutex_unlock(&cgroup_mutex);
4001
4002
4003
4004
4005 if (test_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags))
4006 schedule();
4007 finish_wait(&cgroup_rmdir_waitq, &wait);
4008 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
4009 if (signal_pending(current))
4010 return -EINTR;
4011 goto again;
4012 }
4013
4014 finish_wait(&cgroup_rmdir_waitq, &wait);
4015 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
4016
4017 spin_lock(&release_list_lock);
4018 set_bit(CGRP_REMOVED, &cgrp->flags);
4019 if (!list_empty(&cgrp->release_list))
4020 list_del_init(&cgrp->release_list);
4021 spin_unlock(&release_list_lock);
4022
4023 cgroup_lock_hierarchy(cgrp->root);
4024
4025 list_del_init(&cgrp->sibling);
4026 cgroup_unlock_hierarchy(cgrp->root);
4027
4028 d = dget(cgrp->dentry);
4029
4030 cgroup_d_remove_dir(d);
4031 dput(d);
4032
4033 set_bit(CGRP_RELEASABLE, &parent->flags);
4034 check_for_release(parent);
4035
4036
4037
4038
4039
4040
4041 spin_lock(&cgrp->event_list_lock);
4042 list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
4043 list_del(&event->list);
4044 remove_wait_queue(event->wqh, &event->wait);
4045 eventfd_signal(event->eventfd, 1);
4046 schedule_work(&event->remove);
4047 }
4048 spin_unlock(&cgrp->event_list_lock);
4049
4050 mutex_unlock(&cgroup_mutex);
4051 return 0;
4052}
4053
4054static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
4055{
4056 struct cgroup_subsys_state *css;
4057
4058 printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);
4059
4060
4061 list_add(&ss->sibling, &rootnode.subsys_list);
4062 ss->root = &rootnode;
4063 css = ss->create(ss, dummytop);
4064
4065 BUG_ON(IS_ERR(css));
4066 init_cgroup_css(css, ss, dummytop);
4067
4068
4069
4070
4071
4072 init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id];
4073
4074 need_forkexit_callback |= ss->fork || ss->exit;
4075
4076
4077
4078
4079 BUG_ON(!list_empty(&init_task.tasks));
4080
4081 mutex_init(&ss->hierarchy_mutex);
4082 lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
4083 ss->active = 1;
4084
4085
4086
4087 BUG_ON(ss->module);
4088}
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
4100{
4101 int i;
4102 struct cgroup_subsys_state *css;
4103
4104
4105 if (ss->name == NULL || strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN ||
4106 ss->create == NULL || ss->destroy == NULL)
4107 return -EINVAL;
4108
4109
4110
4111
4112
4113
4114
4115 if (ss->fork || ss->exit)
4116 return -EINVAL;
4117
4118
4119
4120
4121
4122 if (ss->module == NULL) {
4123
4124 BUG_ON(ss->subsys_id >= CGROUP_BUILTIN_SUBSYS_COUNT);
4125 BUG_ON(subsys[ss->subsys_id] != ss);
4126 return 0;
4127 }
4128
4129
4130
4131
4132
4133 mutex_lock(&cgroup_mutex);
4134
4135 for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
4136 if (subsys[i] == NULL)
4137 break;
4138 }
4139 if (i == CGROUP_SUBSYS_COUNT) {
4140
4141 mutex_unlock(&cgroup_mutex);
4142 return -EBUSY;
4143 }
4144
4145 ss->subsys_id = i;
4146 subsys[i] = ss;
4147
4148
4149
4150
4151
4152 css = ss->create(ss, dummytop);
4153 if (IS_ERR(css)) {
4154
4155 subsys[i] = NULL;
4156 mutex_unlock(&cgroup_mutex);
4157 return PTR_ERR(css);
4158 }
4159
4160 list_add(&ss->sibling, &rootnode.subsys_list);
4161 ss->root = &rootnode;
4162
4163
4164 init_cgroup_css(css, ss, dummytop);
4165
4166 if (ss->use_id) {
4167 int ret = cgroup_init_idr(ss, css);
4168 if (ret) {
4169 dummytop->subsys[ss->subsys_id] = NULL;
4170 ss->destroy(ss, dummytop);
4171 subsys[i] = NULL;
4172 mutex_unlock(&cgroup_mutex);
4173 return ret;
4174 }
4175 }
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185 write_lock(&css_set_lock);
4186 for (i = 0; i < CSS_SET_TABLE_SIZE; i++) {
4187 struct css_set *cg;
4188 struct hlist_node *node, *tmp;
4189 struct hlist_head *bucket = &css_set_table[i], *new_bucket;
4190
4191 hlist_for_each_entry_safe(cg, node, tmp, bucket, hlist) {
4192
4193 if (cg->subsys[ss->subsys_id])
4194 continue;
4195
4196 hlist_del(&cg->hlist);
4197
4198 cg->subsys[ss->subsys_id] = css;
4199
4200 new_bucket = css_set_hash(cg->subsys);
4201 hlist_add_head(&cg->hlist, new_bucket);
4202 }
4203 }
4204 write_unlock(&css_set_lock);
4205
4206 mutex_init(&ss->hierarchy_mutex);
4207 lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
4208 ss->active = 1;
4209
4210
4211 mutex_unlock(&cgroup_mutex);
4212 return 0;
4213}
4214EXPORT_SYMBOL_GPL(cgroup_load_subsys);
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224void cgroup_unload_subsys(struct cgroup_subsys *ss)
4225{
4226 struct cg_cgroup_link *link;
4227 struct hlist_head *hhead;
4228
4229 BUG_ON(ss->module == NULL);
4230
4231
4232
4233
4234
4235
4236 BUG_ON(ss->root != &rootnode);
4237
4238 mutex_lock(&cgroup_mutex);
4239
4240 BUG_ON(ss->subsys_id < CGROUP_BUILTIN_SUBSYS_COUNT);
4241 subsys[ss->subsys_id] = NULL;
4242
4243
4244 list_del_init(&ss->sibling);
4245
4246
4247
4248
4249
4250 write_lock(&css_set_lock);
4251 list_for_each_entry(link, &dummytop->css_sets, cgrp_link_list) {
4252 struct css_set *cg = link->cg;
4253
4254 hlist_del(&cg->hlist);
4255 BUG_ON(!cg->subsys[ss->subsys_id]);
4256 cg->subsys[ss->subsys_id] = NULL;
4257 hhead = css_set_hash(cg->subsys);
4258 hlist_add_head(&cg->hlist, hhead);
4259 }
4260 write_unlock(&css_set_lock);
4261
4262
4263
4264
4265
4266
4267
4268 ss->destroy(ss, dummytop);
4269 dummytop->subsys[ss->subsys_id] = NULL;
4270
4271 mutex_unlock(&cgroup_mutex);
4272}
4273EXPORT_SYMBOL_GPL(cgroup_unload_subsys);
4274
4275
4276
4277
4278
4279
4280
4281int __init cgroup_init_early(void)
4282{
4283 int i;
4284 atomic_set(&init_css_set.refcount, 1);
4285 INIT_LIST_HEAD(&init_css_set.cg_links);
4286 INIT_LIST_HEAD(&init_css_set.tasks);
4287 INIT_HLIST_NODE(&init_css_set.hlist);
4288 css_set_count = 1;
4289 init_cgroup_root(&rootnode);
4290 root_count = 1;
4291 init_task.cgroups = &init_css_set;
4292
4293 init_css_set_link.cg = &init_css_set;
4294 init_css_set_link.cgrp = dummytop;
4295 list_add(&init_css_set_link.cgrp_link_list,
4296 &rootnode.top_cgroup.css_sets);
4297 list_add(&init_css_set_link.cg_link_list,
4298 &init_css_set.cg_links);
4299
4300 for (i = 0; i < CSS_SET_TABLE_SIZE; i++)
4301 INIT_HLIST_HEAD(&css_set_table[i]);
4302
4303
4304 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4305 struct cgroup_subsys *ss = subsys[i];
4306
4307 BUG_ON(!ss->name);
4308 BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN);
4309 BUG_ON(!ss->create);
4310 BUG_ON(!ss->destroy);
4311 if (ss->subsys_id != i) {
4312 printk(KERN_ERR "cgroup: Subsys %s id == %d\n",
4313 ss->name, ss->subsys_id);
4314 BUG();
4315 }
4316
4317 if (ss->early_init)
4318 cgroup_init_subsys(ss);
4319 }
4320 return 0;
4321}
4322
4323
4324
4325
4326
4327
4328
4329int __init cgroup_init(void)
4330{
4331 int err;
4332 int i;
4333 struct hlist_head *hhead;
4334
4335 err = bdi_init(&cgroup_backing_dev_info);
4336 if (err)
4337 return err;
4338
4339
4340 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4341 struct cgroup_subsys *ss = subsys[i];
4342 if (!ss->early_init)
4343 cgroup_init_subsys(ss);
4344 if (ss->use_id)
4345 cgroup_init_idr(ss, init_css_set.subsys[ss->subsys_id]);
4346 }
4347
4348
4349 hhead = css_set_hash(init_css_set.subsys);
4350 hlist_add_head(&init_css_set.hlist, hhead);
4351 BUG_ON(!init_root_id(&rootnode));
4352
4353 cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj);
4354 if (!cgroup_kobj) {
4355 err = -ENOMEM;
4356 goto out;
4357 }
4358
4359 err = register_filesystem(&cgroup_fs_type);
4360 if (err < 0) {
4361 kobject_put(cgroup_kobj);
4362 goto out;
4363 }
4364
4365 proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations);
4366
4367out:
4368 if (err)
4369 bdi_destroy(&cgroup_backing_dev_info);
4370
4371 return err;
4372}
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387static int proc_cgroup_show(struct seq_file *m, void *v)
4388{
4389 struct pid *pid;
4390 struct task_struct *tsk;
4391 char *buf;
4392 int retval;
4393 struct cgroupfs_root *root;
4394
4395 retval = -ENOMEM;
4396 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
4397 if (!buf)
4398 goto out;
4399
4400 retval = -ESRCH;
4401 pid = m->private;
4402 tsk = get_pid_task(pid, PIDTYPE_PID);
4403 if (!tsk)
4404 goto out_free;
4405
4406 retval = 0;
4407
4408 mutex_lock(&cgroup_mutex);
4409
4410 for_each_active_root(root) {
4411 struct cgroup_subsys *ss;
4412 struct cgroup *cgrp;
4413 int count = 0;
4414
4415 seq_printf(m, "%d:", root->hierarchy_id);
4416 for_each_subsys(root, ss)
4417 seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
4418 if (strlen(root->name))
4419 seq_printf(m, "%sname=%s", count ? "," : "",
4420 root->name);
4421 seq_putc(m, ':');
4422 cgrp = task_cgroup_from_root(tsk, root);
4423 retval = cgroup_path(cgrp, buf, PAGE_SIZE);
4424 if (retval < 0)
4425 goto out_unlock;
4426 seq_puts(m, buf);
4427 seq_putc(m, '\n');
4428 }
4429
4430out_unlock:
4431 mutex_unlock(&cgroup_mutex);
4432 put_task_struct(tsk);
4433out_free:
4434 kfree(buf);
4435out:
4436 return retval;
4437}
4438
4439static int cgroup_open(struct inode *inode, struct file *file)
4440{
4441 struct pid *pid = PROC_I(inode)->pid;
4442 return single_open(file, proc_cgroup_show, pid);
4443}
4444
4445const struct file_operations proc_cgroup_operations = {
4446 .open = cgroup_open,
4447 .read = seq_read,
4448 .llseek = seq_lseek,
4449 .release = single_release,
4450};
4451
4452
4453static int proc_cgroupstats_show(struct seq_file *m, void *v)
4454{
4455 int i;
4456
4457 seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
4458
4459
4460
4461
4462
4463 mutex_lock(&cgroup_mutex);
4464 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
4465 struct cgroup_subsys *ss = subsys[i];
4466 if (ss == NULL)
4467 continue;
4468 seq_printf(m, "%s\t%d\t%d\t%d\n",
4469 ss->name, ss->root->hierarchy_id,
4470 ss->root->number_of_cgroups, !ss->disabled);
4471 }
4472 mutex_unlock(&cgroup_mutex);
4473 return 0;
4474}
4475
4476static int cgroupstats_open(struct inode *inode, struct file *file)
4477{
4478 return single_open(file, proc_cgroupstats_show, NULL);
4479}
4480
4481static const struct file_operations proc_cgroupstats_operations = {
4482 .open = cgroupstats_open,
4483 .read = seq_read,
4484 .llseek = seq_lseek,
4485 .release = single_release,
4486};
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504void cgroup_fork(struct task_struct *child)
4505{
4506 task_lock(current);
4507 child->cgroups = current->cgroups;
4508 get_css_set(child->cgroups);
4509 task_unlock(current);
4510 INIT_LIST_HEAD(&child->cg_list);
4511}
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521void cgroup_fork_callbacks(struct task_struct *child)
4522{
4523 if (need_forkexit_callback) {
4524 int i;
4525
4526
4527
4528
4529
4530 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4531 struct cgroup_subsys *ss = subsys[i];
4532 if (ss->fork)
4533 ss->fork(ss, child);
4534 }
4535 }
4536}
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547void cgroup_post_fork(struct task_struct *child)
4548{
4549 if (use_task_css_set_links) {
4550 write_lock(&css_set_lock);
4551 task_lock(child);
4552 if (list_empty(&child->cg_list))
4553 list_add(&child->cg_list, &child->cgroups->tasks);
4554 task_unlock(child);
4555 write_unlock(&css_set_lock);
4556 }
4557}
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593void cgroup_exit(struct task_struct *tsk, int run_callbacks)
4594{
4595 struct css_set *cg;
4596 int i;
4597
4598
4599
4600
4601
4602
4603 if (!list_empty(&tsk->cg_list)) {
4604 write_lock(&css_set_lock);
4605 if (!list_empty(&tsk->cg_list))
4606 list_del_init(&tsk->cg_list);
4607 write_unlock(&css_set_lock);
4608 }
4609
4610
4611 task_lock(tsk);
4612 cg = tsk->cgroups;
4613 tsk->cgroups = &init_css_set;
4614
4615 if (run_callbacks && need_forkexit_callback) {
4616
4617
4618
4619
4620 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4621 struct cgroup_subsys *ss = subsys[i];
4622 if (ss->exit) {
4623 struct cgroup *old_cgrp =
4624 rcu_dereference_raw(cg->subsys[i])->cgroup;
4625 struct cgroup *cgrp = task_cgroup(tsk, i);
4626 ss->exit(ss, cgrp, old_cgrp, tsk);
4627 }
4628 }
4629 }
4630 task_unlock(tsk);
4631
4632 if (cg)
4633 put_css_set_taskexit(cg);
4634}
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task)
4650{
4651 int ret;
4652 struct cgroup *target;
4653
4654 if (cgrp == dummytop)
4655 return 1;
4656
4657 target = task_cgroup_from_root(task, cgrp->root);
4658 while (cgrp != target && cgrp!= cgrp->top_cgroup)
4659 cgrp = cgrp->parent;
4660 ret = (cgrp == target);
4661 return ret;
4662}
4663
4664static void check_for_release(struct cgroup *cgrp)
4665{
4666
4667
4668 if (cgroup_is_releasable(cgrp) && !atomic_read(&cgrp->count)
4669 && list_empty(&cgrp->children) && !cgroup_has_css_refs(cgrp)) {
4670
4671
4672
4673 int need_schedule_work = 0;
4674 spin_lock(&release_list_lock);
4675 if (!cgroup_is_removed(cgrp) &&
4676 list_empty(&cgrp->release_list)) {
4677 list_add(&cgrp->release_list, &release_list);
4678 need_schedule_work = 1;
4679 }
4680 spin_unlock(&release_list_lock);
4681 if (need_schedule_work)
4682 schedule_work(&release_agent_work);
4683 }
4684}
4685
4686
4687void __css_put(struct cgroup_subsys_state *css, int count)
4688{
4689 struct cgroup *cgrp = css->cgroup;
4690 int val;
4691 rcu_read_lock();
4692 val = atomic_sub_return(count, &css->refcnt);
4693 if (val == 1) {
4694 if (notify_on_release(cgrp)) {
4695 set_bit(CGRP_RELEASABLE, &cgrp->flags);
4696 check_for_release(cgrp);
4697 }
4698 cgroup_wakeup_rmdir_waiter(cgrp);
4699 }
4700 rcu_read_unlock();
4701 WARN_ON_ONCE(val < 1);
4702}
4703EXPORT_SYMBOL_GPL(__css_put);
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728static void cgroup_release_agent(struct work_struct *work)
4729{
4730 BUG_ON(work != &release_agent_work);
4731 mutex_lock(&cgroup_mutex);
4732 spin_lock(&release_list_lock);
4733 while (!list_empty(&release_list)) {
4734 char *argv[3], *envp[3];
4735 int i;
4736 char *pathbuf = NULL, *agentbuf = NULL;
4737 struct cgroup *cgrp = list_entry(release_list.next,
4738 struct cgroup,
4739 release_list);
4740 list_del_init(&cgrp->release_list);
4741 spin_unlock(&release_list_lock);
4742 pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
4743 if (!pathbuf)
4744 goto continue_free;
4745 if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0)
4746 goto continue_free;
4747 agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
4748 if (!agentbuf)
4749 goto continue_free;
4750
4751 i = 0;
4752 argv[i++] = agentbuf;
4753 argv[i++] = pathbuf;
4754 argv[i] = NULL;
4755
4756 i = 0;
4757
4758 envp[i++] = "HOME=/";
4759 envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
4760 envp[i] = NULL;
4761
4762
4763
4764
4765 mutex_unlock(&cgroup_mutex);
4766 call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
4767 mutex_lock(&cgroup_mutex);
4768 continue_free:
4769 kfree(pathbuf);
4770 kfree(agentbuf);
4771 spin_lock(&release_list_lock);
4772 }
4773 spin_unlock(&release_list_lock);
4774 mutex_unlock(&cgroup_mutex);
4775}
4776
4777static int __init cgroup_disable(char *str)
4778{
4779 int i;
4780 char *token;
4781
4782 while ((token = strsep(&str, ",")) != NULL) {
4783 if (!*token)
4784 continue;
4785
4786
4787
4788
4789 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4790 struct cgroup_subsys *ss = subsys[i];
4791
4792 if (!strcmp(token, ss->name)) {
4793 ss->disabled = 1;
4794 printk(KERN_INFO "Disabling %s control group"
4795 " subsystem\n", ss->name);
4796 break;
4797 }
4798 }
4799 }
4800 return 1;
4801}
4802__setup("cgroup_disable=", cgroup_disable);
4803
4804
4805
4806
4807
4808
4809
4810
4811unsigned short css_id(struct cgroup_subsys_state *css)
4812{
4813 struct css_id *cssid;
4814
4815
4816
4817
4818
4819
4820 cssid = rcu_dereference_check(css->id, atomic_read(&css->refcnt));
4821
4822 if (cssid)
4823 return cssid->id;
4824 return 0;
4825}
4826EXPORT_SYMBOL_GPL(css_id);
4827
4828unsigned short css_depth(struct cgroup_subsys_state *css)
4829{
4830 struct css_id *cssid;
4831
4832 cssid = rcu_dereference_check(css->id, atomic_read(&css->refcnt));
4833
4834 if (cssid)
4835 return cssid->depth;
4836 return 0;
4837}
4838EXPORT_SYMBOL_GPL(css_depth);
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853bool css_is_ancestor(struct cgroup_subsys_state *child,
4854 const struct cgroup_subsys_state *root)
4855{
4856 struct css_id *child_id;
4857 struct css_id *root_id;
4858 bool ret = true;
4859
4860 rcu_read_lock();
4861 child_id = rcu_dereference(child->id);
4862 root_id = rcu_dereference(root->id);
4863 if (!child_id
4864 || !root_id
4865 || (child_id->depth < root_id->depth)
4866 || (child_id->stack[root_id->depth] != root_id->id))
4867 ret = false;
4868 rcu_read_unlock();
4869 return ret;
4870}
4871
4872void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
4873{
4874 struct css_id *id = css->id;
4875
4876 if (!id)
4877 return;
4878
4879 BUG_ON(!ss->use_id);
4880
4881 rcu_assign_pointer(id->css, NULL);
4882 rcu_assign_pointer(css->id, NULL);
4883 spin_lock(&ss->id_lock);
4884 idr_remove(&ss->idr, id->id);
4885 spin_unlock(&ss->id_lock);
4886 kfree_rcu(id, rcu_head);
4887}
4888EXPORT_SYMBOL_GPL(free_css_id);
4889
4890
4891
4892
4893
4894
4895static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth)
4896{
4897 struct css_id *newid;
4898 int myid, error, size;
4899
4900 BUG_ON(!ss->use_id);
4901
4902 size = sizeof(*newid) + sizeof(unsigned short) * (depth + 1);
4903 newid = kzalloc(size, GFP_KERNEL);
4904 if (!newid)
4905 return ERR_PTR(-ENOMEM);
4906
4907 if (unlikely(!idr_pre_get(&ss->idr, GFP_KERNEL))) {
4908 error = -ENOMEM;
4909 goto err_out;
4910 }
4911 spin_lock(&ss->id_lock);
4912
4913 error = idr_get_new_above(&ss->idr, newid, 1, &myid);
4914 spin_unlock(&ss->id_lock);
4915
4916
4917 if (error) {
4918 error = -ENOSPC;
4919 goto err_out;
4920 }
4921 if (myid > CSS_ID_MAX)
4922 goto remove_idr;
4923
4924 newid->id = myid;
4925 newid->depth = depth;
4926 return newid;
4927remove_idr:
4928 error = -ENOSPC;
4929 spin_lock(&ss->id_lock);
4930 idr_remove(&ss->idr, myid);
4931 spin_unlock(&ss->id_lock);
4932err_out:
4933 kfree(newid);
4934 return ERR_PTR(error);
4935
4936}
4937
4938static int __init_or_module cgroup_init_idr(struct cgroup_subsys *ss,
4939 struct cgroup_subsys_state *rootcss)
4940{
4941 struct css_id *newid;
4942
4943 spin_lock_init(&ss->id_lock);
4944 idr_init(&ss->idr);
4945
4946 newid = get_new_cssid(ss, 0);
4947 if (IS_ERR(newid))
4948 return PTR_ERR(newid);
4949
4950 newid->stack[0] = newid->id;
4951 newid->css = rootcss;
4952 rootcss->id = newid;
4953 return 0;
4954}
4955
4956static int alloc_css_id(struct cgroup_subsys *ss, struct cgroup *parent,
4957 struct cgroup *child)
4958{
4959 int subsys_id, i, depth = 0;
4960 struct cgroup_subsys_state *parent_css, *child_css;
4961 struct css_id *child_id, *parent_id;
4962
4963 subsys_id = ss->subsys_id;
4964 parent_css = parent->subsys[subsys_id];
4965 child_css = child->subsys[subsys_id];
4966 parent_id = parent_css->id;
4967 depth = parent_id->depth + 1;
4968
4969 child_id = get_new_cssid(ss, depth);
4970 if (IS_ERR(child_id))
4971 return PTR_ERR(child_id);
4972
4973 for (i = 0; i < depth; i++)
4974 child_id->stack[i] = parent_id->stack[i];
4975 child_id->stack[depth] = child_id->id;
4976
4977
4978
4979
4980 rcu_assign_pointer(child_css->id, child_id);
4981
4982 return 0;
4983}
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id)
4994{
4995 struct css_id *cssid = NULL;
4996
4997 BUG_ON(!ss->use_id);
4998 cssid = idr_find(&ss->idr, id);
4999
5000 if (unlikely(!cssid))
5001 return NULL;
5002
5003 return rcu_dereference(cssid->css);
5004}
5005EXPORT_SYMBOL_GPL(css_lookup);
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017struct cgroup_subsys_state *
5018css_get_next(struct cgroup_subsys *ss, int id,
5019 struct cgroup_subsys_state *root, int *foundid)
5020{
5021 struct cgroup_subsys_state *ret = NULL;
5022 struct css_id *tmp;
5023 int tmpid;
5024 int rootid = css_id(root);
5025 int depth = css_depth(root);
5026
5027 if (!rootid)
5028 return NULL;
5029
5030 BUG_ON(!ss->use_id);
5031
5032 tmpid = id;
5033 while (1) {
5034
5035
5036
5037
5038 spin_lock(&ss->id_lock);
5039 tmp = idr_get_next(&ss->idr, &tmpid);
5040 spin_unlock(&ss->id_lock);
5041
5042 if (!tmp)
5043 break;
5044 if (tmp->depth >= depth && tmp->stack[depth] == rootid) {
5045 ret = rcu_dereference(tmp->css);
5046 if (ret) {
5047 *foundid = tmpid;
5048 break;
5049 }
5050 }
5051
5052 tmpid = tmpid + 1;
5053 }
5054 return ret;
5055}
5056
5057
5058
5059
5060struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id)
5061{
5062 struct cgroup *cgrp;
5063 struct inode *inode;
5064 struct cgroup_subsys_state *css;
5065
5066 inode = f->f_dentry->d_inode;
5067
5068 if (inode->i_op != &cgroup_dir_inode_operations)
5069 return ERR_PTR(-EBADF);
5070
5071 if (id < 0 || id >= CGROUP_SUBSYS_COUNT)
5072 return ERR_PTR(-EINVAL);
5073
5074
5075 cgrp = __d_cgrp(f->f_dentry);
5076 css = cgrp->subsys[id];
5077 return css ? css : ERR_PTR(-ENOENT);
5078}
5079
5080#ifdef CONFIG_CGROUP_DEBUG
5081static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss,
5082 struct cgroup *cont)
5083{
5084 struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
5085
5086 if (!css)
5087 return ERR_PTR(-ENOMEM);
5088
5089 return css;
5090}
5091
5092static void debug_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
5093{
5094 kfree(cont->subsys[debug_subsys_id]);
5095}
5096
5097static u64 cgroup_refcount_read(struct cgroup *cont, struct cftype *cft)
5098{
5099 return atomic_read(&cont->count);
5100}
5101
5102static u64 debug_taskcount_read(struct cgroup *cont, struct cftype *cft)
5103{
5104 return cgroup_task_count(cont);
5105}
5106
5107static u64 current_css_set_read(struct cgroup *cont, struct cftype *cft)
5108{
5109 return (u64)(unsigned long)current->cgroups;
5110}
5111
5112static u64 current_css_set_refcount_read(struct cgroup *cont,
5113 struct cftype *cft)
5114{
5115 u64 count;
5116
5117 rcu_read_lock();
5118 count = atomic_read(¤t->cgroups->refcount);
5119 rcu_read_unlock();
5120 return count;
5121}
5122
5123static int current_css_set_cg_links_read(struct cgroup *cont,
5124 struct cftype *cft,
5125 struct seq_file *seq)
5126{
5127 struct cg_cgroup_link *link;
5128 struct css_set *cg;
5129
5130 read_lock(&css_set_lock);
5131 rcu_read_lock();
5132 cg = rcu_dereference(current->cgroups);
5133 list_for_each_entry(link, &cg->cg_links, cg_link_list) {
5134 struct cgroup *c = link->cgrp;
5135 const char *name;
5136
5137 if (c->dentry)
5138 name = c->dentry->d_name.name;
5139 else
5140 name = "?";
5141 seq_printf(seq, "Root %d group %s\n",
5142 c->root->hierarchy_id, name);
5143 }
5144 rcu_read_unlock();
5145 read_unlock(&css_set_lock);
5146 return 0;
5147}
5148
5149#define MAX_TASKS_SHOWN_PER_CSS 25
5150static int cgroup_css_links_read(struct cgroup *cont,
5151 struct cftype *cft,
5152 struct seq_file *seq)
5153{
5154 struct cg_cgroup_link *link;
5155
5156 read_lock(&css_set_lock);
5157 list_for_each_entry(link, &cont->css_sets, cgrp_link_list) {
5158 struct css_set *cg = link->cg;
5159 struct task_struct *task;
5160 int count = 0;
5161 seq_printf(seq, "css_set %p\n", cg);
5162 list_for_each_entry(task, &cg->tasks, cg_list) {
5163 if (count++ > MAX_TASKS_SHOWN_PER_CSS) {
5164 seq_puts(seq, " ...\n");
5165 break;
5166 } else {
5167 seq_printf(seq, " task %d\n",
5168 task_pid_vnr(task));
5169 }
5170 }
5171 }
5172 read_unlock(&css_set_lock);
5173 return 0;
5174}
5175
5176static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft)
5177{
5178 return test_bit(CGRP_RELEASABLE, &cgrp->flags);
5179}
5180
5181static struct cftype debug_files[] = {
5182 {
5183 .name = "cgroup_refcount",
5184 .read_u64 = cgroup_refcount_read,
5185 },
5186 {
5187 .name = "taskcount",
5188 .read_u64 = debug_taskcount_read,
5189 },
5190
5191 {
5192 .name = "current_css_set",
5193 .read_u64 = current_css_set_read,
5194 },
5195
5196 {
5197 .name = "current_css_set_refcount",
5198 .read_u64 = current_css_set_refcount_read,
5199 },
5200
5201 {
5202 .name = "current_css_set_cg_links",
5203 .read_seq_string = current_css_set_cg_links_read,
5204 },
5205
5206 {
5207 .name = "cgroup_css_links",
5208 .read_seq_string = cgroup_css_links_read,
5209 },
5210
5211 {
5212 .name = "releasable",
5213 .read_u64 = releasable_read,
5214 },
5215};
5216
5217static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont)
5218{
5219 return cgroup_add_files(cont, ss, debug_files,
5220 ARRAY_SIZE(debug_files));
5221}
5222
5223struct cgroup_subsys debug_subsys = {
5224 .name = "debug",
5225 .create = debug_create,
5226 .destroy = debug_destroy,
5227 .populate = debug_populate,
5228 .subsys_id = debug_subsys_id,
5229};
5230#endif
5231