1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29#include <linux/cgroup.h>
30#include <linux/cred.h>
31#include <linux/ctype.h>
32#include <linux/errno.h>
33#include <linux/fs.h>
34#include <linux/init_task.h>
35#include <linux/kernel.h>
36#include <linux/list.h>
37#include <linux/mm.h>
38#include <linux/mutex.h>
39#include <linux/mount.h>
40#include <linux/pagemap.h>
41#include <linux/proc_fs.h>
42#include <linux/rcupdate.h>
43#include <linux/sched.h>
44#include <linux/backing-dev.h>
45#include <linux/seq_file.h>
46#include <linux/slab.h>
47#include <linux/magic.h>
48#include <linux/spinlock.h>
49#include <linux/string.h>
50#include <linux/sort.h>
51#include <linux/kmod.h>
52#include <linux/module.h>
53#include <linux/delayacct.h>
54#include <linux/cgroupstats.h>
55#include <linux/hash.h>
56#include <linux/namei.h>
57#include <linux/pid_namespace.h>
58#include <linux/idr.h>
59#include <linux/vmalloc.h>
60#include <linux/eventfd.h>
61#include <linux/poll.h>
62#include <linux/flex_array.h>
63
64#include <linux/atomic.h>
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82static DEFINE_MUTEX(cgroup_mutex);
83static DEFINE_MUTEX(cgroup_root_mutex);
84
85
86
87
88
89
90
91#define SUBSYS(_x) &_x ## _subsys,
92static struct cgroup_subsys *subsys[CGROUP_SUBSYS_COUNT] = {
93#include <linux/cgroup_subsys.h>
94};
95
96#define MAX_CGROUP_ROOT_NAMELEN 64
97
98
99
100
101
102
103struct cgroupfs_root {
104 struct super_block *sb;
105
106
107
108
109
110 unsigned long subsys_bits;
111
112
113 int hierarchy_id;
114
115
116 unsigned long actual_subsys_bits;
117
118
119 struct list_head subsys_list;
120
121
122 struct cgroup top_cgroup;
123
124
125 int number_of_cgroups;
126
127
128 struct list_head root_list;
129
130
131 unsigned long flags;
132
133
134 char release_agent_path[PATH_MAX];
135
136
137 char name[MAX_CGROUP_ROOT_NAMELEN];
138};
139
140
141
142
143
144
145static struct cgroupfs_root rootnode;
146
147
148
149
150
151#define CSS_ID_MAX (65535)
152struct css_id {
153
154
155
156
157
158
159
160 struct cgroup_subsys_state __rcu *css;
161
162
163
164 unsigned short id;
165
166
167
168 unsigned short depth;
169
170
171
172 struct rcu_head rcu_head;
173
174
175
176 unsigned short stack[0];
177};
178
179
180
181
182struct cgroup_event {
183
184
185
186 struct cgroup *cgrp;
187
188
189
190 struct cftype *cft;
191
192
193
194 struct eventfd_ctx *eventfd;
195
196
197
198 struct list_head list;
199
200
201
202
203 poll_table pt;
204 wait_queue_head_t *wqh;
205 wait_queue_t wait;
206 struct work_struct remove;
207};
208
209
210
211static LIST_HEAD(roots);
212static int root_count;
213
214static DEFINE_IDA(hierarchy_ida);
215static int next_hierarchy_id;
216static DEFINE_SPINLOCK(hierarchy_id_lock);
217
218
219#define dummytop (&rootnode.top_cgroup)
220
221
222
223
224
225
226static int need_forkexit_callback __read_mostly;
227
228#ifdef CONFIG_PROVE_LOCKING
229int cgroup_lock_is_held(void)
230{
231 return lockdep_is_held(&cgroup_mutex);
232}
233#else
234int cgroup_lock_is_held(void)
235{
236 return mutex_is_locked(&cgroup_mutex);
237}
238#endif
239
240EXPORT_SYMBOL_GPL(cgroup_lock_is_held);
241
242
243inline int cgroup_is_removed(const struct cgroup *cgrp)
244{
245 return test_bit(CGRP_REMOVED, &cgrp->flags);
246}
247
248
249enum {
250 ROOT_NOPREFIX,
251};
252
253static int cgroup_is_releasable(const struct cgroup *cgrp)
254{
255 const int bits =
256 (1 << CGRP_RELEASABLE) |
257 (1 << CGRP_NOTIFY_ON_RELEASE);
258 return (cgrp->flags & bits) == bits;
259}
260
261static int notify_on_release(const struct cgroup *cgrp)
262{
263 return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
264}
265
266static int clone_children(const struct cgroup *cgrp)
267{
268 return test_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
269}
270
271
272
273
274
275#define for_each_subsys(_root, _ss) \
276list_for_each_entry(_ss, &_root->subsys_list, sibling)
277
278
279#define for_each_active_root(_root) \
280list_for_each_entry(_root, &roots, root_list)
281
282
283
284static LIST_HEAD(release_list);
285static DEFINE_RAW_SPINLOCK(release_list_lock);
286static void cgroup_release_agent(struct work_struct *work);
287static DECLARE_WORK(release_agent_work, cgroup_release_agent);
288static void check_for_release(struct cgroup *cgrp);
289
290
291struct cg_cgroup_link {
292
293
294
295
296 struct list_head cgrp_link_list;
297 struct cgroup *cgrp;
298
299
300
301
302 struct list_head cg_link_list;
303 struct css_set *cg;
304};
305
306
307
308
309
310
311
312
313static struct css_set init_css_set;
314static struct cg_cgroup_link init_css_set_link;
315
316static int cgroup_init_idr(struct cgroup_subsys *ss,
317 struct cgroup_subsys_state *css);
318
319
320
321
322static DEFINE_RWLOCK(css_set_lock);
323static int css_set_count;
324
325
326
327
328
329
330#define CSS_SET_HASH_BITS 7
331#define CSS_SET_TABLE_SIZE (1 << CSS_SET_HASH_BITS)
332static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE];
333
334static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
335{
336 int i;
337 int index;
338 unsigned long tmp = 0UL;
339
340 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++)
341 tmp += (unsigned long)css[i];
342 tmp = (tmp >> 16) ^ tmp;
343
344 index = hash_long(tmp, CSS_SET_HASH_BITS);
345
346 return &css_set_table[index];
347}
348
349
350
351
352
353static int use_task_css_set_links __read_mostly;
354
355static void __put_css_set(struct css_set *cg, int taskexit)
356{
357 struct cg_cgroup_link *link;
358 struct cg_cgroup_link *saved_link;
359
360
361
362
363
364 if (atomic_add_unless(&cg->refcount, -1, 1))
365 return;
366 write_lock(&css_set_lock);
367 if (!atomic_dec_and_test(&cg->refcount)) {
368 write_unlock(&css_set_lock);
369 return;
370 }
371
372
373 hlist_del(&cg->hlist);
374 css_set_count--;
375
376 list_for_each_entry_safe(link, saved_link, &cg->cg_links,
377 cg_link_list) {
378 struct cgroup *cgrp = link->cgrp;
379 list_del(&link->cg_link_list);
380 list_del(&link->cgrp_link_list);
381 if (atomic_dec_and_test(&cgrp->count) &&
382 notify_on_release(cgrp)) {
383 if (taskexit)
384 set_bit(CGRP_RELEASABLE, &cgrp->flags);
385 check_for_release(cgrp);
386 }
387
388 kfree(link);
389 }
390
391 write_unlock(&css_set_lock);
392 kfree_rcu(cg, rcu_head);
393}
394
395
396
397
398static inline void get_css_set(struct css_set *cg)
399{
400 atomic_inc(&cg->refcount);
401}
402
403static inline void put_css_set(struct css_set *cg)
404{
405 __put_css_set(cg, 0);
406}
407
408static inline void put_css_set_taskexit(struct css_set *cg)
409{
410 __put_css_set(cg, 1);
411}
412
413
414
415
416
417
418
419
420
421
422
423static bool compare_css_sets(struct css_set *cg,
424 struct css_set *old_cg,
425 struct cgroup *new_cgrp,
426 struct cgroup_subsys_state *template[])
427{
428 struct list_head *l1, *l2;
429
430 if (memcmp(template, cg->subsys, sizeof(cg->subsys))) {
431
432 return false;
433 }
434
435
436
437
438
439
440
441
442
443
444 l1 = &cg->cg_links;
445 l2 = &old_cg->cg_links;
446 while (1) {
447 struct cg_cgroup_link *cgl1, *cgl2;
448 struct cgroup *cg1, *cg2;
449
450 l1 = l1->next;
451 l2 = l2->next;
452
453 if (l1 == &cg->cg_links) {
454 BUG_ON(l2 != &old_cg->cg_links);
455 break;
456 } else {
457 BUG_ON(l2 == &old_cg->cg_links);
458 }
459
460 cgl1 = list_entry(l1, struct cg_cgroup_link, cg_link_list);
461 cgl2 = list_entry(l2, struct cg_cgroup_link, cg_link_list);
462 cg1 = cgl1->cgrp;
463 cg2 = cgl2->cgrp;
464
465 BUG_ON(cg1->root != cg2->root);
466
467
468
469
470
471
472
473
474 if (cg1->root == new_cgrp->root) {
475 if (cg1 != new_cgrp)
476 return false;
477 } else {
478 if (cg1 != cg2)
479 return false;
480 }
481 }
482 return true;
483}
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498static struct css_set *find_existing_css_set(
499 struct css_set *oldcg,
500 struct cgroup *cgrp,
501 struct cgroup_subsys_state *template[])
502{
503 int i;
504 struct cgroupfs_root *root = cgrp->root;
505 struct hlist_head *hhead;
506 struct hlist_node *node;
507 struct css_set *cg;
508
509
510
511
512
513
514 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
515 if (root->subsys_bits & (1UL << i)) {
516
517
518
519 template[i] = cgrp->subsys[i];
520 } else {
521
522
523 template[i] = oldcg->subsys[i];
524 }
525 }
526
527 hhead = css_set_hash(template);
528 hlist_for_each_entry(cg, node, hhead, hlist) {
529 if (!compare_css_sets(cg, oldcg, cgrp, template))
530 continue;
531
532
533 return cg;
534 }
535
536
537 return NULL;
538}
539
540static void free_cg_links(struct list_head *tmp)
541{
542 struct cg_cgroup_link *link;
543 struct cg_cgroup_link *saved_link;
544
545 list_for_each_entry_safe(link, saved_link, tmp, cgrp_link_list) {
546 list_del(&link->cgrp_link_list);
547 kfree(link);
548 }
549}
550
551
552
553
554
555
556static int allocate_cg_links(int count, struct list_head *tmp)
557{
558 struct cg_cgroup_link *link;
559 int i;
560 INIT_LIST_HEAD(tmp);
561 for (i = 0; i < count; i++) {
562 link = kmalloc(sizeof(*link), GFP_KERNEL);
563 if (!link) {
564 free_cg_links(tmp);
565 return -ENOMEM;
566 }
567 list_add(&link->cgrp_link_list, tmp);
568 }
569 return 0;
570}
571
572
573
574
575
576
577
578static void link_css_set(struct list_head *tmp_cg_links,
579 struct css_set *cg, struct cgroup *cgrp)
580{
581 struct cg_cgroup_link *link;
582
583 BUG_ON(list_empty(tmp_cg_links));
584 link = list_first_entry(tmp_cg_links, struct cg_cgroup_link,
585 cgrp_link_list);
586 link->cg = cg;
587 link->cgrp = cgrp;
588 atomic_inc(&cgrp->count);
589 list_move(&link->cgrp_link_list, &cgrp->css_sets);
590
591
592
593
594 list_add_tail(&link->cg_link_list, &cg->cg_links);
595}
596
597
598
599
600
601
602
603
604static struct css_set *find_css_set(
605 struct css_set *oldcg, struct cgroup *cgrp)
606{
607 struct css_set *res;
608 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
609
610 struct list_head tmp_cg_links;
611
612 struct hlist_head *hhead;
613 struct cg_cgroup_link *link;
614
615
616
617 read_lock(&css_set_lock);
618 res = find_existing_css_set(oldcg, cgrp, template);
619 if (res)
620 get_css_set(res);
621 read_unlock(&css_set_lock);
622
623 if (res)
624 return res;
625
626 res = kmalloc(sizeof(*res), GFP_KERNEL);
627 if (!res)
628 return NULL;
629
630
631 if (allocate_cg_links(root_count, &tmp_cg_links) < 0) {
632 kfree(res);
633 return NULL;
634 }
635
636 atomic_set(&res->refcount, 1);
637 INIT_LIST_HEAD(&res->cg_links);
638 INIT_LIST_HEAD(&res->tasks);
639 INIT_HLIST_NODE(&res->hlist);
640
641
642
643 memcpy(res->subsys, template, sizeof(res->subsys));
644
645 write_lock(&css_set_lock);
646
647 list_for_each_entry(link, &oldcg->cg_links, cg_link_list) {
648 struct cgroup *c = link->cgrp;
649 if (c->root == cgrp->root)
650 c = cgrp;
651 link_css_set(&tmp_cg_links, res, c);
652 }
653
654 BUG_ON(!list_empty(&tmp_cg_links));
655
656 css_set_count++;
657
658
659 hhead = css_set_hash(res->subsys);
660 hlist_add_head(&res->hlist, hhead);
661
662 write_unlock(&css_set_lock);
663
664 return res;
665}
666
667
668
669
670
671static struct cgroup *task_cgroup_from_root(struct task_struct *task,
672 struct cgroupfs_root *root)
673{
674 struct css_set *css;
675 struct cgroup *res = NULL;
676
677 BUG_ON(!mutex_is_locked(&cgroup_mutex));
678 read_lock(&css_set_lock);
679
680
681
682
683
684 css = task->cgroups;
685 if (css == &init_css_set) {
686 res = &root->top_cgroup;
687 } else {
688 struct cg_cgroup_link *link;
689 list_for_each_entry(link, &css->cg_links, cg_link_list) {
690 struct cgroup *c = link->cgrp;
691 if (c->root == root) {
692 res = c;
693 break;
694 }
695 }
696 }
697 read_unlock(&css_set_lock);
698 BUG_ON(!res);
699 return res;
700}
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756void cgroup_lock(void)
757{
758 mutex_lock(&cgroup_mutex);
759}
760EXPORT_SYMBOL_GPL(cgroup_lock);
761
762
763
764
765
766
767void cgroup_unlock(void)
768{
769 mutex_unlock(&cgroup_mutex);
770}
771EXPORT_SYMBOL_GPL(cgroup_unlock);
772
773
774
775
776
777
778
779
780static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
781static struct dentry *cgroup_lookup(struct inode *, struct dentry *, struct nameidata *);
782static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
783static int cgroup_populate_dir(struct cgroup *cgrp);
784static const struct inode_operations cgroup_dir_inode_operations;
785static const struct file_operations proc_cgroupstats_operations;
786
787static struct backing_dev_info cgroup_backing_dev_info = {
788 .name = "cgroup",
789 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
790};
791
792static int alloc_css_id(struct cgroup_subsys *ss,
793 struct cgroup *parent, struct cgroup *child);
794
795static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb)
796{
797 struct inode *inode = new_inode(sb);
798
799 if (inode) {
800 inode->i_ino = get_next_ino();
801 inode->i_mode = mode;
802 inode->i_uid = current_fsuid();
803 inode->i_gid = current_fsgid();
804 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
805 inode->i_mapping->backing_dev_info = &cgroup_backing_dev_info;
806 }
807 return inode;
808}
809
810
811
812
813
814static int cgroup_call_pre_destroy(struct cgroup *cgrp)
815{
816 struct cgroup_subsys *ss;
817 int ret = 0;
818
819 for_each_subsys(cgrp->root, ss)
820 if (ss->pre_destroy) {
821 ret = ss->pre_destroy(ss, cgrp);
822 if (ret)
823 break;
824 }
825
826 return ret;
827}
828
829static void cgroup_diput(struct dentry *dentry, struct inode *inode)
830{
831
832 if (S_ISDIR(inode->i_mode)) {
833 struct cgroup *cgrp = dentry->d_fsdata;
834 struct cgroup_subsys *ss;
835 BUG_ON(!(cgroup_is_removed(cgrp)));
836
837
838
839
840
841
842 synchronize_rcu();
843
844 mutex_lock(&cgroup_mutex);
845
846
847
848 for_each_subsys(cgrp->root, ss)
849 ss->destroy(ss, cgrp);
850
851 cgrp->root->number_of_cgroups--;
852 mutex_unlock(&cgroup_mutex);
853
854
855
856
857
858 deactivate_super(cgrp->root->sb);
859
860
861
862
863
864 BUG_ON(!list_empty(&cgrp->pidlists));
865
866 kfree_rcu(cgrp, rcu_head);
867 }
868 iput(inode);
869}
870
871static int cgroup_delete(const struct dentry *d)
872{
873 return 1;
874}
875
876static void remove_dir(struct dentry *d)
877{
878 struct dentry *parent = dget(d->d_parent);
879
880 d_delete(d);
881 simple_rmdir(parent->d_inode, d);
882 dput(parent);
883}
884
885static void cgroup_clear_directory(struct dentry *dentry)
886{
887 struct list_head *node;
888
889 BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
890 spin_lock(&dentry->d_lock);
891 node = dentry->d_subdirs.next;
892 while (node != &dentry->d_subdirs) {
893 struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
894
895 spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
896 list_del_init(node);
897 if (d->d_inode) {
898
899
900 BUG_ON(d->d_inode->i_mode & S_IFDIR);
901 dget_dlock(d);
902 spin_unlock(&d->d_lock);
903 spin_unlock(&dentry->d_lock);
904 d_delete(d);
905 simple_unlink(dentry->d_inode, d);
906 dput(d);
907 spin_lock(&dentry->d_lock);
908 } else
909 spin_unlock(&d->d_lock);
910 node = dentry->d_subdirs.next;
911 }
912 spin_unlock(&dentry->d_lock);
913}
914
915
916
917
918static void cgroup_d_remove_dir(struct dentry *dentry)
919{
920 struct dentry *parent;
921
922 cgroup_clear_directory(dentry);
923
924 parent = dentry->d_parent;
925 spin_lock(&parent->d_lock);
926 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
927 list_del_init(&dentry->d_u.d_child);
928 spin_unlock(&dentry->d_lock);
929 spin_unlock(&parent->d_lock);
930 remove_dir(dentry);
931}
932
933
934
935
936
937
938
939
940
941static DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
942
943static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
944{
945 if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
946 wake_up_all(&cgroup_rmdir_waitq);
947}
948
949void cgroup_exclude_rmdir(struct cgroup_subsys_state *css)
950{
951 css_get(css);
952}
953
954void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
955{
956 cgroup_wakeup_rmdir_waiter(css->cgroup);
957 css_put(css);
958}
959
960
961
962
963
964
965static int rebind_subsystems(struct cgroupfs_root *root,
966 unsigned long final_bits)
967{
968 unsigned long added_bits, removed_bits;
969 struct cgroup *cgrp = &root->top_cgroup;
970 int i;
971
972 BUG_ON(!mutex_is_locked(&cgroup_mutex));
973 BUG_ON(!mutex_is_locked(&cgroup_root_mutex));
974
975 removed_bits = root->actual_subsys_bits & ~final_bits;
976 added_bits = final_bits & ~root->actual_subsys_bits;
977
978 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
979 unsigned long bit = 1UL << i;
980 struct cgroup_subsys *ss = subsys[i];
981 if (!(bit & added_bits))
982 continue;
983
984
985
986
987
988 BUG_ON(ss == NULL);
989 if (ss->root != &rootnode) {
990
991 return -EBUSY;
992 }
993 }
994
995
996
997
998
999 if (root->number_of_cgroups > 1)
1000 return -EBUSY;
1001
1002
1003 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1004 struct cgroup_subsys *ss = subsys[i];
1005 unsigned long bit = 1UL << i;
1006 if (bit & added_bits) {
1007
1008 BUG_ON(ss == NULL);
1009 BUG_ON(cgrp->subsys[i]);
1010 BUG_ON(!dummytop->subsys[i]);
1011 BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
1012 mutex_lock(&ss->hierarchy_mutex);
1013 cgrp->subsys[i] = dummytop->subsys[i];
1014 cgrp->subsys[i]->cgroup = cgrp;
1015 list_move(&ss->sibling, &root->subsys_list);
1016 ss->root = root;
1017 if (ss->bind)
1018 ss->bind(ss, cgrp);
1019 mutex_unlock(&ss->hierarchy_mutex);
1020
1021 } else if (bit & removed_bits) {
1022
1023 BUG_ON(ss == NULL);
1024 BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
1025 BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
1026 mutex_lock(&ss->hierarchy_mutex);
1027 if (ss->bind)
1028 ss->bind(ss, dummytop);
1029 dummytop->subsys[i]->cgroup = dummytop;
1030 cgrp->subsys[i] = NULL;
1031 subsys[i]->root = &rootnode;
1032 list_move(&ss->sibling, &rootnode.subsys_list);
1033 mutex_unlock(&ss->hierarchy_mutex);
1034
1035 module_put(ss->module);
1036 } else if (bit & final_bits) {
1037
1038 BUG_ON(ss == NULL);
1039 BUG_ON(!cgrp->subsys[i]);
1040
1041
1042
1043
1044 module_put(ss->module);
1045#ifdef CONFIG_MODULE_UNLOAD
1046 BUG_ON(ss->module && !module_refcount(ss->module));
1047#endif
1048 } else {
1049
1050 BUG_ON(cgrp->subsys[i]);
1051 }
1052 }
1053 root->subsys_bits = root->actual_subsys_bits = final_bits;
1054 synchronize_rcu();
1055
1056 return 0;
1057}
1058
1059static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry)
1060{
1061 struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
1062 struct cgroup_subsys *ss;
1063
1064 mutex_lock(&cgroup_root_mutex);
1065 for_each_subsys(root, ss)
1066 seq_printf(seq, ",%s", ss->name);
1067 if (test_bit(ROOT_NOPREFIX, &root->flags))
1068 seq_puts(seq, ",noprefix");
1069 if (strlen(root->release_agent_path))
1070 seq_printf(seq, ",release_agent=%s", root->release_agent_path);
1071 if (clone_children(&root->top_cgroup))
1072 seq_puts(seq, ",clone_children");
1073 if (strlen(root->name))
1074 seq_printf(seq, ",name=%s", root->name);
1075 mutex_unlock(&cgroup_root_mutex);
1076 return 0;
1077}
1078
1079struct cgroup_sb_opts {
1080 unsigned long subsys_bits;
1081 unsigned long flags;
1082 char *release_agent;
1083 bool clone_children;
1084 char *name;
1085
1086 bool none;
1087
1088 struct cgroupfs_root *new_root;
1089
1090};
1091
1092
1093
1094
1095
1096
1097
1098static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
1099{
1100 char *token, *o = data;
1101 bool all_ss = false, one_ss = false;
1102 unsigned long mask = (unsigned long)-1;
1103 int i;
1104 bool module_pin_failed = false;
1105
1106 BUG_ON(!mutex_is_locked(&cgroup_mutex));
1107
1108#ifdef CONFIG_CPUSETS
1109 mask = ~(1UL << cpuset_subsys_id);
1110#endif
1111
1112 memset(opts, 0, sizeof(*opts));
1113
1114 while ((token = strsep(&o, ",")) != NULL) {
1115 if (!*token)
1116 return -EINVAL;
1117 if (!strcmp(token, "none")) {
1118
1119 opts->none = true;
1120 continue;
1121 }
1122 if (!strcmp(token, "all")) {
1123
1124 if (one_ss)
1125 return -EINVAL;
1126 all_ss = true;
1127 continue;
1128 }
1129 if (!strcmp(token, "noprefix")) {
1130 set_bit(ROOT_NOPREFIX, &opts->flags);
1131 continue;
1132 }
1133 if (!strcmp(token, "clone_children")) {
1134 opts->clone_children = true;
1135 continue;
1136 }
1137 if (!strncmp(token, "release_agent=", 14)) {
1138
1139 if (opts->release_agent)
1140 return -EINVAL;
1141 opts->release_agent =
1142 kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL);
1143 if (!opts->release_agent)
1144 return -ENOMEM;
1145 continue;
1146 }
1147 if (!strncmp(token, "name=", 5)) {
1148 const char *name = token + 5;
1149
1150 if (!strlen(name))
1151 return -EINVAL;
1152
1153 for (i = 0; i < strlen(name); i++) {
1154 char c = name[i];
1155 if (isalnum(c))
1156 continue;
1157 if ((c == '.') || (c == '-') || (c == '_'))
1158 continue;
1159 return -EINVAL;
1160 }
1161
1162 if (opts->name)
1163 return -EINVAL;
1164 opts->name = kstrndup(name,
1165 MAX_CGROUP_ROOT_NAMELEN - 1,
1166 GFP_KERNEL);
1167 if (!opts->name)
1168 return -ENOMEM;
1169
1170 continue;
1171 }
1172
1173 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1174 struct cgroup_subsys *ss = subsys[i];
1175 if (ss == NULL)
1176 continue;
1177 if (strcmp(token, ss->name))
1178 continue;
1179 if (ss->disabled)
1180 continue;
1181
1182
1183 if (all_ss)
1184 return -EINVAL;
1185 set_bit(i, &opts->subsys_bits);
1186 one_ss = true;
1187
1188 break;
1189 }
1190 if (i == CGROUP_SUBSYS_COUNT)
1191 return -ENOENT;
1192 }
1193
1194
1195
1196
1197
1198
1199 if (all_ss || (!one_ss && !opts->none && !opts->name)) {
1200 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1201 struct cgroup_subsys *ss = subsys[i];
1202 if (ss == NULL)
1203 continue;
1204 if (ss->disabled)
1205 continue;
1206 set_bit(i, &opts->subsys_bits);
1207 }
1208 }
1209
1210
1211
1212
1213
1214
1215
1216
1217 if (test_bit(ROOT_NOPREFIX, &opts->flags) &&
1218 (opts->subsys_bits & mask))
1219 return -EINVAL;
1220
1221
1222
1223 if (opts->subsys_bits && opts->none)
1224 return -EINVAL;
1225
1226
1227
1228
1229
1230 if (!opts->subsys_bits && !opts->name)
1231 return -EINVAL;
1232
1233
1234
1235
1236
1237
1238
1239 for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
1240 unsigned long bit = 1UL << i;
1241
1242 if (!(bit & opts->subsys_bits))
1243 continue;
1244 if (!try_module_get(subsys[i]->module)) {
1245 module_pin_failed = true;
1246 break;
1247 }
1248 }
1249 if (module_pin_failed) {
1250
1251
1252
1253
1254
1255 for (i--; i >= CGROUP_BUILTIN_SUBSYS_COUNT; i--) {
1256
1257 unsigned long bit = 1UL << i;
1258
1259 if (!(bit & opts->subsys_bits))
1260 continue;
1261 module_put(subsys[i]->module);
1262 }
1263 return -ENOENT;
1264 }
1265
1266 return 0;
1267}
1268
1269static void drop_parsed_module_refcounts(unsigned long subsys_bits)
1270{
1271 int i;
1272 for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
1273 unsigned long bit = 1UL << i;
1274
1275 if (!(bit & subsys_bits))
1276 continue;
1277 module_put(subsys[i]->module);
1278 }
1279}
1280
1281static int cgroup_remount(struct super_block *sb, int *flags, char *data)
1282{
1283 int ret = 0;
1284 struct cgroupfs_root *root = sb->s_fs_info;
1285 struct cgroup *cgrp = &root->top_cgroup;
1286 struct cgroup_sb_opts opts;
1287
1288 mutex_lock(&cgrp->dentry->d_inode->i_mutex);
1289 mutex_lock(&cgroup_mutex);
1290 mutex_lock(&cgroup_root_mutex);
1291
1292
1293 ret = parse_cgroupfs_options(data, &opts);
1294 if (ret)
1295 goto out_unlock;
1296
1297
1298 if (opts.flags != root->flags ||
1299 (opts.name && strcmp(opts.name, root->name))) {
1300 ret = -EINVAL;
1301 drop_parsed_module_refcounts(opts.subsys_bits);
1302 goto out_unlock;
1303 }
1304
1305 ret = rebind_subsystems(root, opts.subsys_bits);
1306 if (ret) {
1307 drop_parsed_module_refcounts(opts.subsys_bits);
1308 goto out_unlock;
1309 }
1310
1311
1312 cgroup_populate_dir(cgrp);
1313
1314 if (opts.release_agent)
1315 strcpy(root->release_agent_path, opts.release_agent);
1316 out_unlock:
1317 kfree(opts.release_agent);
1318 kfree(opts.name);
1319 mutex_unlock(&cgroup_root_mutex);
1320 mutex_unlock(&cgroup_mutex);
1321 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
1322 return ret;
1323}
1324
1325static const struct super_operations cgroup_ops = {
1326 .statfs = simple_statfs,
1327 .drop_inode = generic_delete_inode,
1328 .show_options = cgroup_show_options,
1329 .remount_fs = cgroup_remount,
1330};
1331
1332static void init_cgroup_housekeeping(struct cgroup *cgrp)
1333{
1334 INIT_LIST_HEAD(&cgrp->sibling);
1335 INIT_LIST_HEAD(&cgrp->children);
1336 INIT_LIST_HEAD(&cgrp->css_sets);
1337 INIT_LIST_HEAD(&cgrp->release_list);
1338 INIT_LIST_HEAD(&cgrp->pidlists);
1339 mutex_init(&cgrp->pidlist_mutex);
1340 INIT_LIST_HEAD(&cgrp->event_list);
1341 spin_lock_init(&cgrp->event_list_lock);
1342}
1343
1344static void init_cgroup_root(struct cgroupfs_root *root)
1345{
1346 struct cgroup *cgrp = &root->top_cgroup;
1347 INIT_LIST_HEAD(&root->subsys_list);
1348 INIT_LIST_HEAD(&root->root_list);
1349 root->number_of_cgroups = 1;
1350 cgrp->root = root;
1351 cgrp->top_cgroup = cgrp;
1352 init_cgroup_housekeeping(cgrp);
1353}
1354
1355static bool init_root_id(struct cgroupfs_root *root)
1356{
1357 int ret = 0;
1358
1359 do {
1360 if (!ida_pre_get(&hierarchy_ida, GFP_KERNEL))
1361 return false;
1362 spin_lock(&hierarchy_id_lock);
1363
1364 ret = ida_get_new_above(&hierarchy_ida, next_hierarchy_id,
1365 &root->hierarchy_id);
1366 if (ret == -ENOSPC)
1367
1368 ret = ida_get_new(&hierarchy_ida, &root->hierarchy_id);
1369 if (!ret) {
1370 next_hierarchy_id = root->hierarchy_id + 1;
1371 } else if (ret != -EAGAIN) {
1372
1373 BUG_ON(ret);
1374 }
1375 spin_unlock(&hierarchy_id_lock);
1376 } while (ret);
1377 return true;
1378}
1379
1380static int cgroup_test_super(struct super_block *sb, void *data)
1381{
1382 struct cgroup_sb_opts *opts = data;
1383 struct cgroupfs_root *root = sb->s_fs_info;
1384
1385
1386 if (opts->name && strcmp(opts->name, root->name))
1387 return 0;
1388
1389
1390
1391
1392
1393 if ((opts->subsys_bits || opts->none)
1394 && (opts->subsys_bits != root->subsys_bits))
1395 return 0;
1396
1397 return 1;
1398}
1399
1400static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
1401{
1402 struct cgroupfs_root *root;
1403
1404 if (!opts->subsys_bits && !opts->none)
1405 return NULL;
1406
1407 root = kzalloc(sizeof(*root), GFP_KERNEL);
1408 if (!root)
1409 return ERR_PTR(-ENOMEM);
1410
1411 if (!init_root_id(root)) {
1412 kfree(root);
1413 return ERR_PTR(-ENOMEM);
1414 }
1415 init_cgroup_root(root);
1416
1417 root->subsys_bits = opts->subsys_bits;
1418 root->flags = opts->flags;
1419 if (opts->release_agent)
1420 strcpy(root->release_agent_path, opts->release_agent);
1421 if (opts->name)
1422 strcpy(root->name, opts->name);
1423 if (opts->clone_children)
1424 set_bit(CGRP_CLONE_CHILDREN, &root->top_cgroup.flags);
1425 return root;
1426}
1427
1428static void cgroup_drop_root(struct cgroupfs_root *root)
1429{
1430 if (!root)
1431 return;
1432
1433 BUG_ON(!root->hierarchy_id);
1434 spin_lock(&hierarchy_id_lock);
1435 ida_remove(&hierarchy_ida, root->hierarchy_id);
1436 spin_unlock(&hierarchy_id_lock);
1437 kfree(root);
1438}
1439
1440static int cgroup_set_super(struct super_block *sb, void *data)
1441{
1442 int ret;
1443 struct cgroup_sb_opts *opts = data;
1444
1445
1446 if (!opts->new_root)
1447 return -EINVAL;
1448
1449 BUG_ON(!opts->subsys_bits && !opts->none);
1450
1451 ret = set_anon_super(sb, NULL);
1452 if (ret)
1453 return ret;
1454
1455 sb->s_fs_info = opts->new_root;
1456 opts->new_root->sb = sb;
1457
1458 sb->s_blocksize = PAGE_CACHE_SIZE;
1459 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
1460 sb->s_magic = CGROUP_SUPER_MAGIC;
1461 sb->s_op = &cgroup_ops;
1462
1463 return 0;
1464}
1465
1466static int cgroup_get_rootdir(struct super_block *sb)
1467{
1468 static const struct dentry_operations cgroup_dops = {
1469 .d_iput = cgroup_diput,
1470 .d_delete = cgroup_delete,
1471 };
1472
1473 struct inode *inode =
1474 cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb);
1475 struct dentry *dentry;
1476
1477 if (!inode)
1478 return -ENOMEM;
1479
1480 inode->i_fop = &simple_dir_operations;
1481 inode->i_op = &cgroup_dir_inode_operations;
1482
1483 inc_nlink(inode);
1484 dentry = d_alloc_root(inode);
1485 if (!dentry) {
1486 iput(inode);
1487 return -ENOMEM;
1488 }
1489 sb->s_root = dentry;
1490
1491 sb->s_d_op = &cgroup_dops;
1492 return 0;
1493}
1494
1495static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1496 int flags, const char *unused_dev_name,
1497 void *data)
1498{
1499 struct cgroup_sb_opts opts;
1500 struct cgroupfs_root *root;
1501 int ret = 0;
1502 struct super_block *sb;
1503 struct cgroupfs_root *new_root;
1504 struct inode *inode;
1505
1506
1507 mutex_lock(&cgroup_mutex);
1508 ret = parse_cgroupfs_options(data, &opts);
1509 mutex_unlock(&cgroup_mutex);
1510 if (ret)
1511 goto out_err;
1512
1513
1514
1515
1516
1517 new_root = cgroup_root_from_opts(&opts);
1518 if (IS_ERR(new_root)) {
1519 ret = PTR_ERR(new_root);
1520 goto drop_modules;
1521 }
1522 opts.new_root = new_root;
1523
1524
1525 sb = sget(fs_type, cgroup_test_super, cgroup_set_super, &opts);
1526 if (IS_ERR(sb)) {
1527 ret = PTR_ERR(sb);
1528 cgroup_drop_root(opts.new_root);
1529 goto drop_modules;
1530 }
1531
1532 root = sb->s_fs_info;
1533 BUG_ON(!root);
1534 if (root == opts.new_root) {
1535
1536 struct list_head tmp_cg_links;
1537 struct cgroup *root_cgrp = &root->top_cgroup;
1538 struct cgroupfs_root *existing_root;
1539 const struct cred *cred;
1540 int i;
1541
1542 BUG_ON(sb->s_root != NULL);
1543
1544 ret = cgroup_get_rootdir(sb);
1545 if (ret)
1546 goto drop_new_super;
1547 inode = sb->s_root->d_inode;
1548
1549 mutex_lock(&inode->i_mutex);
1550 mutex_lock(&cgroup_mutex);
1551 mutex_lock(&cgroup_root_mutex);
1552
1553
1554 ret = -EBUSY;
1555 if (strlen(root->name))
1556 for_each_active_root(existing_root)
1557 if (!strcmp(existing_root->name, root->name))
1558 goto unlock_drop;
1559
1560
1561
1562
1563
1564
1565
1566
1567 ret = allocate_cg_links(css_set_count, &tmp_cg_links);
1568 if (ret)
1569 goto unlock_drop;
1570
1571 ret = rebind_subsystems(root, root->subsys_bits);
1572 if (ret == -EBUSY) {
1573 free_cg_links(&tmp_cg_links);
1574 goto unlock_drop;
1575 }
1576
1577
1578
1579
1580
1581
1582
1583 BUG_ON(ret);
1584
1585 list_add(&root->root_list, &roots);
1586 root_count++;
1587
1588 sb->s_root->d_fsdata = root_cgrp;
1589 root->top_cgroup.dentry = sb->s_root;
1590
1591
1592
1593 write_lock(&css_set_lock);
1594 for (i = 0; i < CSS_SET_TABLE_SIZE; i++) {
1595 struct hlist_head *hhead = &css_set_table[i];
1596 struct hlist_node *node;
1597 struct css_set *cg;
1598
1599 hlist_for_each_entry(cg, node, hhead, hlist)
1600 link_css_set(&tmp_cg_links, cg, root_cgrp);
1601 }
1602 write_unlock(&css_set_lock);
1603
1604 free_cg_links(&tmp_cg_links);
1605
1606 BUG_ON(!list_empty(&root_cgrp->sibling));
1607 BUG_ON(!list_empty(&root_cgrp->children));
1608 BUG_ON(root->number_of_cgroups != 1);
1609
1610 cred = override_creds(&init_cred);
1611 cgroup_populate_dir(root_cgrp);
1612 revert_creds(cred);
1613 mutex_unlock(&cgroup_root_mutex);
1614 mutex_unlock(&cgroup_mutex);
1615 mutex_unlock(&inode->i_mutex);
1616 } else {
1617
1618
1619
1620
1621 cgroup_drop_root(opts.new_root);
1622
1623 drop_parsed_module_refcounts(opts.subsys_bits);
1624 }
1625
1626 kfree(opts.release_agent);
1627 kfree(opts.name);
1628 return dget(sb->s_root);
1629
1630 unlock_drop:
1631 mutex_unlock(&cgroup_root_mutex);
1632 mutex_unlock(&cgroup_mutex);
1633 mutex_unlock(&inode->i_mutex);
1634 drop_new_super:
1635 deactivate_locked_super(sb);
1636 drop_modules:
1637 drop_parsed_module_refcounts(opts.subsys_bits);
1638 out_err:
1639 kfree(opts.release_agent);
1640 kfree(opts.name);
1641 return ERR_PTR(ret);
1642}
1643
1644static void cgroup_kill_sb(struct super_block *sb) {
1645 struct cgroupfs_root *root = sb->s_fs_info;
1646 struct cgroup *cgrp = &root->top_cgroup;
1647 int ret;
1648 struct cg_cgroup_link *link;
1649 struct cg_cgroup_link *saved_link;
1650
1651 BUG_ON(!root);
1652
1653 BUG_ON(root->number_of_cgroups != 1);
1654 BUG_ON(!list_empty(&cgrp->children));
1655 BUG_ON(!list_empty(&cgrp->sibling));
1656
1657 mutex_lock(&cgroup_mutex);
1658 mutex_lock(&cgroup_root_mutex);
1659
1660
1661 ret = rebind_subsystems(root, 0);
1662
1663 BUG_ON(ret);
1664
1665
1666
1667
1668
1669 write_lock(&css_set_lock);
1670
1671 list_for_each_entry_safe(link, saved_link, &cgrp->css_sets,
1672 cgrp_link_list) {
1673 list_del(&link->cg_link_list);
1674 list_del(&link->cgrp_link_list);
1675 kfree(link);
1676 }
1677 write_unlock(&css_set_lock);
1678
1679 if (!list_empty(&root->root_list)) {
1680 list_del(&root->root_list);
1681 root_count--;
1682 }
1683
1684 mutex_unlock(&cgroup_root_mutex);
1685 mutex_unlock(&cgroup_mutex);
1686
1687 kill_litter_super(sb);
1688 cgroup_drop_root(root);
1689}
1690
1691static struct file_system_type cgroup_fs_type = {
1692 .name = "cgroup",
1693 .mount = cgroup_mount,
1694 .kill_sb = cgroup_kill_sb,
1695};
1696
1697static struct kobject *cgroup_kobj;
1698
1699static inline struct cgroup *__d_cgrp(struct dentry *dentry)
1700{
1701 return dentry->d_fsdata;
1702}
1703
1704static inline struct cftype *__d_cft(struct dentry *dentry)
1705{
1706 return dentry->d_fsdata;
1707}
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
1720{
1721 char *start;
1722 struct dentry *dentry = rcu_dereference_check(cgrp->dentry,
1723 cgroup_lock_is_held());
1724
1725 if (!dentry || cgrp == dummytop) {
1726
1727
1728
1729
1730 strcpy(buf, "/");
1731 return 0;
1732 }
1733
1734 start = buf + buflen;
1735
1736 *--start = '\0';
1737 for (;;) {
1738 int len = dentry->d_name.len;
1739
1740 if ((start -= len) < buf)
1741 return -ENAMETOOLONG;
1742 memcpy(start, dentry->d_name.name, len);
1743 cgrp = cgrp->parent;
1744 if (!cgrp)
1745 break;
1746
1747 dentry = rcu_dereference_check(cgrp->dentry,
1748 cgroup_lock_is_held());
1749 if (!cgrp->parent)
1750 continue;
1751 if (--start < buf)
1752 return -ENAMETOOLONG;
1753 *start = '/';
1754 }
1755 memmove(buf, start, buf + buflen - start);
1756 return 0;
1757}
1758EXPORT_SYMBOL_GPL(cgroup_path);
1759
1760
1761
1762
1763struct task_and_cgroup {
1764 struct task_struct *task;
1765 struct cgroup *cgrp;
1766};
1767
1768struct cgroup_taskset {
1769 struct task_and_cgroup single;
1770 struct flex_array *tc_array;
1771 int tc_array_len;
1772 int idx;
1773 struct cgroup *cur_cgrp;
1774};
1775
1776
1777
1778
1779
1780
1781
1782struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset)
1783{
1784 if (tset->tc_array) {
1785 tset->idx = 0;
1786 return cgroup_taskset_next(tset);
1787 } else {
1788 tset->cur_cgrp = tset->single.cgrp;
1789 return tset->single.task;
1790 }
1791}
1792EXPORT_SYMBOL_GPL(cgroup_taskset_first);
1793
1794
1795
1796
1797
1798
1799
1800
1801struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset)
1802{
1803 struct task_and_cgroup *tc;
1804
1805 if (!tset->tc_array || tset->idx >= tset->tc_array_len)
1806 return NULL;
1807
1808 tc = flex_array_get(tset->tc_array, tset->idx++);
1809 tset->cur_cgrp = tc->cgrp;
1810 return tc->task;
1811}
1812EXPORT_SYMBOL_GPL(cgroup_taskset_next);
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822struct cgroup *cgroup_taskset_cur_cgroup(struct cgroup_taskset *tset)
1823{
1824 return tset->cur_cgrp;
1825}
1826EXPORT_SYMBOL_GPL(cgroup_taskset_cur_cgroup);
1827
1828
1829
1830
1831
1832int cgroup_taskset_size(struct cgroup_taskset *tset)
1833{
1834 return tset->tc_array ? tset->tc_array_len : 1;
1835}
1836EXPORT_SYMBOL_GPL(cgroup_taskset_size);
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
1847 struct task_struct *tsk, bool guarantee)
1848{
1849 struct css_set *oldcg;
1850 struct css_set *newcg;
1851
1852
1853
1854
1855
1856
1857 WARN_ON_ONCE(tsk->flags & PF_EXITING);
1858 oldcg = tsk->cgroups;
1859
1860
1861 if (guarantee) {
1862
1863 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
1864 read_lock(&css_set_lock);
1865 newcg = find_existing_css_set(oldcg, cgrp, template);
1866 BUG_ON(!newcg);
1867 get_css_set(newcg);
1868 read_unlock(&css_set_lock);
1869 } else {
1870 might_sleep();
1871
1872 newcg = find_css_set(oldcg, cgrp);
1873 if (!newcg)
1874 return -ENOMEM;
1875 }
1876
1877 task_lock(tsk);
1878 rcu_assign_pointer(tsk->cgroups, newcg);
1879 task_unlock(tsk);
1880
1881
1882 write_lock(&css_set_lock);
1883 if (!list_empty(&tsk->cg_list))
1884 list_move(&tsk->cg_list, &newcg->tasks);
1885 write_unlock(&css_set_lock);
1886
1887
1888
1889
1890
1891
1892 put_css_set(oldcg);
1893
1894 set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
1895 return 0;
1896}
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1907{
1908 int retval;
1909 struct cgroup_subsys *ss, *failed_ss = NULL;
1910 struct cgroup *oldcgrp;
1911 struct cgroupfs_root *root = cgrp->root;
1912 struct cgroup_taskset tset = { };
1913
1914
1915 if (tsk->flags & PF_EXITING)
1916 return -ESRCH;
1917
1918
1919 oldcgrp = task_cgroup_from_root(tsk, root);
1920 if (cgrp == oldcgrp)
1921 return 0;
1922
1923 tset.single.task = tsk;
1924 tset.single.cgrp = oldcgrp;
1925
1926 for_each_subsys(root, ss) {
1927 if (ss->can_attach) {
1928 retval = ss->can_attach(ss, cgrp, &tset);
1929 if (retval) {
1930
1931
1932
1933
1934
1935
1936 failed_ss = ss;
1937 goto out;
1938 }
1939 }
1940 }
1941
1942 retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, false);
1943 if (retval)
1944 goto out;
1945
1946 for_each_subsys(root, ss) {
1947 if (ss->attach)
1948 ss->attach(ss, cgrp, &tset);
1949 }
1950
1951 synchronize_rcu();
1952
1953
1954
1955
1956
1957 cgroup_wakeup_rmdir_waiter(cgrp);
1958out:
1959 if (retval) {
1960 for_each_subsys(root, ss) {
1961 if (ss == failed_ss)
1962
1963
1964
1965
1966
1967
1968 break;
1969 if (ss->cancel_attach)
1970 ss->cancel_attach(ss, cgrp, &tset);
1971 }
1972 }
1973 return retval;
1974}
1975
1976
1977
1978
1979
1980
1981int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
1982{
1983 struct cgroupfs_root *root;
1984 int retval = 0;
1985
1986 cgroup_lock();
1987 for_each_active_root(root) {
1988 struct cgroup *from_cg = task_cgroup_from_root(from, root);
1989
1990 retval = cgroup_attach_task(from_cg, tsk);
1991 if (retval)
1992 break;
1993 }
1994 cgroup_unlock();
1995
1996 return retval;
1997}
1998EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
1999
2000
2001
2002
2003
2004
2005
2006struct cg_list_entry {
2007 struct css_set *cg;
2008 struct list_head links;
2009};
2010
2011static bool css_set_check_fetched(struct cgroup *cgrp,
2012 struct task_struct *tsk, struct css_set *cg,
2013 struct list_head *newcg_list)
2014{
2015 struct css_set *newcg;
2016 struct cg_list_entry *cg_entry;
2017 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
2018
2019 read_lock(&css_set_lock);
2020 newcg = find_existing_css_set(cg, cgrp, template);
2021 read_unlock(&css_set_lock);
2022
2023
2024 if (!newcg)
2025 return false;
2026
2027 list_for_each_entry(cg_entry, newcg_list, links)
2028 if (cg_entry->cg == newcg)
2029 return true;
2030
2031
2032 return false;
2033}
2034
2035
2036
2037
2038
2039static int css_set_prefetch(struct cgroup *cgrp, struct css_set *cg,
2040 struct list_head *newcg_list)
2041{
2042 struct css_set *newcg;
2043 struct cg_list_entry *cg_entry;
2044
2045
2046 newcg = find_css_set(cg, cgrp);
2047 if (!newcg)
2048 return -ENOMEM;
2049
2050 cg_entry = kmalloc(sizeof(struct cg_list_entry), GFP_KERNEL);
2051 if (!cg_entry) {
2052 put_css_set(newcg);
2053 return -ENOMEM;
2054 }
2055 cg_entry->cg = newcg;
2056 list_add(&cg_entry->links, newcg_list);
2057 return 0;
2058}
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
2069{
2070 int retval, i, group_size;
2071 struct cgroup_subsys *ss, *failed_ss = NULL;
2072
2073 struct css_set *oldcg;
2074 struct cgroupfs_root *root = cgrp->root;
2075
2076 struct task_struct *tsk;
2077 struct task_and_cgroup *tc;
2078 struct flex_array *group;
2079 struct cgroup_taskset tset = { };
2080
2081
2082
2083
2084
2085 struct list_head newcg_list;
2086 struct cg_list_entry *cg_entry, *temp_nobe;
2087
2088
2089
2090
2091
2092
2093
2094
2095 group_size = get_nr_threads(leader);
2096
2097 group = flex_array_alloc(sizeof(*tc), group_size, GFP_KERNEL);
2098 if (!group)
2099 return -ENOMEM;
2100
2101 retval = flex_array_prealloc(group, 0, group_size - 1, GFP_KERNEL);
2102 if (retval)
2103 goto out_free_group_list;
2104
2105
2106 read_lock(&tasklist_lock);
2107 if (!thread_group_leader(leader)) {
2108
2109
2110
2111
2112
2113
2114
2115 read_unlock(&tasklist_lock);
2116 retval = -EAGAIN;
2117 goto out_free_group_list;
2118 }
2119
2120 tsk = leader;
2121 i = 0;
2122 do {
2123 struct task_and_cgroup ent;
2124
2125
2126 if (tsk->flags & PF_EXITING)
2127 continue;
2128
2129
2130 BUG_ON(i >= group_size);
2131
2132
2133
2134
2135 ent.task = tsk;
2136 ent.cgrp = task_cgroup_from_root(tsk, root);
2137
2138 if (ent.cgrp == cgrp)
2139 continue;
2140 retval = flex_array_put(group, i, &ent, GFP_ATOMIC);
2141 BUG_ON(retval != 0);
2142 i++;
2143 } while_each_thread(leader, tsk);
2144
2145 group_size = i;
2146 tset.tc_array = group;
2147 tset.tc_array_len = group_size;
2148 read_unlock(&tasklist_lock);
2149
2150
2151 retval = 0;
2152 if (!group_size)
2153 goto out_free_group_list;
2154
2155
2156
2157
2158 for_each_subsys(root, ss) {
2159 if (ss->can_attach) {
2160 retval = ss->can_attach(ss, cgrp, &tset);
2161 if (retval) {
2162 failed_ss = ss;
2163 goto out_cancel_attach;
2164 }
2165 }
2166 }
2167
2168
2169
2170
2171
2172 INIT_LIST_HEAD(&newcg_list);
2173 for (i = 0; i < group_size; i++) {
2174 tc = flex_array_get(group, i);
2175 oldcg = tc->task->cgroups;
2176
2177
2178 if (!css_set_check_fetched(cgrp, tc->task, oldcg,
2179 &newcg_list)) {
2180 retval = css_set_prefetch(cgrp, oldcg, &newcg_list);
2181 if (retval)
2182 goto out_list_teardown;
2183 }
2184 }
2185
2186
2187
2188
2189
2190
2191 for (i = 0; i < group_size; i++) {
2192 tc = flex_array_get(group, i);
2193 retval = cgroup_task_migrate(cgrp, tc->cgrp, tc->task, true);
2194 BUG_ON(retval);
2195 }
2196
2197
2198
2199
2200
2201 for_each_subsys(root, ss) {
2202 if (ss->attach)
2203 ss->attach(ss, cgrp, &tset);
2204 }
2205
2206
2207
2208
2209 synchronize_rcu();
2210 cgroup_wakeup_rmdir_waiter(cgrp);
2211 retval = 0;
2212out_list_teardown:
2213
2214 list_for_each_entry_safe(cg_entry, temp_nobe, &newcg_list, links) {
2215 list_del(&cg_entry->links);
2216 put_css_set(cg_entry->cg);
2217 kfree(cg_entry);
2218 }
2219out_cancel_attach:
2220
2221 if (retval) {
2222 for_each_subsys(root, ss) {
2223 if (ss == failed_ss)
2224 break;
2225 if (ss->cancel_attach)
2226 ss->cancel_attach(ss, cgrp, &tset);
2227 }
2228 }
2229out_free_group_list:
2230 flex_array_free(group);
2231 return retval;
2232}
2233
2234
2235
2236
2237
2238
2239static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup)
2240{
2241 struct task_struct *tsk;
2242 const struct cred *cred = current_cred(), *tcred;
2243 int ret;
2244
2245 if (!cgroup_lock_live_group(cgrp))
2246 return -ENODEV;
2247
2248 if (pid) {
2249 rcu_read_lock();
2250 tsk = find_task_by_vpid(pid);
2251 if (!tsk) {
2252 rcu_read_unlock();
2253 cgroup_unlock();
2254 return -ESRCH;
2255 }
2256 if (threadgroup) {
2257
2258
2259
2260
2261
2262
2263 tsk = tsk->group_leader;
2264 }
2265
2266
2267
2268
2269 tcred = __task_cred(tsk);
2270 if (cred->euid &&
2271 cred->euid != tcred->uid &&
2272 cred->euid != tcred->suid) {
2273 rcu_read_unlock();
2274 cgroup_unlock();
2275 return -EACCES;
2276 }
2277 get_task_struct(tsk);
2278 rcu_read_unlock();
2279 } else {
2280 if (threadgroup)
2281 tsk = current->group_leader;
2282 else
2283 tsk = current;
2284 get_task_struct(tsk);
2285 }
2286
2287 threadgroup_lock(tsk);
2288
2289 if (threadgroup)
2290 ret = cgroup_attach_proc(cgrp, tsk);
2291 else
2292 ret = cgroup_attach_task(cgrp, tsk);
2293
2294 threadgroup_unlock(tsk);
2295
2296 put_task_struct(tsk);
2297 cgroup_unlock();
2298 return ret;
2299}
2300
2301static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid)
2302{
2303 return attach_task_by_pid(cgrp, pid, false);
2304}
2305
2306static int cgroup_procs_write(struct cgroup *cgrp, struct cftype *cft, u64 tgid)
2307{
2308 int ret;
2309 do {
2310
2311
2312
2313
2314
2315 ret = attach_task_by_pid(cgrp, tgid, true);
2316 } while (ret == -EAGAIN);
2317 return ret;
2318}
2319
2320
2321
2322
2323
2324
2325
2326
2327bool cgroup_lock_live_group(struct cgroup *cgrp)
2328{
2329 mutex_lock(&cgroup_mutex);
2330 if (cgroup_is_removed(cgrp)) {
2331 mutex_unlock(&cgroup_mutex);
2332 return false;
2333 }
2334 return true;
2335}
2336EXPORT_SYMBOL_GPL(cgroup_lock_live_group);
2337
2338static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
2339 const char *buffer)
2340{
2341 BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
2342 if (strlen(buffer) >= PATH_MAX)
2343 return -EINVAL;
2344 if (!cgroup_lock_live_group(cgrp))
2345 return -ENODEV;
2346 mutex_lock(&cgroup_root_mutex);
2347 strcpy(cgrp->root->release_agent_path, buffer);
2348 mutex_unlock(&cgroup_root_mutex);
2349 cgroup_unlock();
2350 return 0;
2351}
2352
2353static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft,
2354 struct seq_file *seq)
2355{
2356 if (!cgroup_lock_live_group(cgrp))
2357 return -ENODEV;
2358 seq_puts(seq, cgrp->root->release_agent_path);
2359 seq_putc(seq, '\n');
2360 cgroup_unlock();
2361 return 0;
2362}
2363
2364
2365#define CGROUP_LOCAL_BUFFER_SIZE 64
2366
2367static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft,
2368 struct file *file,
2369 const char __user *userbuf,
2370 size_t nbytes, loff_t *unused_ppos)
2371{
2372 char buffer[CGROUP_LOCAL_BUFFER_SIZE];
2373 int retval = 0;
2374 char *end;
2375
2376 if (!nbytes)
2377 return -EINVAL;
2378 if (nbytes >= sizeof(buffer))
2379 return -E2BIG;
2380 if (copy_from_user(buffer, userbuf, nbytes))
2381 return -EFAULT;
2382
2383 buffer[nbytes] = 0;
2384 if (cft->write_u64) {
2385 u64 val = simple_strtoull(strstrip(buffer), &end, 0);
2386 if (*end)
2387 return -EINVAL;
2388 retval = cft->write_u64(cgrp, cft, val);
2389 } else {
2390 s64 val = simple_strtoll(strstrip(buffer), &end, 0);
2391 if (*end)
2392 return -EINVAL;
2393 retval = cft->write_s64(cgrp, cft, val);
2394 }
2395 if (!retval)
2396 retval = nbytes;
2397 return retval;
2398}
2399
2400static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft,
2401 struct file *file,
2402 const char __user *userbuf,
2403 size_t nbytes, loff_t *unused_ppos)
2404{
2405 char local_buffer[CGROUP_LOCAL_BUFFER_SIZE];
2406 int retval = 0;
2407 size_t max_bytes = cft->max_write_len;
2408 char *buffer = local_buffer;
2409
2410 if (!max_bytes)
2411 max_bytes = sizeof(local_buffer) - 1;
2412 if (nbytes >= max_bytes)
2413 return -E2BIG;
2414
2415 if (nbytes >= sizeof(local_buffer)) {
2416 buffer = kmalloc(nbytes + 1, GFP_KERNEL);
2417 if (buffer == NULL)
2418 return -ENOMEM;
2419 }
2420 if (nbytes && copy_from_user(buffer, userbuf, nbytes)) {
2421 retval = -EFAULT;
2422 goto out;
2423 }
2424
2425 buffer[nbytes] = 0;
2426 retval = cft->write_string(cgrp, cft, strstrip(buffer));
2427 if (!retval)
2428 retval = nbytes;
2429out:
2430 if (buffer != local_buffer)
2431 kfree(buffer);
2432 return retval;
2433}
2434
2435static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
2436 size_t nbytes, loff_t *ppos)
2437{
2438 struct cftype *cft = __d_cft(file->f_dentry);
2439 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
2440
2441 if (cgroup_is_removed(cgrp))
2442 return -ENODEV;
2443 if (cft->write)
2444 return cft->write(cgrp, cft, file, buf, nbytes, ppos);
2445 if (cft->write_u64 || cft->write_s64)
2446 return cgroup_write_X64(cgrp, cft, file, buf, nbytes, ppos);
2447 if (cft->write_string)
2448 return cgroup_write_string(cgrp, cft, file, buf, nbytes, ppos);
2449 if (cft->trigger) {
2450 int ret = cft->trigger(cgrp, (unsigned int)cft->private);
2451 return ret ? ret : nbytes;
2452 }
2453 return -EINVAL;
2454}
2455
2456static ssize_t cgroup_read_u64(struct cgroup *cgrp, struct cftype *cft,
2457 struct file *file,
2458 char __user *buf, size_t nbytes,
2459 loff_t *ppos)
2460{
2461 char tmp[CGROUP_LOCAL_BUFFER_SIZE];
2462 u64 val = cft->read_u64(cgrp, cft);
2463 int len = sprintf(tmp, "%llu\n", (unsigned long long) val);
2464
2465 return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
2466}
2467
2468static ssize_t cgroup_read_s64(struct cgroup *cgrp, struct cftype *cft,
2469 struct file *file,
2470 char __user *buf, size_t nbytes,
2471 loff_t *ppos)
2472{
2473 char tmp[CGROUP_LOCAL_BUFFER_SIZE];
2474 s64 val = cft->read_s64(cgrp, cft);
2475 int len = sprintf(tmp, "%lld\n", (long long) val);
2476
2477 return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
2478}
2479
2480static ssize_t cgroup_file_read(struct file *file, char __user *buf,
2481 size_t nbytes, loff_t *ppos)
2482{
2483 struct cftype *cft = __d_cft(file->f_dentry);
2484 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
2485
2486 if (cgroup_is_removed(cgrp))
2487 return -ENODEV;
2488
2489 if (cft->read)
2490 return cft->read(cgrp, cft, file, buf, nbytes, ppos);
2491 if (cft->read_u64)
2492 return cgroup_read_u64(cgrp, cft, file, buf, nbytes, ppos);
2493 if (cft->read_s64)
2494 return cgroup_read_s64(cgrp, cft, file, buf, nbytes, ppos);
2495 return -EINVAL;
2496}
2497
2498
2499
2500
2501
2502
2503struct cgroup_seqfile_state {
2504 struct cftype *cft;
2505 struct cgroup *cgroup;
2506};
2507
2508static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value)
2509{
2510 struct seq_file *sf = cb->state;
2511 return seq_printf(sf, "%s %llu\n", key, (unsigned long long)value);
2512}
2513
2514static int cgroup_seqfile_show(struct seq_file *m, void *arg)
2515{
2516 struct cgroup_seqfile_state *state = m->private;
2517 struct cftype *cft = state->cft;
2518 if (cft->read_map) {
2519 struct cgroup_map_cb cb = {
2520 .fill = cgroup_map_add,
2521 .state = m,
2522 };
2523 return cft->read_map(state->cgroup, cft, &cb);
2524 }
2525 return cft->read_seq_string(state->cgroup, cft, m);
2526}
2527
2528static int cgroup_seqfile_release(struct inode *inode, struct file *file)
2529{
2530 struct seq_file *seq = file->private_data;
2531 kfree(seq->private);
2532 return single_release(inode, file);
2533}
2534
2535static const struct file_operations cgroup_seqfile_operations = {
2536 .read = seq_read,
2537 .write = cgroup_file_write,
2538 .llseek = seq_lseek,
2539 .release = cgroup_seqfile_release,
2540};
2541
2542static int cgroup_file_open(struct inode *inode, struct file *file)
2543{
2544 int err;
2545 struct cftype *cft;
2546
2547 err = generic_file_open(inode, file);
2548 if (err)
2549 return err;
2550 cft = __d_cft(file->f_dentry);
2551
2552 if (cft->read_map || cft->read_seq_string) {
2553 struct cgroup_seqfile_state *state =
2554 kzalloc(sizeof(*state), GFP_USER);
2555 if (!state)
2556 return -ENOMEM;
2557 state->cft = cft;
2558 state->cgroup = __d_cgrp(file->f_dentry->d_parent);
2559 file->f_op = &cgroup_seqfile_operations;
2560 err = single_open(file, cgroup_seqfile_show, state);
2561 if (err < 0)
2562 kfree(state);
2563 } else if (cft->open)
2564 err = cft->open(inode, file);
2565 else
2566 err = 0;
2567
2568 return err;
2569}
2570
2571static int cgroup_file_release(struct inode *inode, struct file *file)
2572{
2573 struct cftype *cft = __d_cft(file->f_dentry);
2574 if (cft->release)
2575 return cft->release(inode, file);
2576 return 0;
2577}
2578
2579
2580
2581
2582static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry,
2583 struct inode *new_dir, struct dentry *new_dentry)
2584{
2585 if (!S_ISDIR(old_dentry->d_inode->i_mode))
2586 return -ENOTDIR;
2587 if (new_dentry->d_inode)
2588 return -EEXIST;
2589 if (old_dir != new_dir)
2590 return -EIO;
2591 return simple_rename(old_dir, old_dentry, new_dir, new_dentry);
2592}
2593
2594static const struct file_operations cgroup_file_operations = {
2595 .read = cgroup_file_read,
2596 .write = cgroup_file_write,
2597 .llseek = generic_file_llseek,
2598 .open = cgroup_file_open,
2599 .release = cgroup_file_release,
2600};
2601
2602static const struct inode_operations cgroup_dir_inode_operations = {
2603 .lookup = cgroup_lookup,
2604 .mkdir = cgroup_mkdir,
2605 .rmdir = cgroup_rmdir,
2606 .rename = cgroup_rename,
2607};
2608
2609static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
2610{
2611 if (dentry->d_name.len > NAME_MAX)
2612 return ERR_PTR(-ENAMETOOLONG);
2613 d_add(dentry, NULL);
2614 return NULL;
2615}
2616
2617
2618
2619
2620static inline struct cftype *__file_cft(struct file *file)
2621{
2622 if (file->f_dentry->d_inode->i_fop != &cgroup_file_operations)
2623 return ERR_PTR(-EINVAL);
2624 return __d_cft(file->f_dentry);
2625}
2626
2627static int cgroup_create_file(struct dentry *dentry, umode_t mode,
2628 struct super_block *sb)
2629{
2630 struct inode *inode;
2631
2632 if (!dentry)
2633 return -ENOENT;
2634 if (dentry->d_inode)
2635 return -EEXIST;
2636
2637 inode = cgroup_new_inode(mode, sb);
2638 if (!inode)
2639 return -ENOMEM;
2640
2641 if (S_ISDIR(mode)) {
2642 inode->i_op = &cgroup_dir_inode_operations;
2643 inode->i_fop = &simple_dir_operations;
2644
2645
2646 inc_nlink(inode);
2647
2648
2649
2650 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
2651 } else if (S_ISREG(mode)) {
2652 inode->i_size = 0;
2653 inode->i_fop = &cgroup_file_operations;
2654 }
2655 d_instantiate(dentry, inode);
2656 dget(dentry);
2657 return 0;
2658}
2659
2660
2661
2662
2663
2664
2665
2666
2667static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry,
2668 umode_t mode)
2669{
2670 struct dentry *parent;
2671 int error = 0;
2672
2673 parent = cgrp->parent->dentry;
2674 error = cgroup_create_file(dentry, S_IFDIR | mode, cgrp->root->sb);
2675 if (!error) {
2676 dentry->d_fsdata = cgrp;
2677 inc_nlink(parent->d_inode);
2678 rcu_assign_pointer(cgrp->dentry, dentry);
2679 dget(dentry);
2680 }
2681 dput(dentry);
2682
2683 return error;
2684}
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695static umode_t cgroup_file_mode(const struct cftype *cft)
2696{
2697 umode_t mode = 0;
2698
2699 if (cft->mode)
2700 return cft->mode;
2701
2702 if (cft->read || cft->read_u64 || cft->read_s64 ||
2703 cft->read_map || cft->read_seq_string)
2704 mode |= S_IRUGO;
2705
2706 if (cft->write || cft->write_u64 || cft->write_s64 ||
2707 cft->write_string || cft->trigger)
2708 mode |= S_IWUSR;
2709
2710 return mode;
2711}
2712
2713int cgroup_add_file(struct cgroup *cgrp,
2714 struct cgroup_subsys *subsys,
2715 const struct cftype *cft)
2716{
2717 struct dentry *dir = cgrp->dentry;
2718 struct dentry *dentry;
2719 int error;
2720 umode_t mode;
2721
2722 char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 };
2723 if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) {
2724 strcpy(name, subsys->name);
2725 strcat(name, ".");
2726 }
2727 strcat(name, cft->name);
2728 BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex));
2729 dentry = lookup_one_len(name, dir, strlen(name));
2730 if (!IS_ERR(dentry)) {
2731 mode = cgroup_file_mode(cft);
2732 error = cgroup_create_file(dentry, mode | S_IFREG,
2733 cgrp->root->sb);
2734 if (!error)
2735 dentry->d_fsdata = (void *)cft;
2736 dput(dentry);
2737 } else
2738 error = PTR_ERR(dentry);
2739 return error;
2740}
2741EXPORT_SYMBOL_GPL(cgroup_add_file);
2742
2743int cgroup_add_files(struct cgroup *cgrp,
2744 struct cgroup_subsys *subsys,
2745 const struct cftype cft[],
2746 int count)
2747{
2748 int i, err;
2749 for (i = 0; i < count; i++) {
2750 err = cgroup_add_file(cgrp, subsys, &cft[i]);
2751 if (err)
2752 return err;
2753 }
2754 return 0;
2755}
2756EXPORT_SYMBOL_GPL(cgroup_add_files);
2757
2758
2759
2760
2761
2762
2763
2764int cgroup_task_count(const struct cgroup *cgrp)
2765{
2766 int count = 0;
2767 struct cg_cgroup_link *link;
2768
2769 read_lock(&css_set_lock);
2770 list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {
2771 count += atomic_read(&link->cg->refcount);
2772 }
2773 read_unlock(&css_set_lock);
2774 return count;
2775}
2776
2777
2778
2779
2780
2781static void cgroup_advance_iter(struct cgroup *cgrp,
2782 struct cgroup_iter *it)
2783{
2784 struct list_head *l = it->cg_link;
2785 struct cg_cgroup_link *link;
2786 struct css_set *cg;
2787
2788
2789 do {
2790 l = l->next;
2791 if (l == &cgrp->css_sets) {
2792 it->cg_link = NULL;
2793 return;
2794 }
2795 link = list_entry(l, struct cg_cgroup_link, cgrp_link_list);
2796 cg = link->cg;
2797 } while (list_empty(&cg->tasks));
2798 it->cg_link = l;
2799 it->task = cg->tasks.next;
2800}
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811static void cgroup_enable_task_cg_lists(void)
2812{
2813 struct task_struct *p, *g;
2814 write_lock(&css_set_lock);
2815 use_task_css_set_links = 1;
2816 do_each_thread(g, p) {
2817 task_lock(p);
2818
2819
2820
2821
2822
2823 if (!(p->flags & PF_EXITING) && list_empty(&p->cg_list))
2824 list_add(&p->cg_list, &p->cgroups->tasks);
2825 task_unlock(p);
2826 } while_each_thread(g, p);
2827 write_unlock(&css_set_lock);
2828}
2829
2830void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it)
2831 __acquires(css_set_lock)
2832{
2833
2834
2835
2836
2837
2838 if (!use_task_css_set_links)
2839 cgroup_enable_task_cg_lists();
2840
2841 read_lock(&css_set_lock);
2842 it->cg_link = &cgrp->css_sets;
2843 cgroup_advance_iter(cgrp, it);
2844}
2845
2846struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
2847 struct cgroup_iter *it)
2848{
2849 struct task_struct *res;
2850 struct list_head *l = it->task;
2851 struct cg_cgroup_link *link;
2852
2853
2854 if (!it->cg_link)
2855 return NULL;
2856 res = list_entry(l, struct task_struct, cg_list);
2857
2858 l = l->next;
2859 link = list_entry(it->cg_link, struct cg_cgroup_link, cgrp_link_list);
2860 if (l == &link->cg->tasks) {
2861
2862
2863 cgroup_advance_iter(cgrp, it);
2864 } else {
2865 it->task = l;
2866 }
2867 return res;
2868}
2869
2870void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it)
2871 __releases(css_set_lock)
2872{
2873 read_unlock(&css_set_lock);
2874}
2875
2876static inline int started_after_time(struct task_struct *t1,
2877 struct timespec *time,
2878 struct task_struct *t2)
2879{
2880 int start_diff = timespec_compare(&t1->start_time, time);
2881 if (start_diff > 0) {
2882 return 1;
2883 } else if (start_diff < 0) {
2884 return 0;
2885 } else {
2886
2887
2888
2889
2890
2891
2892
2893
2894 return t1 > t2;
2895 }
2896}
2897
2898
2899
2900
2901
2902
2903static inline int started_after(void *p1, void *p2)
2904{
2905 struct task_struct *t1 = p1;
2906 struct task_struct *t2 = p2;
2907 return started_after_time(t1, &t2->start_time, t2);
2908}
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937int cgroup_scan_tasks(struct cgroup_scanner *scan)
2938{
2939 int retval, i;
2940 struct cgroup_iter it;
2941 struct task_struct *p, *dropped;
2942
2943 struct task_struct *latest_task = NULL;
2944 struct ptr_heap tmp_heap;
2945 struct ptr_heap *heap;
2946 struct timespec latest_time = { 0, 0 };
2947
2948 if (scan->heap) {
2949
2950 heap = scan->heap;
2951 heap->gt = &started_after;
2952 } else {
2953
2954 heap = &tmp_heap;
2955 retval = heap_init(heap, PAGE_SIZE, GFP_KERNEL, &started_after);
2956 if (retval)
2957
2958 return retval;
2959 }
2960
2961 again:
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974 heap->size = 0;
2975 cgroup_iter_start(scan->cg, &it);
2976 while ((p = cgroup_iter_next(scan->cg, &it))) {
2977
2978
2979
2980
2981 if (scan->test_task && !scan->test_task(p, scan))
2982 continue;
2983
2984
2985
2986
2987 if (!started_after_time(p, &latest_time, latest_task))
2988 continue;
2989 dropped = heap_insert(heap, p);
2990 if (dropped == NULL) {
2991
2992
2993
2994
2995 get_task_struct(p);
2996 } else if (dropped != p) {
2997
2998
2999
3000
3001 get_task_struct(p);
3002 put_task_struct(dropped);
3003 }
3004
3005
3006
3007
3008 }
3009 cgroup_iter_end(scan->cg, &it);
3010
3011 if (heap->size) {
3012 for (i = 0; i < heap->size; i++) {
3013 struct task_struct *q = heap->ptrs[i];
3014 if (i == 0) {
3015 latest_time = q->start_time;
3016 latest_task = q;
3017 }
3018
3019 scan->process_task(q, scan);
3020 put_task_struct(q);
3021 }
3022
3023
3024
3025
3026
3027
3028
3029 goto again;
3030 }
3031 if (heap == &tmp_heap)
3032 heap_free(&tmp_heap);
3033 return 0;
3034}
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051#define PIDLIST_TOO_LARGE(c) ((c) * sizeof(pid_t) > (PAGE_SIZE * 2))
3052static void *pidlist_allocate(int count)
3053{
3054 if (PIDLIST_TOO_LARGE(count))
3055 return vmalloc(count * sizeof(pid_t));
3056 else
3057 return kmalloc(count * sizeof(pid_t), GFP_KERNEL);
3058}
3059static void pidlist_free(void *p)
3060{
3061 if (is_vmalloc_addr(p))
3062 vfree(p);
3063 else
3064 kfree(p);
3065}
3066static void *pidlist_resize(void *p, int newcount)
3067{
3068 void *newlist;
3069
3070 if (is_vmalloc_addr(p)) {
3071 newlist = vmalloc(newcount * sizeof(pid_t));
3072 if (!newlist)
3073 return NULL;
3074 memcpy(newlist, p, newcount * sizeof(pid_t));
3075 vfree(p);
3076 } else {
3077 newlist = krealloc(p, newcount * sizeof(pid_t), GFP_KERNEL);
3078 }
3079 return newlist;
3080}
3081
3082
3083
3084
3085
3086
3087
3088
3089#define PIDLIST_REALLOC_DIFFERENCE(old, new) ((old) - PAGE_SIZE >= (new))
3090static int pidlist_uniq(pid_t **p, int length)
3091{
3092 int src, dest = 1;
3093 pid_t *list = *p;
3094 pid_t *newlist;
3095
3096
3097
3098
3099
3100 if (length == 0 || length == 1)
3101 return length;
3102
3103 for (src = 1; src < length; src++) {
3104
3105 while (list[src] == list[src-1]) {
3106 src++;
3107 if (src == length)
3108 goto after;
3109 }
3110
3111 list[dest] = list[src];
3112 dest++;
3113 }
3114after:
3115
3116
3117
3118
3119
3120 if (PIDLIST_REALLOC_DIFFERENCE(length, dest)) {
3121 newlist = pidlist_resize(list, dest);
3122 if (newlist)
3123 *p = newlist;
3124 }
3125 return dest;
3126}
3127
3128static int cmppid(const void *a, const void *b)
3129{
3130 return *(pid_t *)a - *(pid_t *)b;
3131}
3132
3133
3134
3135
3136
3137
3138
3139static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
3140 enum cgroup_filetype type)
3141{
3142 struct cgroup_pidlist *l;
3143
3144 struct pid_namespace *ns = current->nsproxy->pid_ns;
3145
3146
3147
3148
3149
3150
3151
3152 mutex_lock(&cgrp->pidlist_mutex);
3153 list_for_each_entry(l, &cgrp->pidlists, links) {
3154 if (l->key.type == type && l->key.ns == ns) {
3155
3156 down_write(&l->mutex);
3157 mutex_unlock(&cgrp->pidlist_mutex);
3158 return l;
3159 }
3160 }
3161
3162 l = kmalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL);
3163 if (!l) {
3164 mutex_unlock(&cgrp->pidlist_mutex);
3165 return l;
3166 }
3167 init_rwsem(&l->mutex);
3168 down_write(&l->mutex);
3169 l->key.type = type;
3170 l->key.ns = get_pid_ns(ns);
3171 l->use_count = 0;
3172 l->list = NULL;
3173 l->owner = cgrp;
3174 list_add(&l->links, &cgrp->pidlists);
3175 mutex_unlock(&cgrp->pidlist_mutex);
3176 return l;
3177}
3178
3179
3180
3181
3182static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
3183 struct cgroup_pidlist **lp)
3184{
3185 pid_t *array;
3186 int length;
3187 int pid, n = 0;
3188 struct cgroup_iter it;
3189 struct task_struct *tsk;
3190 struct cgroup_pidlist *l;
3191
3192
3193
3194
3195
3196
3197
3198 length = cgroup_task_count(cgrp);
3199 array = pidlist_allocate(length);
3200 if (!array)
3201 return -ENOMEM;
3202
3203 cgroup_iter_start(cgrp, &it);
3204 while ((tsk = cgroup_iter_next(cgrp, &it))) {
3205 if (unlikely(n == length))
3206 break;
3207
3208 if (type == CGROUP_FILE_PROCS)
3209 pid = task_tgid_vnr(tsk);
3210 else
3211 pid = task_pid_vnr(tsk);
3212 if (pid > 0)
3213 array[n++] = pid;
3214 }
3215 cgroup_iter_end(cgrp, &it);
3216 length = n;
3217
3218 sort(array, length, sizeof(pid_t), cmppid, NULL);
3219 if (type == CGROUP_FILE_PROCS)
3220 length = pidlist_uniq(&array, length);
3221 l = cgroup_pidlist_find(cgrp, type);
3222 if (!l) {
3223 pidlist_free(array);
3224 return -ENOMEM;
3225 }
3226
3227 pidlist_free(l->list);
3228 l->list = array;
3229 l->length = length;
3230 l->use_count++;
3231 up_write(&l->mutex);
3232 *lp = l;
3233 return 0;
3234}
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
3246{
3247 int ret = -EINVAL;
3248 struct cgroup *cgrp;
3249 struct cgroup_iter it;
3250 struct task_struct *tsk;
3251
3252
3253
3254
3255
3256 if (dentry->d_sb->s_op != &cgroup_ops ||
3257 !S_ISDIR(dentry->d_inode->i_mode))
3258 goto err;
3259
3260 ret = 0;
3261 cgrp = dentry->d_fsdata;
3262
3263 cgroup_iter_start(cgrp, &it);
3264 while ((tsk = cgroup_iter_next(cgrp, &it))) {
3265 switch (tsk->state) {
3266 case TASK_RUNNING:
3267 stats->nr_running++;
3268 break;
3269 case TASK_INTERRUPTIBLE:
3270 stats->nr_sleeping++;
3271 break;
3272 case TASK_UNINTERRUPTIBLE:
3273 stats->nr_uninterruptible++;
3274 break;
3275 case TASK_STOPPED:
3276 stats->nr_stopped++;
3277 break;
3278 default:
3279 if (delayacct_is_task_waiting_on_io(tsk))
3280 stats->nr_io_wait++;
3281 break;
3282 }
3283 }
3284 cgroup_iter_end(cgrp, &it);
3285
3286err:
3287 return ret;
3288}
3289
3290
3291
3292
3293
3294
3295
3296
3297static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
3298{
3299
3300
3301
3302
3303
3304
3305 struct cgroup_pidlist *l = s->private;
3306 int index = 0, pid = *pos;
3307 int *iter;
3308
3309 down_read(&l->mutex);
3310 if (pid) {
3311 int end = l->length;
3312
3313 while (index < end) {
3314 int mid = (index + end) / 2;
3315 if (l->list[mid] == pid) {
3316 index = mid;
3317 break;
3318 } else if (l->list[mid] <= pid)
3319 index = mid + 1;
3320 else
3321 end = mid;
3322 }
3323 }
3324
3325 if (index >= l->length)
3326 return NULL;
3327
3328 iter = l->list + index;
3329 *pos = *iter;
3330 return iter;
3331}
3332
3333static void cgroup_pidlist_stop(struct seq_file *s, void *v)
3334{
3335 struct cgroup_pidlist *l = s->private;
3336 up_read(&l->mutex);
3337}
3338
3339static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
3340{
3341 struct cgroup_pidlist *l = s->private;
3342 pid_t *p = v;
3343 pid_t *end = l->list + l->length;
3344
3345
3346
3347
3348 p++;
3349 if (p >= end) {
3350 return NULL;
3351 } else {
3352 *pos = *p;
3353 return p;
3354 }
3355}
3356
3357static int cgroup_pidlist_show(struct seq_file *s, void *v)
3358{
3359 return seq_printf(s, "%d\n", *(int *)v);
3360}
3361
3362
3363
3364
3365
3366static const struct seq_operations cgroup_pidlist_seq_operations = {
3367 .start = cgroup_pidlist_start,
3368 .stop = cgroup_pidlist_stop,
3369 .next = cgroup_pidlist_next,
3370 .show = cgroup_pidlist_show,
3371};
3372
3373static void cgroup_release_pid_array(struct cgroup_pidlist *l)
3374{
3375
3376
3377
3378
3379
3380
3381 mutex_lock(&l->owner->pidlist_mutex);
3382 down_write(&l->mutex);
3383 BUG_ON(!l->use_count);
3384 if (!--l->use_count) {
3385
3386 list_del(&l->links);
3387 mutex_unlock(&l->owner->pidlist_mutex);
3388 pidlist_free(l->list);
3389 put_pid_ns(l->key.ns);
3390 up_write(&l->mutex);
3391 kfree(l);
3392 return;
3393 }
3394 mutex_unlock(&l->owner->pidlist_mutex);
3395 up_write(&l->mutex);
3396}
3397
3398static int cgroup_pidlist_release(struct inode *inode, struct file *file)
3399{
3400 struct cgroup_pidlist *l;
3401 if (!(file->f_mode & FMODE_READ))
3402 return 0;
3403
3404
3405
3406
3407 l = ((struct seq_file *)file->private_data)->private;
3408 cgroup_release_pid_array(l);
3409 return seq_release(inode, file);
3410}
3411
3412static const struct file_operations cgroup_pidlist_operations = {
3413 .read = seq_read,
3414 .llseek = seq_lseek,
3415 .write = cgroup_file_write,
3416 .release = cgroup_pidlist_release,
3417};
3418
3419
3420
3421
3422
3423
3424
3425static int cgroup_pidlist_open(struct file *file, enum cgroup_filetype type)
3426{
3427 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
3428 struct cgroup_pidlist *l;
3429 int retval;
3430
3431
3432 if (!(file->f_mode & FMODE_READ))
3433 return 0;
3434
3435
3436 retval = pidlist_array_load(cgrp, type, &l);
3437 if (retval)
3438 return retval;
3439
3440 file->f_op = &cgroup_pidlist_operations;
3441
3442 retval = seq_open(file, &cgroup_pidlist_seq_operations);
3443 if (retval) {
3444 cgroup_release_pid_array(l);
3445 return retval;
3446 }
3447 ((struct seq_file *)file->private_data)->private = l;
3448 return 0;
3449}
3450static int cgroup_tasks_open(struct inode *unused, struct file *file)
3451{
3452 return cgroup_pidlist_open(file, CGROUP_FILE_TASKS);
3453}
3454static int cgroup_procs_open(struct inode *unused, struct file *file)
3455{
3456 return cgroup_pidlist_open(file, CGROUP_FILE_PROCS);
3457}
3458
3459static u64 cgroup_read_notify_on_release(struct cgroup *cgrp,
3460 struct cftype *cft)
3461{
3462 return notify_on_release(cgrp);
3463}
3464
3465static int cgroup_write_notify_on_release(struct cgroup *cgrp,
3466 struct cftype *cft,
3467 u64 val)
3468{
3469 clear_bit(CGRP_RELEASABLE, &cgrp->flags);
3470 if (val)
3471 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
3472 else
3473 clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
3474 return 0;
3475}
3476
3477
3478
3479
3480
3481
3482static void cgroup_event_remove(struct work_struct *work)
3483{
3484 struct cgroup_event *event = container_of(work, struct cgroup_event,
3485 remove);
3486 struct cgroup *cgrp = event->cgrp;
3487
3488 event->cft->unregister_event(cgrp, event->cft, event->eventfd);
3489
3490 eventfd_ctx_put(event->eventfd);
3491 kfree(event);
3492 dput(cgrp->dentry);
3493}
3494
3495
3496
3497
3498
3499
3500static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
3501 int sync, void *key)
3502{
3503 struct cgroup_event *event = container_of(wait,
3504 struct cgroup_event, wait);
3505 struct cgroup *cgrp = event->cgrp;
3506 unsigned long flags = (unsigned long)key;
3507
3508 if (flags & POLLHUP) {
3509 __remove_wait_queue(event->wqh, &event->wait);
3510 spin_lock(&cgrp->event_list_lock);
3511 list_del(&event->list);
3512 spin_unlock(&cgrp->event_list_lock);
3513
3514
3515
3516
3517 schedule_work(&event->remove);
3518 }
3519
3520 return 0;
3521}
3522
3523static void cgroup_event_ptable_queue_proc(struct file *file,
3524 wait_queue_head_t *wqh, poll_table *pt)
3525{
3526 struct cgroup_event *event = container_of(pt,
3527 struct cgroup_event, pt);
3528
3529 event->wqh = wqh;
3530 add_wait_queue(wqh, &event->wait);
3531}
3532
3533
3534
3535
3536
3537
3538
3539static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft,
3540 const char *buffer)
3541{
3542 struct cgroup_event *event = NULL;
3543 unsigned int efd, cfd;
3544 struct file *efile = NULL;
3545 struct file *cfile = NULL;
3546 char *endp;
3547 int ret;
3548
3549 efd = simple_strtoul(buffer, &endp, 10);
3550 if (*endp != ' ')
3551 return -EINVAL;
3552 buffer = endp + 1;
3553
3554 cfd = simple_strtoul(buffer, &endp, 10);
3555 if ((*endp != ' ') && (*endp != '\0'))
3556 return -EINVAL;
3557 buffer = endp + 1;
3558
3559 event = kzalloc(sizeof(*event), GFP_KERNEL);
3560 if (!event)
3561 return -ENOMEM;
3562 event->cgrp = cgrp;
3563 INIT_LIST_HEAD(&event->list);
3564 init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc);
3565 init_waitqueue_func_entry(&event->wait, cgroup_event_wake);
3566 INIT_WORK(&event->remove, cgroup_event_remove);
3567
3568 efile = eventfd_fget(efd);
3569 if (IS_ERR(efile)) {
3570 ret = PTR_ERR(efile);
3571 goto fail;
3572 }
3573
3574 event->eventfd = eventfd_ctx_fileget(efile);
3575 if (IS_ERR(event->eventfd)) {
3576 ret = PTR_ERR(event->eventfd);
3577 goto fail;
3578 }
3579
3580 cfile = fget(cfd);
3581 if (!cfile) {
3582 ret = -EBADF;
3583 goto fail;
3584 }
3585
3586
3587
3588 ret = inode_permission(cfile->f_path.dentry->d_inode, MAY_READ);
3589 if (ret < 0)
3590 goto fail;
3591
3592 event->cft = __file_cft(cfile);
3593 if (IS_ERR(event->cft)) {
3594 ret = PTR_ERR(event->cft);
3595 goto fail;
3596 }
3597
3598 if (!event->cft->register_event || !event->cft->unregister_event) {
3599 ret = -EINVAL;
3600 goto fail;
3601 }
3602
3603 ret = event->cft->register_event(cgrp, event->cft,
3604 event->eventfd, buffer);
3605 if (ret)
3606 goto fail;
3607
3608 if (efile->f_op->poll(efile, &event->pt) & POLLHUP) {
3609 event->cft->unregister_event(cgrp, event->cft, event->eventfd);
3610 ret = 0;
3611 goto fail;
3612 }
3613
3614
3615
3616
3617
3618
3619 dget(cgrp->dentry);
3620
3621 spin_lock(&cgrp->event_list_lock);
3622 list_add(&event->list, &cgrp->event_list);
3623 spin_unlock(&cgrp->event_list_lock);
3624
3625 fput(cfile);
3626 fput(efile);
3627
3628 return 0;
3629
3630fail:
3631 if (cfile)
3632 fput(cfile);
3633
3634 if (event && event->eventfd && !IS_ERR(event->eventfd))
3635 eventfd_ctx_put(event->eventfd);
3636
3637 if (!IS_ERR_OR_NULL(efile))
3638 fput(efile);
3639
3640 kfree(event);
3641
3642 return ret;
3643}
3644
3645static u64 cgroup_clone_children_read(struct cgroup *cgrp,
3646 struct cftype *cft)
3647{
3648 return clone_children(cgrp);
3649}
3650
3651static int cgroup_clone_children_write(struct cgroup *cgrp,
3652 struct cftype *cft,
3653 u64 val)
3654{
3655 if (val)
3656 set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
3657 else
3658 clear_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
3659 return 0;
3660}
3661
3662
3663
3664
3665
3666#define CGROUP_FILE_GENERIC_PREFIX "cgroup."
3667static struct cftype files[] = {
3668 {
3669 .name = "tasks",
3670 .open = cgroup_tasks_open,
3671 .write_u64 = cgroup_tasks_write,
3672 .release = cgroup_pidlist_release,
3673 .mode = S_IRUGO | S_IWUSR,
3674 },
3675 {
3676 .name = CGROUP_FILE_GENERIC_PREFIX "procs",
3677 .open = cgroup_procs_open,
3678 .write_u64 = cgroup_procs_write,
3679 .release = cgroup_pidlist_release,
3680 .mode = S_IRUGO | S_IWUSR,
3681 },
3682 {
3683 .name = "notify_on_release",
3684 .read_u64 = cgroup_read_notify_on_release,
3685 .write_u64 = cgroup_write_notify_on_release,
3686 },
3687 {
3688 .name = CGROUP_FILE_GENERIC_PREFIX "event_control",
3689 .write_string = cgroup_write_event_control,
3690 .mode = S_IWUGO,
3691 },
3692 {
3693 .name = "cgroup.clone_children",
3694 .read_u64 = cgroup_clone_children_read,
3695 .write_u64 = cgroup_clone_children_write,
3696 },
3697};
3698
3699static struct cftype cft_release_agent = {
3700 .name = "release_agent",
3701 .read_seq_string = cgroup_release_agent_show,
3702 .write_string = cgroup_release_agent_write,
3703 .max_write_len = PATH_MAX,
3704};
3705
3706static int cgroup_populate_dir(struct cgroup *cgrp)
3707{
3708 int err;
3709 struct cgroup_subsys *ss;
3710
3711
3712 cgroup_clear_directory(cgrp->dentry);
3713
3714 err = cgroup_add_files(cgrp, NULL, files, ARRAY_SIZE(files));
3715 if (err < 0)
3716 return err;
3717
3718 if (cgrp == cgrp->top_cgroup) {
3719 if ((err = cgroup_add_file(cgrp, NULL, &cft_release_agent)) < 0)
3720 return err;
3721 }
3722
3723 for_each_subsys(cgrp->root, ss) {
3724 if (ss->populate && (err = ss->populate(ss, cgrp)) < 0)
3725 return err;
3726 }
3727
3728 for_each_subsys(cgrp->root, ss) {
3729 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
3730
3731
3732
3733
3734
3735 if (css->id)
3736 rcu_assign_pointer(css->id->css, css);
3737 }
3738
3739 return 0;
3740}
3741
3742static void init_cgroup_css(struct cgroup_subsys_state *css,
3743 struct cgroup_subsys *ss,
3744 struct cgroup *cgrp)
3745{
3746 css->cgroup = cgrp;
3747 atomic_set(&css->refcnt, 1);
3748 css->flags = 0;
3749 css->id = NULL;
3750 if (cgrp == dummytop)
3751 set_bit(CSS_ROOT, &css->flags);
3752 BUG_ON(cgrp->subsys[ss->subsys_id]);
3753 cgrp->subsys[ss->subsys_id] = css;
3754}
3755
3756static void cgroup_lock_hierarchy(struct cgroupfs_root *root)
3757{
3758
3759 int i;
3760
3761
3762
3763
3764
3765 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3766 struct cgroup_subsys *ss = subsys[i];
3767 if (ss == NULL)
3768 continue;
3769 if (ss->root == root)
3770 mutex_lock(&ss->hierarchy_mutex);
3771 }
3772}
3773
3774static void cgroup_unlock_hierarchy(struct cgroupfs_root *root)
3775{
3776 int i;
3777
3778 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3779 struct cgroup_subsys *ss = subsys[i];
3780 if (ss == NULL)
3781 continue;
3782 if (ss->root == root)
3783 mutex_unlock(&ss->hierarchy_mutex);
3784 }
3785}
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
3796 umode_t mode)
3797{
3798 struct cgroup *cgrp;
3799 struct cgroupfs_root *root = parent->root;
3800 int err = 0;
3801 struct cgroup_subsys *ss;
3802 struct super_block *sb = root->sb;
3803
3804 cgrp = kzalloc(sizeof(*cgrp), GFP_KERNEL);
3805 if (!cgrp)
3806 return -ENOMEM;
3807
3808
3809
3810
3811
3812
3813 atomic_inc(&sb->s_active);
3814
3815 mutex_lock(&cgroup_mutex);
3816
3817 init_cgroup_housekeeping(cgrp);
3818
3819 cgrp->parent = parent;
3820 cgrp->root = parent->root;
3821 cgrp->top_cgroup = parent->top_cgroup;
3822
3823 if (notify_on_release(parent))
3824 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
3825
3826 if (clone_children(parent))
3827 set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
3828
3829 for_each_subsys(root, ss) {
3830 struct cgroup_subsys_state *css = ss->create(ss, cgrp);
3831
3832 if (IS_ERR(css)) {
3833 err = PTR_ERR(css);
3834 goto err_destroy;
3835 }
3836 init_cgroup_css(css, ss, cgrp);
3837 if (ss->use_id) {
3838 err = alloc_css_id(ss, parent, cgrp);
3839 if (err)
3840 goto err_destroy;
3841 }
3842
3843 if (clone_children(parent) && ss->post_clone)
3844 ss->post_clone(ss, cgrp);
3845 }
3846
3847 cgroup_lock_hierarchy(root);
3848 list_add(&cgrp->sibling, &cgrp->parent->children);
3849 cgroup_unlock_hierarchy(root);
3850 root->number_of_cgroups++;
3851
3852 err = cgroup_create_dir(cgrp, dentry, mode);
3853 if (err < 0)
3854 goto err_remove;
3855
3856
3857 BUG_ON(!mutex_is_locked(&cgrp->dentry->d_inode->i_mutex));
3858
3859 err = cgroup_populate_dir(cgrp);
3860
3861
3862 mutex_unlock(&cgroup_mutex);
3863 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
3864
3865 return 0;
3866
3867 err_remove:
3868
3869 cgroup_lock_hierarchy(root);
3870 list_del(&cgrp->sibling);
3871 cgroup_unlock_hierarchy(root);
3872 root->number_of_cgroups--;
3873
3874 err_destroy:
3875
3876 for_each_subsys(root, ss) {
3877 if (cgrp->subsys[ss->subsys_id])
3878 ss->destroy(ss, cgrp);
3879 }
3880
3881 mutex_unlock(&cgroup_mutex);
3882
3883
3884 deactivate_super(sb);
3885
3886 kfree(cgrp);
3887 return err;
3888}
3889
3890static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
3891{
3892 struct cgroup *c_parent = dentry->d_parent->d_fsdata;
3893
3894
3895 return cgroup_create(c_parent, dentry, mode | S_IFDIR);
3896}
3897
3898static int cgroup_has_css_refs(struct cgroup *cgrp)
3899{
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909 int i;
3910
3911
3912
3913
3914
3915 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3916 struct cgroup_subsys *ss = subsys[i];
3917 struct cgroup_subsys_state *css;
3918
3919 if (ss == NULL || ss->root != cgrp->root)
3920 continue;
3921 css = cgrp->subsys[ss->subsys_id];
3922
3923
3924
3925
3926
3927
3928 if (css && (atomic_read(&css->refcnt) > 1))
3929 return 1;
3930 }
3931 return 0;
3932}
3933
3934
3935
3936
3937
3938
3939
3940static int cgroup_clear_css_refs(struct cgroup *cgrp)
3941{
3942 struct cgroup_subsys *ss;
3943 unsigned long flags;
3944 bool failed = false;
3945 local_irq_save(flags);
3946 for_each_subsys(cgrp->root, ss) {
3947 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
3948 int refcnt;
3949 while (1) {
3950
3951 refcnt = atomic_read(&css->refcnt);
3952 if (refcnt > 1) {
3953 failed = true;
3954 goto done;
3955 }
3956 BUG_ON(!refcnt);
3957
3958
3959
3960
3961
3962
3963 if (atomic_cmpxchg(&css->refcnt, refcnt, 0) == refcnt)
3964 break;
3965 cpu_relax();
3966 }
3967 }
3968 done:
3969 for_each_subsys(cgrp->root, ss) {
3970 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
3971 if (failed) {
3972
3973
3974
3975
3976 if (!atomic_read(&css->refcnt))
3977 atomic_set(&css->refcnt, 1);
3978 } else {
3979
3980 set_bit(CSS_REMOVED, &css->flags);
3981 }
3982 }
3983 local_irq_restore(flags);
3984 return !failed;
3985}
3986
3987static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
3988{
3989 struct cgroup *cgrp = dentry->d_fsdata;
3990 struct dentry *d;
3991 struct cgroup *parent;
3992 DEFINE_WAIT(wait);
3993 struct cgroup_event *event, *tmp;
3994 int ret;
3995
3996
3997again:
3998 mutex_lock(&cgroup_mutex);
3999 if (atomic_read(&cgrp->count) != 0) {
4000 mutex_unlock(&cgroup_mutex);
4001 return -EBUSY;
4002 }
4003 if (!list_empty(&cgrp->children)) {
4004 mutex_unlock(&cgroup_mutex);
4005 return -EBUSY;
4006 }
4007 mutex_unlock(&cgroup_mutex);
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018 set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
4019
4020
4021
4022
4023
4024 ret = cgroup_call_pre_destroy(cgrp);
4025 if (ret) {
4026 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
4027 return ret;
4028 }
4029
4030 mutex_lock(&cgroup_mutex);
4031 parent = cgrp->parent;
4032 if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) {
4033 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
4034 mutex_unlock(&cgroup_mutex);
4035 return -EBUSY;
4036 }
4037 prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE);
4038 if (!cgroup_clear_css_refs(cgrp)) {
4039 mutex_unlock(&cgroup_mutex);
4040
4041
4042
4043
4044 if (test_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags))
4045 schedule();
4046 finish_wait(&cgroup_rmdir_waitq, &wait);
4047 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
4048 if (signal_pending(current))
4049 return -EINTR;
4050 goto again;
4051 }
4052
4053 finish_wait(&cgroup_rmdir_waitq, &wait);
4054 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
4055
4056 raw_spin_lock(&release_list_lock);
4057 set_bit(CGRP_REMOVED, &cgrp->flags);
4058 if (!list_empty(&cgrp->release_list))
4059 list_del_init(&cgrp->release_list);
4060 raw_spin_unlock(&release_list_lock);
4061
4062 cgroup_lock_hierarchy(cgrp->root);
4063
4064 list_del_init(&cgrp->sibling);
4065 cgroup_unlock_hierarchy(cgrp->root);
4066
4067 d = dget(cgrp->dentry);
4068
4069 cgroup_d_remove_dir(d);
4070 dput(d);
4071
4072 set_bit(CGRP_RELEASABLE, &parent->flags);
4073 check_for_release(parent);
4074
4075
4076
4077
4078
4079
4080 spin_lock(&cgrp->event_list_lock);
4081 list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
4082 list_del(&event->list);
4083 remove_wait_queue(event->wqh, &event->wait);
4084 eventfd_signal(event->eventfd, 1);
4085 schedule_work(&event->remove);
4086 }
4087 spin_unlock(&cgrp->event_list_lock);
4088
4089 mutex_unlock(&cgroup_mutex);
4090 return 0;
4091}
4092
4093static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
4094{
4095 struct cgroup_subsys_state *css;
4096
4097 printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);
4098
4099
4100 list_add(&ss->sibling, &rootnode.subsys_list);
4101 ss->root = &rootnode;
4102 css = ss->create(ss, dummytop);
4103
4104 BUG_ON(IS_ERR(css));
4105 init_cgroup_css(css, ss, dummytop);
4106
4107
4108
4109
4110
4111 init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id];
4112
4113 need_forkexit_callback |= ss->fork || ss->exit;
4114
4115
4116
4117
4118 BUG_ON(!list_empty(&init_task.tasks));
4119
4120 mutex_init(&ss->hierarchy_mutex);
4121 lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
4122 ss->active = 1;
4123
4124
4125
4126 BUG_ON(ss->module);
4127}
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
4139{
4140 int i;
4141 struct cgroup_subsys_state *css;
4142
4143
4144 if (ss->name == NULL || strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN ||
4145 ss->create == NULL || ss->destroy == NULL)
4146 return -EINVAL;
4147
4148
4149
4150
4151
4152
4153
4154 if (ss->fork || ss->exit)
4155 return -EINVAL;
4156
4157
4158
4159
4160
4161 if (ss->module == NULL) {
4162
4163 BUG_ON(ss->subsys_id >= CGROUP_BUILTIN_SUBSYS_COUNT);
4164 BUG_ON(subsys[ss->subsys_id] != ss);
4165 return 0;
4166 }
4167
4168
4169
4170
4171
4172 mutex_lock(&cgroup_mutex);
4173
4174 for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
4175 if (subsys[i] == NULL)
4176 break;
4177 }
4178 if (i == CGROUP_SUBSYS_COUNT) {
4179
4180 mutex_unlock(&cgroup_mutex);
4181 return -EBUSY;
4182 }
4183
4184 ss->subsys_id = i;
4185 subsys[i] = ss;
4186
4187
4188
4189
4190
4191 css = ss->create(ss, dummytop);
4192 if (IS_ERR(css)) {
4193
4194 subsys[i] = NULL;
4195 mutex_unlock(&cgroup_mutex);
4196 return PTR_ERR(css);
4197 }
4198
4199 list_add(&ss->sibling, &rootnode.subsys_list);
4200 ss->root = &rootnode;
4201
4202
4203 init_cgroup_css(css, ss, dummytop);
4204
4205 if (ss->use_id) {
4206 int ret = cgroup_init_idr(ss, css);
4207 if (ret) {
4208 dummytop->subsys[ss->subsys_id] = NULL;
4209 ss->destroy(ss, dummytop);
4210 subsys[i] = NULL;
4211 mutex_unlock(&cgroup_mutex);
4212 return ret;
4213 }
4214 }
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224 write_lock(&css_set_lock);
4225 for (i = 0; i < CSS_SET_TABLE_SIZE; i++) {
4226 struct css_set *cg;
4227 struct hlist_node *node, *tmp;
4228 struct hlist_head *bucket = &css_set_table[i], *new_bucket;
4229
4230 hlist_for_each_entry_safe(cg, node, tmp, bucket, hlist) {
4231
4232 if (cg->subsys[ss->subsys_id])
4233 continue;
4234
4235 hlist_del(&cg->hlist);
4236
4237 cg->subsys[ss->subsys_id] = css;
4238
4239 new_bucket = css_set_hash(cg->subsys);
4240 hlist_add_head(&cg->hlist, new_bucket);
4241 }
4242 }
4243 write_unlock(&css_set_lock);
4244
4245 mutex_init(&ss->hierarchy_mutex);
4246 lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
4247 ss->active = 1;
4248
4249
4250 mutex_unlock(&cgroup_mutex);
4251 return 0;
4252}
4253EXPORT_SYMBOL_GPL(cgroup_load_subsys);
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263void cgroup_unload_subsys(struct cgroup_subsys *ss)
4264{
4265 struct cg_cgroup_link *link;
4266 struct hlist_head *hhead;
4267
4268 BUG_ON(ss->module == NULL);
4269
4270
4271
4272
4273
4274
4275 BUG_ON(ss->root != &rootnode);
4276
4277 mutex_lock(&cgroup_mutex);
4278
4279 BUG_ON(ss->subsys_id < CGROUP_BUILTIN_SUBSYS_COUNT);
4280 subsys[ss->subsys_id] = NULL;
4281
4282
4283 list_del_init(&ss->sibling);
4284
4285
4286
4287
4288
4289 write_lock(&css_set_lock);
4290 list_for_each_entry(link, &dummytop->css_sets, cgrp_link_list) {
4291 struct css_set *cg = link->cg;
4292
4293 hlist_del(&cg->hlist);
4294 BUG_ON(!cg->subsys[ss->subsys_id]);
4295 cg->subsys[ss->subsys_id] = NULL;
4296 hhead = css_set_hash(cg->subsys);
4297 hlist_add_head(&cg->hlist, hhead);
4298 }
4299 write_unlock(&css_set_lock);
4300
4301
4302
4303
4304
4305
4306
4307 ss->destroy(ss, dummytop);
4308 dummytop->subsys[ss->subsys_id] = NULL;
4309
4310 mutex_unlock(&cgroup_mutex);
4311}
4312EXPORT_SYMBOL_GPL(cgroup_unload_subsys);
4313
4314
4315
4316
4317
4318
4319
4320int __init cgroup_init_early(void)
4321{
4322 int i;
4323 atomic_set(&init_css_set.refcount, 1);
4324 INIT_LIST_HEAD(&init_css_set.cg_links);
4325 INIT_LIST_HEAD(&init_css_set.tasks);
4326 INIT_HLIST_NODE(&init_css_set.hlist);
4327 css_set_count = 1;
4328 init_cgroup_root(&rootnode);
4329 root_count = 1;
4330 init_task.cgroups = &init_css_set;
4331
4332 init_css_set_link.cg = &init_css_set;
4333 init_css_set_link.cgrp = dummytop;
4334 list_add(&init_css_set_link.cgrp_link_list,
4335 &rootnode.top_cgroup.css_sets);
4336 list_add(&init_css_set_link.cg_link_list,
4337 &init_css_set.cg_links);
4338
4339 for (i = 0; i < CSS_SET_TABLE_SIZE; i++)
4340 INIT_HLIST_HEAD(&css_set_table[i]);
4341
4342
4343 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4344 struct cgroup_subsys *ss = subsys[i];
4345
4346 BUG_ON(!ss->name);
4347 BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN);
4348 BUG_ON(!ss->create);
4349 BUG_ON(!ss->destroy);
4350 if (ss->subsys_id != i) {
4351 printk(KERN_ERR "cgroup: Subsys %s id == %d\n",
4352 ss->name, ss->subsys_id);
4353 BUG();
4354 }
4355
4356 if (ss->early_init)
4357 cgroup_init_subsys(ss);
4358 }
4359 return 0;
4360}
4361
4362
4363
4364
4365
4366
4367
4368int __init cgroup_init(void)
4369{
4370 int err;
4371 int i;
4372 struct hlist_head *hhead;
4373
4374 err = bdi_init(&cgroup_backing_dev_info);
4375 if (err)
4376 return err;
4377
4378
4379 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4380 struct cgroup_subsys *ss = subsys[i];
4381 if (!ss->early_init)
4382 cgroup_init_subsys(ss);
4383 if (ss->use_id)
4384 cgroup_init_idr(ss, init_css_set.subsys[ss->subsys_id]);
4385 }
4386
4387
4388 hhead = css_set_hash(init_css_set.subsys);
4389 hlist_add_head(&init_css_set.hlist, hhead);
4390 BUG_ON(!init_root_id(&rootnode));
4391
4392 cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj);
4393 if (!cgroup_kobj) {
4394 err = -ENOMEM;
4395 goto out;
4396 }
4397
4398 err = register_filesystem(&cgroup_fs_type);
4399 if (err < 0) {
4400 kobject_put(cgroup_kobj);
4401 goto out;
4402 }
4403
4404 proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations);
4405
4406out:
4407 if (err)
4408 bdi_destroy(&cgroup_backing_dev_info);
4409
4410 return err;
4411}
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426static int proc_cgroup_show(struct seq_file *m, void *v)
4427{
4428 struct pid *pid;
4429 struct task_struct *tsk;
4430 char *buf;
4431 int retval;
4432 struct cgroupfs_root *root;
4433
4434 retval = -ENOMEM;
4435 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
4436 if (!buf)
4437 goto out;
4438
4439 retval = -ESRCH;
4440 pid = m->private;
4441 tsk = get_pid_task(pid, PIDTYPE_PID);
4442 if (!tsk)
4443 goto out_free;
4444
4445 retval = 0;
4446
4447 mutex_lock(&cgroup_mutex);
4448
4449 for_each_active_root(root) {
4450 struct cgroup_subsys *ss;
4451 struct cgroup *cgrp;
4452 int count = 0;
4453
4454 seq_printf(m, "%d:", root->hierarchy_id);
4455 for_each_subsys(root, ss)
4456 seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
4457 if (strlen(root->name))
4458 seq_printf(m, "%sname=%s", count ? "," : "",
4459 root->name);
4460 seq_putc(m, ':');
4461 cgrp = task_cgroup_from_root(tsk, root);
4462 retval = cgroup_path(cgrp, buf, PAGE_SIZE);
4463 if (retval < 0)
4464 goto out_unlock;
4465 seq_puts(m, buf);
4466 seq_putc(m, '\n');
4467 }
4468
4469out_unlock:
4470 mutex_unlock(&cgroup_mutex);
4471 put_task_struct(tsk);
4472out_free:
4473 kfree(buf);
4474out:
4475 return retval;
4476}
4477
4478static int cgroup_open(struct inode *inode, struct file *file)
4479{
4480 struct pid *pid = PROC_I(inode)->pid;
4481 return single_open(file, proc_cgroup_show, pid);
4482}
4483
4484const struct file_operations proc_cgroup_operations = {
4485 .open = cgroup_open,
4486 .read = seq_read,
4487 .llseek = seq_lseek,
4488 .release = single_release,
4489};
4490
4491
4492static int proc_cgroupstats_show(struct seq_file *m, void *v)
4493{
4494 int i;
4495
4496 seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
4497
4498
4499
4500
4501
4502 mutex_lock(&cgroup_mutex);
4503 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
4504 struct cgroup_subsys *ss = subsys[i];
4505 if (ss == NULL)
4506 continue;
4507 seq_printf(m, "%s\t%d\t%d\t%d\n",
4508 ss->name, ss->root->hierarchy_id,
4509 ss->root->number_of_cgroups, !ss->disabled);
4510 }
4511 mutex_unlock(&cgroup_mutex);
4512 return 0;
4513}
4514
4515static int cgroupstats_open(struct inode *inode, struct file *file)
4516{
4517 return single_open(file, proc_cgroupstats_show, NULL);
4518}
4519
4520static const struct file_operations proc_cgroupstats_operations = {
4521 .open = cgroupstats_open,
4522 .read = seq_read,
4523 .llseek = seq_lseek,
4524 .release = single_release,
4525};
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550void cgroup_fork(struct task_struct *child)
4551{
4552
4553
4554
4555
4556
4557
4558 child->cgroups = current->cgroups;
4559 get_css_set(child->cgroups);
4560 INIT_LIST_HEAD(&child->cg_list);
4561}
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571void cgroup_fork_callbacks(struct task_struct *child)
4572{
4573 if (need_forkexit_callback) {
4574 int i;
4575
4576
4577
4578
4579
4580 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4581 struct cgroup_subsys *ss = subsys[i];
4582 if (ss->fork)
4583 ss->fork(ss, child);
4584 }
4585 }
4586}
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597void cgroup_post_fork(struct task_struct *child)
4598{
4599 if (use_task_css_set_links) {
4600 write_lock(&css_set_lock);
4601 if (list_empty(&child->cg_list)) {
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612 list_add(&child->cg_list, &child->cgroups->tasks);
4613 }
4614 write_unlock(&css_set_lock);
4615 }
4616}
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652void cgroup_exit(struct task_struct *tsk, int run_callbacks)
4653{
4654 struct css_set *cg;
4655 int i;
4656
4657
4658
4659
4660
4661
4662 if (!list_empty(&tsk->cg_list)) {
4663 write_lock(&css_set_lock);
4664 if (!list_empty(&tsk->cg_list))
4665 list_del_init(&tsk->cg_list);
4666 write_unlock(&css_set_lock);
4667 }
4668
4669
4670 task_lock(tsk);
4671 cg = tsk->cgroups;
4672 tsk->cgroups = &init_css_set;
4673
4674 if (run_callbacks && need_forkexit_callback) {
4675
4676
4677
4678
4679 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4680 struct cgroup_subsys *ss = subsys[i];
4681 if (ss->exit) {
4682 struct cgroup *old_cgrp =
4683 rcu_dereference_raw(cg->subsys[i])->cgroup;
4684 struct cgroup *cgrp = task_cgroup(tsk, i);
4685 ss->exit(ss, cgrp, old_cgrp, tsk);
4686 }
4687 }
4688 }
4689 task_unlock(tsk);
4690
4691 if (cg)
4692 put_css_set_taskexit(cg);
4693}
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task)
4709{
4710 int ret;
4711 struct cgroup *target;
4712
4713 if (cgrp == dummytop)
4714 return 1;
4715
4716 target = task_cgroup_from_root(task, cgrp->root);
4717 while (cgrp != target && cgrp!= cgrp->top_cgroup)
4718 cgrp = cgrp->parent;
4719 ret = (cgrp == target);
4720 return ret;
4721}
4722
4723static void check_for_release(struct cgroup *cgrp)
4724{
4725
4726
4727 if (cgroup_is_releasable(cgrp) && !atomic_read(&cgrp->count)
4728 && list_empty(&cgrp->children) && !cgroup_has_css_refs(cgrp)) {
4729
4730
4731
4732 int need_schedule_work = 0;
4733 raw_spin_lock(&release_list_lock);
4734 if (!cgroup_is_removed(cgrp) &&
4735 list_empty(&cgrp->release_list)) {
4736 list_add(&cgrp->release_list, &release_list);
4737 need_schedule_work = 1;
4738 }
4739 raw_spin_unlock(&release_list_lock);
4740 if (need_schedule_work)
4741 schedule_work(&release_agent_work);
4742 }
4743}
4744
4745
4746void __css_put(struct cgroup_subsys_state *css, int count)
4747{
4748 struct cgroup *cgrp = css->cgroup;
4749 int val;
4750 rcu_read_lock();
4751 val = atomic_sub_return(count, &css->refcnt);
4752 if (val == 1) {
4753 if (notify_on_release(cgrp)) {
4754 set_bit(CGRP_RELEASABLE, &cgrp->flags);
4755 check_for_release(cgrp);
4756 }
4757 cgroup_wakeup_rmdir_waiter(cgrp);
4758 }
4759 rcu_read_unlock();
4760 WARN_ON_ONCE(val < 1);
4761}
4762EXPORT_SYMBOL_GPL(__css_put);
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787static void cgroup_release_agent(struct work_struct *work)
4788{
4789 BUG_ON(work != &release_agent_work);
4790 mutex_lock(&cgroup_mutex);
4791 raw_spin_lock(&release_list_lock);
4792 while (!list_empty(&release_list)) {
4793 char *argv[3], *envp[3];
4794 int i;
4795 char *pathbuf = NULL, *agentbuf = NULL;
4796 struct cgroup *cgrp = list_entry(release_list.next,
4797 struct cgroup,
4798 release_list);
4799 list_del_init(&cgrp->release_list);
4800 raw_spin_unlock(&release_list_lock);
4801 pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
4802 if (!pathbuf)
4803 goto continue_free;
4804 if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0)
4805 goto continue_free;
4806 agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
4807 if (!agentbuf)
4808 goto continue_free;
4809
4810 i = 0;
4811 argv[i++] = agentbuf;
4812 argv[i++] = pathbuf;
4813 argv[i] = NULL;
4814
4815 i = 0;
4816
4817 envp[i++] = "HOME=/";
4818 envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
4819 envp[i] = NULL;
4820
4821
4822
4823
4824 mutex_unlock(&cgroup_mutex);
4825 call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
4826 mutex_lock(&cgroup_mutex);
4827 continue_free:
4828 kfree(pathbuf);
4829 kfree(agentbuf);
4830 raw_spin_lock(&release_list_lock);
4831 }
4832 raw_spin_unlock(&release_list_lock);
4833 mutex_unlock(&cgroup_mutex);
4834}
4835
4836static int __init cgroup_disable(char *str)
4837{
4838 int i;
4839 char *token;
4840
4841 while ((token = strsep(&str, ",")) != NULL) {
4842 if (!*token)
4843 continue;
4844
4845
4846
4847
4848 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4849 struct cgroup_subsys *ss = subsys[i];
4850
4851 if (!strcmp(token, ss->name)) {
4852 ss->disabled = 1;
4853 printk(KERN_INFO "Disabling %s control group"
4854 " subsystem\n", ss->name);
4855 break;
4856 }
4857 }
4858 }
4859 return 1;
4860}
4861__setup("cgroup_disable=", cgroup_disable);
4862
4863
4864
4865
4866
4867
4868
4869
4870unsigned short css_id(struct cgroup_subsys_state *css)
4871{
4872 struct css_id *cssid;
4873
4874
4875
4876
4877
4878
4879 cssid = rcu_dereference_check(css->id, atomic_read(&css->refcnt));
4880
4881 if (cssid)
4882 return cssid->id;
4883 return 0;
4884}
4885EXPORT_SYMBOL_GPL(css_id);
4886
4887unsigned short css_depth(struct cgroup_subsys_state *css)
4888{
4889 struct css_id *cssid;
4890
4891 cssid = rcu_dereference_check(css->id, atomic_read(&css->refcnt));
4892
4893 if (cssid)
4894 return cssid->depth;
4895 return 0;
4896}
4897EXPORT_SYMBOL_GPL(css_depth);
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912bool css_is_ancestor(struct cgroup_subsys_state *child,
4913 const struct cgroup_subsys_state *root)
4914{
4915 struct css_id *child_id;
4916 struct css_id *root_id;
4917 bool ret = true;
4918
4919 rcu_read_lock();
4920 child_id = rcu_dereference(child->id);
4921 root_id = rcu_dereference(root->id);
4922 if (!child_id
4923 || !root_id
4924 || (child_id->depth < root_id->depth)
4925 || (child_id->stack[root_id->depth] != root_id->id))
4926 ret = false;
4927 rcu_read_unlock();
4928 return ret;
4929}
4930
4931void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
4932{
4933 struct css_id *id = css->id;
4934
4935 if (!id)
4936 return;
4937
4938 BUG_ON(!ss->use_id);
4939
4940 rcu_assign_pointer(id->css, NULL);
4941 rcu_assign_pointer(css->id, NULL);
4942 write_lock(&ss->id_lock);
4943 idr_remove(&ss->idr, id->id);
4944 write_unlock(&ss->id_lock);
4945 kfree_rcu(id, rcu_head);
4946}
4947EXPORT_SYMBOL_GPL(free_css_id);
4948
4949
4950
4951
4952
4953
4954static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth)
4955{
4956 struct css_id *newid;
4957 int myid, error, size;
4958
4959 BUG_ON(!ss->use_id);
4960
4961 size = sizeof(*newid) + sizeof(unsigned short) * (depth + 1);
4962 newid = kzalloc(size, GFP_KERNEL);
4963 if (!newid)
4964 return ERR_PTR(-ENOMEM);
4965
4966 if (unlikely(!idr_pre_get(&ss->idr, GFP_KERNEL))) {
4967 error = -ENOMEM;
4968 goto err_out;
4969 }
4970 write_lock(&ss->id_lock);
4971
4972 error = idr_get_new_above(&ss->idr, newid, 1, &myid);
4973 write_unlock(&ss->id_lock);
4974
4975
4976 if (error) {
4977 error = -ENOSPC;
4978 goto err_out;
4979 }
4980 if (myid > CSS_ID_MAX)
4981 goto remove_idr;
4982
4983 newid->id = myid;
4984 newid->depth = depth;
4985 return newid;
4986remove_idr:
4987 error = -ENOSPC;
4988 write_lock(&ss->id_lock);
4989 idr_remove(&ss->idr, myid);
4990 write_unlock(&ss->id_lock);
4991err_out:
4992 kfree(newid);
4993 return ERR_PTR(error);
4994
4995}
4996
4997static int __init_or_module cgroup_init_idr(struct cgroup_subsys *ss,
4998 struct cgroup_subsys_state *rootcss)
4999{
5000 struct css_id *newid;
5001
5002 rwlock_init(&ss->id_lock);
5003 idr_init(&ss->idr);
5004
5005 newid = get_new_cssid(ss, 0);
5006 if (IS_ERR(newid))
5007 return PTR_ERR(newid);
5008
5009 newid->stack[0] = newid->id;
5010 newid->css = rootcss;
5011 rootcss->id = newid;
5012 return 0;
5013}
5014
5015static int alloc_css_id(struct cgroup_subsys *ss, struct cgroup *parent,
5016 struct cgroup *child)
5017{
5018 int subsys_id, i, depth = 0;
5019 struct cgroup_subsys_state *parent_css, *child_css;
5020 struct css_id *child_id, *parent_id;
5021
5022 subsys_id = ss->subsys_id;
5023 parent_css = parent->subsys[subsys_id];
5024 child_css = child->subsys[subsys_id];
5025 parent_id = parent_css->id;
5026 depth = parent_id->depth + 1;
5027
5028 child_id = get_new_cssid(ss, depth);
5029 if (IS_ERR(child_id))
5030 return PTR_ERR(child_id);
5031
5032 for (i = 0; i < depth; i++)
5033 child_id->stack[i] = parent_id->stack[i];
5034 child_id->stack[depth] = child_id->id;
5035
5036
5037
5038
5039 rcu_assign_pointer(child_css->id, child_id);
5040
5041 return 0;
5042}
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id)
5053{
5054 struct css_id *cssid = NULL;
5055
5056 BUG_ON(!ss->use_id);
5057 cssid = idr_find(&ss->idr, id);
5058
5059 if (unlikely(!cssid))
5060 return NULL;
5061
5062 return rcu_dereference(cssid->css);
5063}
5064EXPORT_SYMBOL_GPL(css_lookup);
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076struct cgroup_subsys_state *
5077css_get_next(struct cgroup_subsys *ss, int id,
5078 struct cgroup_subsys_state *root, int *foundid)
5079{
5080 struct cgroup_subsys_state *ret = NULL;
5081 struct css_id *tmp;
5082 int tmpid;
5083 int rootid = css_id(root);
5084 int depth = css_depth(root);
5085
5086 if (!rootid)
5087 return NULL;
5088
5089 BUG_ON(!ss->use_id);
5090
5091 tmpid = id;
5092 while (1) {
5093
5094
5095
5096
5097 read_lock(&ss->id_lock);
5098 tmp = idr_get_next(&ss->idr, &tmpid);
5099 read_unlock(&ss->id_lock);
5100
5101 if (!tmp)
5102 break;
5103 if (tmp->depth >= depth && tmp->stack[depth] == rootid) {
5104 ret = rcu_dereference(tmp->css);
5105 if (ret) {
5106 *foundid = tmpid;
5107 break;
5108 }
5109 }
5110
5111 tmpid = tmpid + 1;
5112 }
5113 return ret;
5114}
5115
5116
5117
5118
5119struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id)
5120{
5121 struct cgroup *cgrp;
5122 struct inode *inode;
5123 struct cgroup_subsys_state *css;
5124
5125 inode = f->f_dentry->d_inode;
5126
5127 if (inode->i_op != &cgroup_dir_inode_operations)
5128 return ERR_PTR(-EBADF);
5129
5130 if (id < 0 || id >= CGROUP_SUBSYS_COUNT)
5131 return ERR_PTR(-EINVAL);
5132
5133
5134 cgrp = __d_cgrp(f->f_dentry);
5135 css = cgrp->subsys[id];
5136 return css ? css : ERR_PTR(-ENOENT);
5137}
5138
5139#ifdef CONFIG_CGROUP_DEBUG
5140static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss,
5141 struct cgroup *cont)
5142{
5143 struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
5144
5145 if (!css)
5146 return ERR_PTR(-ENOMEM);
5147
5148 return css;
5149}
5150
5151static void debug_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
5152{
5153 kfree(cont->subsys[debug_subsys_id]);
5154}
5155
5156static u64 cgroup_refcount_read(struct cgroup *cont, struct cftype *cft)
5157{
5158 return atomic_read(&cont->count);
5159}
5160
5161static u64 debug_taskcount_read(struct cgroup *cont, struct cftype *cft)
5162{
5163 return cgroup_task_count(cont);
5164}
5165
5166static u64 current_css_set_read(struct cgroup *cont, struct cftype *cft)
5167{
5168 return (u64)(unsigned long)current->cgroups;
5169}
5170
5171static u64 current_css_set_refcount_read(struct cgroup *cont,
5172 struct cftype *cft)
5173{
5174 u64 count;
5175
5176 rcu_read_lock();
5177 count = atomic_read(¤t->cgroups->refcount);
5178 rcu_read_unlock();
5179 return count;
5180}
5181
5182static int current_css_set_cg_links_read(struct cgroup *cont,
5183 struct cftype *cft,
5184 struct seq_file *seq)
5185{
5186 struct cg_cgroup_link *link;
5187 struct css_set *cg;
5188
5189 read_lock(&css_set_lock);
5190 rcu_read_lock();
5191 cg = rcu_dereference(current->cgroups);
5192 list_for_each_entry(link, &cg->cg_links, cg_link_list) {
5193 struct cgroup *c = link->cgrp;
5194 const char *name;
5195
5196 if (c->dentry)
5197 name = c->dentry->d_name.name;
5198 else
5199 name = "?";
5200 seq_printf(seq, "Root %d group %s\n",
5201 c->root->hierarchy_id, name);
5202 }
5203 rcu_read_unlock();
5204 read_unlock(&css_set_lock);
5205 return 0;
5206}
5207
5208#define MAX_TASKS_SHOWN_PER_CSS 25
5209static int cgroup_css_links_read(struct cgroup *cont,
5210 struct cftype *cft,
5211 struct seq_file *seq)
5212{
5213 struct cg_cgroup_link *link;
5214
5215 read_lock(&css_set_lock);
5216 list_for_each_entry(link, &cont->css_sets, cgrp_link_list) {
5217 struct css_set *cg = link->cg;
5218 struct task_struct *task;
5219 int count = 0;
5220 seq_printf(seq, "css_set %p\n", cg);
5221 list_for_each_entry(task, &cg->tasks, cg_list) {
5222 if (count++ > MAX_TASKS_SHOWN_PER_CSS) {
5223 seq_puts(seq, " ...\n");
5224 break;
5225 } else {
5226 seq_printf(seq, " task %d\n",
5227 task_pid_vnr(task));
5228 }
5229 }
5230 }
5231 read_unlock(&css_set_lock);
5232 return 0;
5233}
5234
5235static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft)
5236{
5237 return test_bit(CGRP_RELEASABLE, &cgrp->flags);
5238}
5239
5240static struct cftype debug_files[] = {
5241 {
5242 .name = "cgroup_refcount",
5243 .read_u64 = cgroup_refcount_read,
5244 },
5245 {
5246 .name = "taskcount",
5247 .read_u64 = debug_taskcount_read,
5248 },
5249
5250 {
5251 .name = "current_css_set",
5252 .read_u64 = current_css_set_read,
5253 },
5254
5255 {
5256 .name = "current_css_set_refcount",
5257 .read_u64 = current_css_set_refcount_read,
5258 },
5259
5260 {
5261 .name = "current_css_set_cg_links",
5262 .read_seq_string = current_css_set_cg_links_read,
5263 },
5264
5265 {
5266 .name = "cgroup_css_links",
5267 .read_seq_string = cgroup_css_links_read,
5268 },
5269
5270 {
5271 .name = "releasable",
5272 .read_u64 = releasable_read,
5273 },
5274};
5275
5276static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont)
5277{
5278 return cgroup_add_files(cont, ss, debug_files,
5279 ARRAY_SIZE(debug_files));
5280}
5281
5282struct cgroup_subsys debug_subsys = {
5283 .name = "debug",
5284 .create = debug_create,
5285 .destroy = debug_destroy,
5286 .populate = debug_populate,
5287 .subsys_id = debug_subsys_id,
5288};
5289#endif
5290