1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29#include <linux/cgroup.h>
30#include <linux/cred.h>
31#include <linux/ctype.h>
32#include <linux/errno.h>
33#include <linux/fs.h>
34#include <linux/init_task.h>
35#include <linux/kernel.h>
36#include <linux/list.h>
37#include <linux/mm.h>
38#include <linux/mutex.h>
39#include <linux/mount.h>
40#include <linux/pagemap.h>
41#include <linux/proc_fs.h>
42#include <linux/rcupdate.h>
43#include <linux/sched.h>
44#include <linux/backing-dev.h>
45#include <linux/seq_file.h>
46#include <linux/slab.h>
47#include <linux/magic.h>
48#include <linux/spinlock.h>
49#include <linux/string.h>
50#include <linux/sort.h>
51#include <linux/kmod.h>
52#include <linux/module.h>
53#include <linux/delayacct.h>
54#include <linux/cgroupstats.h>
55#include <linux/hash.h>
56#include <linux/namei.h>
57#include <linux/pid_namespace.h>
58#include <linux/idr.h>
59#include <linux/vmalloc.h>
60#include <linux/eventfd.h>
61#include <linux/poll.h>
62#include <linux/flex_array.h>
63#include <linux/kthread.h>
64
65#include <linux/atomic.h>
66
67
68#define CSS_DEACT_BIAS INT_MIN
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86static DEFINE_MUTEX(cgroup_mutex);
87static DEFINE_MUTEX(cgroup_root_mutex);
88
89
90
91
92
93
94
95#define SUBSYS(_x) &_x ## _subsys,
96static struct cgroup_subsys *subsys[CGROUP_SUBSYS_COUNT] = {
97#include <linux/cgroup_subsys.h>
98};
99
100#define MAX_CGROUP_ROOT_NAMELEN 64
101
102
103
104
105
106
107struct cgroupfs_root {
108 struct super_block *sb;
109
110
111
112
113
114 unsigned long subsys_bits;
115
116
117 int hierarchy_id;
118
119
120 unsigned long actual_subsys_bits;
121
122
123 struct list_head subsys_list;
124
125
126 struct cgroup top_cgroup;
127
128
129 int number_of_cgroups;
130
131
132 struct list_head root_list;
133
134
135 struct list_head allcg_list;
136
137
138 unsigned long flags;
139
140
141 char release_agent_path[PATH_MAX];
142
143
144 char name[MAX_CGROUP_ROOT_NAMELEN];
145};
146
147
148
149
150
151
152static struct cgroupfs_root rootnode;
153
154
155
156
157struct cfent {
158 struct list_head node;
159 struct dentry *dentry;
160 struct cftype *type;
161};
162
163
164
165
166
167#define CSS_ID_MAX (65535)
168struct css_id {
169
170
171
172
173
174
175
176 struct cgroup_subsys_state __rcu *css;
177
178
179
180 unsigned short id;
181
182
183
184 unsigned short depth;
185
186
187
188 struct rcu_head rcu_head;
189
190
191
192 unsigned short stack[0];
193};
194
195
196
197
198struct cgroup_event {
199
200
201
202 struct cgroup *cgrp;
203
204
205
206 struct cftype *cft;
207
208
209
210 struct eventfd_ctx *eventfd;
211
212
213
214 struct list_head list;
215
216
217
218
219 poll_table pt;
220 wait_queue_head_t *wqh;
221 wait_queue_t wait;
222 struct work_struct remove;
223};
224
225
226
227static LIST_HEAD(roots);
228static int root_count;
229
230static DEFINE_IDA(hierarchy_ida);
231static int next_hierarchy_id;
232static DEFINE_SPINLOCK(hierarchy_id_lock);
233
234
235#define dummytop (&rootnode.top_cgroup)
236
237
238
239
240
241
242static int need_forkexit_callback __read_mostly;
243
244#ifdef CONFIG_PROVE_LOCKING
245int cgroup_lock_is_held(void)
246{
247 return lockdep_is_held(&cgroup_mutex);
248}
249#else
250int cgroup_lock_is_held(void)
251{
252 return mutex_is_locked(&cgroup_mutex);
253}
254#endif
255
256EXPORT_SYMBOL_GPL(cgroup_lock_is_held);
257
258static int css_unbias_refcnt(int refcnt)
259{
260 return refcnt >= 0 ? refcnt : refcnt - CSS_DEACT_BIAS;
261}
262
263
264static int css_refcnt(struct cgroup_subsys_state *css)
265{
266 int v = atomic_read(&css->refcnt);
267
268 return css_unbias_refcnt(v);
269}
270
271
272inline int cgroup_is_removed(const struct cgroup *cgrp)
273{
274 return test_bit(CGRP_REMOVED, &cgrp->flags);
275}
276
277
278enum {
279 ROOT_NOPREFIX,
280};
281
282static int cgroup_is_releasable(const struct cgroup *cgrp)
283{
284 const int bits =
285 (1 << CGRP_RELEASABLE) |
286 (1 << CGRP_NOTIFY_ON_RELEASE);
287 return (cgrp->flags & bits) == bits;
288}
289
290static int notify_on_release(const struct cgroup *cgrp)
291{
292 return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
293}
294
295static int clone_children(const struct cgroup *cgrp)
296{
297 return test_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
298}
299
300
301
302
303
304#define for_each_subsys(_root, _ss) \
305list_for_each_entry(_ss, &_root->subsys_list, sibling)
306
307
308#define for_each_active_root(_root) \
309list_for_each_entry(_root, &roots, root_list)
310
311static inline struct cgroup *__d_cgrp(struct dentry *dentry)
312{
313 return dentry->d_fsdata;
314}
315
316static inline struct cfent *__d_cfe(struct dentry *dentry)
317{
318 return dentry->d_fsdata;
319}
320
321static inline struct cftype *__d_cft(struct dentry *dentry)
322{
323 return __d_cfe(dentry)->type;
324}
325
326
327
328static LIST_HEAD(release_list);
329static DEFINE_RAW_SPINLOCK(release_list_lock);
330static void cgroup_release_agent(struct work_struct *work);
331static DECLARE_WORK(release_agent_work, cgroup_release_agent);
332static void check_for_release(struct cgroup *cgrp);
333
334
335struct cg_cgroup_link {
336
337
338
339
340 struct list_head cgrp_link_list;
341 struct cgroup *cgrp;
342
343
344
345
346 struct list_head cg_link_list;
347 struct css_set *cg;
348};
349
350
351
352
353
354
355
356
357static struct css_set init_css_set;
358static struct cg_cgroup_link init_css_set_link;
359
360static int cgroup_init_idr(struct cgroup_subsys *ss,
361 struct cgroup_subsys_state *css);
362
363
364
365
366static DEFINE_RWLOCK(css_set_lock);
367static int css_set_count;
368
369
370
371
372
373
374#define CSS_SET_HASH_BITS 7
375#define CSS_SET_TABLE_SIZE (1 << CSS_SET_HASH_BITS)
376static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE];
377
378static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
379{
380 int i;
381 int index;
382 unsigned long tmp = 0UL;
383
384 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++)
385 tmp += (unsigned long)css[i];
386 tmp = (tmp >> 16) ^ tmp;
387
388 index = hash_long(tmp, CSS_SET_HASH_BITS);
389
390 return &css_set_table[index];
391}
392
393
394
395
396
397static int use_task_css_set_links __read_mostly;
398
399static void __put_css_set(struct css_set *cg, int taskexit)
400{
401 struct cg_cgroup_link *link;
402 struct cg_cgroup_link *saved_link;
403
404
405
406
407
408 if (atomic_add_unless(&cg->refcount, -1, 1))
409 return;
410 write_lock(&css_set_lock);
411 if (!atomic_dec_and_test(&cg->refcount)) {
412 write_unlock(&css_set_lock);
413 return;
414 }
415
416
417 hlist_del(&cg->hlist);
418 css_set_count--;
419
420 list_for_each_entry_safe(link, saved_link, &cg->cg_links,
421 cg_link_list) {
422 struct cgroup *cgrp = link->cgrp;
423 list_del(&link->cg_link_list);
424 list_del(&link->cgrp_link_list);
425 if (atomic_dec_and_test(&cgrp->count) &&
426 notify_on_release(cgrp)) {
427 if (taskexit)
428 set_bit(CGRP_RELEASABLE, &cgrp->flags);
429 check_for_release(cgrp);
430 }
431
432 kfree(link);
433 }
434
435 write_unlock(&css_set_lock);
436 kfree_rcu(cg, rcu_head);
437}
438
439
440
441
442static inline void get_css_set(struct css_set *cg)
443{
444 atomic_inc(&cg->refcount);
445}
446
447static inline void put_css_set(struct css_set *cg)
448{
449 __put_css_set(cg, 0);
450}
451
452static inline void put_css_set_taskexit(struct css_set *cg)
453{
454 __put_css_set(cg, 1);
455}
456
457
458
459
460
461
462
463
464
465
466
467static bool compare_css_sets(struct css_set *cg,
468 struct css_set *old_cg,
469 struct cgroup *new_cgrp,
470 struct cgroup_subsys_state *template[])
471{
472 struct list_head *l1, *l2;
473
474 if (memcmp(template, cg->subsys, sizeof(cg->subsys))) {
475
476 return false;
477 }
478
479
480
481
482
483
484
485
486
487
488 l1 = &cg->cg_links;
489 l2 = &old_cg->cg_links;
490 while (1) {
491 struct cg_cgroup_link *cgl1, *cgl2;
492 struct cgroup *cg1, *cg2;
493
494 l1 = l1->next;
495 l2 = l2->next;
496
497 if (l1 == &cg->cg_links) {
498 BUG_ON(l2 != &old_cg->cg_links);
499 break;
500 } else {
501 BUG_ON(l2 == &old_cg->cg_links);
502 }
503
504 cgl1 = list_entry(l1, struct cg_cgroup_link, cg_link_list);
505 cgl2 = list_entry(l2, struct cg_cgroup_link, cg_link_list);
506 cg1 = cgl1->cgrp;
507 cg2 = cgl2->cgrp;
508
509 BUG_ON(cg1->root != cg2->root);
510
511
512
513
514
515
516
517
518 if (cg1->root == new_cgrp->root) {
519 if (cg1 != new_cgrp)
520 return false;
521 } else {
522 if (cg1 != cg2)
523 return false;
524 }
525 }
526 return true;
527}
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542static struct css_set *find_existing_css_set(
543 struct css_set *oldcg,
544 struct cgroup *cgrp,
545 struct cgroup_subsys_state *template[])
546{
547 int i;
548 struct cgroupfs_root *root = cgrp->root;
549 struct hlist_head *hhead;
550 struct hlist_node *node;
551 struct css_set *cg;
552
553
554
555
556
557
558 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
559 if (root->subsys_bits & (1UL << i)) {
560
561
562
563 template[i] = cgrp->subsys[i];
564 } else {
565
566
567 template[i] = oldcg->subsys[i];
568 }
569 }
570
571 hhead = css_set_hash(template);
572 hlist_for_each_entry(cg, node, hhead, hlist) {
573 if (!compare_css_sets(cg, oldcg, cgrp, template))
574 continue;
575
576
577 return cg;
578 }
579
580
581 return NULL;
582}
583
584static void free_cg_links(struct list_head *tmp)
585{
586 struct cg_cgroup_link *link;
587 struct cg_cgroup_link *saved_link;
588
589 list_for_each_entry_safe(link, saved_link, tmp, cgrp_link_list) {
590 list_del(&link->cgrp_link_list);
591 kfree(link);
592 }
593}
594
595
596
597
598
599
600static int allocate_cg_links(int count, struct list_head *tmp)
601{
602 struct cg_cgroup_link *link;
603 int i;
604 INIT_LIST_HEAD(tmp);
605 for (i = 0; i < count; i++) {
606 link = kmalloc(sizeof(*link), GFP_KERNEL);
607 if (!link) {
608 free_cg_links(tmp);
609 return -ENOMEM;
610 }
611 list_add(&link->cgrp_link_list, tmp);
612 }
613 return 0;
614}
615
616
617
618
619
620
621
622static void link_css_set(struct list_head *tmp_cg_links,
623 struct css_set *cg, struct cgroup *cgrp)
624{
625 struct cg_cgroup_link *link;
626
627 BUG_ON(list_empty(tmp_cg_links));
628 link = list_first_entry(tmp_cg_links, struct cg_cgroup_link,
629 cgrp_link_list);
630 link->cg = cg;
631 link->cgrp = cgrp;
632 atomic_inc(&cgrp->count);
633 list_move(&link->cgrp_link_list, &cgrp->css_sets);
634
635
636
637
638 list_add_tail(&link->cg_link_list, &cg->cg_links);
639}
640
641
642
643
644
645
646
647
648static struct css_set *find_css_set(
649 struct css_set *oldcg, struct cgroup *cgrp)
650{
651 struct css_set *res;
652 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
653
654 struct list_head tmp_cg_links;
655
656 struct hlist_head *hhead;
657 struct cg_cgroup_link *link;
658
659
660
661 read_lock(&css_set_lock);
662 res = find_existing_css_set(oldcg, cgrp, template);
663 if (res)
664 get_css_set(res);
665 read_unlock(&css_set_lock);
666
667 if (res)
668 return res;
669
670 res = kmalloc(sizeof(*res), GFP_KERNEL);
671 if (!res)
672 return NULL;
673
674
675 if (allocate_cg_links(root_count, &tmp_cg_links) < 0) {
676 kfree(res);
677 return NULL;
678 }
679
680 atomic_set(&res->refcount, 1);
681 INIT_LIST_HEAD(&res->cg_links);
682 INIT_LIST_HEAD(&res->tasks);
683 INIT_HLIST_NODE(&res->hlist);
684
685
686
687 memcpy(res->subsys, template, sizeof(res->subsys));
688
689 write_lock(&css_set_lock);
690
691 list_for_each_entry(link, &oldcg->cg_links, cg_link_list) {
692 struct cgroup *c = link->cgrp;
693 if (c->root == cgrp->root)
694 c = cgrp;
695 link_css_set(&tmp_cg_links, res, c);
696 }
697
698 BUG_ON(!list_empty(&tmp_cg_links));
699
700 css_set_count++;
701
702
703 hhead = css_set_hash(res->subsys);
704 hlist_add_head(&res->hlist, hhead);
705
706 write_unlock(&css_set_lock);
707
708 return res;
709}
710
711
712
713
714
715static struct cgroup *task_cgroup_from_root(struct task_struct *task,
716 struct cgroupfs_root *root)
717{
718 struct css_set *css;
719 struct cgroup *res = NULL;
720
721 BUG_ON(!mutex_is_locked(&cgroup_mutex));
722 read_lock(&css_set_lock);
723
724
725
726
727
728 css = task->cgroups;
729 if (css == &init_css_set) {
730 res = &root->top_cgroup;
731 } else {
732 struct cg_cgroup_link *link;
733 list_for_each_entry(link, &css->cg_links, cg_link_list) {
734 struct cgroup *c = link->cgrp;
735 if (c->root == root) {
736 res = c;
737 break;
738 }
739 }
740 }
741 read_unlock(&css_set_lock);
742 BUG_ON(!res);
743 return res;
744}
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800void cgroup_lock(void)
801{
802 mutex_lock(&cgroup_mutex);
803}
804EXPORT_SYMBOL_GPL(cgroup_lock);
805
806
807
808
809
810
811void cgroup_unlock(void)
812{
813 mutex_unlock(&cgroup_mutex);
814}
815EXPORT_SYMBOL_GPL(cgroup_unlock);
816
817
818
819
820
821
822
823
824static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
825static struct dentry *cgroup_lookup(struct inode *, struct dentry *, unsigned int);
826static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
827static int cgroup_populate_dir(struct cgroup *cgrp);
828static const struct inode_operations cgroup_dir_inode_operations;
829static const struct file_operations proc_cgroupstats_operations;
830
831static struct backing_dev_info cgroup_backing_dev_info = {
832 .name = "cgroup",
833 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
834};
835
836static int alloc_css_id(struct cgroup_subsys *ss,
837 struct cgroup *parent, struct cgroup *child);
838
839static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb)
840{
841 struct inode *inode = new_inode(sb);
842
843 if (inode) {
844 inode->i_ino = get_next_ino();
845 inode->i_mode = mode;
846 inode->i_uid = current_fsuid();
847 inode->i_gid = current_fsgid();
848 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
849 inode->i_mapping->backing_dev_info = &cgroup_backing_dev_info;
850 }
851 return inode;
852}
853
854
855
856
857
858static int cgroup_call_pre_destroy(struct cgroup *cgrp)
859{
860 struct cgroup_subsys *ss;
861 int ret = 0;
862
863 for_each_subsys(cgrp->root, ss) {
864 if (!ss->pre_destroy)
865 continue;
866
867 ret = ss->pre_destroy(cgrp);
868 if (ret) {
869
870 WARN_ON_ONCE(!ss->__DEPRECATED_clear_css_refs);
871 break;
872 }
873 }
874
875 return ret;
876}
877
878static void cgroup_diput(struct dentry *dentry, struct inode *inode)
879{
880
881 if (S_ISDIR(inode->i_mode)) {
882 struct cgroup *cgrp = dentry->d_fsdata;
883 struct cgroup_subsys *ss;
884 BUG_ON(!(cgroup_is_removed(cgrp)));
885
886
887
888
889
890
891 synchronize_rcu();
892
893 mutex_lock(&cgroup_mutex);
894
895
896
897 for_each_subsys(cgrp->root, ss)
898 ss->destroy(cgrp);
899
900 cgrp->root->number_of_cgroups--;
901 mutex_unlock(&cgroup_mutex);
902
903
904
905
906
907 deactivate_super(cgrp->root->sb);
908
909
910
911
912
913 BUG_ON(!list_empty(&cgrp->pidlists));
914
915 kfree_rcu(cgrp, rcu_head);
916 } else {
917 struct cfent *cfe = __d_cfe(dentry);
918 struct cgroup *cgrp = dentry->d_parent->d_fsdata;
919
920 WARN_ONCE(!list_empty(&cfe->node) &&
921 cgrp != &cgrp->root->top_cgroup,
922 "cfe still linked for %s\n", cfe->type->name);
923 kfree(cfe);
924 }
925 iput(inode);
926}
927
928static int cgroup_delete(const struct dentry *d)
929{
930 return 1;
931}
932
933static void remove_dir(struct dentry *d)
934{
935 struct dentry *parent = dget(d->d_parent);
936
937 d_delete(d);
938 simple_rmdir(parent->d_inode, d);
939 dput(parent);
940}
941
942static int cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
943{
944 struct cfent *cfe;
945
946 lockdep_assert_held(&cgrp->dentry->d_inode->i_mutex);
947 lockdep_assert_held(&cgroup_mutex);
948
949 list_for_each_entry(cfe, &cgrp->files, node) {
950 struct dentry *d = cfe->dentry;
951
952 if (cft && cfe->type != cft)
953 continue;
954
955 dget(d);
956 d_delete(d);
957 simple_unlink(cgrp->dentry->d_inode, d);
958 list_del_init(&cfe->node);
959 dput(d);
960
961 return 0;
962 }
963 return -ENOENT;
964}
965
966static void cgroup_clear_directory(struct dentry *dir)
967{
968 struct cgroup *cgrp = __d_cgrp(dir);
969
970 while (!list_empty(&cgrp->files))
971 cgroup_rm_file(cgrp, NULL);
972}
973
974
975
976
977static void cgroup_d_remove_dir(struct dentry *dentry)
978{
979 struct dentry *parent;
980
981 cgroup_clear_directory(dentry);
982
983 parent = dentry->d_parent;
984 spin_lock(&parent->d_lock);
985 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
986 list_del_init(&dentry->d_u.d_child);
987 spin_unlock(&dentry->d_lock);
988 spin_unlock(&parent->d_lock);
989 remove_dir(dentry);
990}
991
992
993
994
995
996
997
998
999
1000static DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
1001
1002static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
1003{
1004 if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
1005 wake_up_all(&cgroup_rmdir_waitq);
1006}
1007
1008void cgroup_exclude_rmdir(struct cgroup_subsys_state *css)
1009{
1010 css_get(css);
1011}
1012
1013void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
1014{
1015 cgroup_wakeup_rmdir_waiter(css->cgroup);
1016 css_put(css);
1017}
1018
1019
1020
1021
1022
1023
1024static int rebind_subsystems(struct cgroupfs_root *root,
1025 unsigned long final_bits)
1026{
1027 unsigned long added_bits, removed_bits;
1028 struct cgroup *cgrp = &root->top_cgroup;
1029 int i;
1030
1031 BUG_ON(!mutex_is_locked(&cgroup_mutex));
1032 BUG_ON(!mutex_is_locked(&cgroup_root_mutex));
1033
1034 removed_bits = root->actual_subsys_bits & ~final_bits;
1035 added_bits = final_bits & ~root->actual_subsys_bits;
1036
1037 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1038 unsigned long bit = 1UL << i;
1039 struct cgroup_subsys *ss = subsys[i];
1040 if (!(bit & added_bits))
1041 continue;
1042
1043
1044
1045
1046
1047 BUG_ON(ss == NULL);
1048 if (ss->root != &rootnode) {
1049
1050 return -EBUSY;
1051 }
1052 }
1053
1054
1055
1056
1057
1058 if (root->number_of_cgroups > 1)
1059 return -EBUSY;
1060
1061
1062 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1063 struct cgroup_subsys *ss = subsys[i];
1064 unsigned long bit = 1UL << i;
1065 if (bit & added_bits) {
1066
1067 BUG_ON(ss == NULL);
1068 BUG_ON(cgrp->subsys[i]);
1069 BUG_ON(!dummytop->subsys[i]);
1070 BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
1071 cgrp->subsys[i] = dummytop->subsys[i];
1072 cgrp->subsys[i]->cgroup = cgrp;
1073 list_move(&ss->sibling, &root->subsys_list);
1074 ss->root = root;
1075 if (ss->bind)
1076 ss->bind(cgrp);
1077
1078 } else if (bit & removed_bits) {
1079
1080 BUG_ON(ss == NULL);
1081 BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
1082 BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
1083 if (ss->bind)
1084 ss->bind(dummytop);
1085 dummytop->subsys[i]->cgroup = dummytop;
1086 cgrp->subsys[i] = NULL;
1087 subsys[i]->root = &rootnode;
1088 list_move(&ss->sibling, &rootnode.subsys_list);
1089
1090 module_put(ss->module);
1091 } else if (bit & final_bits) {
1092
1093 BUG_ON(ss == NULL);
1094 BUG_ON(!cgrp->subsys[i]);
1095
1096
1097
1098
1099 module_put(ss->module);
1100#ifdef CONFIG_MODULE_UNLOAD
1101 BUG_ON(ss->module && !module_refcount(ss->module));
1102#endif
1103 } else {
1104
1105 BUG_ON(cgrp->subsys[i]);
1106 }
1107 }
1108 root->subsys_bits = root->actual_subsys_bits = final_bits;
1109 synchronize_rcu();
1110
1111 return 0;
1112}
1113
1114static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry)
1115{
1116 struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
1117 struct cgroup_subsys *ss;
1118
1119 mutex_lock(&cgroup_root_mutex);
1120 for_each_subsys(root, ss)
1121 seq_printf(seq, ",%s", ss->name);
1122 if (test_bit(ROOT_NOPREFIX, &root->flags))
1123 seq_puts(seq, ",noprefix");
1124 if (strlen(root->release_agent_path))
1125 seq_printf(seq, ",release_agent=%s", root->release_agent_path);
1126 if (clone_children(&root->top_cgroup))
1127 seq_puts(seq, ",clone_children");
1128 if (strlen(root->name))
1129 seq_printf(seq, ",name=%s", root->name);
1130 mutex_unlock(&cgroup_root_mutex);
1131 return 0;
1132}
1133
1134struct cgroup_sb_opts {
1135 unsigned long subsys_bits;
1136 unsigned long flags;
1137 char *release_agent;
1138 bool clone_children;
1139 char *name;
1140
1141 bool none;
1142
1143 struct cgroupfs_root *new_root;
1144
1145};
1146
1147
1148
1149
1150
1151
1152
1153static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
1154{
1155 char *token, *o = data;
1156 bool all_ss = false, one_ss = false;
1157 unsigned long mask = (unsigned long)-1;
1158 int i;
1159 bool module_pin_failed = false;
1160
1161 BUG_ON(!mutex_is_locked(&cgroup_mutex));
1162
1163#ifdef CONFIG_CPUSETS
1164 mask = ~(1UL << cpuset_subsys_id);
1165#endif
1166
1167 memset(opts, 0, sizeof(*opts));
1168
1169 while ((token = strsep(&o, ",")) != NULL) {
1170 if (!*token)
1171 return -EINVAL;
1172 if (!strcmp(token, "none")) {
1173
1174 opts->none = true;
1175 continue;
1176 }
1177 if (!strcmp(token, "all")) {
1178
1179 if (one_ss)
1180 return -EINVAL;
1181 all_ss = true;
1182 continue;
1183 }
1184 if (!strcmp(token, "noprefix")) {
1185 set_bit(ROOT_NOPREFIX, &opts->flags);
1186 continue;
1187 }
1188 if (!strcmp(token, "clone_children")) {
1189 opts->clone_children = true;
1190 continue;
1191 }
1192 if (!strncmp(token, "release_agent=", 14)) {
1193
1194 if (opts->release_agent)
1195 return -EINVAL;
1196 opts->release_agent =
1197 kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL);
1198 if (!opts->release_agent)
1199 return -ENOMEM;
1200 continue;
1201 }
1202 if (!strncmp(token, "name=", 5)) {
1203 const char *name = token + 5;
1204
1205 if (!strlen(name))
1206 return -EINVAL;
1207
1208 for (i = 0; i < strlen(name); i++) {
1209 char c = name[i];
1210 if (isalnum(c))
1211 continue;
1212 if ((c == '.') || (c == '-') || (c == '_'))
1213 continue;
1214 return -EINVAL;
1215 }
1216
1217 if (opts->name)
1218 return -EINVAL;
1219 opts->name = kstrndup(name,
1220 MAX_CGROUP_ROOT_NAMELEN - 1,
1221 GFP_KERNEL);
1222 if (!opts->name)
1223 return -ENOMEM;
1224
1225 continue;
1226 }
1227
1228 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1229 struct cgroup_subsys *ss = subsys[i];
1230 if (ss == NULL)
1231 continue;
1232 if (strcmp(token, ss->name))
1233 continue;
1234 if (ss->disabled)
1235 continue;
1236
1237
1238 if (all_ss)
1239 return -EINVAL;
1240 set_bit(i, &opts->subsys_bits);
1241 one_ss = true;
1242
1243 break;
1244 }
1245 if (i == CGROUP_SUBSYS_COUNT)
1246 return -ENOENT;
1247 }
1248
1249
1250
1251
1252
1253
1254 if (all_ss || (!one_ss && !opts->none && !opts->name)) {
1255 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1256 struct cgroup_subsys *ss = subsys[i];
1257 if (ss == NULL)
1258 continue;
1259 if (ss->disabled)
1260 continue;
1261 set_bit(i, &opts->subsys_bits);
1262 }
1263 }
1264
1265
1266
1267
1268
1269
1270
1271
1272 if (test_bit(ROOT_NOPREFIX, &opts->flags) &&
1273 (opts->subsys_bits & mask))
1274 return -EINVAL;
1275
1276
1277
1278 if (opts->subsys_bits && opts->none)
1279 return -EINVAL;
1280
1281
1282
1283
1284
1285 if (!opts->subsys_bits && !opts->name)
1286 return -EINVAL;
1287
1288
1289
1290
1291
1292
1293
1294 for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
1295 unsigned long bit = 1UL << i;
1296
1297 if (!(bit & opts->subsys_bits))
1298 continue;
1299 if (!try_module_get(subsys[i]->module)) {
1300 module_pin_failed = true;
1301 break;
1302 }
1303 }
1304 if (module_pin_failed) {
1305
1306
1307
1308
1309
1310 for (i--; i >= CGROUP_BUILTIN_SUBSYS_COUNT; i--) {
1311
1312 unsigned long bit = 1UL << i;
1313
1314 if (!(bit & opts->subsys_bits))
1315 continue;
1316 module_put(subsys[i]->module);
1317 }
1318 return -ENOENT;
1319 }
1320
1321 return 0;
1322}
1323
1324static void drop_parsed_module_refcounts(unsigned long subsys_bits)
1325{
1326 int i;
1327 for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
1328 unsigned long bit = 1UL << i;
1329
1330 if (!(bit & subsys_bits))
1331 continue;
1332 module_put(subsys[i]->module);
1333 }
1334}
1335
1336static int cgroup_remount(struct super_block *sb, int *flags, char *data)
1337{
1338 int ret = 0;
1339 struct cgroupfs_root *root = sb->s_fs_info;
1340 struct cgroup *cgrp = &root->top_cgroup;
1341 struct cgroup_sb_opts opts;
1342
1343 mutex_lock(&cgrp->dentry->d_inode->i_mutex);
1344 mutex_lock(&cgroup_mutex);
1345 mutex_lock(&cgroup_root_mutex);
1346
1347
1348 ret = parse_cgroupfs_options(data, &opts);
1349 if (ret)
1350 goto out_unlock;
1351
1352
1353 if (opts.subsys_bits != root->actual_subsys_bits || opts.release_agent)
1354 pr_warning("cgroup: option changes via remount are deprecated (pid=%d comm=%s)\n",
1355 task_tgid_nr(current), current->comm);
1356
1357
1358 if (opts.flags != root->flags ||
1359 (opts.name && strcmp(opts.name, root->name))) {
1360 ret = -EINVAL;
1361 drop_parsed_module_refcounts(opts.subsys_bits);
1362 goto out_unlock;
1363 }
1364
1365 ret = rebind_subsystems(root, opts.subsys_bits);
1366 if (ret) {
1367 drop_parsed_module_refcounts(opts.subsys_bits);
1368 goto out_unlock;
1369 }
1370
1371
1372 cgroup_clear_directory(cgrp->dentry);
1373 cgroup_populate_dir(cgrp);
1374
1375 if (opts.release_agent)
1376 strcpy(root->release_agent_path, opts.release_agent);
1377 out_unlock:
1378 kfree(opts.release_agent);
1379 kfree(opts.name);
1380 mutex_unlock(&cgroup_root_mutex);
1381 mutex_unlock(&cgroup_mutex);
1382 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
1383 return ret;
1384}
1385
1386static const struct super_operations cgroup_ops = {
1387 .statfs = simple_statfs,
1388 .drop_inode = generic_delete_inode,
1389 .show_options = cgroup_show_options,
1390 .remount_fs = cgroup_remount,
1391};
1392
1393static void init_cgroup_housekeeping(struct cgroup *cgrp)
1394{
1395 INIT_LIST_HEAD(&cgrp->sibling);
1396 INIT_LIST_HEAD(&cgrp->children);
1397 INIT_LIST_HEAD(&cgrp->files);
1398 INIT_LIST_HEAD(&cgrp->css_sets);
1399 INIT_LIST_HEAD(&cgrp->release_list);
1400 INIT_LIST_HEAD(&cgrp->pidlists);
1401 mutex_init(&cgrp->pidlist_mutex);
1402 INIT_LIST_HEAD(&cgrp->event_list);
1403 spin_lock_init(&cgrp->event_list_lock);
1404}
1405
1406static void init_cgroup_root(struct cgroupfs_root *root)
1407{
1408 struct cgroup *cgrp = &root->top_cgroup;
1409
1410 INIT_LIST_HEAD(&root->subsys_list);
1411 INIT_LIST_HEAD(&root->root_list);
1412 INIT_LIST_HEAD(&root->allcg_list);
1413 root->number_of_cgroups = 1;
1414 cgrp->root = root;
1415 cgrp->top_cgroup = cgrp;
1416 list_add_tail(&cgrp->allcg_node, &root->allcg_list);
1417 init_cgroup_housekeeping(cgrp);
1418}
1419
1420static bool init_root_id(struct cgroupfs_root *root)
1421{
1422 int ret = 0;
1423
1424 do {
1425 if (!ida_pre_get(&hierarchy_ida, GFP_KERNEL))
1426 return false;
1427 spin_lock(&hierarchy_id_lock);
1428
1429 ret = ida_get_new_above(&hierarchy_ida, next_hierarchy_id,
1430 &root->hierarchy_id);
1431 if (ret == -ENOSPC)
1432
1433 ret = ida_get_new(&hierarchy_ida, &root->hierarchy_id);
1434 if (!ret) {
1435 next_hierarchy_id = root->hierarchy_id + 1;
1436 } else if (ret != -EAGAIN) {
1437
1438 BUG_ON(ret);
1439 }
1440 spin_unlock(&hierarchy_id_lock);
1441 } while (ret);
1442 return true;
1443}
1444
1445static int cgroup_test_super(struct super_block *sb, void *data)
1446{
1447 struct cgroup_sb_opts *opts = data;
1448 struct cgroupfs_root *root = sb->s_fs_info;
1449
1450
1451 if (opts->name && strcmp(opts->name, root->name))
1452 return 0;
1453
1454
1455
1456
1457
1458 if ((opts->subsys_bits || opts->none)
1459 && (opts->subsys_bits != root->subsys_bits))
1460 return 0;
1461
1462 return 1;
1463}
1464
1465static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
1466{
1467 struct cgroupfs_root *root;
1468
1469 if (!opts->subsys_bits && !opts->none)
1470 return NULL;
1471
1472 root = kzalloc(sizeof(*root), GFP_KERNEL);
1473 if (!root)
1474 return ERR_PTR(-ENOMEM);
1475
1476 if (!init_root_id(root)) {
1477 kfree(root);
1478 return ERR_PTR(-ENOMEM);
1479 }
1480 init_cgroup_root(root);
1481
1482 root->subsys_bits = opts->subsys_bits;
1483 root->flags = opts->flags;
1484 if (opts->release_agent)
1485 strcpy(root->release_agent_path, opts->release_agent);
1486 if (opts->name)
1487 strcpy(root->name, opts->name);
1488 if (opts->clone_children)
1489 set_bit(CGRP_CLONE_CHILDREN, &root->top_cgroup.flags);
1490 return root;
1491}
1492
1493static void cgroup_drop_root(struct cgroupfs_root *root)
1494{
1495 if (!root)
1496 return;
1497
1498 BUG_ON(!root->hierarchy_id);
1499 spin_lock(&hierarchy_id_lock);
1500 ida_remove(&hierarchy_ida, root->hierarchy_id);
1501 spin_unlock(&hierarchy_id_lock);
1502 kfree(root);
1503}
1504
1505static int cgroup_set_super(struct super_block *sb, void *data)
1506{
1507 int ret;
1508 struct cgroup_sb_opts *opts = data;
1509
1510
1511 if (!opts->new_root)
1512 return -EINVAL;
1513
1514 BUG_ON(!opts->subsys_bits && !opts->none);
1515
1516 ret = set_anon_super(sb, NULL);
1517 if (ret)
1518 return ret;
1519
1520 sb->s_fs_info = opts->new_root;
1521 opts->new_root->sb = sb;
1522
1523 sb->s_blocksize = PAGE_CACHE_SIZE;
1524 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
1525 sb->s_magic = CGROUP_SUPER_MAGIC;
1526 sb->s_op = &cgroup_ops;
1527
1528 return 0;
1529}
1530
1531static int cgroup_get_rootdir(struct super_block *sb)
1532{
1533 static const struct dentry_operations cgroup_dops = {
1534 .d_iput = cgroup_diput,
1535 .d_delete = cgroup_delete,
1536 };
1537
1538 struct inode *inode =
1539 cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb);
1540
1541 if (!inode)
1542 return -ENOMEM;
1543
1544 inode->i_fop = &simple_dir_operations;
1545 inode->i_op = &cgroup_dir_inode_operations;
1546
1547 inc_nlink(inode);
1548 sb->s_root = d_make_root(inode);
1549 if (!sb->s_root)
1550 return -ENOMEM;
1551
1552 sb->s_d_op = &cgroup_dops;
1553 return 0;
1554}
1555
1556static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1557 int flags, const char *unused_dev_name,
1558 void *data)
1559{
1560 struct cgroup_sb_opts opts;
1561 struct cgroupfs_root *root;
1562 int ret = 0;
1563 struct super_block *sb;
1564 struct cgroupfs_root *new_root;
1565 struct inode *inode;
1566
1567
1568 mutex_lock(&cgroup_mutex);
1569 ret = parse_cgroupfs_options(data, &opts);
1570 mutex_unlock(&cgroup_mutex);
1571 if (ret)
1572 goto out_err;
1573
1574
1575
1576
1577
1578 new_root = cgroup_root_from_opts(&opts);
1579 if (IS_ERR(new_root)) {
1580 ret = PTR_ERR(new_root);
1581 goto drop_modules;
1582 }
1583 opts.new_root = new_root;
1584
1585
1586 sb = sget(fs_type, cgroup_test_super, cgroup_set_super, 0, &opts);
1587 if (IS_ERR(sb)) {
1588 ret = PTR_ERR(sb);
1589 cgroup_drop_root(opts.new_root);
1590 goto drop_modules;
1591 }
1592
1593 root = sb->s_fs_info;
1594 BUG_ON(!root);
1595 if (root == opts.new_root) {
1596
1597 struct list_head tmp_cg_links;
1598 struct cgroup *root_cgrp = &root->top_cgroup;
1599 struct cgroupfs_root *existing_root;
1600 const struct cred *cred;
1601 int i;
1602
1603 BUG_ON(sb->s_root != NULL);
1604
1605 ret = cgroup_get_rootdir(sb);
1606 if (ret)
1607 goto drop_new_super;
1608 inode = sb->s_root->d_inode;
1609
1610 mutex_lock(&inode->i_mutex);
1611 mutex_lock(&cgroup_mutex);
1612 mutex_lock(&cgroup_root_mutex);
1613
1614
1615 ret = -EBUSY;
1616 if (strlen(root->name))
1617 for_each_active_root(existing_root)
1618 if (!strcmp(existing_root->name, root->name))
1619 goto unlock_drop;
1620
1621
1622
1623
1624
1625
1626
1627
1628 ret = allocate_cg_links(css_set_count, &tmp_cg_links);
1629 if (ret)
1630 goto unlock_drop;
1631
1632 ret = rebind_subsystems(root, root->subsys_bits);
1633 if (ret == -EBUSY) {
1634 free_cg_links(&tmp_cg_links);
1635 goto unlock_drop;
1636 }
1637
1638
1639
1640
1641
1642
1643
1644 BUG_ON(ret);
1645
1646 list_add(&root->root_list, &roots);
1647 root_count++;
1648
1649 sb->s_root->d_fsdata = root_cgrp;
1650 root->top_cgroup.dentry = sb->s_root;
1651
1652
1653
1654 write_lock(&css_set_lock);
1655 for (i = 0; i < CSS_SET_TABLE_SIZE; i++) {
1656 struct hlist_head *hhead = &css_set_table[i];
1657 struct hlist_node *node;
1658 struct css_set *cg;
1659
1660 hlist_for_each_entry(cg, node, hhead, hlist)
1661 link_css_set(&tmp_cg_links, cg, root_cgrp);
1662 }
1663 write_unlock(&css_set_lock);
1664
1665 free_cg_links(&tmp_cg_links);
1666
1667 BUG_ON(!list_empty(&root_cgrp->sibling));
1668 BUG_ON(!list_empty(&root_cgrp->children));
1669 BUG_ON(root->number_of_cgroups != 1);
1670
1671 cred = override_creds(&init_cred);
1672 cgroup_populate_dir(root_cgrp);
1673 revert_creds(cred);
1674 mutex_unlock(&cgroup_root_mutex);
1675 mutex_unlock(&cgroup_mutex);
1676 mutex_unlock(&inode->i_mutex);
1677 } else {
1678
1679
1680
1681
1682 cgroup_drop_root(opts.new_root);
1683
1684 drop_parsed_module_refcounts(opts.subsys_bits);
1685 }
1686
1687 kfree(opts.release_agent);
1688 kfree(opts.name);
1689 return dget(sb->s_root);
1690
1691 unlock_drop:
1692 mutex_unlock(&cgroup_root_mutex);
1693 mutex_unlock(&cgroup_mutex);
1694 mutex_unlock(&inode->i_mutex);
1695 drop_new_super:
1696 deactivate_locked_super(sb);
1697 drop_modules:
1698 drop_parsed_module_refcounts(opts.subsys_bits);
1699 out_err:
1700 kfree(opts.release_agent);
1701 kfree(opts.name);
1702 return ERR_PTR(ret);
1703}
1704
1705static void cgroup_kill_sb(struct super_block *sb) {
1706 struct cgroupfs_root *root = sb->s_fs_info;
1707 struct cgroup *cgrp = &root->top_cgroup;
1708 int ret;
1709 struct cg_cgroup_link *link;
1710 struct cg_cgroup_link *saved_link;
1711
1712 BUG_ON(!root);
1713
1714 BUG_ON(root->number_of_cgroups != 1);
1715 BUG_ON(!list_empty(&cgrp->children));
1716 BUG_ON(!list_empty(&cgrp->sibling));
1717
1718 mutex_lock(&cgroup_mutex);
1719 mutex_lock(&cgroup_root_mutex);
1720
1721
1722 ret = rebind_subsystems(root, 0);
1723
1724 BUG_ON(ret);
1725
1726
1727
1728
1729
1730 write_lock(&css_set_lock);
1731
1732 list_for_each_entry_safe(link, saved_link, &cgrp->css_sets,
1733 cgrp_link_list) {
1734 list_del(&link->cg_link_list);
1735 list_del(&link->cgrp_link_list);
1736 kfree(link);
1737 }
1738 write_unlock(&css_set_lock);
1739
1740 if (!list_empty(&root->root_list)) {
1741 list_del(&root->root_list);
1742 root_count--;
1743 }
1744
1745 mutex_unlock(&cgroup_root_mutex);
1746 mutex_unlock(&cgroup_mutex);
1747
1748 kill_litter_super(sb);
1749 cgroup_drop_root(root);
1750}
1751
1752static struct file_system_type cgroup_fs_type = {
1753 .name = "cgroup",
1754 .mount = cgroup_mount,
1755 .kill_sb = cgroup_kill_sb,
1756};
1757
1758static struct kobject *cgroup_kobj;
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
1771{
1772 char *start;
1773 struct dentry *dentry = rcu_dereference_check(cgrp->dentry,
1774 cgroup_lock_is_held());
1775
1776 if (!dentry || cgrp == dummytop) {
1777
1778
1779
1780
1781 strcpy(buf, "/");
1782 return 0;
1783 }
1784
1785 start = buf + buflen;
1786
1787 *--start = '\0';
1788 for (;;) {
1789 int len = dentry->d_name.len;
1790
1791 if ((start -= len) < buf)
1792 return -ENAMETOOLONG;
1793 memcpy(start, dentry->d_name.name, len);
1794 cgrp = cgrp->parent;
1795 if (!cgrp)
1796 break;
1797
1798 dentry = rcu_dereference_check(cgrp->dentry,
1799 cgroup_lock_is_held());
1800 if (!cgrp->parent)
1801 continue;
1802 if (--start < buf)
1803 return -ENAMETOOLONG;
1804 *start = '/';
1805 }
1806 memmove(buf, start, buf + buflen - start);
1807 return 0;
1808}
1809EXPORT_SYMBOL_GPL(cgroup_path);
1810
1811
1812
1813
1814struct task_and_cgroup {
1815 struct task_struct *task;
1816 struct cgroup *cgrp;
1817 struct css_set *cg;
1818};
1819
1820struct cgroup_taskset {
1821 struct task_and_cgroup single;
1822 struct flex_array *tc_array;
1823 int tc_array_len;
1824 int idx;
1825 struct cgroup *cur_cgrp;
1826};
1827
1828
1829
1830
1831
1832
1833
1834struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset)
1835{
1836 if (tset->tc_array) {
1837 tset->idx = 0;
1838 return cgroup_taskset_next(tset);
1839 } else {
1840 tset->cur_cgrp = tset->single.cgrp;
1841 return tset->single.task;
1842 }
1843}
1844EXPORT_SYMBOL_GPL(cgroup_taskset_first);
1845
1846
1847
1848
1849
1850
1851
1852
1853struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset)
1854{
1855 struct task_and_cgroup *tc;
1856
1857 if (!tset->tc_array || tset->idx >= tset->tc_array_len)
1858 return NULL;
1859
1860 tc = flex_array_get(tset->tc_array, tset->idx++);
1861 tset->cur_cgrp = tc->cgrp;
1862 return tc->task;
1863}
1864EXPORT_SYMBOL_GPL(cgroup_taskset_next);
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874struct cgroup *cgroup_taskset_cur_cgroup(struct cgroup_taskset *tset)
1875{
1876 return tset->cur_cgrp;
1877}
1878EXPORT_SYMBOL_GPL(cgroup_taskset_cur_cgroup);
1879
1880
1881
1882
1883
1884int cgroup_taskset_size(struct cgroup_taskset *tset)
1885{
1886 return tset->tc_array ? tset->tc_array_len : 1;
1887}
1888EXPORT_SYMBOL_GPL(cgroup_taskset_size);
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898static void cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
1899 struct task_struct *tsk, struct css_set *newcg)
1900{
1901 struct css_set *oldcg;
1902
1903
1904
1905
1906
1907
1908 WARN_ON_ONCE(tsk->flags & PF_EXITING);
1909 oldcg = tsk->cgroups;
1910
1911 task_lock(tsk);
1912 rcu_assign_pointer(tsk->cgroups, newcg);
1913 task_unlock(tsk);
1914
1915
1916 write_lock(&css_set_lock);
1917 if (!list_empty(&tsk->cg_list))
1918 list_move(&tsk->cg_list, &newcg->tasks);
1919 write_unlock(&css_set_lock);
1920
1921
1922
1923
1924
1925
1926 set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
1927 put_css_set(oldcg);
1928}
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1939{
1940 int retval = 0;
1941 struct cgroup_subsys *ss, *failed_ss = NULL;
1942 struct cgroup *oldcgrp;
1943 struct cgroupfs_root *root = cgrp->root;
1944 struct cgroup_taskset tset = { };
1945 struct css_set *newcg;
1946
1947
1948 if (tsk->flags & PF_EXITING)
1949 return -ESRCH;
1950
1951
1952 oldcgrp = task_cgroup_from_root(tsk, root);
1953 if (cgrp == oldcgrp)
1954 return 0;
1955
1956 tset.single.task = tsk;
1957 tset.single.cgrp = oldcgrp;
1958
1959 for_each_subsys(root, ss) {
1960 if (ss->can_attach) {
1961 retval = ss->can_attach(cgrp, &tset);
1962 if (retval) {
1963
1964
1965
1966
1967
1968
1969 failed_ss = ss;
1970 goto out;
1971 }
1972 }
1973 }
1974
1975 newcg = find_css_set(tsk->cgroups, cgrp);
1976 if (!newcg) {
1977 retval = -ENOMEM;
1978 goto out;
1979 }
1980
1981 cgroup_task_migrate(cgrp, oldcgrp, tsk, newcg);
1982
1983 for_each_subsys(root, ss) {
1984 if (ss->attach)
1985 ss->attach(cgrp, &tset);
1986 }
1987
1988 synchronize_rcu();
1989
1990
1991
1992
1993
1994 cgroup_wakeup_rmdir_waiter(cgrp);
1995out:
1996 if (retval) {
1997 for_each_subsys(root, ss) {
1998 if (ss == failed_ss)
1999
2000
2001
2002
2003
2004
2005 break;
2006 if (ss->cancel_attach)
2007 ss->cancel_attach(cgrp, &tset);
2008 }
2009 }
2010 return retval;
2011}
2012
2013
2014
2015
2016
2017
2018int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
2019{
2020 struct cgroupfs_root *root;
2021 int retval = 0;
2022
2023 cgroup_lock();
2024 for_each_active_root(root) {
2025 struct cgroup *from_cg = task_cgroup_from_root(from, root);
2026
2027 retval = cgroup_attach_task(from_cg, tsk);
2028 if (retval)
2029 break;
2030 }
2031 cgroup_unlock();
2032
2033 return retval;
2034}
2035EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
2046{
2047 int retval, i, group_size;
2048 struct cgroup_subsys *ss, *failed_ss = NULL;
2049
2050 struct cgroupfs_root *root = cgrp->root;
2051
2052 struct task_struct *tsk;
2053 struct task_and_cgroup *tc;
2054 struct flex_array *group;
2055 struct cgroup_taskset tset = { };
2056
2057
2058
2059
2060
2061
2062
2063
2064 group_size = get_nr_threads(leader);
2065
2066 group = flex_array_alloc(sizeof(*tc), group_size, GFP_KERNEL);
2067 if (!group)
2068 return -ENOMEM;
2069
2070 retval = flex_array_prealloc(group, 0, group_size - 1, GFP_KERNEL);
2071 if (retval)
2072 goto out_free_group_list;
2073
2074 tsk = leader;
2075 i = 0;
2076
2077
2078
2079
2080
2081 rcu_read_lock();
2082 do {
2083 struct task_and_cgroup ent;
2084
2085
2086 if (tsk->flags & PF_EXITING)
2087 continue;
2088
2089
2090 BUG_ON(i >= group_size);
2091 ent.task = tsk;
2092 ent.cgrp = task_cgroup_from_root(tsk, root);
2093
2094 if (ent.cgrp == cgrp)
2095 continue;
2096
2097
2098
2099
2100 retval = flex_array_put(group, i, &ent, GFP_ATOMIC);
2101 BUG_ON(retval != 0);
2102 i++;
2103 } while_each_thread(leader, tsk);
2104 rcu_read_unlock();
2105
2106 group_size = i;
2107 tset.tc_array = group;
2108 tset.tc_array_len = group_size;
2109
2110
2111 retval = 0;
2112 if (!group_size)
2113 goto out_free_group_list;
2114
2115
2116
2117
2118 for_each_subsys(root, ss) {
2119 if (ss->can_attach) {
2120 retval = ss->can_attach(cgrp, &tset);
2121 if (retval) {
2122 failed_ss = ss;
2123 goto out_cancel_attach;
2124 }
2125 }
2126 }
2127
2128
2129
2130
2131
2132 for (i = 0; i < group_size; i++) {
2133 tc = flex_array_get(group, i);
2134 tc->cg = find_css_set(tc->task->cgroups, cgrp);
2135 if (!tc->cg) {
2136 retval = -ENOMEM;
2137 goto out_put_css_set_refs;
2138 }
2139 }
2140
2141
2142
2143
2144
2145
2146 for (i = 0; i < group_size; i++) {
2147 tc = flex_array_get(group, i);
2148 cgroup_task_migrate(cgrp, tc->cgrp, tc->task, tc->cg);
2149 }
2150
2151
2152
2153
2154
2155 for_each_subsys(root, ss) {
2156 if (ss->attach)
2157 ss->attach(cgrp, &tset);
2158 }
2159
2160
2161
2162
2163 synchronize_rcu();
2164 cgroup_wakeup_rmdir_waiter(cgrp);
2165 retval = 0;
2166out_put_css_set_refs:
2167 if (retval) {
2168 for (i = 0; i < group_size; i++) {
2169 tc = flex_array_get(group, i);
2170 if (!tc->cg)
2171 break;
2172 put_css_set(tc->cg);
2173 }
2174 }
2175out_cancel_attach:
2176 if (retval) {
2177 for_each_subsys(root, ss) {
2178 if (ss == failed_ss)
2179 break;
2180 if (ss->cancel_attach)
2181 ss->cancel_attach(cgrp, &tset);
2182 }
2183 }
2184out_free_group_list:
2185 flex_array_free(group);
2186 return retval;
2187}
2188
2189
2190
2191
2192
2193
2194static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup)
2195{
2196 struct task_struct *tsk;
2197 const struct cred *cred = current_cred(), *tcred;
2198 int ret;
2199
2200 if (!cgroup_lock_live_group(cgrp))
2201 return -ENODEV;
2202
2203retry_find_task:
2204 rcu_read_lock();
2205 if (pid) {
2206 tsk = find_task_by_vpid(pid);
2207 if (!tsk) {
2208 rcu_read_unlock();
2209 ret= -ESRCH;
2210 goto out_unlock_cgroup;
2211 }
2212
2213
2214
2215
2216 tcred = __task_cred(tsk);
2217 if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
2218 !uid_eq(cred->euid, tcred->uid) &&
2219 !uid_eq(cred->euid, tcred->suid)) {
2220 rcu_read_unlock();
2221 ret = -EACCES;
2222 goto out_unlock_cgroup;
2223 }
2224 } else
2225 tsk = current;
2226
2227 if (threadgroup)
2228 tsk = tsk->group_leader;
2229
2230
2231
2232
2233
2234
2235 if (tsk == kthreadd_task || (tsk->flags & PF_THREAD_BOUND)) {
2236 ret = -EINVAL;
2237 rcu_read_unlock();
2238 goto out_unlock_cgroup;
2239 }
2240
2241 get_task_struct(tsk);
2242 rcu_read_unlock();
2243
2244 threadgroup_lock(tsk);
2245 if (threadgroup) {
2246 if (!thread_group_leader(tsk)) {
2247
2248
2249
2250
2251
2252
2253
2254 threadgroup_unlock(tsk);
2255 put_task_struct(tsk);
2256 goto retry_find_task;
2257 }
2258 ret = cgroup_attach_proc(cgrp, tsk);
2259 } else
2260 ret = cgroup_attach_task(cgrp, tsk);
2261 threadgroup_unlock(tsk);
2262
2263 put_task_struct(tsk);
2264out_unlock_cgroup:
2265 cgroup_unlock();
2266 return ret;
2267}
2268
2269static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid)
2270{
2271 return attach_task_by_pid(cgrp, pid, false);
2272}
2273
2274static int cgroup_procs_write(struct cgroup *cgrp, struct cftype *cft, u64 tgid)
2275{
2276 return attach_task_by_pid(cgrp, tgid, true);
2277}
2278
2279
2280
2281
2282
2283
2284
2285
2286bool cgroup_lock_live_group(struct cgroup *cgrp)
2287{
2288 mutex_lock(&cgroup_mutex);
2289 if (cgroup_is_removed(cgrp)) {
2290 mutex_unlock(&cgroup_mutex);
2291 return false;
2292 }
2293 return true;
2294}
2295EXPORT_SYMBOL_GPL(cgroup_lock_live_group);
2296
2297static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
2298 const char *buffer)
2299{
2300 BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
2301 if (strlen(buffer) >= PATH_MAX)
2302 return -EINVAL;
2303 if (!cgroup_lock_live_group(cgrp))
2304 return -ENODEV;
2305 mutex_lock(&cgroup_root_mutex);
2306 strcpy(cgrp->root->release_agent_path, buffer);
2307 mutex_unlock(&cgroup_root_mutex);
2308 cgroup_unlock();
2309 return 0;
2310}
2311
2312static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft,
2313 struct seq_file *seq)
2314{
2315 if (!cgroup_lock_live_group(cgrp))
2316 return -ENODEV;
2317 seq_puts(seq, cgrp->root->release_agent_path);
2318 seq_putc(seq, '\n');
2319 cgroup_unlock();
2320 return 0;
2321}
2322
2323
2324#define CGROUP_LOCAL_BUFFER_SIZE 64
2325
2326static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft,
2327 struct file *file,
2328 const char __user *userbuf,
2329 size_t nbytes, loff_t *unused_ppos)
2330{
2331 char buffer[CGROUP_LOCAL_BUFFER_SIZE];
2332 int retval = 0;
2333 char *end;
2334
2335 if (!nbytes)
2336 return -EINVAL;
2337 if (nbytes >= sizeof(buffer))
2338 return -E2BIG;
2339 if (copy_from_user(buffer, userbuf, nbytes))
2340 return -EFAULT;
2341
2342 buffer[nbytes] = 0;
2343 if (cft->write_u64) {
2344 u64 val = simple_strtoull(strstrip(buffer), &end, 0);
2345 if (*end)
2346 return -EINVAL;
2347 retval = cft->write_u64(cgrp, cft, val);
2348 } else {
2349 s64 val = simple_strtoll(strstrip(buffer), &end, 0);
2350 if (*end)
2351 return -EINVAL;
2352 retval = cft->write_s64(cgrp, cft, val);
2353 }
2354 if (!retval)
2355 retval = nbytes;
2356 return retval;
2357}
2358
2359static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft,
2360 struct file *file,
2361 const char __user *userbuf,
2362 size_t nbytes, loff_t *unused_ppos)
2363{
2364 char local_buffer[CGROUP_LOCAL_BUFFER_SIZE];
2365 int retval = 0;
2366 size_t max_bytes = cft->max_write_len;
2367 char *buffer = local_buffer;
2368
2369 if (!max_bytes)
2370 max_bytes = sizeof(local_buffer) - 1;
2371 if (nbytes >= max_bytes)
2372 return -E2BIG;
2373
2374 if (nbytes >= sizeof(local_buffer)) {
2375 buffer = kmalloc(nbytes + 1, GFP_KERNEL);
2376 if (buffer == NULL)
2377 return -ENOMEM;
2378 }
2379 if (nbytes && copy_from_user(buffer, userbuf, nbytes)) {
2380 retval = -EFAULT;
2381 goto out;
2382 }
2383
2384 buffer[nbytes] = 0;
2385 retval = cft->write_string(cgrp, cft, strstrip(buffer));
2386 if (!retval)
2387 retval = nbytes;
2388out:
2389 if (buffer != local_buffer)
2390 kfree(buffer);
2391 return retval;
2392}
2393
2394static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
2395 size_t nbytes, loff_t *ppos)
2396{
2397 struct cftype *cft = __d_cft(file->f_dentry);
2398 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
2399
2400 if (cgroup_is_removed(cgrp))
2401 return -ENODEV;
2402 if (cft->write)
2403 return cft->write(cgrp, cft, file, buf, nbytes, ppos);
2404 if (cft->write_u64 || cft->write_s64)
2405 return cgroup_write_X64(cgrp, cft, file, buf, nbytes, ppos);
2406 if (cft->write_string)
2407 return cgroup_write_string(cgrp, cft, file, buf, nbytes, ppos);
2408 if (cft->trigger) {
2409 int ret = cft->trigger(cgrp, (unsigned int)cft->private);
2410 return ret ? ret : nbytes;
2411 }
2412 return -EINVAL;
2413}
2414
2415static ssize_t cgroup_read_u64(struct cgroup *cgrp, struct cftype *cft,
2416 struct file *file,
2417 char __user *buf, size_t nbytes,
2418 loff_t *ppos)
2419{
2420 char tmp[CGROUP_LOCAL_BUFFER_SIZE];
2421 u64 val = cft->read_u64(cgrp, cft);
2422 int len = sprintf(tmp, "%llu\n", (unsigned long long) val);
2423
2424 return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
2425}
2426
2427static ssize_t cgroup_read_s64(struct cgroup *cgrp, struct cftype *cft,
2428 struct file *file,
2429 char __user *buf, size_t nbytes,
2430 loff_t *ppos)
2431{
2432 char tmp[CGROUP_LOCAL_BUFFER_SIZE];
2433 s64 val = cft->read_s64(cgrp, cft);
2434 int len = sprintf(tmp, "%lld\n", (long long) val);
2435
2436 return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
2437}
2438
2439static ssize_t cgroup_file_read(struct file *file, char __user *buf,
2440 size_t nbytes, loff_t *ppos)
2441{
2442 struct cftype *cft = __d_cft(file->f_dentry);
2443 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
2444
2445 if (cgroup_is_removed(cgrp))
2446 return -ENODEV;
2447
2448 if (cft->read)
2449 return cft->read(cgrp, cft, file, buf, nbytes, ppos);
2450 if (cft->read_u64)
2451 return cgroup_read_u64(cgrp, cft, file, buf, nbytes, ppos);
2452 if (cft->read_s64)
2453 return cgroup_read_s64(cgrp, cft, file, buf, nbytes, ppos);
2454 return -EINVAL;
2455}
2456
2457
2458
2459
2460
2461
2462struct cgroup_seqfile_state {
2463 struct cftype *cft;
2464 struct cgroup *cgroup;
2465};
2466
2467static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value)
2468{
2469 struct seq_file *sf = cb->state;
2470 return seq_printf(sf, "%s %llu\n", key, (unsigned long long)value);
2471}
2472
2473static int cgroup_seqfile_show(struct seq_file *m, void *arg)
2474{
2475 struct cgroup_seqfile_state *state = m->private;
2476 struct cftype *cft = state->cft;
2477 if (cft->read_map) {
2478 struct cgroup_map_cb cb = {
2479 .fill = cgroup_map_add,
2480 .state = m,
2481 };
2482 return cft->read_map(state->cgroup, cft, &cb);
2483 }
2484 return cft->read_seq_string(state->cgroup, cft, m);
2485}
2486
2487static int cgroup_seqfile_release(struct inode *inode, struct file *file)
2488{
2489 struct seq_file *seq = file->private_data;
2490 kfree(seq->private);
2491 return single_release(inode, file);
2492}
2493
2494static const struct file_operations cgroup_seqfile_operations = {
2495 .read = seq_read,
2496 .write = cgroup_file_write,
2497 .llseek = seq_lseek,
2498 .release = cgroup_seqfile_release,
2499};
2500
2501static int cgroup_file_open(struct inode *inode, struct file *file)
2502{
2503 int err;
2504 struct cftype *cft;
2505
2506 err = generic_file_open(inode, file);
2507 if (err)
2508 return err;
2509 cft = __d_cft(file->f_dentry);
2510
2511 if (cft->read_map || cft->read_seq_string) {
2512 struct cgroup_seqfile_state *state =
2513 kzalloc(sizeof(*state), GFP_USER);
2514 if (!state)
2515 return -ENOMEM;
2516 state->cft = cft;
2517 state->cgroup = __d_cgrp(file->f_dentry->d_parent);
2518 file->f_op = &cgroup_seqfile_operations;
2519 err = single_open(file, cgroup_seqfile_show, state);
2520 if (err < 0)
2521 kfree(state);
2522 } else if (cft->open)
2523 err = cft->open(inode, file);
2524 else
2525 err = 0;
2526
2527 return err;
2528}
2529
2530static int cgroup_file_release(struct inode *inode, struct file *file)
2531{
2532 struct cftype *cft = __d_cft(file->f_dentry);
2533 if (cft->release)
2534 return cft->release(inode, file);
2535 return 0;
2536}
2537
2538
2539
2540
2541static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry,
2542 struct inode *new_dir, struct dentry *new_dentry)
2543{
2544 if (!S_ISDIR(old_dentry->d_inode->i_mode))
2545 return -ENOTDIR;
2546 if (new_dentry->d_inode)
2547 return -EEXIST;
2548 if (old_dir != new_dir)
2549 return -EIO;
2550 return simple_rename(old_dir, old_dentry, new_dir, new_dentry);
2551}
2552
2553static const struct file_operations cgroup_file_operations = {
2554 .read = cgroup_file_read,
2555 .write = cgroup_file_write,
2556 .llseek = generic_file_llseek,
2557 .open = cgroup_file_open,
2558 .release = cgroup_file_release,
2559};
2560
2561static const struct inode_operations cgroup_dir_inode_operations = {
2562 .lookup = cgroup_lookup,
2563 .mkdir = cgroup_mkdir,
2564 .rmdir = cgroup_rmdir,
2565 .rename = cgroup_rename,
2566};
2567
2568static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
2569{
2570 if (dentry->d_name.len > NAME_MAX)
2571 return ERR_PTR(-ENAMETOOLONG);
2572 d_add(dentry, NULL);
2573 return NULL;
2574}
2575
2576
2577
2578
2579static inline struct cftype *__file_cft(struct file *file)
2580{
2581 if (file->f_dentry->d_inode->i_fop != &cgroup_file_operations)
2582 return ERR_PTR(-EINVAL);
2583 return __d_cft(file->f_dentry);
2584}
2585
2586static int cgroup_create_file(struct dentry *dentry, umode_t mode,
2587 struct super_block *sb)
2588{
2589 struct inode *inode;
2590
2591 if (!dentry)
2592 return -ENOENT;
2593 if (dentry->d_inode)
2594 return -EEXIST;
2595
2596 inode = cgroup_new_inode(mode, sb);
2597 if (!inode)
2598 return -ENOMEM;
2599
2600 if (S_ISDIR(mode)) {
2601 inode->i_op = &cgroup_dir_inode_operations;
2602 inode->i_fop = &simple_dir_operations;
2603
2604
2605 inc_nlink(inode);
2606
2607
2608
2609 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
2610 } else if (S_ISREG(mode)) {
2611 inode->i_size = 0;
2612 inode->i_fop = &cgroup_file_operations;
2613 }
2614 d_instantiate(dentry, inode);
2615 dget(dentry);
2616 return 0;
2617}
2618
2619
2620
2621
2622
2623
2624
2625
2626static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry,
2627 umode_t mode)
2628{
2629 struct dentry *parent;
2630 int error = 0;
2631
2632 parent = cgrp->parent->dentry;
2633 error = cgroup_create_file(dentry, S_IFDIR | mode, cgrp->root->sb);
2634 if (!error) {
2635 dentry->d_fsdata = cgrp;
2636 inc_nlink(parent->d_inode);
2637 rcu_assign_pointer(cgrp->dentry, dentry);
2638 dget(dentry);
2639 }
2640 dput(dentry);
2641
2642 return error;
2643}
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654static umode_t cgroup_file_mode(const struct cftype *cft)
2655{
2656 umode_t mode = 0;
2657
2658 if (cft->mode)
2659 return cft->mode;
2660
2661 if (cft->read || cft->read_u64 || cft->read_s64 ||
2662 cft->read_map || cft->read_seq_string)
2663 mode |= S_IRUGO;
2664
2665 if (cft->write || cft->write_u64 || cft->write_s64 ||
2666 cft->write_string || cft->trigger)
2667 mode |= S_IWUSR;
2668
2669 return mode;
2670}
2671
2672static int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys,
2673 const struct cftype *cft)
2674{
2675 struct dentry *dir = cgrp->dentry;
2676 struct cgroup *parent = __d_cgrp(dir);
2677 struct dentry *dentry;
2678 struct cfent *cfe;
2679 int error;
2680 umode_t mode;
2681 char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 };
2682
2683
2684 if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent)
2685 return 0;
2686 if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent)
2687 return 0;
2688
2689 if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) {
2690 strcpy(name, subsys->name);
2691 strcat(name, ".");
2692 }
2693 strcat(name, cft->name);
2694
2695 BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex));
2696
2697 cfe = kzalloc(sizeof(*cfe), GFP_KERNEL);
2698 if (!cfe)
2699 return -ENOMEM;
2700
2701 dentry = lookup_one_len(name, dir, strlen(name));
2702 if (IS_ERR(dentry)) {
2703 error = PTR_ERR(dentry);
2704 goto out;
2705 }
2706
2707 mode = cgroup_file_mode(cft);
2708 error = cgroup_create_file(dentry, mode | S_IFREG, cgrp->root->sb);
2709 if (!error) {
2710 cfe->type = (void *)cft;
2711 cfe->dentry = dentry;
2712 dentry->d_fsdata = cfe;
2713 list_add_tail(&cfe->node, &parent->files);
2714 cfe = NULL;
2715 }
2716 dput(dentry);
2717out:
2718 kfree(cfe);
2719 return error;
2720}
2721
2722static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys,
2723 const struct cftype cfts[], bool is_add)
2724{
2725 const struct cftype *cft;
2726 int err, ret = 0;
2727
2728 for (cft = cfts; cft->name[0] != '\0'; cft++) {
2729 if (is_add)
2730 err = cgroup_add_file(cgrp, subsys, cft);
2731 else
2732 err = cgroup_rm_file(cgrp, cft);
2733 if (err) {
2734 pr_warning("cgroup_addrm_files: failed to %s %s, err=%d\n",
2735 is_add ? "add" : "remove", cft->name, err);
2736 ret = err;
2737 }
2738 }
2739 return ret;
2740}
2741
2742static DEFINE_MUTEX(cgroup_cft_mutex);
2743
2744static void cgroup_cfts_prepare(void)
2745 __acquires(&cgroup_cft_mutex) __acquires(&cgroup_mutex)
2746{
2747
2748
2749
2750
2751
2752
2753
2754 mutex_lock(&cgroup_cft_mutex);
2755 mutex_lock(&cgroup_mutex);
2756}
2757
2758static void cgroup_cfts_commit(struct cgroup_subsys *ss,
2759 const struct cftype *cfts, bool is_add)
2760 __releases(&cgroup_mutex) __releases(&cgroup_cft_mutex)
2761{
2762 LIST_HEAD(pending);
2763 struct cgroup *cgrp, *n;
2764
2765
2766 if (cfts && ss->root != &rootnode) {
2767 list_for_each_entry(cgrp, &ss->root->allcg_list, allcg_node) {
2768 dget(cgrp->dentry);
2769 list_add_tail(&cgrp->cft_q_node, &pending);
2770 }
2771 }
2772
2773 mutex_unlock(&cgroup_mutex);
2774
2775
2776
2777
2778
2779 list_for_each_entry_safe(cgrp, n, &pending, cft_q_node) {
2780 struct inode *inode = cgrp->dentry->d_inode;
2781
2782 mutex_lock(&inode->i_mutex);
2783 mutex_lock(&cgroup_mutex);
2784 if (!cgroup_is_removed(cgrp))
2785 cgroup_addrm_files(cgrp, ss, cfts, is_add);
2786 mutex_unlock(&cgroup_mutex);
2787 mutex_unlock(&inode->i_mutex);
2788
2789 list_del_init(&cgrp->cft_q_node);
2790 dput(cgrp->dentry);
2791 }
2792
2793 mutex_unlock(&cgroup_cft_mutex);
2794}
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810int cgroup_add_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts)
2811{
2812 struct cftype_set *set;
2813
2814 set = kzalloc(sizeof(*set), GFP_KERNEL);
2815 if (!set)
2816 return -ENOMEM;
2817
2818 cgroup_cfts_prepare();
2819 set->cfts = cfts;
2820 list_add_tail(&set->node, &ss->cftsets);
2821 cgroup_cfts_commit(ss, cfts, true);
2822
2823 return 0;
2824}
2825EXPORT_SYMBOL_GPL(cgroup_add_cftypes);
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840int cgroup_rm_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts)
2841{
2842 struct cftype_set *set;
2843
2844 cgroup_cfts_prepare();
2845
2846 list_for_each_entry(set, &ss->cftsets, node) {
2847 if (set->cfts == cfts) {
2848 list_del_init(&set->node);
2849 cgroup_cfts_commit(ss, cfts, false);
2850 return 0;
2851 }
2852 }
2853
2854 cgroup_cfts_commit(ss, NULL, false);
2855 return -ENOENT;
2856}
2857
2858
2859
2860
2861
2862
2863
2864int cgroup_task_count(const struct cgroup *cgrp)
2865{
2866 int count = 0;
2867 struct cg_cgroup_link *link;
2868
2869 read_lock(&css_set_lock);
2870 list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {
2871 count += atomic_read(&link->cg->refcount);
2872 }
2873 read_unlock(&css_set_lock);
2874 return count;
2875}
2876
2877
2878
2879
2880
2881static void cgroup_advance_iter(struct cgroup *cgrp,
2882 struct cgroup_iter *it)
2883{
2884 struct list_head *l = it->cg_link;
2885 struct cg_cgroup_link *link;
2886 struct css_set *cg;
2887
2888
2889 do {
2890 l = l->next;
2891 if (l == &cgrp->css_sets) {
2892 it->cg_link = NULL;
2893 return;
2894 }
2895 link = list_entry(l, struct cg_cgroup_link, cgrp_link_list);
2896 cg = link->cg;
2897 } while (list_empty(&cg->tasks));
2898 it->cg_link = l;
2899 it->task = cg->tasks.next;
2900}
2901
2902
2903
2904
2905
2906
2907
2908static void cgroup_enable_task_cg_lists(void)
2909{
2910 struct task_struct *p, *g;
2911 write_lock(&css_set_lock);
2912 use_task_css_set_links = 1;
2913
2914
2915
2916
2917
2918
2919
2920 read_lock(&tasklist_lock);
2921 do_each_thread(g, p) {
2922 task_lock(p);
2923
2924
2925
2926
2927
2928 if (!(p->flags & PF_EXITING) && list_empty(&p->cg_list))
2929 list_add(&p->cg_list, &p->cgroups->tasks);
2930 task_unlock(p);
2931 } while_each_thread(g, p);
2932 read_unlock(&tasklist_lock);
2933 write_unlock(&css_set_lock);
2934}
2935
2936void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it)
2937 __acquires(css_set_lock)
2938{
2939
2940
2941
2942
2943
2944 if (!use_task_css_set_links)
2945 cgroup_enable_task_cg_lists();
2946
2947 read_lock(&css_set_lock);
2948 it->cg_link = &cgrp->css_sets;
2949 cgroup_advance_iter(cgrp, it);
2950}
2951
2952struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
2953 struct cgroup_iter *it)
2954{
2955 struct task_struct *res;
2956 struct list_head *l = it->task;
2957 struct cg_cgroup_link *link;
2958
2959
2960 if (!it->cg_link)
2961 return NULL;
2962 res = list_entry(l, struct task_struct, cg_list);
2963
2964 l = l->next;
2965 link = list_entry(it->cg_link, struct cg_cgroup_link, cgrp_link_list);
2966 if (l == &link->cg->tasks) {
2967
2968
2969 cgroup_advance_iter(cgrp, it);
2970 } else {
2971 it->task = l;
2972 }
2973 return res;
2974}
2975
2976void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it)
2977 __releases(css_set_lock)
2978{
2979 read_unlock(&css_set_lock);
2980}
2981
2982static inline int started_after_time(struct task_struct *t1,
2983 struct timespec *time,
2984 struct task_struct *t2)
2985{
2986 int start_diff = timespec_compare(&t1->start_time, time);
2987 if (start_diff > 0) {
2988 return 1;
2989 } else if (start_diff < 0) {
2990 return 0;
2991 } else {
2992
2993
2994
2995
2996
2997
2998
2999
3000 return t1 > t2;
3001 }
3002}
3003
3004
3005
3006
3007
3008
3009static inline int started_after(void *p1, void *p2)
3010{
3011 struct task_struct *t1 = p1;
3012 struct task_struct *t2 = p2;
3013 return started_after_time(t1, &t2->start_time, t2);
3014}
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043int cgroup_scan_tasks(struct cgroup_scanner *scan)
3044{
3045 int retval, i;
3046 struct cgroup_iter it;
3047 struct task_struct *p, *dropped;
3048
3049 struct task_struct *latest_task = NULL;
3050 struct ptr_heap tmp_heap;
3051 struct ptr_heap *heap;
3052 struct timespec latest_time = { 0, 0 };
3053
3054 if (scan->heap) {
3055
3056 heap = scan->heap;
3057 heap->gt = &started_after;
3058 } else {
3059
3060 heap = &tmp_heap;
3061 retval = heap_init(heap, PAGE_SIZE, GFP_KERNEL, &started_after);
3062 if (retval)
3063
3064 return retval;
3065 }
3066
3067 again:
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080 heap->size = 0;
3081 cgroup_iter_start(scan->cg, &it);
3082 while ((p = cgroup_iter_next(scan->cg, &it))) {
3083
3084
3085
3086
3087 if (scan->test_task && !scan->test_task(p, scan))
3088 continue;
3089
3090
3091
3092
3093 if (!started_after_time(p, &latest_time, latest_task))
3094 continue;
3095 dropped = heap_insert(heap, p);
3096 if (dropped == NULL) {
3097
3098
3099
3100
3101 get_task_struct(p);
3102 } else if (dropped != p) {
3103
3104
3105
3106
3107 get_task_struct(p);
3108 put_task_struct(dropped);
3109 }
3110
3111
3112
3113
3114 }
3115 cgroup_iter_end(scan->cg, &it);
3116
3117 if (heap->size) {
3118 for (i = 0; i < heap->size; i++) {
3119 struct task_struct *q = heap->ptrs[i];
3120 if (i == 0) {
3121 latest_time = q->start_time;
3122 latest_task = q;
3123 }
3124
3125 scan->process_task(q, scan);
3126 put_task_struct(q);
3127 }
3128
3129
3130
3131
3132
3133
3134
3135 goto again;
3136 }
3137 if (heap == &tmp_heap)
3138 heap_free(&tmp_heap);
3139 return 0;
3140}
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153enum cgroup_filetype {
3154 CGROUP_FILE_PROCS,
3155 CGROUP_FILE_TASKS,
3156};
3157
3158
3159
3160
3161
3162
3163
3164struct cgroup_pidlist {
3165
3166
3167
3168
3169 struct { enum cgroup_filetype type; struct pid_namespace *ns; } key;
3170
3171 pid_t *list;
3172
3173 int length;
3174
3175 int use_count;
3176
3177 struct list_head links;
3178
3179 struct cgroup *owner;
3180
3181 struct rw_semaphore mutex;
3182};
3183
3184
3185
3186
3187
3188
3189#define PIDLIST_TOO_LARGE(c) ((c) * sizeof(pid_t) > (PAGE_SIZE * 2))
3190static void *pidlist_allocate(int count)
3191{
3192 if (PIDLIST_TOO_LARGE(count))
3193 return vmalloc(count * sizeof(pid_t));
3194 else
3195 return kmalloc(count * sizeof(pid_t), GFP_KERNEL);
3196}
3197static void pidlist_free(void *p)
3198{
3199 if (is_vmalloc_addr(p))
3200 vfree(p);
3201 else
3202 kfree(p);
3203}
3204static void *pidlist_resize(void *p, int newcount)
3205{
3206 void *newlist;
3207
3208 if (is_vmalloc_addr(p)) {
3209 newlist = vmalloc(newcount * sizeof(pid_t));
3210 if (!newlist)
3211 return NULL;
3212 memcpy(newlist, p, newcount * sizeof(pid_t));
3213 vfree(p);
3214 } else {
3215 newlist = krealloc(p, newcount * sizeof(pid_t), GFP_KERNEL);
3216 }
3217 return newlist;
3218}
3219
3220
3221
3222
3223
3224
3225
3226
3227#define PIDLIST_REALLOC_DIFFERENCE(old, new) ((old) - PAGE_SIZE >= (new))
3228static int pidlist_uniq(pid_t **p, int length)
3229{
3230 int src, dest = 1;
3231 pid_t *list = *p;
3232 pid_t *newlist;
3233
3234
3235
3236
3237
3238 if (length == 0 || length == 1)
3239 return length;
3240
3241 for (src = 1; src < length; src++) {
3242
3243 while (list[src] == list[src-1]) {
3244 src++;
3245 if (src == length)
3246 goto after;
3247 }
3248
3249 list[dest] = list[src];
3250 dest++;
3251 }
3252after:
3253
3254
3255
3256
3257
3258 if (PIDLIST_REALLOC_DIFFERENCE(length, dest)) {
3259 newlist = pidlist_resize(list, dest);
3260 if (newlist)
3261 *p = newlist;
3262 }
3263 return dest;
3264}
3265
3266static int cmppid(const void *a, const void *b)
3267{
3268 return *(pid_t *)a - *(pid_t *)b;
3269}
3270
3271
3272
3273
3274
3275
3276
3277static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
3278 enum cgroup_filetype type)
3279{
3280 struct cgroup_pidlist *l;
3281
3282 struct pid_namespace *ns = current->nsproxy->pid_ns;
3283
3284
3285
3286
3287
3288
3289
3290 mutex_lock(&cgrp->pidlist_mutex);
3291 list_for_each_entry(l, &cgrp->pidlists, links) {
3292 if (l->key.type == type && l->key.ns == ns) {
3293
3294 down_write(&l->mutex);
3295 mutex_unlock(&cgrp->pidlist_mutex);
3296 return l;
3297 }
3298 }
3299
3300 l = kmalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL);
3301 if (!l) {
3302 mutex_unlock(&cgrp->pidlist_mutex);
3303 return l;
3304 }
3305 init_rwsem(&l->mutex);
3306 down_write(&l->mutex);
3307 l->key.type = type;
3308 l->key.ns = get_pid_ns(ns);
3309 l->use_count = 0;
3310 l->list = NULL;
3311 l->owner = cgrp;
3312 list_add(&l->links, &cgrp->pidlists);
3313 mutex_unlock(&cgrp->pidlist_mutex);
3314 return l;
3315}
3316
3317
3318
3319
3320static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
3321 struct cgroup_pidlist **lp)
3322{
3323 pid_t *array;
3324 int length;
3325 int pid, n = 0;
3326 struct cgroup_iter it;
3327 struct task_struct *tsk;
3328 struct cgroup_pidlist *l;
3329
3330
3331
3332
3333
3334
3335
3336 length = cgroup_task_count(cgrp);
3337 array = pidlist_allocate(length);
3338 if (!array)
3339 return -ENOMEM;
3340
3341 cgroup_iter_start(cgrp, &it);
3342 while ((tsk = cgroup_iter_next(cgrp, &it))) {
3343 if (unlikely(n == length))
3344 break;
3345
3346 if (type == CGROUP_FILE_PROCS)
3347 pid = task_tgid_vnr(tsk);
3348 else
3349 pid = task_pid_vnr(tsk);
3350 if (pid > 0)
3351 array[n++] = pid;
3352 }
3353 cgroup_iter_end(cgrp, &it);
3354 length = n;
3355
3356 sort(array, length, sizeof(pid_t), cmppid, NULL);
3357 if (type == CGROUP_FILE_PROCS)
3358 length = pidlist_uniq(&array, length);
3359 l = cgroup_pidlist_find(cgrp, type);
3360 if (!l) {
3361 pidlist_free(array);
3362 return -ENOMEM;
3363 }
3364
3365 pidlist_free(l->list);
3366 l->list = array;
3367 l->length = length;
3368 l->use_count++;
3369 up_write(&l->mutex);
3370 *lp = l;
3371 return 0;
3372}
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
3384{
3385 int ret = -EINVAL;
3386 struct cgroup *cgrp;
3387 struct cgroup_iter it;
3388 struct task_struct *tsk;
3389
3390
3391
3392
3393
3394 if (dentry->d_sb->s_op != &cgroup_ops ||
3395 !S_ISDIR(dentry->d_inode->i_mode))
3396 goto err;
3397
3398 ret = 0;
3399 cgrp = dentry->d_fsdata;
3400
3401 cgroup_iter_start(cgrp, &it);
3402 while ((tsk = cgroup_iter_next(cgrp, &it))) {
3403 switch (tsk->state) {
3404 case TASK_RUNNING:
3405 stats->nr_running++;
3406 break;
3407 case TASK_INTERRUPTIBLE:
3408 stats->nr_sleeping++;
3409 break;
3410 case TASK_UNINTERRUPTIBLE:
3411 stats->nr_uninterruptible++;
3412 break;
3413 case TASK_STOPPED:
3414 stats->nr_stopped++;
3415 break;
3416 default:
3417 if (delayacct_is_task_waiting_on_io(tsk))
3418 stats->nr_io_wait++;
3419 break;
3420 }
3421 }
3422 cgroup_iter_end(cgrp, &it);
3423
3424err:
3425 return ret;
3426}
3427
3428
3429
3430
3431
3432
3433
3434
3435static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
3436{
3437
3438
3439
3440
3441
3442
3443 struct cgroup_pidlist *l = s->private;
3444 int index = 0, pid = *pos;
3445 int *iter;
3446
3447 down_read(&l->mutex);
3448 if (pid) {
3449 int end = l->length;
3450
3451 while (index < end) {
3452 int mid = (index + end) / 2;
3453 if (l->list[mid] == pid) {
3454 index = mid;
3455 break;
3456 } else if (l->list[mid] <= pid)
3457 index = mid + 1;
3458 else
3459 end = mid;
3460 }
3461 }
3462
3463 if (index >= l->length)
3464 return NULL;
3465
3466 iter = l->list + index;
3467 *pos = *iter;
3468 return iter;
3469}
3470
3471static void cgroup_pidlist_stop(struct seq_file *s, void *v)
3472{
3473 struct cgroup_pidlist *l = s->private;
3474 up_read(&l->mutex);
3475}
3476
3477static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
3478{
3479 struct cgroup_pidlist *l = s->private;
3480 pid_t *p = v;
3481 pid_t *end = l->list + l->length;
3482
3483
3484
3485
3486 p++;
3487 if (p >= end) {
3488 return NULL;
3489 } else {
3490 *pos = *p;
3491 return p;
3492 }
3493}
3494
3495static int cgroup_pidlist_show(struct seq_file *s, void *v)
3496{
3497 return seq_printf(s, "%d\n", *(int *)v);
3498}
3499
3500
3501
3502
3503
3504static const struct seq_operations cgroup_pidlist_seq_operations = {
3505 .start = cgroup_pidlist_start,
3506 .stop = cgroup_pidlist_stop,
3507 .next = cgroup_pidlist_next,
3508 .show = cgroup_pidlist_show,
3509};
3510
3511static void cgroup_release_pid_array(struct cgroup_pidlist *l)
3512{
3513
3514
3515
3516
3517
3518
3519 mutex_lock(&l->owner->pidlist_mutex);
3520 down_write(&l->mutex);
3521 BUG_ON(!l->use_count);
3522 if (!--l->use_count) {
3523
3524 list_del(&l->links);
3525 mutex_unlock(&l->owner->pidlist_mutex);
3526 pidlist_free(l->list);
3527 put_pid_ns(l->key.ns);
3528 up_write(&l->mutex);
3529 kfree(l);
3530 return;
3531 }
3532 mutex_unlock(&l->owner->pidlist_mutex);
3533 up_write(&l->mutex);
3534}
3535
3536static int cgroup_pidlist_release(struct inode *inode, struct file *file)
3537{
3538 struct cgroup_pidlist *l;
3539 if (!(file->f_mode & FMODE_READ))
3540 return 0;
3541
3542
3543
3544
3545 l = ((struct seq_file *)file->private_data)->private;
3546 cgroup_release_pid_array(l);
3547 return seq_release(inode, file);
3548}
3549
3550static const struct file_operations cgroup_pidlist_operations = {
3551 .read = seq_read,
3552 .llseek = seq_lseek,
3553 .write = cgroup_file_write,
3554 .release = cgroup_pidlist_release,
3555};
3556
3557
3558
3559
3560
3561
3562
3563static int cgroup_pidlist_open(struct file *file, enum cgroup_filetype type)
3564{
3565 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
3566 struct cgroup_pidlist *l;
3567 int retval;
3568
3569
3570 if (!(file->f_mode & FMODE_READ))
3571 return 0;
3572
3573
3574 retval = pidlist_array_load(cgrp, type, &l);
3575 if (retval)
3576 return retval;
3577
3578 file->f_op = &cgroup_pidlist_operations;
3579
3580 retval = seq_open(file, &cgroup_pidlist_seq_operations);
3581 if (retval) {
3582 cgroup_release_pid_array(l);
3583 return retval;
3584 }
3585 ((struct seq_file *)file->private_data)->private = l;
3586 return 0;
3587}
3588static int cgroup_tasks_open(struct inode *unused, struct file *file)
3589{
3590 return cgroup_pidlist_open(file, CGROUP_FILE_TASKS);
3591}
3592static int cgroup_procs_open(struct inode *unused, struct file *file)
3593{
3594 return cgroup_pidlist_open(file, CGROUP_FILE_PROCS);
3595}
3596
3597static u64 cgroup_read_notify_on_release(struct cgroup *cgrp,
3598 struct cftype *cft)
3599{
3600 return notify_on_release(cgrp);
3601}
3602
3603static int cgroup_write_notify_on_release(struct cgroup *cgrp,
3604 struct cftype *cft,
3605 u64 val)
3606{
3607 clear_bit(CGRP_RELEASABLE, &cgrp->flags);
3608 if (val)
3609 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
3610 else
3611 clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
3612 return 0;
3613}
3614
3615
3616
3617
3618
3619
3620static void cgroup_event_remove(struct work_struct *work)
3621{
3622 struct cgroup_event *event = container_of(work, struct cgroup_event,
3623 remove);
3624 struct cgroup *cgrp = event->cgrp;
3625
3626 event->cft->unregister_event(cgrp, event->cft, event->eventfd);
3627
3628 eventfd_ctx_put(event->eventfd);
3629 kfree(event);
3630 dput(cgrp->dentry);
3631}
3632
3633
3634
3635
3636
3637
3638static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
3639 int sync, void *key)
3640{
3641 struct cgroup_event *event = container_of(wait,
3642 struct cgroup_event, wait);
3643 struct cgroup *cgrp = event->cgrp;
3644 unsigned long flags = (unsigned long)key;
3645
3646 if (flags & POLLHUP) {
3647 __remove_wait_queue(event->wqh, &event->wait);
3648 spin_lock(&cgrp->event_list_lock);
3649 list_del(&event->list);
3650 spin_unlock(&cgrp->event_list_lock);
3651
3652
3653
3654
3655 schedule_work(&event->remove);
3656 }
3657
3658 return 0;
3659}
3660
3661static void cgroup_event_ptable_queue_proc(struct file *file,
3662 wait_queue_head_t *wqh, poll_table *pt)
3663{
3664 struct cgroup_event *event = container_of(pt,
3665 struct cgroup_event, pt);
3666
3667 event->wqh = wqh;
3668 add_wait_queue(wqh, &event->wait);
3669}
3670
3671
3672
3673
3674
3675
3676
3677static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft,
3678 const char *buffer)
3679{
3680 struct cgroup_event *event = NULL;
3681 unsigned int efd, cfd;
3682 struct file *efile = NULL;
3683 struct file *cfile = NULL;
3684 char *endp;
3685 int ret;
3686
3687 efd = simple_strtoul(buffer, &endp, 10);
3688 if (*endp != ' ')
3689 return -EINVAL;
3690 buffer = endp + 1;
3691
3692 cfd = simple_strtoul(buffer, &endp, 10);
3693 if ((*endp != ' ') && (*endp != '\0'))
3694 return -EINVAL;
3695 buffer = endp + 1;
3696
3697 event = kzalloc(sizeof(*event), GFP_KERNEL);
3698 if (!event)
3699 return -ENOMEM;
3700 event->cgrp = cgrp;
3701 INIT_LIST_HEAD(&event->list);
3702 init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc);
3703 init_waitqueue_func_entry(&event->wait, cgroup_event_wake);
3704 INIT_WORK(&event->remove, cgroup_event_remove);
3705
3706 efile = eventfd_fget(efd);
3707 if (IS_ERR(efile)) {
3708 ret = PTR_ERR(efile);
3709 goto fail;
3710 }
3711
3712 event->eventfd = eventfd_ctx_fileget(efile);
3713 if (IS_ERR(event->eventfd)) {
3714 ret = PTR_ERR(event->eventfd);
3715 goto fail;
3716 }
3717
3718 cfile = fget(cfd);
3719 if (!cfile) {
3720 ret = -EBADF;
3721 goto fail;
3722 }
3723
3724
3725
3726 ret = inode_permission(cfile->f_path.dentry->d_inode, MAY_READ);
3727 if (ret < 0)
3728 goto fail;
3729
3730 event->cft = __file_cft(cfile);
3731 if (IS_ERR(event->cft)) {
3732 ret = PTR_ERR(event->cft);
3733 goto fail;
3734 }
3735
3736 if (!event->cft->register_event || !event->cft->unregister_event) {
3737 ret = -EINVAL;
3738 goto fail;
3739 }
3740
3741 ret = event->cft->register_event(cgrp, event->cft,
3742 event->eventfd, buffer);
3743 if (ret)
3744 goto fail;
3745
3746 if (efile->f_op->poll(efile, &event->pt) & POLLHUP) {
3747 event->cft->unregister_event(cgrp, event->cft, event->eventfd);
3748 ret = 0;
3749 goto fail;
3750 }
3751
3752
3753
3754
3755
3756
3757 dget(cgrp->dentry);
3758
3759 spin_lock(&cgrp->event_list_lock);
3760 list_add(&event->list, &cgrp->event_list);
3761 spin_unlock(&cgrp->event_list_lock);
3762
3763 fput(cfile);
3764 fput(efile);
3765
3766 return 0;
3767
3768fail:
3769 if (cfile)
3770 fput(cfile);
3771
3772 if (event && event->eventfd && !IS_ERR(event->eventfd))
3773 eventfd_ctx_put(event->eventfd);
3774
3775 if (!IS_ERR_OR_NULL(efile))
3776 fput(efile);
3777
3778 kfree(event);
3779
3780 return ret;
3781}
3782
3783static u64 cgroup_clone_children_read(struct cgroup *cgrp,
3784 struct cftype *cft)
3785{
3786 return clone_children(cgrp);
3787}
3788
3789static int cgroup_clone_children_write(struct cgroup *cgrp,
3790 struct cftype *cft,
3791 u64 val)
3792{
3793 if (val)
3794 set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
3795 else
3796 clear_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
3797 return 0;
3798}
3799
3800
3801
3802
3803
3804#define CGROUP_FILE_GENERIC_PREFIX "cgroup."
3805static struct cftype files[] = {
3806 {
3807 .name = "tasks",
3808 .open = cgroup_tasks_open,
3809 .write_u64 = cgroup_tasks_write,
3810 .release = cgroup_pidlist_release,
3811 .mode = S_IRUGO | S_IWUSR,
3812 },
3813 {
3814 .name = CGROUP_FILE_GENERIC_PREFIX "procs",
3815 .open = cgroup_procs_open,
3816 .write_u64 = cgroup_procs_write,
3817 .release = cgroup_pidlist_release,
3818 .mode = S_IRUGO | S_IWUSR,
3819 },
3820 {
3821 .name = "notify_on_release",
3822 .read_u64 = cgroup_read_notify_on_release,
3823 .write_u64 = cgroup_write_notify_on_release,
3824 },
3825 {
3826 .name = CGROUP_FILE_GENERIC_PREFIX "event_control",
3827 .write_string = cgroup_write_event_control,
3828 .mode = S_IWUGO,
3829 },
3830 {
3831 .name = "cgroup.clone_children",
3832 .read_u64 = cgroup_clone_children_read,
3833 .write_u64 = cgroup_clone_children_write,
3834 },
3835 {
3836 .name = "release_agent",
3837 .flags = CFTYPE_ONLY_ON_ROOT,
3838 .read_seq_string = cgroup_release_agent_show,
3839 .write_string = cgroup_release_agent_write,
3840 .max_write_len = PATH_MAX,
3841 },
3842 { }
3843};
3844
3845static int cgroup_populate_dir(struct cgroup *cgrp)
3846{
3847 int err;
3848 struct cgroup_subsys *ss;
3849
3850 err = cgroup_addrm_files(cgrp, NULL, files, true);
3851 if (err < 0)
3852 return err;
3853
3854
3855 for_each_subsys(cgrp->root, ss) {
3856 struct cftype_set *set;
3857
3858 list_for_each_entry(set, &ss->cftsets, node)
3859 cgroup_addrm_files(cgrp, ss, set->cfts, true);
3860 }
3861
3862
3863 for_each_subsys(cgrp->root, ss) {
3864 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
3865
3866
3867
3868
3869
3870 if (css->id)
3871 rcu_assign_pointer(css->id->css, css);
3872 }
3873
3874 return 0;
3875}
3876
3877static void css_dput_fn(struct work_struct *work)
3878{
3879 struct cgroup_subsys_state *css =
3880 container_of(work, struct cgroup_subsys_state, dput_work);
3881 struct dentry *dentry = css->cgroup->dentry;
3882 struct super_block *sb = dentry->d_sb;
3883
3884 atomic_inc(&sb->s_active);
3885 dput(dentry);
3886 deactivate_super(sb);
3887}
3888
3889static void init_cgroup_css(struct cgroup_subsys_state *css,
3890 struct cgroup_subsys *ss,
3891 struct cgroup *cgrp)
3892{
3893 css->cgroup = cgrp;
3894 atomic_set(&css->refcnt, 1);
3895 css->flags = 0;
3896 css->id = NULL;
3897 if (cgrp == dummytop)
3898 set_bit(CSS_ROOT, &css->flags);
3899 BUG_ON(cgrp->subsys[ss->subsys_id]);
3900 cgrp->subsys[ss->subsys_id] = css;
3901
3902
3903
3904
3905
3906
3907
3908 INIT_WORK(&css->dput_work, css_dput_fn);
3909 if (ss->__DEPRECATED_clear_css_refs)
3910 set_bit(CSS_CLEAR_CSS_REFS, &css->flags);
3911}
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
3922 umode_t mode)
3923{
3924 struct cgroup *cgrp;
3925 struct cgroupfs_root *root = parent->root;
3926 int err = 0;
3927 struct cgroup_subsys *ss;
3928 struct super_block *sb = root->sb;
3929
3930 cgrp = kzalloc(sizeof(*cgrp), GFP_KERNEL);
3931 if (!cgrp)
3932 return -ENOMEM;
3933
3934
3935
3936
3937
3938
3939 atomic_inc(&sb->s_active);
3940
3941 mutex_lock(&cgroup_mutex);
3942
3943 init_cgroup_housekeeping(cgrp);
3944
3945 cgrp->parent = parent;
3946 cgrp->root = parent->root;
3947 cgrp->top_cgroup = parent->top_cgroup;
3948
3949 if (notify_on_release(parent))
3950 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
3951
3952 if (clone_children(parent))
3953 set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
3954
3955 for_each_subsys(root, ss) {
3956 struct cgroup_subsys_state *css = ss->create(cgrp);
3957
3958 if (IS_ERR(css)) {
3959 err = PTR_ERR(css);
3960 goto err_destroy;
3961 }
3962 init_cgroup_css(css, ss, cgrp);
3963 if (ss->use_id) {
3964 err = alloc_css_id(ss, parent, cgrp);
3965 if (err)
3966 goto err_destroy;
3967 }
3968
3969 if (clone_children(parent) && ss->post_clone)
3970 ss->post_clone(cgrp);
3971 }
3972
3973 list_add(&cgrp->sibling, &cgrp->parent->children);
3974 root->number_of_cgroups++;
3975
3976 err = cgroup_create_dir(cgrp, dentry, mode);
3977 if (err < 0)
3978 goto err_remove;
3979
3980
3981 for_each_subsys(root, ss)
3982 if (!ss->__DEPRECATED_clear_css_refs)
3983 dget(dentry);
3984
3985
3986 BUG_ON(!mutex_is_locked(&cgrp->dentry->d_inode->i_mutex));
3987
3988 list_add_tail(&cgrp->allcg_node, &root->allcg_list);
3989
3990 err = cgroup_populate_dir(cgrp);
3991
3992
3993 mutex_unlock(&cgroup_mutex);
3994 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
3995
3996 return 0;
3997
3998 err_remove:
3999
4000 list_del(&cgrp->sibling);
4001 root->number_of_cgroups--;
4002
4003 err_destroy:
4004
4005 for_each_subsys(root, ss) {
4006 if (cgrp->subsys[ss->subsys_id])
4007 ss->destroy(cgrp);
4008 }
4009
4010 mutex_unlock(&cgroup_mutex);
4011
4012
4013 deactivate_super(sb);
4014
4015 kfree(cgrp);
4016 return err;
4017}
4018
4019static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
4020{
4021 struct cgroup *c_parent = dentry->d_parent->d_fsdata;
4022
4023
4024 return cgroup_create(c_parent, dentry, mode | S_IFDIR);
4025}
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036static int cgroup_has_css_refs(struct cgroup *cgrp)
4037{
4038 int i;
4039
4040
4041
4042
4043
4044
4045 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
4046 struct cgroup_subsys *ss = subsys[i];
4047 struct cgroup_subsys_state *css;
4048
4049
4050 if (ss == NULL || ss->root != cgrp->root)
4051 continue;
4052
4053 css = cgrp->subsys[ss->subsys_id];
4054
4055
4056
4057
4058
4059
4060
4061
4062 if (css && css_refcnt(css) > 1)
4063 return 1;
4064 }
4065 return 0;
4066}
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090static int cgroup_clear_css_refs(struct cgroup *cgrp)
4091{
4092 struct cgroup_subsys *ss;
4093 unsigned long flags;
4094 bool failed = false;
4095
4096 local_irq_save(flags);
4097
4098
4099
4100
4101
4102
4103 for_each_subsys(cgrp->root, ss) {
4104 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
4105
4106 WARN_ON(atomic_read(&css->refcnt) < 0);
4107 atomic_add(CSS_DEACT_BIAS, &css->refcnt);
4108
4109 if (ss->__DEPRECATED_clear_css_refs)
4110 failed |= css_refcnt(css) != 1;
4111 }
4112
4113
4114
4115
4116
4117
4118 for_each_subsys(cgrp->root, ss) {
4119 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
4120
4121 if (!failed) {
4122 set_bit(CSS_REMOVED, &css->flags);
4123 css_put(css);
4124 } else {
4125 atomic_sub(CSS_DEACT_BIAS, &css->refcnt);
4126 }
4127 }
4128
4129 local_irq_restore(flags);
4130 return !failed;
4131}
4132
4133static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
4134{
4135 struct cgroup *cgrp = dentry->d_fsdata;
4136 struct dentry *d;
4137 struct cgroup *parent;
4138 DEFINE_WAIT(wait);
4139 struct cgroup_event *event, *tmp;
4140 int ret;
4141
4142
4143again:
4144 mutex_lock(&cgroup_mutex);
4145 if (atomic_read(&cgrp->count) != 0) {
4146 mutex_unlock(&cgroup_mutex);
4147 return -EBUSY;
4148 }
4149 if (!list_empty(&cgrp->children)) {
4150 mutex_unlock(&cgroup_mutex);
4151 return -EBUSY;
4152 }
4153 mutex_unlock(&cgroup_mutex);
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164 set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
4165
4166
4167
4168
4169
4170 ret = cgroup_call_pre_destroy(cgrp);
4171 if (ret) {
4172 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
4173 return ret;
4174 }
4175
4176 mutex_lock(&cgroup_mutex);
4177 parent = cgrp->parent;
4178 if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) {
4179 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
4180 mutex_unlock(&cgroup_mutex);
4181 return -EBUSY;
4182 }
4183 prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE);
4184 if (!cgroup_clear_css_refs(cgrp)) {
4185 mutex_unlock(&cgroup_mutex);
4186
4187
4188
4189
4190 if (test_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags))
4191 schedule();
4192 finish_wait(&cgroup_rmdir_waitq, &wait);
4193 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
4194 if (signal_pending(current))
4195 return -EINTR;
4196 goto again;
4197 }
4198
4199 finish_wait(&cgroup_rmdir_waitq, &wait);
4200 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
4201
4202 raw_spin_lock(&release_list_lock);
4203 set_bit(CGRP_REMOVED, &cgrp->flags);
4204 if (!list_empty(&cgrp->release_list))
4205 list_del_init(&cgrp->release_list);
4206 raw_spin_unlock(&release_list_lock);
4207
4208
4209 list_del_init(&cgrp->sibling);
4210
4211 list_del_init(&cgrp->allcg_node);
4212
4213 d = dget(cgrp->dentry);
4214
4215 cgroup_d_remove_dir(d);
4216 dput(d);
4217
4218 set_bit(CGRP_RELEASABLE, &parent->flags);
4219 check_for_release(parent);
4220
4221
4222
4223
4224
4225
4226 spin_lock(&cgrp->event_list_lock);
4227 list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
4228 list_del(&event->list);
4229 remove_wait_queue(event->wqh, &event->wait);
4230 eventfd_signal(event->eventfd, 1);
4231 schedule_work(&event->remove);
4232 }
4233 spin_unlock(&cgrp->event_list_lock);
4234
4235 mutex_unlock(&cgroup_mutex);
4236 return 0;
4237}
4238
4239static void __init_or_module cgroup_init_cftsets(struct cgroup_subsys *ss)
4240{
4241 INIT_LIST_HEAD(&ss->cftsets);
4242
4243
4244
4245
4246
4247 if (ss->base_cftypes) {
4248 ss->base_cftset.cfts = ss->base_cftypes;
4249 list_add_tail(&ss->base_cftset.node, &ss->cftsets);
4250 }
4251}
4252
4253static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
4254{
4255 struct cgroup_subsys_state *css;
4256
4257 printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);
4258
4259
4260 cgroup_init_cftsets(ss);
4261
4262
4263 list_add(&ss->sibling, &rootnode.subsys_list);
4264 ss->root = &rootnode;
4265 css = ss->create(dummytop);
4266
4267 BUG_ON(IS_ERR(css));
4268 init_cgroup_css(css, ss, dummytop);
4269
4270
4271
4272
4273
4274 init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id];
4275
4276 need_forkexit_callback |= ss->fork || ss->exit;
4277
4278
4279
4280
4281 BUG_ON(!list_empty(&init_task.tasks));
4282
4283 ss->active = 1;
4284
4285
4286
4287 BUG_ON(ss->module);
4288}
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
4300{
4301 int i;
4302 struct cgroup_subsys_state *css;
4303
4304
4305 if (ss->name == NULL || strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN ||
4306 ss->create == NULL || ss->destroy == NULL)
4307 return -EINVAL;
4308
4309
4310
4311
4312
4313
4314
4315 if (ss->fork || ss->exit)
4316 return -EINVAL;
4317
4318
4319
4320
4321
4322 if (ss->module == NULL) {
4323
4324 BUG_ON(ss->subsys_id >= CGROUP_BUILTIN_SUBSYS_COUNT);
4325 BUG_ON(subsys[ss->subsys_id] != ss);
4326 return 0;
4327 }
4328
4329
4330 cgroup_init_cftsets(ss);
4331
4332
4333
4334
4335
4336 mutex_lock(&cgroup_mutex);
4337
4338 for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
4339 if (subsys[i] == NULL)
4340 break;
4341 }
4342 if (i == CGROUP_SUBSYS_COUNT) {
4343
4344 mutex_unlock(&cgroup_mutex);
4345 return -EBUSY;
4346 }
4347
4348 ss->subsys_id = i;
4349 subsys[i] = ss;
4350
4351
4352
4353
4354
4355 css = ss->create(dummytop);
4356 if (IS_ERR(css)) {
4357
4358 subsys[i] = NULL;
4359 mutex_unlock(&cgroup_mutex);
4360 return PTR_ERR(css);
4361 }
4362
4363 list_add(&ss->sibling, &rootnode.subsys_list);
4364 ss->root = &rootnode;
4365
4366
4367 init_cgroup_css(css, ss, dummytop);
4368
4369 if (ss->use_id) {
4370 int ret = cgroup_init_idr(ss, css);
4371 if (ret) {
4372 dummytop->subsys[ss->subsys_id] = NULL;
4373 ss->destroy(dummytop);
4374 subsys[i] = NULL;
4375 mutex_unlock(&cgroup_mutex);
4376 return ret;
4377 }
4378 }
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388 write_lock(&css_set_lock);
4389 for (i = 0; i < CSS_SET_TABLE_SIZE; i++) {
4390 struct css_set *cg;
4391 struct hlist_node *node, *tmp;
4392 struct hlist_head *bucket = &css_set_table[i], *new_bucket;
4393
4394 hlist_for_each_entry_safe(cg, node, tmp, bucket, hlist) {
4395
4396 if (cg->subsys[ss->subsys_id])
4397 continue;
4398
4399 hlist_del(&cg->hlist);
4400
4401 cg->subsys[ss->subsys_id] = css;
4402
4403 new_bucket = css_set_hash(cg->subsys);
4404 hlist_add_head(&cg->hlist, new_bucket);
4405 }
4406 }
4407 write_unlock(&css_set_lock);
4408
4409 ss->active = 1;
4410
4411
4412 mutex_unlock(&cgroup_mutex);
4413 return 0;
4414}
4415EXPORT_SYMBOL_GPL(cgroup_load_subsys);
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425void cgroup_unload_subsys(struct cgroup_subsys *ss)
4426{
4427 struct cg_cgroup_link *link;
4428 struct hlist_head *hhead;
4429
4430 BUG_ON(ss->module == NULL);
4431
4432
4433
4434
4435
4436
4437 BUG_ON(ss->root != &rootnode);
4438
4439 mutex_lock(&cgroup_mutex);
4440
4441 BUG_ON(ss->subsys_id < CGROUP_BUILTIN_SUBSYS_COUNT);
4442 subsys[ss->subsys_id] = NULL;
4443
4444
4445 list_del_init(&ss->sibling);
4446
4447
4448
4449
4450
4451 write_lock(&css_set_lock);
4452 list_for_each_entry(link, &dummytop->css_sets, cgrp_link_list) {
4453 struct css_set *cg = link->cg;
4454
4455 hlist_del(&cg->hlist);
4456 BUG_ON(!cg->subsys[ss->subsys_id]);
4457 cg->subsys[ss->subsys_id] = NULL;
4458 hhead = css_set_hash(cg->subsys);
4459 hlist_add_head(&cg->hlist, hhead);
4460 }
4461 write_unlock(&css_set_lock);
4462
4463
4464
4465
4466
4467
4468
4469 ss->destroy(dummytop);
4470 dummytop->subsys[ss->subsys_id] = NULL;
4471
4472 mutex_unlock(&cgroup_mutex);
4473}
4474EXPORT_SYMBOL_GPL(cgroup_unload_subsys);
4475
4476
4477
4478
4479
4480
4481
4482int __init cgroup_init_early(void)
4483{
4484 int i;
4485 atomic_set(&init_css_set.refcount, 1);
4486 INIT_LIST_HEAD(&init_css_set.cg_links);
4487 INIT_LIST_HEAD(&init_css_set.tasks);
4488 INIT_HLIST_NODE(&init_css_set.hlist);
4489 css_set_count = 1;
4490 init_cgroup_root(&rootnode);
4491 root_count = 1;
4492 init_task.cgroups = &init_css_set;
4493
4494 init_css_set_link.cg = &init_css_set;
4495 init_css_set_link.cgrp = dummytop;
4496 list_add(&init_css_set_link.cgrp_link_list,
4497 &rootnode.top_cgroup.css_sets);
4498 list_add(&init_css_set_link.cg_link_list,
4499 &init_css_set.cg_links);
4500
4501 for (i = 0; i < CSS_SET_TABLE_SIZE; i++)
4502 INIT_HLIST_HEAD(&css_set_table[i]);
4503
4504
4505 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4506 struct cgroup_subsys *ss = subsys[i];
4507
4508 BUG_ON(!ss->name);
4509 BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN);
4510 BUG_ON(!ss->create);
4511 BUG_ON(!ss->destroy);
4512 if (ss->subsys_id != i) {
4513 printk(KERN_ERR "cgroup: Subsys %s id == %d\n",
4514 ss->name, ss->subsys_id);
4515 BUG();
4516 }
4517
4518 if (ss->early_init)
4519 cgroup_init_subsys(ss);
4520 }
4521 return 0;
4522}
4523
4524
4525
4526
4527
4528
4529
4530int __init cgroup_init(void)
4531{
4532 int err;
4533 int i;
4534 struct hlist_head *hhead;
4535
4536 err = bdi_init(&cgroup_backing_dev_info);
4537 if (err)
4538 return err;
4539
4540
4541 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4542 struct cgroup_subsys *ss = subsys[i];
4543 if (!ss->early_init)
4544 cgroup_init_subsys(ss);
4545 if (ss->use_id)
4546 cgroup_init_idr(ss, init_css_set.subsys[ss->subsys_id]);
4547 }
4548
4549
4550 hhead = css_set_hash(init_css_set.subsys);
4551 hlist_add_head(&init_css_set.hlist, hhead);
4552 BUG_ON(!init_root_id(&rootnode));
4553
4554 cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj);
4555 if (!cgroup_kobj) {
4556 err = -ENOMEM;
4557 goto out;
4558 }
4559
4560 err = register_filesystem(&cgroup_fs_type);
4561 if (err < 0) {
4562 kobject_put(cgroup_kobj);
4563 goto out;
4564 }
4565
4566 proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations);
4567
4568out:
4569 if (err)
4570 bdi_destroy(&cgroup_backing_dev_info);
4571
4572 return err;
4573}
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588static int proc_cgroup_show(struct seq_file *m, void *v)
4589{
4590 struct pid *pid;
4591 struct task_struct *tsk;
4592 char *buf;
4593 int retval;
4594 struct cgroupfs_root *root;
4595
4596 retval = -ENOMEM;
4597 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
4598 if (!buf)
4599 goto out;
4600
4601 retval = -ESRCH;
4602 pid = m->private;
4603 tsk = get_pid_task(pid, PIDTYPE_PID);
4604 if (!tsk)
4605 goto out_free;
4606
4607 retval = 0;
4608
4609 mutex_lock(&cgroup_mutex);
4610
4611 for_each_active_root(root) {
4612 struct cgroup_subsys *ss;
4613 struct cgroup *cgrp;
4614 int count = 0;
4615
4616 seq_printf(m, "%d:", root->hierarchy_id);
4617 for_each_subsys(root, ss)
4618 seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
4619 if (strlen(root->name))
4620 seq_printf(m, "%sname=%s", count ? "," : "",
4621 root->name);
4622 seq_putc(m, ':');
4623 cgrp = task_cgroup_from_root(tsk, root);
4624 retval = cgroup_path(cgrp, buf, PAGE_SIZE);
4625 if (retval < 0)
4626 goto out_unlock;
4627 seq_puts(m, buf);
4628 seq_putc(m, '\n');
4629 }
4630
4631out_unlock:
4632 mutex_unlock(&cgroup_mutex);
4633 put_task_struct(tsk);
4634out_free:
4635 kfree(buf);
4636out:
4637 return retval;
4638}
4639
4640static int cgroup_open(struct inode *inode, struct file *file)
4641{
4642 struct pid *pid = PROC_I(inode)->pid;
4643 return single_open(file, proc_cgroup_show, pid);
4644}
4645
4646const struct file_operations proc_cgroup_operations = {
4647 .open = cgroup_open,
4648 .read = seq_read,
4649 .llseek = seq_lseek,
4650 .release = single_release,
4651};
4652
4653
4654static int proc_cgroupstats_show(struct seq_file *m, void *v)
4655{
4656 int i;
4657
4658 seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
4659
4660
4661
4662
4663
4664 mutex_lock(&cgroup_mutex);
4665 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
4666 struct cgroup_subsys *ss = subsys[i];
4667 if (ss == NULL)
4668 continue;
4669 seq_printf(m, "%s\t%d\t%d\t%d\n",
4670 ss->name, ss->root->hierarchy_id,
4671 ss->root->number_of_cgroups, !ss->disabled);
4672 }
4673 mutex_unlock(&cgroup_mutex);
4674 return 0;
4675}
4676
4677static int cgroupstats_open(struct inode *inode, struct file *file)
4678{
4679 return single_open(file, proc_cgroupstats_show, NULL);
4680}
4681
4682static const struct file_operations proc_cgroupstats_operations = {
4683 .open = cgroupstats_open,
4684 .read = seq_read,
4685 .llseek = seq_lseek,
4686 .release = single_release,
4687};
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705void cgroup_fork(struct task_struct *child)
4706{
4707 task_lock(current);
4708 child->cgroups = current->cgroups;
4709 get_css_set(child->cgroups);
4710 task_unlock(current);
4711 INIT_LIST_HEAD(&child->cg_list);
4712}
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722void cgroup_fork_callbacks(struct task_struct *child)
4723{
4724 if (need_forkexit_callback) {
4725 int i;
4726
4727
4728
4729
4730
4731 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4732 struct cgroup_subsys *ss = subsys[i];
4733 if (ss->fork)
4734 ss->fork(child);
4735 }
4736 }
4737}
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748void cgroup_post_fork(struct task_struct *child)
4749{
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761 if (use_task_css_set_links) {
4762 write_lock(&css_set_lock);
4763 task_lock(child);
4764 if (list_empty(&child->cg_list))
4765 list_add(&child->cg_list, &child->cgroups->tasks);
4766 task_unlock(child);
4767 write_unlock(&css_set_lock);
4768 }
4769}
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805void cgroup_exit(struct task_struct *tsk, int run_callbacks)
4806{
4807 struct css_set *cg;
4808 int i;
4809
4810
4811
4812
4813
4814
4815 if (!list_empty(&tsk->cg_list)) {
4816 write_lock(&css_set_lock);
4817 if (!list_empty(&tsk->cg_list))
4818 list_del_init(&tsk->cg_list);
4819 write_unlock(&css_set_lock);
4820 }
4821
4822
4823 task_lock(tsk);
4824 cg = tsk->cgroups;
4825 tsk->cgroups = &init_css_set;
4826
4827 if (run_callbacks && need_forkexit_callback) {
4828
4829
4830
4831
4832 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4833 struct cgroup_subsys *ss = subsys[i];
4834 if (ss->exit) {
4835 struct cgroup *old_cgrp =
4836 rcu_dereference_raw(cg->subsys[i])->cgroup;
4837 struct cgroup *cgrp = task_cgroup(tsk, i);
4838 ss->exit(cgrp, old_cgrp, tsk);
4839 }
4840 }
4841 }
4842 task_unlock(tsk);
4843
4844 if (cg)
4845 put_css_set_taskexit(cg);
4846}
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task)
4862{
4863 int ret;
4864 struct cgroup *target;
4865
4866 if (cgrp == dummytop)
4867 return 1;
4868
4869 target = task_cgroup_from_root(task, cgrp->root);
4870 while (cgrp != target && cgrp!= cgrp->top_cgroup)
4871 cgrp = cgrp->parent;
4872 ret = (cgrp == target);
4873 return ret;
4874}
4875
4876static void check_for_release(struct cgroup *cgrp)
4877{
4878
4879
4880 if (cgroup_is_releasable(cgrp) && !atomic_read(&cgrp->count)
4881 && list_empty(&cgrp->children) && !cgroup_has_css_refs(cgrp)) {
4882
4883
4884
4885 int need_schedule_work = 0;
4886 raw_spin_lock(&release_list_lock);
4887 if (!cgroup_is_removed(cgrp) &&
4888 list_empty(&cgrp->release_list)) {
4889 list_add(&cgrp->release_list, &release_list);
4890 need_schedule_work = 1;
4891 }
4892 raw_spin_unlock(&release_list_lock);
4893 if (need_schedule_work)
4894 schedule_work(&release_agent_work);
4895 }
4896}
4897
4898
4899bool __css_tryget(struct cgroup_subsys_state *css)
4900{
4901 do {
4902 int v = css_refcnt(css);
4903
4904 if (atomic_cmpxchg(&css->refcnt, v, v + 1) == v)
4905 return true;
4906 cpu_relax();
4907 } while (!test_bit(CSS_REMOVED, &css->flags));
4908
4909 return false;
4910}
4911EXPORT_SYMBOL_GPL(__css_tryget);
4912
4913
4914void __css_put(struct cgroup_subsys_state *css)
4915{
4916 struct cgroup *cgrp = css->cgroup;
4917 int v;
4918
4919 rcu_read_lock();
4920 v = css_unbias_refcnt(atomic_dec_return(&css->refcnt));
4921
4922 switch (v) {
4923 case 1:
4924 if (notify_on_release(cgrp)) {
4925 set_bit(CGRP_RELEASABLE, &cgrp->flags);
4926 check_for_release(cgrp);
4927 }
4928 cgroup_wakeup_rmdir_waiter(cgrp);
4929 break;
4930 case 0:
4931 if (!test_bit(CSS_CLEAR_CSS_REFS, &css->flags))
4932 schedule_work(&css->dput_work);
4933 break;
4934 }
4935 rcu_read_unlock();
4936}
4937EXPORT_SYMBOL_GPL(__css_put);
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962static void cgroup_release_agent(struct work_struct *work)
4963{
4964 BUG_ON(work != &release_agent_work);
4965 mutex_lock(&cgroup_mutex);
4966 raw_spin_lock(&release_list_lock);
4967 while (!list_empty(&release_list)) {
4968 char *argv[3], *envp[3];
4969 int i;
4970 char *pathbuf = NULL, *agentbuf = NULL;
4971 struct cgroup *cgrp = list_entry(release_list.next,
4972 struct cgroup,
4973 release_list);
4974 list_del_init(&cgrp->release_list);
4975 raw_spin_unlock(&release_list_lock);
4976 pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
4977 if (!pathbuf)
4978 goto continue_free;
4979 if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0)
4980 goto continue_free;
4981 agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
4982 if (!agentbuf)
4983 goto continue_free;
4984
4985 i = 0;
4986 argv[i++] = agentbuf;
4987 argv[i++] = pathbuf;
4988 argv[i] = NULL;
4989
4990 i = 0;
4991
4992 envp[i++] = "HOME=/";
4993 envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
4994 envp[i] = NULL;
4995
4996
4997
4998
4999 mutex_unlock(&cgroup_mutex);
5000 call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
5001 mutex_lock(&cgroup_mutex);
5002 continue_free:
5003 kfree(pathbuf);
5004 kfree(agentbuf);
5005 raw_spin_lock(&release_list_lock);
5006 }
5007 raw_spin_unlock(&release_list_lock);
5008 mutex_unlock(&cgroup_mutex);
5009}
5010
5011static int __init cgroup_disable(char *str)
5012{
5013 int i;
5014 char *token;
5015
5016 while ((token = strsep(&str, ",")) != NULL) {
5017 if (!*token)
5018 continue;
5019
5020
5021
5022
5023 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
5024 struct cgroup_subsys *ss = subsys[i];
5025
5026 if (!strcmp(token, ss->name)) {
5027 ss->disabled = 1;
5028 printk(KERN_INFO "Disabling %s control group"
5029 " subsystem\n", ss->name);
5030 break;
5031 }
5032 }
5033 }
5034 return 1;
5035}
5036__setup("cgroup_disable=", cgroup_disable);
5037
5038
5039
5040
5041
5042
5043
5044
5045unsigned short css_id(struct cgroup_subsys_state *css)
5046{
5047 struct css_id *cssid;
5048
5049
5050
5051
5052
5053
5054 cssid = rcu_dereference_check(css->id, css_refcnt(css));
5055
5056 if (cssid)
5057 return cssid->id;
5058 return 0;
5059}
5060EXPORT_SYMBOL_GPL(css_id);
5061
5062unsigned short css_depth(struct cgroup_subsys_state *css)
5063{
5064 struct css_id *cssid;
5065
5066 cssid = rcu_dereference_check(css->id, css_refcnt(css));
5067
5068 if (cssid)
5069 return cssid->depth;
5070 return 0;
5071}
5072EXPORT_SYMBOL_GPL(css_depth);
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087bool css_is_ancestor(struct cgroup_subsys_state *child,
5088 const struct cgroup_subsys_state *root)
5089{
5090 struct css_id *child_id;
5091 struct css_id *root_id;
5092
5093 child_id = rcu_dereference(child->id);
5094 if (!child_id)
5095 return false;
5096 root_id = rcu_dereference(root->id);
5097 if (!root_id)
5098 return false;
5099 if (child_id->depth < root_id->depth)
5100 return false;
5101 if (child_id->stack[root_id->depth] != root_id->id)
5102 return false;
5103 return true;
5104}
5105
5106void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
5107{
5108 struct css_id *id = css->id;
5109
5110 if (!id)
5111 return;
5112
5113 BUG_ON(!ss->use_id);
5114
5115 rcu_assign_pointer(id->css, NULL);
5116 rcu_assign_pointer(css->id, NULL);
5117 spin_lock(&ss->id_lock);
5118 idr_remove(&ss->idr, id->id);
5119 spin_unlock(&ss->id_lock);
5120 kfree_rcu(id, rcu_head);
5121}
5122EXPORT_SYMBOL_GPL(free_css_id);
5123
5124
5125
5126
5127
5128
5129static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth)
5130{
5131 struct css_id *newid;
5132 int myid, error, size;
5133
5134 BUG_ON(!ss->use_id);
5135
5136 size = sizeof(*newid) + sizeof(unsigned short) * (depth + 1);
5137 newid = kzalloc(size, GFP_KERNEL);
5138 if (!newid)
5139 return ERR_PTR(-ENOMEM);
5140
5141 if (unlikely(!idr_pre_get(&ss->idr, GFP_KERNEL))) {
5142 error = -ENOMEM;
5143 goto err_out;
5144 }
5145 spin_lock(&ss->id_lock);
5146
5147 error = idr_get_new_above(&ss->idr, newid, 1, &myid);
5148 spin_unlock(&ss->id_lock);
5149
5150
5151 if (error) {
5152 error = -ENOSPC;
5153 goto err_out;
5154 }
5155 if (myid > CSS_ID_MAX)
5156 goto remove_idr;
5157
5158 newid->id = myid;
5159 newid->depth = depth;
5160 return newid;
5161remove_idr:
5162 error = -ENOSPC;
5163 spin_lock(&ss->id_lock);
5164 idr_remove(&ss->idr, myid);
5165 spin_unlock(&ss->id_lock);
5166err_out:
5167 kfree(newid);
5168 return ERR_PTR(error);
5169
5170}
5171
5172static int __init_or_module cgroup_init_idr(struct cgroup_subsys *ss,
5173 struct cgroup_subsys_state *rootcss)
5174{
5175 struct css_id *newid;
5176
5177 spin_lock_init(&ss->id_lock);
5178 idr_init(&ss->idr);
5179
5180 newid = get_new_cssid(ss, 0);
5181 if (IS_ERR(newid))
5182 return PTR_ERR(newid);
5183
5184 newid->stack[0] = newid->id;
5185 newid->css = rootcss;
5186 rootcss->id = newid;
5187 return 0;
5188}
5189
5190static int alloc_css_id(struct cgroup_subsys *ss, struct cgroup *parent,
5191 struct cgroup *child)
5192{
5193 int subsys_id, i, depth = 0;
5194 struct cgroup_subsys_state *parent_css, *child_css;
5195 struct css_id *child_id, *parent_id;
5196
5197 subsys_id = ss->subsys_id;
5198 parent_css = parent->subsys[subsys_id];
5199 child_css = child->subsys[subsys_id];
5200 parent_id = parent_css->id;
5201 depth = parent_id->depth + 1;
5202
5203 child_id = get_new_cssid(ss, depth);
5204 if (IS_ERR(child_id))
5205 return PTR_ERR(child_id);
5206
5207 for (i = 0; i < depth; i++)
5208 child_id->stack[i] = parent_id->stack[i];
5209 child_id->stack[depth] = child_id->id;
5210
5211
5212
5213
5214 rcu_assign_pointer(child_css->id, child_id);
5215
5216 return 0;
5217}
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id)
5228{
5229 struct css_id *cssid = NULL;
5230
5231 BUG_ON(!ss->use_id);
5232 cssid = idr_find(&ss->idr, id);
5233
5234 if (unlikely(!cssid))
5235 return NULL;
5236
5237 return rcu_dereference(cssid->css);
5238}
5239EXPORT_SYMBOL_GPL(css_lookup);
5240
5241
5242
5243
5244
5245
5246
5247
5248
5249
5250
5251struct cgroup_subsys_state *
5252css_get_next(struct cgroup_subsys *ss, int id,
5253 struct cgroup_subsys_state *root, int *foundid)
5254{
5255 struct cgroup_subsys_state *ret = NULL;
5256 struct css_id *tmp;
5257 int tmpid;
5258 int rootid = css_id(root);
5259 int depth = css_depth(root);
5260
5261 if (!rootid)
5262 return NULL;
5263
5264 BUG_ON(!ss->use_id);
5265 WARN_ON_ONCE(!rcu_read_lock_held());
5266
5267
5268 tmpid = id;
5269 while (1) {
5270
5271
5272
5273
5274 tmp = idr_get_next(&ss->idr, &tmpid);
5275 if (!tmp)
5276 break;
5277 if (tmp->depth >= depth && tmp->stack[depth] == rootid) {
5278 ret = rcu_dereference(tmp->css);
5279 if (ret) {
5280 *foundid = tmpid;
5281 break;
5282 }
5283 }
5284
5285 tmpid = tmpid + 1;
5286 }
5287 return ret;
5288}
5289
5290
5291
5292
5293struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id)
5294{
5295 struct cgroup *cgrp;
5296 struct inode *inode;
5297 struct cgroup_subsys_state *css;
5298
5299 inode = f->f_dentry->d_inode;
5300
5301 if (inode->i_op != &cgroup_dir_inode_operations)
5302 return ERR_PTR(-EBADF);
5303
5304 if (id < 0 || id >= CGROUP_SUBSYS_COUNT)
5305 return ERR_PTR(-EINVAL);
5306
5307
5308 cgrp = __d_cgrp(f->f_dentry);
5309 css = cgrp->subsys[id];
5310 return css ? css : ERR_PTR(-ENOENT);
5311}
5312
5313#ifdef CONFIG_CGROUP_DEBUG
5314static struct cgroup_subsys_state *debug_create(struct cgroup *cont)
5315{
5316 struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
5317
5318 if (!css)
5319 return ERR_PTR(-ENOMEM);
5320
5321 return css;
5322}
5323
5324static void debug_destroy(struct cgroup *cont)
5325{
5326 kfree(cont->subsys[debug_subsys_id]);
5327}
5328
5329static u64 cgroup_refcount_read(struct cgroup *cont, struct cftype *cft)
5330{
5331 return atomic_read(&cont->count);
5332}
5333
5334static u64 debug_taskcount_read(struct cgroup *cont, struct cftype *cft)
5335{
5336 return cgroup_task_count(cont);
5337}
5338
5339static u64 current_css_set_read(struct cgroup *cont, struct cftype *cft)
5340{
5341 return (u64)(unsigned long)current->cgroups;
5342}
5343
5344static u64 current_css_set_refcount_read(struct cgroup *cont,
5345 struct cftype *cft)
5346{
5347 u64 count;
5348
5349 rcu_read_lock();
5350 count = atomic_read(¤t->cgroups->refcount);
5351 rcu_read_unlock();
5352 return count;
5353}
5354
5355static int current_css_set_cg_links_read(struct cgroup *cont,
5356 struct cftype *cft,
5357 struct seq_file *seq)
5358{
5359 struct cg_cgroup_link *link;
5360 struct css_set *cg;
5361
5362 read_lock(&css_set_lock);
5363 rcu_read_lock();
5364 cg = rcu_dereference(current->cgroups);
5365 list_for_each_entry(link, &cg->cg_links, cg_link_list) {
5366 struct cgroup *c = link->cgrp;
5367 const char *name;
5368
5369 if (c->dentry)
5370 name = c->dentry->d_name.name;
5371 else
5372 name = "?";
5373 seq_printf(seq, "Root %d group %s\n",
5374 c->root->hierarchy_id, name);
5375 }
5376 rcu_read_unlock();
5377 read_unlock(&css_set_lock);
5378 return 0;
5379}
5380
5381#define MAX_TASKS_SHOWN_PER_CSS 25
5382static int cgroup_css_links_read(struct cgroup *cont,
5383 struct cftype *cft,
5384 struct seq_file *seq)
5385{
5386 struct cg_cgroup_link *link;
5387
5388 read_lock(&css_set_lock);
5389 list_for_each_entry(link, &cont->css_sets, cgrp_link_list) {
5390 struct css_set *cg = link->cg;
5391 struct task_struct *task;
5392 int count = 0;
5393 seq_printf(seq, "css_set %p\n", cg);
5394 list_for_each_entry(task, &cg->tasks, cg_list) {
5395 if (count++ > MAX_TASKS_SHOWN_PER_CSS) {
5396 seq_puts(seq, " ...\n");
5397 break;
5398 } else {
5399 seq_printf(seq, " task %d\n",
5400 task_pid_vnr(task));
5401 }
5402 }
5403 }
5404 read_unlock(&css_set_lock);
5405 return 0;
5406}
5407
5408static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft)
5409{
5410 return test_bit(CGRP_RELEASABLE, &cgrp->flags);
5411}
5412
5413static struct cftype debug_files[] = {
5414 {
5415 .name = "cgroup_refcount",
5416 .read_u64 = cgroup_refcount_read,
5417 },
5418 {
5419 .name = "taskcount",
5420 .read_u64 = debug_taskcount_read,
5421 },
5422
5423 {
5424 .name = "current_css_set",
5425 .read_u64 = current_css_set_read,
5426 },
5427
5428 {
5429 .name = "current_css_set_refcount",
5430 .read_u64 = current_css_set_refcount_read,
5431 },
5432
5433 {
5434 .name = "current_css_set_cg_links",
5435 .read_seq_string = current_css_set_cg_links_read,
5436 },
5437
5438 {
5439 .name = "cgroup_css_links",
5440 .read_seq_string = cgroup_css_links_read,
5441 },
5442
5443 {
5444 .name = "releasable",
5445 .read_u64 = releasable_read,
5446 },
5447
5448 { }
5449};
5450
5451struct cgroup_subsys debug_subsys = {
5452 .name = "debug",
5453 .create = debug_create,
5454 .destroy = debug_destroy,
5455 .subsys_id = debug_subsys_id,
5456 .base_cftypes = debug_files,
5457};
5458#endif
5459