1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29#include <linux/cgroup.h>
30#include <linux/ctype.h>
31#include <linux/errno.h>
32#include <linux/fs.h>
33#include <linux/kernel.h>
34#include <linux/list.h>
35#include <linux/mm.h>
36#include <linux/mutex.h>
37#include <linux/mount.h>
38#include <linux/pagemap.h>
39#include <linux/proc_fs.h>
40#include <linux/rcupdate.h>
41#include <linux/sched.h>
42#include <linux/backing-dev.h>
43#include <linux/seq_file.h>
44#include <linux/slab.h>
45#include <linux/magic.h>
46#include <linux/spinlock.h>
47#include <linux/string.h>
48#include <linux/sort.h>
49#include <linux/kmod.h>
50#include <linux/module.h>
51#include <linux/delayacct.h>
52#include <linux/cgroupstats.h>
53#include <linux/hash.h>
54#include <linux/namei.h>
55#include <linux/pid_namespace.h>
56#include <linux/idr.h>
57#include <linux/vmalloc.h>
58#include <linux/eventfd.h>
59#include <linux/poll.h>
60
61#include <asm/atomic.h>
62
63static DEFINE_MUTEX(cgroup_mutex);
64
65
66
67
68
69
70
71#define SUBSYS(_x) &_x ## _subsys,
72static struct cgroup_subsys *subsys[CGROUP_SUBSYS_COUNT] = {
73#include <linux/cgroup_subsys.h>
74};
75
76#define MAX_CGROUP_ROOT_NAMELEN 64
77
78
79
80
81
82
83struct cgroupfs_root {
84 struct super_block *sb;
85
86
87
88
89
90 unsigned long subsys_bits;
91
92
93 int hierarchy_id;
94
95
96 unsigned long actual_subsys_bits;
97
98
99 struct list_head subsys_list;
100
101
102 struct cgroup top_cgroup;
103
104
105 int number_of_cgroups;
106
107
108 struct list_head root_list;
109
110
111 unsigned long flags;
112
113
114 char release_agent_path[PATH_MAX];
115
116
117 char name[MAX_CGROUP_ROOT_NAMELEN];
118};
119
120
121
122
123
124
125static struct cgroupfs_root rootnode;
126
127
128
129
130
131#define CSS_ID_MAX (65535)
132struct css_id {
133
134
135
136
137
138
139
140 struct cgroup_subsys_state __rcu *css;
141
142
143
144 unsigned short id;
145
146
147
148 unsigned short depth;
149
150
151
152 struct rcu_head rcu_head;
153
154
155
156 unsigned short stack[0];
157};
158
159
160
161
162struct cgroup_event {
163
164
165
166 struct cgroup *cgrp;
167
168
169
170 struct cftype *cft;
171
172
173
174 struct eventfd_ctx *eventfd;
175
176
177
178 struct list_head list;
179
180
181
182
183 poll_table pt;
184 wait_queue_head_t *wqh;
185 wait_queue_t wait;
186 struct work_struct remove;
187};
188
189
190
191static LIST_HEAD(roots);
192static int root_count;
193
194static DEFINE_IDA(hierarchy_ida);
195static int next_hierarchy_id;
196static DEFINE_SPINLOCK(hierarchy_id_lock);
197
198
199#define dummytop (&rootnode.top_cgroup)
200
201
202
203
204
205
206static int need_forkexit_callback __read_mostly;
207
208#ifdef CONFIG_PROVE_LOCKING
209int cgroup_lock_is_held(void)
210{
211 return lockdep_is_held(&cgroup_mutex);
212}
213#else
214int cgroup_lock_is_held(void)
215{
216 return mutex_is_locked(&cgroup_mutex);
217}
218#endif
219
220EXPORT_SYMBOL_GPL(cgroup_lock_is_held);
221
222
223inline int cgroup_is_removed(const struct cgroup *cgrp)
224{
225 return test_bit(CGRP_REMOVED, &cgrp->flags);
226}
227
228
229enum {
230 ROOT_NOPREFIX,
231};
232
233static int cgroup_is_releasable(const struct cgroup *cgrp)
234{
235 const int bits =
236 (1 << CGRP_RELEASABLE) |
237 (1 << CGRP_NOTIFY_ON_RELEASE);
238 return (cgrp->flags & bits) == bits;
239}
240
241static int notify_on_release(const struct cgroup *cgrp)
242{
243 return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
244}
245
246static int clone_children(const struct cgroup *cgrp)
247{
248 return test_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
249}
250
251
252
253
254
255#define for_each_subsys(_root, _ss) \
256list_for_each_entry(_ss, &_root->subsys_list, sibling)
257
258
259#define for_each_active_root(_root) \
260list_for_each_entry(_root, &roots, root_list)
261
262
263
264static LIST_HEAD(release_list);
265static DEFINE_SPINLOCK(release_list_lock);
266static void cgroup_release_agent(struct work_struct *work);
267static DECLARE_WORK(release_agent_work, cgroup_release_agent);
268static void check_for_release(struct cgroup *cgrp);
269
270
271struct cg_cgroup_link {
272
273
274
275
276 struct list_head cgrp_link_list;
277 struct cgroup *cgrp;
278
279
280
281
282 struct list_head cg_link_list;
283 struct css_set *cg;
284};
285
286
287
288
289
290
291
292
293static struct css_set init_css_set;
294static struct cg_cgroup_link init_css_set_link;
295
296static int cgroup_init_idr(struct cgroup_subsys *ss,
297 struct cgroup_subsys_state *css);
298
299
300
301
302static DEFINE_RWLOCK(css_set_lock);
303static int css_set_count;
304
305
306
307
308
309
310#define CSS_SET_HASH_BITS 7
311#define CSS_SET_TABLE_SIZE (1 << CSS_SET_HASH_BITS)
312static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE];
313
314static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
315{
316 int i;
317 int index;
318 unsigned long tmp = 0UL;
319
320 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++)
321 tmp += (unsigned long)css[i];
322 tmp = (tmp >> 16) ^ tmp;
323
324 index = hash_long(tmp, CSS_SET_HASH_BITS);
325
326 return &css_set_table[index];
327}
328
329static void free_css_set_rcu(struct rcu_head *obj)
330{
331 struct css_set *cg = container_of(obj, struct css_set, rcu_head);
332 kfree(cg);
333}
334
335
336
337
338
339static int use_task_css_set_links __read_mostly;
340
341static void __put_css_set(struct css_set *cg, int taskexit)
342{
343 struct cg_cgroup_link *link;
344 struct cg_cgroup_link *saved_link;
345
346
347
348
349
350 if (atomic_add_unless(&cg->refcount, -1, 1))
351 return;
352 write_lock(&css_set_lock);
353 if (!atomic_dec_and_test(&cg->refcount)) {
354 write_unlock(&css_set_lock);
355 return;
356 }
357
358
359 hlist_del(&cg->hlist);
360 css_set_count--;
361
362 list_for_each_entry_safe(link, saved_link, &cg->cg_links,
363 cg_link_list) {
364 struct cgroup *cgrp = link->cgrp;
365 list_del(&link->cg_link_list);
366 list_del(&link->cgrp_link_list);
367 if (atomic_dec_and_test(&cgrp->count) &&
368 notify_on_release(cgrp)) {
369 if (taskexit)
370 set_bit(CGRP_RELEASABLE, &cgrp->flags);
371 check_for_release(cgrp);
372 }
373
374 kfree(link);
375 }
376
377 write_unlock(&css_set_lock);
378 call_rcu(&cg->rcu_head, free_css_set_rcu);
379}
380
381
382
383
384static inline void get_css_set(struct css_set *cg)
385{
386 atomic_inc(&cg->refcount);
387}
388
389static inline void put_css_set(struct css_set *cg)
390{
391 __put_css_set(cg, 0);
392}
393
394static inline void put_css_set_taskexit(struct css_set *cg)
395{
396 __put_css_set(cg, 1);
397}
398
399
400
401
402
403
404
405
406
407
408
409static bool compare_css_sets(struct css_set *cg,
410 struct css_set *old_cg,
411 struct cgroup *new_cgrp,
412 struct cgroup_subsys_state *template[])
413{
414 struct list_head *l1, *l2;
415
416 if (memcmp(template, cg->subsys, sizeof(cg->subsys))) {
417
418 return false;
419 }
420
421
422
423
424
425
426
427
428
429
430 l1 = &cg->cg_links;
431 l2 = &old_cg->cg_links;
432 while (1) {
433 struct cg_cgroup_link *cgl1, *cgl2;
434 struct cgroup *cg1, *cg2;
435
436 l1 = l1->next;
437 l2 = l2->next;
438
439 if (l1 == &cg->cg_links) {
440 BUG_ON(l2 != &old_cg->cg_links);
441 break;
442 } else {
443 BUG_ON(l2 == &old_cg->cg_links);
444 }
445
446 cgl1 = list_entry(l1, struct cg_cgroup_link, cg_link_list);
447 cgl2 = list_entry(l2, struct cg_cgroup_link, cg_link_list);
448 cg1 = cgl1->cgrp;
449 cg2 = cgl2->cgrp;
450
451 BUG_ON(cg1->root != cg2->root);
452
453
454
455
456
457
458
459
460 if (cg1->root == new_cgrp->root) {
461 if (cg1 != new_cgrp)
462 return false;
463 } else {
464 if (cg1 != cg2)
465 return false;
466 }
467 }
468 return true;
469}
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484static struct css_set *find_existing_css_set(
485 struct css_set *oldcg,
486 struct cgroup *cgrp,
487 struct cgroup_subsys_state *template[])
488{
489 int i;
490 struct cgroupfs_root *root = cgrp->root;
491 struct hlist_head *hhead;
492 struct hlist_node *node;
493 struct css_set *cg;
494
495
496
497
498
499
500 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
501 if (root->subsys_bits & (1UL << i)) {
502
503
504
505 template[i] = cgrp->subsys[i];
506 } else {
507
508
509 template[i] = oldcg->subsys[i];
510 }
511 }
512
513 hhead = css_set_hash(template);
514 hlist_for_each_entry(cg, node, hhead, hlist) {
515 if (!compare_css_sets(cg, oldcg, cgrp, template))
516 continue;
517
518
519 return cg;
520 }
521
522
523 return NULL;
524}
525
526static void free_cg_links(struct list_head *tmp)
527{
528 struct cg_cgroup_link *link;
529 struct cg_cgroup_link *saved_link;
530
531 list_for_each_entry_safe(link, saved_link, tmp, cgrp_link_list) {
532 list_del(&link->cgrp_link_list);
533 kfree(link);
534 }
535}
536
537
538
539
540
541
542static int allocate_cg_links(int count, struct list_head *tmp)
543{
544 struct cg_cgroup_link *link;
545 int i;
546 INIT_LIST_HEAD(tmp);
547 for (i = 0; i < count; i++) {
548 link = kmalloc(sizeof(*link), GFP_KERNEL);
549 if (!link) {
550 free_cg_links(tmp);
551 return -ENOMEM;
552 }
553 list_add(&link->cgrp_link_list, tmp);
554 }
555 return 0;
556}
557
558
559
560
561
562
563
564static void link_css_set(struct list_head *tmp_cg_links,
565 struct css_set *cg, struct cgroup *cgrp)
566{
567 struct cg_cgroup_link *link;
568
569 BUG_ON(list_empty(tmp_cg_links));
570 link = list_first_entry(tmp_cg_links, struct cg_cgroup_link,
571 cgrp_link_list);
572 link->cg = cg;
573 link->cgrp = cgrp;
574 atomic_inc(&cgrp->count);
575 list_move(&link->cgrp_link_list, &cgrp->css_sets);
576
577
578
579
580 list_add_tail(&link->cg_link_list, &cg->cg_links);
581}
582
583
584
585
586
587
588
589
590static struct css_set *find_css_set(
591 struct css_set *oldcg, struct cgroup *cgrp)
592{
593 struct css_set *res;
594 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
595
596 struct list_head tmp_cg_links;
597
598 struct hlist_head *hhead;
599 struct cg_cgroup_link *link;
600
601
602
603 read_lock(&css_set_lock);
604 res = find_existing_css_set(oldcg, cgrp, template);
605 if (res)
606 get_css_set(res);
607 read_unlock(&css_set_lock);
608
609 if (res)
610 return res;
611
612 res = kmalloc(sizeof(*res), GFP_KERNEL);
613 if (!res)
614 return NULL;
615
616
617 if (allocate_cg_links(root_count, &tmp_cg_links) < 0) {
618 kfree(res);
619 return NULL;
620 }
621
622 atomic_set(&res->refcount, 1);
623 INIT_LIST_HEAD(&res->cg_links);
624 INIT_LIST_HEAD(&res->tasks);
625 INIT_HLIST_NODE(&res->hlist);
626
627
628
629 memcpy(res->subsys, template, sizeof(res->subsys));
630
631 write_lock(&css_set_lock);
632
633 list_for_each_entry(link, &oldcg->cg_links, cg_link_list) {
634 struct cgroup *c = link->cgrp;
635 if (c->root == cgrp->root)
636 c = cgrp;
637 link_css_set(&tmp_cg_links, res, c);
638 }
639
640 BUG_ON(!list_empty(&tmp_cg_links));
641
642 css_set_count++;
643
644
645 hhead = css_set_hash(res->subsys);
646 hlist_add_head(&res->hlist, hhead);
647
648 write_unlock(&css_set_lock);
649
650 return res;
651}
652
653
654
655
656
657static struct cgroup *task_cgroup_from_root(struct task_struct *task,
658 struct cgroupfs_root *root)
659{
660 struct css_set *css;
661 struct cgroup *res = NULL;
662
663 BUG_ON(!mutex_is_locked(&cgroup_mutex));
664 read_lock(&css_set_lock);
665
666
667
668
669
670 css = task->cgroups;
671 if (css == &init_css_set) {
672 res = &root->top_cgroup;
673 } else {
674 struct cg_cgroup_link *link;
675 list_for_each_entry(link, &css->cg_links, cg_link_list) {
676 struct cgroup *c = link->cgrp;
677 if (c->root == root) {
678 res = c;
679 break;
680 }
681 }
682 }
683 read_unlock(&css_set_lock);
684 BUG_ON(!res);
685 return res;
686}
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742void cgroup_lock(void)
743{
744 mutex_lock(&cgroup_mutex);
745}
746EXPORT_SYMBOL_GPL(cgroup_lock);
747
748
749
750
751
752
753void cgroup_unlock(void)
754{
755 mutex_unlock(&cgroup_mutex);
756}
757EXPORT_SYMBOL_GPL(cgroup_unlock);
758
759
760
761
762
763
764
765
766static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode);
767static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
768static int cgroup_populate_dir(struct cgroup *cgrp);
769static const struct inode_operations cgroup_dir_inode_operations;
770static const struct file_operations proc_cgroupstats_operations;
771
772static struct backing_dev_info cgroup_backing_dev_info = {
773 .name = "cgroup",
774 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
775};
776
777static int alloc_css_id(struct cgroup_subsys *ss,
778 struct cgroup *parent, struct cgroup *child);
779
780static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
781{
782 struct inode *inode = new_inode(sb);
783
784 if (inode) {
785 inode->i_ino = get_next_ino();
786 inode->i_mode = mode;
787 inode->i_uid = current_fsuid();
788 inode->i_gid = current_fsgid();
789 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
790 inode->i_mapping->backing_dev_info = &cgroup_backing_dev_info;
791 }
792 return inode;
793}
794
795
796
797
798
799static int cgroup_call_pre_destroy(struct cgroup *cgrp)
800{
801 struct cgroup_subsys *ss;
802 int ret = 0;
803
804 for_each_subsys(cgrp->root, ss)
805 if (ss->pre_destroy) {
806 ret = ss->pre_destroy(ss, cgrp);
807 if (ret)
808 break;
809 }
810
811 return ret;
812}
813
814static void free_cgroup_rcu(struct rcu_head *obj)
815{
816 struct cgroup *cgrp = container_of(obj, struct cgroup, rcu_head);
817
818 kfree(cgrp);
819}
820
821static void cgroup_diput(struct dentry *dentry, struct inode *inode)
822{
823
824 if (S_ISDIR(inode->i_mode)) {
825 struct cgroup *cgrp = dentry->d_fsdata;
826 struct cgroup_subsys *ss;
827 BUG_ON(!(cgroup_is_removed(cgrp)));
828
829
830
831
832
833
834 synchronize_rcu();
835
836 mutex_lock(&cgroup_mutex);
837
838
839
840 for_each_subsys(cgrp->root, ss)
841 ss->destroy(ss, cgrp);
842
843 cgrp->root->number_of_cgroups--;
844 mutex_unlock(&cgroup_mutex);
845
846
847
848
849
850 deactivate_super(cgrp->root->sb);
851
852
853
854
855
856 BUG_ON(!list_empty(&cgrp->pidlists));
857
858 call_rcu(&cgrp->rcu_head, free_cgroup_rcu);
859 }
860 iput(inode);
861}
862
863static void remove_dir(struct dentry *d)
864{
865 struct dentry *parent = dget(d->d_parent);
866
867 d_delete(d);
868 simple_rmdir(parent->d_inode, d);
869 dput(parent);
870}
871
872static void cgroup_clear_directory(struct dentry *dentry)
873{
874 struct list_head *node;
875
876 BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
877 spin_lock(&dcache_lock);
878 node = dentry->d_subdirs.next;
879 while (node != &dentry->d_subdirs) {
880 struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
881 list_del_init(node);
882 if (d->d_inode) {
883
884
885 BUG_ON(d->d_inode->i_mode & S_IFDIR);
886 d = dget_locked(d);
887 spin_unlock(&dcache_lock);
888 d_delete(d);
889 simple_unlink(dentry->d_inode, d);
890 dput(d);
891 spin_lock(&dcache_lock);
892 }
893 node = dentry->d_subdirs.next;
894 }
895 spin_unlock(&dcache_lock);
896}
897
898
899
900
901static void cgroup_d_remove_dir(struct dentry *dentry)
902{
903 cgroup_clear_directory(dentry);
904
905 spin_lock(&dcache_lock);
906 list_del_init(&dentry->d_u.d_child);
907 spin_unlock(&dcache_lock);
908 remove_dir(dentry);
909}
910
911
912
913
914
915
916
917
918
919DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
920
921static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
922{
923 if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
924 wake_up_all(&cgroup_rmdir_waitq);
925}
926
927void cgroup_exclude_rmdir(struct cgroup_subsys_state *css)
928{
929 css_get(css);
930}
931
932void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
933{
934 cgroup_wakeup_rmdir_waiter(css->cgroup);
935 css_put(css);
936}
937
938
939
940
941
942
943static int rebind_subsystems(struct cgroupfs_root *root,
944 unsigned long final_bits)
945{
946 unsigned long added_bits, removed_bits;
947 struct cgroup *cgrp = &root->top_cgroup;
948 int i;
949
950 BUG_ON(!mutex_is_locked(&cgroup_mutex));
951
952 removed_bits = root->actual_subsys_bits & ~final_bits;
953 added_bits = final_bits & ~root->actual_subsys_bits;
954
955 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
956 unsigned long bit = 1UL << i;
957 struct cgroup_subsys *ss = subsys[i];
958 if (!(bit & added_bits))
959 continue;
960
961
962
963
964
965 BUG_ON(ss == NULL);
966 if (ss->root != &rootnode) {
967
968 return -EBUSY;
969 }
970 }
971
972
973
974
975
976 if (root->number_of_cgroups > 1)
977 return -EBUSY;
978
979
980 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
981 struct cgroup_subsys *ss = subsys[i];
982 unsigned long bit = 1UL << i;
983 if (bit & added_bits) {
984
985 BUG_ON(ss == NULL);
986 BUG_ON(cgrp->subsys[i]);
987 BUG_ON(!dummytop->subsys[i]);
988 BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
989 mutex_lock(&ss->hierarchy_mutex);
990 cgrp->subsys[i] = dummytop->subsys[i];
991 cgrp->subsys[i]->cgroup = cgrp;
992 list_move(&ss->sibling, &root->subsys_list);
993 ss->root = root;
994 if (ss->bind)
995 ss->bind(ss, cgrp);
996 mutex_unlock(&ss->hierarchy_mutex);
997
998 } else if (bit & removed_bits) {
999
1000 BUG_ON(ss == NULL);
1001 BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
1002 BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
1003 mutex_lock(&ss->hierarchy_mutex);
1004 if (ss->bind)
1005 ss->bind(ss, dummytop);
1006 dummytop->subsys[i]->cgroup = dummytop;
1007 cgrp->subsys[i] = NULL;
1008 subsys[i]->root = &rootnode;
1009 list_move(&ss->sibling, &rootnode.subsys_list);
1010 mutex_unlock(&ss->hierarchy_mutex);
1011
1012 module_put(ss->module);
1013 } else if (bit & final_bits) {
1014
1015 BUG_ON(ss == NULL);
1016 BUG_ON(!cgrp->subsys[i]);
1017
1018
1019
1020
1021 module_put(ss->module);
1022#ifdef CONFIG_MODULE_UNLOAD
1023 BUG_ON(ss->module && !module_refcount(ss->module));
1024#endif
1025 } else {
1026
1027 BUG_ON(cgrp->subsys[i]);
1028 }
1029 }
1030 root->subsys_bits = root->actual_subsys_bits = final_bits;
1031 synchronize_rcu();
1032
1033 return 0;
1034}
1035
1036static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs)
1037{
1038 struct cgroupfs_root *root = vfs->mnt_sb->s_fs_info;
1039 struct cgroup_subsys *ss;
1040
1041 mutex_lock(&cgroup_mutex);
1042 for_each_subsys(root, ss)
1043 seq_printf(seq, ",%s", ss->name);
1044 if (test_bit(ROOT_NOPREFIX, &root->flags))
1045 seq_puts(seq, ",noprefix");
1046 if (strlen(root->release_agent_path))
1047 seq_printf(seq, ",release_agent=%s", root->release_agent_path);
1048 if (clone_children(&root->top_cgroup))
1049 seq_puts(seq, ",clone_children");
1050 if (strlen(root->name))
1051 seq_printf(seq, ",name=%s", root->name);
1052 mutex_unlock(&cgroup_mutex);
1053 return 0;
1054}
1055
1056struct cgroup_sb_opts {
1057 unsigned long subsys_bits;
1058 unsigned long flags;
1059 char *release_agent;
1060 bool clone_children;
1061 char *name;
1062
1063 bool none;
1064
1065 struct cgroupfs_root *new_root;
1066
1067};
1068
1069
1070
1071
1072
1073
1074
1075static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
1076{
1077 char *token, *o = data;
1078 bool all_ss = false, one_ss = false;
1079 unsigned long mask = (unsigned long)-1;
1080 int i;
1081 bool module_pin_failed = false;
1082
1083 BUG_ON(!mutex_is_locked(&cgroup_mutex));
1084
1085#ifdef CONFIG_CPUSETS
1086 mask = ~(1UL << cpuset_subsys_id);
1087#endif
1088
1089 memset(opts, 0, sizeof(*opts));
1090
1091 while ((token = strsep(&o, ",")) != NULL) {
1092 if (!*token)
1093 return -EINVAL;
1094 if (!strcmp(token, "none")) {
1095
1096 opts->none = true;
1097 continue;
1098 }
1099 if (!strcmp(token, "all")) {
1100
1101 if (one_ss)
1102 return -EINVAL;
1103 all_ss = true;
1104 continue;
1105 }
1106 if (!strcmp(token, "noprefix")) {
1107 set_bit(ROOT_NOPREFIX, &opts->flags);
1108 continue;
1109 }
1110 if (!strcmp(token, "clone_children")) {
1111 opts->clone_children = true;
1112 continue;
1113 }
1114 if (!strncmp(token, "release_agent=", 14)) {
1115
1116 if (opts->release_agent)
1117 return -EINVAL;
1118 opts->release_agent =
1119 kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL);
1120 if (!opts->release_agent)
1121 return -ENOMEM;
1122 continue;
1123 }
1124 if (!strncmp(token, "name=", 5)) {
1125 const char *name = token + 5;
1126
1127 if (!strlen(name))
1128 return -EINVAL;
1129
1130 for (i = 0; i < strlen(name); i++) {
1131 char c = name[i];
1132 if (isalnum(c))
1133 continue;
1134 if ((c == '.') || (c == '-') || (c == '_'))
1135 continue;
1136 return -EINVAL;
1137 }
1138
1139 if (opts->name)
1140 return -EINVAL;
1141 opts->name = kstrndup(name,
1142 MAX_CGROUP_ROOT_NAMELEN - 1,
1143 GFP_KERNEL);
1144 if (!opts->name)
1145 return -ENOMEM;
1146
1147 continue;
1148 }
1149
1150 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1151 struct cgroup_subsys *ss = subsys[i];
1152 if (ss == NULL)
1153 continue;
1154 if (strcmp(token, ss->name))
1155 continue;
1156 if (ss->disabled)
1157 continue;
1158
1159
1160 if (all_ss)
1161 return -EINVAL;
1162 set_bit(i, &opts->subsys_bits);
1163 one_ss = true;
1164
1165 break;
1166 }
1167 if (i == CGROUP_SUBSYS_COUNT)
1168 return -ENOENT;
1169 }
1170
1171
1172
1173
1174
1175
1176 if (all_ss || (!all_ss && !one_ss && !opts->none)) {
1177 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1178 struct cgroup_subsys *ss = subsys[i];
1179 if (ss == NULL)
1180 continue;
1181 if (ss->disabled)
1182 continue;
1183 set_bit(i, &opts->subsys_bits);
1184 }
1185 }
1186
1187
1188
1189
1190
1191
1192
1193
1194 if (test_bit(ROOT_NOPREFIX, &opts->flags) &&
1195 (opts->subsys_bits & mask))
1196 return -EINVAL;
1197
1198
1199
1200 if (opts->subsys_bits && opts->none)
1201 return -EINVAL;
1202
1203
1204
1205
1206
1207 if (!opts->subsys_bits && !opts->name)
1208 return -EINVAL;
1209
1210
1211
1212
1213
1214
1215
1216 for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
1217 unsigned long bit = 1UL << i;
1218
1219 if (!(bit & opts->subsys_bits))
1220 continue;
1221 if (!try_module_get(subsys[i]->module)) {
1222 module_pin_failed = true;
1223 break;
1224 }
1225 }
1226 if (module_pin_failed) {
1227
1228
1229
1230
1231
1232 for (i--; i >= CGROUP_BUILTIN_SUBSYS_COUNT; i--) {
1233
1234 unsigned long bit = 1UL << i;
1235
1236 if (!(bit & opts->subsys_bits))
1237 continue;
1238 module_put(subsys[i]->module);
1239 }
1240 return -ENOENT;
1241 }
1242
1243 return 0;
1244}
1245
1246static void drop_parsed_module_refcounts(unsigned long subsys_bits)
1247{
1248 int i;
1249 for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
1250 unsigned long bit = 1UL << i;
1251
1252 if (!(bit & subsys_bits))
1253 continue;
1254 module_put(subsys[i]->module);
1255 }
1256}
1257
1258static int cgroup_remount(struct super_block *sb, int *flags, char *data)
1259{
1260 int ret = 0;
1261 struct cgroupfs_root *root = sb->s_fs_info;
1262 struct cgroup *cgrp = &root->top_cgroup;
1263 struct cgroup_sb_opts opts;
1264
1265 mutex_lock(&cgrp->dentry->d_inode->i_mutex);
1266 mutex_lock(&cgroup_mutex);
1267
1268
1269 ret = parse_cgroupfs_options(data, &opts);
1270 if (ret)
1271 goto out_unlock;
1272
1273
1274 if (opts.flags != root->flags ||
1275 (opts.name && strcmp(opts.name, root->name))) {
1276 ret = -EINVAL;
1277 drop_parsed_module_refcounts(opts.subsys_bits);
1278 goto out_unlock;
1279 }
1280
1281 ret = rebind_subsystems(root, opts.subsys_bits);
1282 if (ret) {
1283 drop_parsed_module_refcounts(opts.subsys_bits);
1284 goto out_unlock;
1285 }
1286
1287
1288 cgroup_populate_dir(cgrp);
1289
1290 if (opts.release_agent)
1291 strcpy(root->release_agent_path, opts.release_agent);
1292 out_unlock:
1293 kfree(opts.release_agent);
1294 kfree(opts.name);
1295 mutex_unlock(&cgroup_mutex);
1296 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
1297 return ret;
1298}
1299
1300static const struct super_operations cgroup_ops = {
1301 .statfs = simple_statfs,
1302 .drop_inode = generic_delete_inode,
1303 .show_options = cgroup_show_options,
1304 .remount_fs = cgroup_remount,
1305};
1306
1307static void init_cgroup_housekeeping(struct cgroup *cgrp)
1308{
1309 INIT_LIST_HEAD(&cgrp->sibling);
1310 INIT_LIST_HEAD(&cgrp->children);
1311 INIT_LIST_HEAD(&cgrp->css_sets);
1312 INIT_LIST_HEAD(&cgrp->release_list);
1313 INIT_LIST_HEAD(&cgrp->pidlists);
1314 mutex_init(&cgrp->pidlist_mutex);
1315 INIT_LIST_HEAD(&cgrp->event_list);
1316 spin_lock_init(&cgrp->event_list_lock);
1317}
1318
1319static void init_cgroup_root(struct cgroupfs_root *root)
1320{
1321 struct cgroup *cgrp = &root->top_cgroup;
1322 INIT_LIST_HEAD(&root->subsys_list);
1323 INIT_LIST_HEAD(&root->root_list);
1324 root->number_of_cgroups = 1;
1325 cgrp->root = root;
1326 cgrp->top_cgroup = cgrp;
1327 init_cgroup_housekeeping(cgrp);
1328}
1329
1330static bool init_root_id(struct cgroupfs_root *root)
1331{
1332 int ret = 0;
1333
1334 do {
1335 if (!ida_pre_get(&hierarchy_ida, GFP_KERNEL))
1336 return false;
1337 spin_lock(&hierarchy_id_lock);
1338
1339 ret = ida_get_new_above(&hierarchy_ida, next_hierarchy_id,
1340 &root->hierarchy_id);
1341 if (ret == -ENOSPC)
1342
1343 ret = ida_get_new(&hierarchy_ida, &root->hierarchy_id);
1344 if (!ret) {
1345 next_hierarchy_id = root->hierarchy_id + 1;
1346 } else if (ret != -EAGAIN) {
1347
1348 BUG_ON(ret);
1349 }
1350 spin_unlock(&hierarchy_id_lock);
1351 } while (ret);
1352 return true;
1353}
1354
1355static int cgroup_test_super(struct super_block *sb, void *data)
1356{
1357 struct cgroup_sb_opts *opts = data;
1358 struct cgroupfs_root *root = sb->s_fs_info;
1359
1360
1361 if (opts->name && strcmp(opts->name, root->name))
1362 return 0;
1363
1364
1365
1366
1367
1368 if ((opts->subsys_bits || opts->none)
1369 && (opts->subsys_bits != root->subsys_bits))
1370 return 0;
1371
1372 return 1;
1373}
1374
1375static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
1376{
1377 struct cgroupfs_root *root;
1378
1379 if (!opts->subsys_bits && !opts->none)
1380 return NULL;
1381
1382 root = kzalloc(sizeof(*root), GFP_KERNEL);
1383 if (!root)
1384 return ERR_PTR(-ENOMEM);
1385
1386 if (!init_root_id(root)) {
1387 kfree(root);
1388 return ERR_PTR(-ENOMEM);
1389 }
1390 init_cgroup_root(root);
1391
1392 root->subsys_bits = opts->subsys_bits;
1393 root->flags = opts->flags;
1394 if (opts->release_agent)
1395 strcpy(root->release_agent_path, opts->release_agent);
1396 if (opts->name)
1397 strcpy(root->name, opts->name);
1398 if (opts->clone_children)
1399 set_bit(CGRP_CLONE_CHILDREN, &root->top_cgroup.flags);
1400 return root;
1401}
1402
1403static void cgroup_drop_root(struct cgroupfs_root *root)
1404{
1405 if (!root)
1406 return;
1407
1408 BUG_ON(!root->hierarchy_id);
1409 spin_lock(&hierarchy_id_lock);
1410 ida_remove(&hierarchy_ida, root->hierarchy_id);
1411 spin_unlock(&hierarchy_id_lock);
1412 kfree(root);
1413}
1414
1415static int cgroup_set_super(struct super_block *sb, void *data)
1416{
1417 int ret;
1418 struct cgroup_sb_opts *opts = data;
1419
1420
1421 if (!opts->new_root)
1422 return -EINVAL;
1423
1424 BUG_ON(!opts->subsys_bits && !opts->none);
1425
1426 ret = set_anon_super(sb, NULL);
1427 if (ret)
1428 return ret;
1429
1430 sb->s_fs_info = opts->new_root;
1431 opts->new_root->sb = sb;
1432
1433 sb->s_blocksize = PAGE_CACHE_SIZE;
1434 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
1435 sb->s_magic = CGROUP_SUPER_MAGIC;
1436 sb->s_op = &cgroup_ops;
1437
1438 return 0;
1439}
1440
1441static int cgroup_get_rootdir(struct super_block *sb)
1442{
1443 struct inode *inode =
1444 cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb);
1445 struct dentry *dentry;
1446
1447 if (!inode)
1448 return -ENOMEM;
1449
1450 inode->i_fop = &simple_dir_operations;
1451 inode->i_op = &cgroup_dir_inode_operations;
1452
1453 inc_nlink(inode);
1454 dentry = d_alloc_root(inode);
1455 if (!dentry) {
1456 iput(inode);
1457 return -ENOMEM;
1458 }
1459 sb->s_root = dentry;
1460 return 0;
1461}
1462
1463static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1464 int flags, const char *unused_dev_name,
1465 void *data)
1466{
1467 struct cgroup_sb_opts opts;
1468 struct cgroupfs_root *root;
1469 int ret = 0;
1470 struct super_block *sb;
1471 struct cgroupfs_root *new_root;
1472
1473
1474 mutex_lock(&cgroup_mutex);
1475 ret = parse_cgroupfs_options(data, &opts);
1476 mutex_unlock(&cgroup_mutex);
1477 if (ret)
1478 goto out_err;
1479
1480
1481
1482
1483
1484 new_root = cgroup_root_from_opts(&opts);
1485 if (IS_ERR(new_root)) {
1486 ret = PTR_ERR(new_root);
1487 goto drop_modules;
1488 }
1489 opts.new_root = new_root;
1490
1491
1492 sb = sget(fs_type, cgroup_test_super, cgroup_set_super, &opts);
1493 if (IS_ERR(sb)) {
1494 ret = PTR_ERR(sb);
1495 cgroup_drop_root(opts.new_root);
1496 goto drop_modules;
1497 }
1498
1499 root = sb->s_fs_info;
1500 BUG_ON(!root);
1501 if (root == opts.new_root) {
1502
1503 struct list_head tmp_cg_links;
1504 struct cgroup *root_cgrp = &root->top_cgroup;
1505 struct inode *inode;
1506 struct cgroupfs_root *existing_root;
1507 int i;
1508
1509 BUG_ON(sb->s_root != NULL);
1510
1511 ret = cgroup_get_rootdir(sb);
1512 if (ret)
1513 goto drop_new_super;
1514 inode = sb->s_root->d_inode;
1515
1516 mutex_lock(&inode->i_mutex);
1517 mutex_lock(&cgroup_mutex);
1518
1519 if (strlen(root->name)) {
1520
1521 for_each_active_root(existing_root) {
1522 if (!strcmp(existing_root->name, root->name)) {
1523 ret = -EBUSY;
1524 mutex_unlock(&cgroup_mutex);
1525 mutex_unlock(&inode->i_mutex);
1526 goto drop_new_super;
1527 }
1528 }
1529 }
1530
1531
1532
1533
1534
1535
1536
1537
1538 ret = allocate_cg_links(css_set_count, &tmp_cg_links);
1539 if (ret) {
1540 mutex_unlock(&cgroup_mutex);
1541 mutex_unlock(&inode->i_mutex);
1542 goto drop_new_super;
1543 }
1544
1545 ret = rebind_subsystems(root, root->subsys_bits);
1546 if (ret == -EBUSY) {
1547 mutex_unlock(&cgroup_mutex);
1548 mutex_unlock(&inode->i_mutex);
1549 free_cg_links(&tmp_cg_links);
1550 goto drop_new_super;
1551 }
1552
1553
1554
1555
1556
1557
1558
1559 BUG_ON(ret);
1560
1561 list_add(&root->root_list, &roots);
1562 root_count++;
1563
1564 sb->s_root->d_fsdata = root_cgrp;
1565 root->top_cgroup.dentry = sb->s_root;
1566
1567
1568
1569 write_lock(&css_set_lock);
1570 for (i = 0; i < CSS_SET_TABLE_SIZE; i++) {
1571 struct hlist_head *hhead = &css_set_table[i];
1572 struct hlist_node *node;
1573 struct css_set *cg;
1574
1575 hlist_for_each_entry(cg, node, hhead, hlist)
1576 link_css_set(&tmp_cg_links, cg, root_cgrp);
1577 }
1578 write_unlock(&css_set_lock);
1579
1580 free_cg_links(&tmp_cg_links);
1581
1582 BUG_ON(!list_empty(&root_cgrp->sibling));
1583 BUG_ON(!list_empty(&root_cgrp->children));
1584 BUG_ON(root->number_of_cgroups != 1);
1585
1586 cgroup_populate_dir(root_cgrp);
1587 mutex_unlock(&cgroup_mutex);
1588 mutex_unlock(&inode->i_mutex);
1589 } else {
1590
1591
1592
1593
1594 cgroup_drop_root(opts.new_root);
1595
1596 drop_parsed_module_refcounts(opts.subsys_bits);
1597 }
1598
1599 kfree(opts.release_agent);
1600 kfree(opts.name);
1601 return dget(sb->s_root);
1602
1603 drop_new_super:
1604 deactivate_locked_super(sb);
1605 drop_modules:
1606 drop_parsed_module_refcounts(opts.subsys_bits);
1607 out_err:
1608 kfree(opts.release_agent);
1609 kfree(opts.name);
1610 return ERR_PTR(ret);
1611}
1612
1613static void cgroup_kill_sb(struct super_block *sb) {
1614 struct cgroupfs_root *root = sb->s_fs_info;
1615 struct cgroup *cgrp = &root->top_cgroup;
1616 int ret;
1617 struct cg_cgroup_link *link;
1618 struct cg_cgroup_link *saved_link;
1619
1620 BUG_ON(!root);
1621
1622 BUG_ON(root->number_of_cgroups != 1);
1623 BUG_ON(!list_empty(&cgrp->children));
1624 BUG_ON(!list_empty(&cgrp->sibling));
1625
1626 mutex_lock(&cgroup_mutex);
1627
1628
1629 ret = rebind_subsystems(root, 0);
1630
1631 BUG_ON(ret);
1632
1633
1634
1635
1636
1637 write_lock(&css_set_lock);
1638
1639 list_for_each_entry_safe(link, saved_link, &cgrp->css_sets,
1640 cgrp_link_list) {
1641 list_del(&link->cg_link_list);
1642 list_del(&link->cgrp_link_list);
1643 kfree(link);
1644 }
1645 write_unlock(&css_set_lock);
1646
1647 if (!list_empty(&root->root_list)) {
1648 list_del(&root->root_list);
1649 root_count--;
1650 }
1651
1652 mutex_unlock(&cgroup_mutex);
1653
1654 kill_litter_super(sb);
1655 cgroup_drop_root(root);
1656}
1657
1658static struct file_system_type cgroup_fs_type = {
1659 .name = "cgroup",
1660 .mount = cgroup_mount,
1661 .kill_sb = cgroup_kill_sb,
1662};
1663
1664static struct kobject *cgroup_kobj;
1665
1666static inline struct cgroup *__d_cgrp(struct dentry *dentry)
1667{
1668 return dentry->d_fsdata;
1669}
1670
1671static inline struct cftype *__d_cft(struct dentry *dentry)
1672{
1673 return dentry->d_fsdata;
1674}
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
1687{
1688 char *start;
1689 struct dentry *dentry = rcu_dereference_check(cgrp->dentry,
1690 rcu_read_lock_held() ||
1691 cgroup_lock_is_held());
1692
1693 if (!dentry || cgrp == dummytop) {
1694
1695
1696
1697
1698 strcpy(buf, "/");
1699 return 0;
1700 }
1701
1702 start = buf + buflen;
1703
1704 *--start = '\0';
1705 for (;;) {
1706 int len = dentry->d_name.len;
1707
1708 if ((start -= len) < buf)
1709 return -ENAMETOOLONG;
1710 memcpy(start, dentry->d_name.name, len);
1711 cgrp = cgrp->parent;
1712 if (!cgrp)
1713 break;
1714
1715 dentry = rcu_dereference_check(cgrp->dentry,
1716 rcu_read_lock_held() ||
1717 cgroup_lock_is_held());
1718 if (!cgrp->parent)
1719 continue;
1720 if (--start < buf)
1721 return -ENAMETOOLONG;
1722 *start = '/';
1723 }
1724 memmove(buf, start, buf + buflen - start);
1725 return 0;
1726}
1727EXPORT_SYMBOL_GPL(cgroup_path);
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1738{
1739 int retval = 0;
1740 struct cgroup_subsys *ss, *failed_ss = NULL;
1741 struct cgroup *oldcgrp;
1742 struct css_set *cg;
1743 struct css_set *newcg;
1744 struct cgroupfs_root *root = cgrp->root;
1745
1746
1747 oldcgrp = task_cgroup_from_root(tsk, root);
1748 if (cgrp == oldcgrp)
1749 return 0;
1750
1751 for_each_subsys(root, ss) {
1752 if (ss->can_attach) {
1753 retval = ss->can_attach(ss, cgrp, tsk, false);
1754 if (retval) {
1755
1756
1757
1758
1759
1760
1761 failed_ss = ss;
1762 goto out;
1763 }
1764 }
1765 }
1766
1767 task_lock(tsk);
1768 cg = tsk->cgroups;
1769 get_css_set(cg);
1770 task_unlock(tsk);
1771
1772
1773
1774
1775 newcg = find_css_set(cg, cgrp);
1776 put_css_set(cg);
1777 if (!newcg) {
1778 retval = -ENOMEM;
1779 goto out;
1780 }
1781
1782 task_lock(tsk);
1783 if (tsk->flags & PF_EXITING) {
1784 task_unlock(tsk);
1785 put_css_set(newcg);
1786 retval = -ESRCH;
1787 goto out;
1788 }
1789 rcu_assign_pointer(tsk->cgroups, newcg);
1790 task_unlock(tsk);
1791
1792
1793 write_lock(&css_set_lock);
1794 if (!list_empty(&tsk->cg_list))
1795 list_move(&tsk->cg_list, &newcg->tasks);
1796 write_unlock(&css_set_lock);
1797
1798 for_each_subsys(root, ss) {
1799 if (ss->attach)
1800 ss->attach(ss, cgrp, oldcgrp, tsk, false);
1801 }
1802 set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
1803 synchronize_rcu();
1804 put_css_set(cg);
1805
1806
1807
1808
1809
1810 cgroup_wakeup_rmdir_waiter(cgrp);
1811out:
1812 if (retval) {
1813 for_each_subsys(root, ss) {
1814 if (ss == failed_ss)
1815
1816
1817
1818
1819
1820
1821 break;
1822 if (ss->cancel_attach)
1823 ss->cancel_attach(ss, cgrp, tsk, false);
1824 }
1825 }
1826 return retval;
1827}
1828
1829
1830
1831
1832
1833
1834int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
1835{
1836 struct cgroupfs_root *root;
1837 int retval = 0;
1838
1839 cgroup_lock();
1840 for_each_active_root(root) {
1841 struct cgroup *from_cg = task_cgroup_from_root(from, root);
1842
1843 retval = cgroup_attach_task(from_cg, tsk);
1844 if (retval)
1845 break;
1846 }
1847 cgroup_unlock();
1848
1849 return retval;
1850}
1851EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
1852
1853
1854
1855
1856
1857static int attach_task_by_pid(struct cgroup *cgrp, u64 pid)
1858{
1859 struct task_struct *tsk;
1860 const struct cred *cred = current_cred(), *tcred;
1861 int ret;
1862
1863 if (pid) {
1864 rcu_read_lock();
1865 tsk = find_task_by_vpid(pid);
1866 if (!tsk || tsk->flags & PF_EXITING) {
1867 rcu_read_unlock();
1868 return -ESRCH;
1869 }
1870
1871 tcred = __task_cred(tsk);
1872 if (cred->euid &&
1873 cred->euid != tcred->uid &&
1874 cred->euid != tcred->suid) {
1875 rcu_read_unlock();
1876 return -EACCES;
1877 }
1878 get_task_struct(tsk);
1879 rcu_read_unlock();
1880 } else {
1881 tsk = current;
1882 get_task_struct(tsk);
1883 }
1884
1885 ret = cgroup_attach_task(cgrp, tsk);
1886 put_task_struct(tsk);
1887 return ret;
1888}
1889
1890static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid)
1891{
1892 int ret;
1893 if (!cgroup_lock_live_group(cgrp))
1894 return -ENODEV;
1895 ret = attach_task_by_pid(cgrp, pid);
1896 cgroup_unlock();
1897 return ret;
1898}
1899
1900
1901
1902
1903
1904
1905
1906
1907bool cgroup_lock_live_group(struct cgroup *cgrp)
1908{
1909 mutex_lock(&cgroup_mutex);
1910 if (cgroup_is_removed(cgrp)) {
1911 mutex_unlock(&cgroup_mutex);
1912 return false;
1913 }
1914 return true;
1915}
1916EXPORT_SYMBOL_GPL(cgroup_lock_live_group);
1917
1918static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
1919 const char *buffer)
1920{
1921 BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
1922 if (strlen(buffer) >= PATH_MAX)
1923 return -EINVAL;
1924 if (!cgroup_lock_live_group(cgrp))
1925 return -ENODEV;
1926 strcpy(cgrp->root->release_agent_path, buffer);
1927 cgroup_unlock();
1928 return 0;
1929}
1930
1931static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft,
1932 struct seq_file *seq)
1933{
1934 if (!cgroup_lock_live_group(cgrp))
1935 return -ENODEV;
1936 seq_puts(seq, cgrp->root->release_agent_path);
1937 seq_putc(seq, '\n');
1938 cgroup_unlock();
1939 return 0;
1940}
1941
1942
1943#define CGROUP_LOCAL_BUFFER_SIZE 64
1944
1945static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft,
1946 struct file *file,
1947 const char __user *userbuf,
1948 size_t nbytes, loff_t *unused_ppos)
1949{
1950 char buffer[CGROUP_LOCAL_BUFFER_SIZE];
1951 int retval = 0;
1952 char *end;
1953
1954 if (!nbytes)
1955 return -EINVAL;
1956 if (nbytes >= sizeof(buffer))
1957 return -E2BIG;
1958 if (copy_from_user(buffer, userbuf, nbytes))
1959 return -EFAULT;
1960
1961 buffer[nbytes] = 0;
1962 if (cft->write_u64) {
1963 u64 val = simple_strtoull(strstrip(buffer), &end, 0);
1964 if (*end)
1965 return -EINVAL;
1966 retval = cft->write_u64(cgrp, cft, val);
1967 } else {
1968 s64 val = simple_strtoll(strstrip(buffer), &end, 0);
1969 if (*end)
1970 return -EINVAL;
1971 retval = cft->write_s64(cgrp, cft, val);
1972 }
1973 if (!retval)
1974 retval = nbytes;
1975 return retval;
1976}
1977
1978static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft,
1979 struct file *file,
1980 const char __user *userbuf,
1981 size_t nbytes, loff_t *unused_ppos)
1982{
1983 char local_buffer[CGROUP_LOCAL_BUFFER_SIZE];
1984 int retval = 0;
1985 size_t max_bytes = cft->max_write_len;
1986 char *buffer = local_buffer;
1987
1988 if (!max_bytes)
1989 max_bytes = sizeof(local_buffer) - 1;
1990 if (nbytes >= max_bytes)
1991 return -E2BIG;
1992
1993 if (nbytes >= sizeof(local_buffer)) {
1994 buffer = kmalloc(nbytes + 1, GFP_KERNEL);
1995 if (buffer == NULL)
1996 return -ENOMEM;
1997 }
1998 if (nbytes && copy_from_user(buffer, userbuf, nbytes)) {
1999 retval = -EFAULT;
2000 goto out;
2001 }
2002
2003 buffer[nbytes] = 0;
2004 retval = cft->write_string(cgrp, cft, strstrip(buffer));
2005 if (!retval)
2006 retval = nbytes;
2007out:
2008 if (buffer != local_buffer)
2009 kfree(buffer);
2010 return retval;
2011}
2012
2013static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
2014 size_t nbytes, loff_t *ppos)
2015{
2016 struct cftype *cft = __d_cft(file->f_dentry);
2017 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
2018
2019 if (cgroup_is_removed(cgrp))
2020 return -ENODEV;
2021 if (cft->write)
2022 return cft->write(cgrp, cft, file, buf, nbytes, ppos);
2023 if (cft->write_u64 || cft->write_s64)
2024 return cgroup_write_X64(cgrp, cft, file, buf, nbytes, ppos);
2025 if (cft->write_string)
2026 return cgroup_write_string(cgrp, cft, file, buf, nbytes, ppos);
2027 if (cft->trigger) {
2028 int ret = cft->trigger(cgrp, (unsigned int)cft->private);
2029 return ret ? ret : nbytes;
2030 }
2031 return -EINVAL;
2032}
2033
2034static ssize_t cgroup_read_u64(struct cgroup *cgrp, struct cftype *cft,
2035 struct file *file,
2036 char __user *buf, size_t nbytes,
2037 loff_t *ppos)
2038{
2039 char tmp[CGROUP_LOCAL_BUFFER_SIZE];
2040 u64 val = cft->read_u64(cgrp, cft);
2041 int len = sprintf(tmp, "%llu\n", (unsigned long long) val);
2042
2043 return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
2044}
2045
2046static ssize_t cgroup_read_s64(struct cgroup *cgrp, struct cftype *cft,
2047 struct file *file,
2048 char __user *buf, size_t nbytes,
2049 loff_t *ppos)
2050{
2051 char tmp[CGROUP_LOCAL_BUFFER_SIZE];
2052 s64 val = cft->read_s64(cgrp, cft);
2053 int len = sprintf(tmp, "%lld\n", (long long) val);
2054
2055 return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
2056}
2057
2058static ssize_t cgroup_file_read(struct file *file, char __user *buf,
2059 size_t nbytes, loff_t *ppos)
2060{
2061 struct cftype *cft = __d_cft(file->f_dentry);
2062 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
2063
2064 if (cgroup_is_removed(cgrp))
2065 return -ENODEV;
2066
2067 if (cft->read)
2068 return cft->read(cgrp, cft, file, buf, nbytes, ppos);
2069 if (cft->read_u64)
2070 return cgroup_read_u64(cgrp, cft, file, buf, nbytes, ppos);
2071 if (cft->read_s64)
2072 return cgroup_read_s64(cgrp, cft, file, buf, nbytes, ppos);
2073 return -EINVAL;
2074}
2075
2076
2077
2078
2079
2080
2081struct cgroup_seqfile_state {
2082 struct cftype *cft;
2083 struct cgroup *cgroup;
2084};
2085
2086static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value)
2087{
2088 struct seq_file *sf = cb->state;
2089 return seq_printf(sf, "%s %llu\n", key, (unsigned long long)value);
2090}
2091
2092static int cgroup_seqfile_show(struct seq_file *m, void *arg)
2093{
2094 struct cgroup_seqfile_state *state = m->private;
2095 struct cftype *cft = state->cft;
2096 if (cft->read_map) {
2097 struct cgroup_map_cb cb = {
2098 .fill = cgroup_map_add,
2099 .state = m,
2100 };
2101 return cft->read_map(state->cgroup, cft, &cb);
2102 }
2103 return cft->read_seq_string(state->cgroup, cft, m);
2104}
2105
2106static int cgroup_seqfile_release(struct inode *inode, struct file *file)
2107{
2108 struct seq_file *seq = file->private_data;
2109 kfree(seq->private);
2110 return single_release(inode, file);
2111}
2112
2113static const struct file_operations cgroup_seqfile_operations = {
2114 .read = seq_read,
2115 .write = cgroup_file_write,
2116 .llseek = seq_lseek,
2117 .release = cgroup_seqfile_release,
2118};
2119
2120static int cgroup_file_open(struct inode *inode, struct file *file)
2121{
2122 int err;
2123 struct cftype *cft;
2124
2125 err = generic_file_open(inode, file);
2126 if (err)
2127 return err;
2128 cft = __d_cft(file->f_dentry);
2129
2130 if (cft->read_map || cft->read_seq_string) {
2131 struct cgroup_seqfile_state *state =
2132 kzalloc(sizeof(*state), GFP_USER);
2133 if (!state)
2134 return -ENOMEM;
2135 state->cft = cft;
2136 state->cgroup = __d_cgrp(file->f_dentry->d_parent);
2137 file->f_op = &cgroup_seqfile_operations;
2138 err = single_open(file, cgroup_seqfile_show, state);
2139 if (err < 0)
2140 kfree(state);
2141 } else if (cft->open)
2142 err = cft->open(inode, file);
2143 else
2144 err = 0;
2145
2146 return err;
2147}
2148
2149static int cgroup_file_release(struct inode *inode, struct file *file)
2150{
2151 struct cftype *cft = __d_cft(file->f_dentry);
2152 if (cft->release)
2153 return cft->release(inode, file);
2154 return 0;
2155}
2156
2157
2158
2159
2160static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry,
2161 struct inode *new_dir, struct dentry *new_dentry)
2162{
2163 if (!S_ISDIR(old_dentry->d_inode->i_mode))
2164 return -ENOTDIR;
2165 if (new_dentry->d_inode)
2166 return -EEXIST;
2167 if (old_dir != new_dir)
2168 return -EIO;
2169 return simple_rename(old_dir, old_dentry, new_dir, new_dentry);
2170}
2171
2172static const struct file_operations cgroup_file_operations = {
2173 .read = cgroup_file_read,
2174 .write = cgroup_file_write,
2175 .llseek = generic_file_llseek,
2176 .open = cgroup_file_open,
2177 .release = cgroup_file_release,
2178};
2179
2180static const struct inode_operations cgroup_dir_inode_operations = {
2181 .lookup = simple_lookup,
2182 .mkdir = cgroup_mkdir,
2183 .rmdir = cgroup_rmdir,
2184 .rename = cgroup_rename,
2185};
2186
2187
2188
2189
2190static inline struct cftype *__file_cft(struct file *file)
2191{
2192 if (file->f_dentry->d_inode->i_fop != &cgroup_file_operations)
2193 return ERR_PTR(-EINVAL);
2194 return __d_cft(file->f_dentry);
2195}
2196
2197static int cgroup_create_file(struct dentry *dentry, mode_t mode,
2198 struct super_block *sb)
2199{
2200 static const struct dentry_operations cgroup_dops = {
2201 .d_iput = cgroup_diput,
2202 };
2203
2204 struct inode *inode;
2205
2206 if (!dentry)
2207 return -ENOENT;
2208 if (dentry->d_inode)
2209 return -EEXIST;
2210
2211 inode = cgroup_new_inode(mode, sb);
2212 if (!inode)
2213 return -ENOMEM;
2214
2215 if (S_ISDIR(mode)) {
2216 inode->i_op = &cgroup_dir_inode_operations;
2217 inode->i_fop = &simple_dir_operations;
2218
2219
2220 inc_nlink(inode);
2221
2222
2223
2224 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
2225 } else if (S_ISREG(mode)) {
2226 inode->i_size = 0;
2227 inode->i_fop = &cgroup_file_operations;
2228 }
2229 dentry->d_op = &cgroup_dops;
2230 d_instantiate(dentry, inode);
2231 dget(dentry);
2232 return 0;
2233}
2234
2235
2236
2237
2238
2239
2240
2241
2242static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry,
2243 mode_t mode)
2244{
2245 struct dentry *parent;
2246 int error = 0;
2247
2248 parent = cgrp->parent->dentry;
2249 error = cgroup_create_file(dentry, S_IFDIR | mode, cgrp->root->sb);
2250 if (!error) {
2251 dentry->d_fsdata = cgrp;
2252 inc_nlink(parent->d_inode);
2253 rcu_assign_pointer(cgrp->dentry, dentry);
2254 dget(dentry);
2255 }
2256 dput(dentry);
2257
2258 return error;
2259}
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270static mode_t cgroup_file_mode(const struct cftype *cft)
2271{
2272 mode_t mode = 0;
2273
2274 if (cft->mode)
2275 return cft->mode;
2276
2277 if (cft->read || cft->read_u64 || cft->read_s64 ||
2278 cft->read_map || cft->read_seq_string)
2279 mode |= S_IRUGO;
2280
2281 if (cft->write || cft->write_u64 || cft->write_s64 ||
2282 cft->write_string || cft->trigger)
2283 mode |= S_IWUSR;
2284
2285 return mode;
2286}
2287
2288int cgroup_add_file(struct cgroup *cgrp,
2289 struct cgroup_subsys *subsys,
2290 const struct cftype *cft)
2291{
2292 struct dentry *dir = cgrp->dentry;
2293 struct dentry *dentry;
2294 int error;
2295 mode_t mode;
2296
2297 char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 };
2298 if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) {
2299 strcpy(name, subsys->name);
2300 strcat(name, ".");
2301 }
2302 strcat(name, cft->name);
2303 BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex));
2304 dentry = lookup_one_len(name, dir, strlen(name));
2305 if (!IS_ERR(dentry)) {
2306 mode = cgroup_file_mode(cft);
2307 error = cgroup_create_file(dentry, mode | S_IFREG,
2308 cgrp->root->sb);
2309 if (!error)
2310 dentry->d_fsdata = (void *)cft;
2311 dput(dentry);
2312 } else
2313 error = PTR_ERR(dentry);
2314 return error;
2315}
2316EXPORT_SYMBOL_GPL(cgroup_add_file);
2317
2318int cgroup_add_files(struct cgroup *cgrp,
2319 struct cgroup_subsys *subsys,
2320 const struct cftype cft[],
2321 int count)
2322{
2323 int i, err;
2324 for (i = 0; i < count; i++) {
2325 err = cgroup_add_file(cgrp, subsys, &cft[i]);
2326 if (err)
2327 return err;
2328 }
2329 return 0;
2330}
2331EXPORT_SYMBOL_GPL(cgroup_add_files);
2332
2333
2334
2335
2336
2337
2338
2339int cgroup_task_count(const struct cgroup *cgrp)
2340{
2341 int count = 0;
2342 struct cg_cgroup_link *link;
2343
2344 read_lock(&css_set_lock);
2345 list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {
2346 count += atomic_read(&link->cg->refcount);
2347 }
2348 read_unlock(&css_set_lock);
2349 return count;
2350}
2351
2352
2353
2354
2355
2356static void cgroup_advance_iter(struct cgroup *cgrp,
2357 struct cgroup_iter *it)
2358{
2359 struct list_head *l = it->cg_link;
2360 struct cg_cgroup_link *link;
2361 struct css_set *cg;
2362
2363
2364 do {
2365 l = l->next;
2366 if (l == &cgrp->css_sets) {
2367 it->cg_link = NULL;
2368 return;
2369 }
2370 link = list_entry(l, struct cg_cgroup_link, cgrp_link_list);
2371 cg = link->cg;
2372 } while (list_empty(&cg->tasks));
2373 it->cg_link = l;
2374 it->task = cg->tasks.next;
2375}
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386static void cgroup_enable_task_cg_lists(void)
2387{
2388 struct task_struct *p, *g;
2389 write_lock(&css_set_lock);
2390 use_task_css_set_links = 1;
2391 do_each_thread(g, p) {
2392 task_lock(p);
2393
2394
2395
2396
2397
2398 if (!(p->flags & PF_EXITING) && list_empty(&p->cg_list))
2399 list_add(&p->cg_list, &p->cgroups->tasks);
2400 task_unlock(p);
2401 } while_each_thread(g, p);
2402 write_unlock(&css_set_lock);
2403}
2404
2405void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it)
2406{
2407
2408
2409
2410
2411
2412 if (!use_task_css_set_links)
2413 cgroup_enable_task_cg_lists();
2414
2415 read_lock(&css_set_lock);
2416 it->cg_link = &cgrp->css_sets;
2417 cgroup_advance_iter(cgrp, it);
2418}
2419
2420struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
2421 struct cgroup_iter *it)
2422{
2423 struct task_struct *res;
2424 struct list_head *l = it->task;
2425 struct cg_cgroup_link *link;
2426
2427
2428 if (!it->cg_link)
2429 return NULL;
2430 res = list_entry(l, struct task_struct, cg_list);
2431
2432 l = l->next;
2433 link = list_entry(it->cg_link, struct cg_cgroup_link, cgrp_link_list);
2434 if (l == &link->cg->tasks) {
2435
2436
2437 cgroup_advance_iter(cgrp, it);
2438 } else {
2439 it->task = l;
2440 }
2441 return res;
2442}
2443
2444void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it)
2445{
2446 read_unlock(&css_set_lock);
2447}
2448
2449static inline int started_after_time(struct task_struct *t1,
2450 struct timespec *time,
2451 struct task_struct *t2)
2452{
2453 int start_diff = timespec_compare(&t1->start_time, time);
2454 if (start_diff > 0) {
2455 return 1;
2456 } else if (start_diff < 0) {
2457 return 0;
2458 } else {
2459
2460
2461
2462
2463
2464
2465
2466
2467 return t1 > t2;
2468 }
2469}
2470
2471
2472
2473
2474
2475
2476static inline int started_after(void *p1, void *p2)
2477{
2478 struct task_struct *t1 = p1;
2479 struct task_struct *t2 = p2;
2480 return started_after_time(t1, &t2->start_time, t2);
2481}
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510int cgroup_scan_tasks(struct cgroup_scanner *scan)
2511{
2512 int retval, i;
2513 struct cgroup_iter it;
2514 struct task_struct *p, *dropped;
2515
2516 struct task_struct *latest_task = NULL;
2517 struct ptr_heap tmp_heap;
2518 struct ptr_heap *heap;
2519 struct timespec latest_time = { 0, 0 };
2520
2521 if (scan->heap) {
2522
2523 heap = scan->heap;
2524 heap->gt = &started_after;
2525 } else {
2526
2527 heap = &tmp_heap;
2528 retval = heap_init(heap, PAGE_SIZE, GFP_KERNEL, &started_after);
2529 if (retval)
2530
2531 return retval;
2532 }
2533
2534 again:
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547 heap->size = 0;
2548 cgroup_iter_start(scan->cg, &it);
2549 while ((p = cgroup_iter_next(scan->cg, &it))) {
2550
2551
2552
2553
2554 if (scan->test_task && !scan->test_task(p, scan))
2555 continue;
2556
2557
2558
2559
2560 if (!started_after_time(p, &latest_time, latest_task))
2561 continue;
2562 dropped = heap_insert(heap, p);
2563 if (dropped == NULL) {
2564
2565
2566
2567
2568 get_task_struct(p);
2569 } else if (dropped != p) {
2570
2571
2572
2573
2574 get_task_struct(p);
2575 put_task_struct(dropped);
2576 }
2577
2578
2579
2580
2581 }
2582 cgroup_iter_end(scan->cg, &it);
2583
2584 if (heap->size) {
2585 for (i = 0; i < heap->size; i++) {
2586 struct task_struct *q = heap->ptrs[i];
2587 if (i == 0) {
2588 latest_time = q->start_time;
2589 latest_task = q;
2590 }
2591
2592 scan->process_task(q, scan);
2593 put_task_struct(q);
2594 }
2595
2596
2597
2598
2599
2600
2601
2602 goto again;
2603 }
2604 if (heap == &tmp_heap)
2605 heap_free(&tmp_heap);
2606 return 0;
2607}
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624#define PIDLIST_TOO_LARGE(c) ((c) * sizeof(pid_t) > (PAGE_SIZE * 2))
2625static void *pidlist_allocate(int count)
2626{
2627 if (PIDLIST_TOO_LARGE(count))
2628 return vmalloc(count * sizeof(pid_t));
2629 else
2630 return kmalloc(count * sizeof(pid_t), GFP_KERNEL);
2631}
2632static void pidlist_free(void *p)
2633{
2634 if (is_vmalloc_addr(p))
2635 vfree(p);
2636 else
2637 kfree(p);
2638}
2639static void *pidlist_resize(void *p, int newcount)
2640{
2641 void *newlist;
2642
2643 if (is_vmalloc_addr(p)) {
2644 newlist = vmalloc(newcount * sizeof(pid_t));
2645 if (!newlist)
2646 return NULL;
2647 memcpy(newlist, p, newcount * sizeof(pid_t));
2648 vfree(p);
2649 } else {
2650 newlist = krealloc(p, newcount * sizeof(pid_t), GFP_KERNEL);
2651 }
2652 return newlist;
2653}
2654
2655
2656
2657
2658
2659
2660
2661
2662#define PIDLIST_REALLOC_DIFFERENCE(old, new) ((old) - PAGE_SIZE >= (new))
2663static int pidlist_uniq(pid_t **p, int length)
2664{
2665 int src, dest = 1;
2666 pid_t *list = *p;
2667 pid_t *newlist;
2668
2669
2670
2671
2672
2673 if (length == 0 || length == 1)
2674 return length;
2675
2676 for (src = 1; src < length; src++) {
2677
2678 while (list[src] == list[src-1]) {
2679 src++;
2680 if (src == length)
2681 goto after;
2682 }
2683
2684 list[dest] = list[src];
2685 dest++;
2686 }
2687after:
2688
2689
2690
2691
2692
2693 if (PIDLIST_REALLOC_DIFFERENCE(length, dest)) {
2694 newlist = pidlist_resize(list, dest);
2695 if (newlist)
2696 *p = newlist;
2697 }
2698 return dest;
2699}
2700
2701static int cmppid(const void *a, const void *b)
2702{
2703 return *(pid_t *)a - *(pid_t *)b;
2704}
2705
2706
2707
2708
2709
2710
2711
2712static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
2713 enum cgroup_filetype type)
2714{
2715 struct cgroup_pidlist *l;
2716
2717 struct pid_namespace *ns = current->nsproxy->pid_ns;
2718
2719
2720
2721
2722
2723
2724
2725 mutex_lock(&cgrp->pidlist_mutex);
2726 list_for_each_entry(l, &cgrp->pidlists, links) {
2727 if (l->key.type == type && l->key.ns == ns) {
2728
2729 down_write(&l->mutex);
2730 mutex_unlock(&cgrp->pidlist_mutex);
2731 return l;
2732 }
2733 }
2734
2735 l = kmalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL);
2736 if (!l) {
2737 mutex_unlock(&cgrp->pidlist_mutex);
2738 return l;
2739 }
2740 init_rwsem(&l->mutex);
2741 down_write(&l->mutex);
2742 l->key.type = type;
2743 l->key.ns = get_pid_ns(ns);
2744 l->use_count = 0;
2745 l->list = NULL;
2746 l->owner = cgrp;
2747 list_add(&l->links, &cgrp->pidlists);
2748 mutex_unlock(&cgrp->pidlist_mutex);
2749 return l;
2750}
2751
2752
2753
2754
2755static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
2756 struct cgroup_pidlist **lp)
2757{
2758 pid_t *array;
2759 int length;
2760 int pid, n = 0;
2761 struct cgroup_iter it;
2762 struct task_struct *tsk;
2763 struct cgroup_pidlist *l;
2764
2765
2766
2767
2768
2769
2770
2771 length = cgroup_task_count(cgrp);
2772 array = pidlist_allocate(length);
2773 if (!array)
2774 return -ENOMEM;
2775
2776 cgroup_iter_start(cgrp, &it);
2777 while ((tsk = cgroup_iter_next(cgrp, &it))) {
2778 if (unlikely(n == length))
2779 break;
2780
2781 if (type == CGROUP_FILE_PROCS)
2782 pid = task_tgid_vnr(tsk);
2783 else
2784 pid = task_pid_vnr(tsk);
2785 if (pid > 0)
2786 array[n++] = pid;
2787 }
2788 cgroup_iter_end(cgrp, &it);
2789 length = n;
2790
2791 sort(array, length, sizeof(pid_t), cmppid, NULL);
2792 if (type == CGROUP_FILE_PROCS)
2793 length = pidlist_uniq(&array, length);
2794 l = cgroup_pidlist_find(cgrp, type);
2795 if (!l) {
2796 pidlist_free(array);
2797 return -ENOMEM;
2798 }
2799
2800 pidlist_free(l->list);
2801 l->list = array;
2802 l->length = length;
2803 l->use_count++;
2804 up_write(&l->mutex);
2805 *lp = l;
2806 return 0;
2807}
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
2819{
2820 int ret = -EINVAL;
2821 struct cgroup *cgrp;
2822 struct cgroup_iter it;
2823 struct task_struct *tsk;
2824
2825
2826
2827
2828
2829 if (dentry->d_sb->s_op != &cgroup_ops ||
2830 !S_ISDIR(dentry->d_inode->i_mode))
2831 goto err;
2832
2833 ret = 0;
2834 cgrp = dentry->d_fsdata;
2835
2836 cgroup_iter_start(cgrp, &it);
2837 while ((tsk = cgroup_iter_next(cgrp, &it))) {
2838 switch (tsk->state) {
2839 case TASK_RUNNING:
2840 stats->nr_running++;
2841 break;
2842 case TASK_INTERRUPTIBLE:
2843 stats->nr_sleeping++;
2844 break;
2845 case TASK_UNINTERRUPTIBLE:
2846 stats->nr_uninterruptible++;
2847 break;
2848 case TASK_STOPPED:
2849 stats->nr_stopped++;
2850 break;
2851 default:
2852 if (delayacct_is_task_waiting_on_io(tsk))
2853 stats->nr_io_wait++;
2854 break;
2855 }
2856 }
2857 cgroup_iter_end(cgrp, &it);
2858
2859err:
2860 return ret;
2861}
2862
2863
2864
2865
2866
2867
2868
2869
2870static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
2871{
2872
2873
2874
2875
2876
2877
2878 struct cgroup_pidlist *l = s->private;
2879 int index = 0, pid = *pos;
2880 int *iter;
2881
2882 down_read(&l->mutex);
2883 if (pid) {
2884 int end = l->length;
2885
2886 while (index < end) {
2887 int mid = (index + end) / 2;
2888 if (l->list[mid] == pid) {
2889 index = mid;
2890 break;
2891 } else if (l->list[mid] <= pid)
2892 index = mid + 1;
2893 else
2894 end = mid;
2895 }
2896 }
2897
2898 if (index >= l->length)
2899 return NULL;
2900
2901 iter = l->list + index;
2902 *pos = *iter;
2903 return iter;
2904}
2905
2906static void cgroup_pidlist_stop(struct seq_file *s, void *v)
2907{
2908 struct cgroup_pidlist *l = s->private;
2909 up_read(&l->mutex);
2910}
2911
2912static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
2913{
2914 struct cgroup_pidlist *l = s->private;
2915 pid_t *p = v;
2916 pid_t *end = l->list + l->length;
2917
2918
2919
2920
2921 p++;
2922 if (p >= end) {
2923 return NULL;
2924 } else {
2925 *pos = *p;
2926 return p;
2927 }
2928}
2929
2930static int cgroup_pidlist_show(struct seq_file *s, void *v)
2931{
2932 return seq_printf(s, "%d\n", *(int *)v);
2933}
2934
2935
2936
2937
2938
2939static const struct seq_operations cgroup_pidlist_seq_operations = {
2940 .start = cgroup_pidlist_start,
2941 .stop = cgroup_pidlist_stop,
2942 .next = cgroup_pidlist_next,
2943 .show = cgroup_pidlist_show,
2944};
2945
2946static void cgroup_release_pid_array(struct cgroup_pidlist *l)
2947{
2948
2949
2950
2951
2952
2953
2954 mutex_lock(&l->owner->pidlist_mutex);
2955 down_write(&l->mutex);
2956 BUG_ON(!l->use_count);
2957 if (!--l->use_count) {
2958
2959 list_del(&l->links);
2960 mutex_unlock(&l->owner->pidlist_mutex);
2961 pidlist_free(l->list);
2962 put_pid_ns(l->key.ns);
2963 up_write(&l->mutex);
2964 kfree(l);
2965 return;
2966 }
2967 mutex_unlock(&l->owner->pidlist_mutex);
2968 up_write(&l->mutex);
2969}
2970
2971static int cgroup_pidlist_release(struct inode *inode, struct file *file)
2972{
2973 struct cgroup_pidlist *l;
2974 if (!(file->f_mode & FMODE_READ))
2975 return 0;
2976
2977
2978
2979
2980 l = ((struct seq_file *)file->private_data)->private;
2981 cgroup_release_pid_array(l);
2982 return seq_release(inode, file);
2983}
2984
2985static const struct file_operations cgroup_pidlist_operations = {
2986 .read = seq_read,
2987 .llseek = seq_lseek,
2988 .write = cgroup_file_write,
2989 .release = cgroup_pidlist_release,
2990};
2991
2992
2993
2994
2995
2996
2997
2998static int cgroup_pidlist_open(struct file *file, enum cgroup_filetype type)
2999{
3000 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
3001 struct cgroup_pidlist *l;
3002 int retval;
3003
3004
3005 if (!(file->f_mode & FMODE_READ))
3006 return 0;
3007
3008
3009 retval = pidlist_array_load(cgrp, type, &l);
3010 if (retval)
3011 return retval;
3012
3013 file->f_op = &cgroup_pidlist_operations;
3014
3015 retval = seq_open(file, &cgroup_pidlist_seq_operations);
3016 if (retval) {
3017 cgroup_release_pid_array(l);
3018 return retval;
3019 }
3020 ((struct seq_file *)file->private_data)->private = l;
3021 return 0;
3022}
3023static int cgroup_tasks_open(struct inode *unused, struct file *file)
3024{
3025 return cgroup_pidlist_open(file, CGROUP_FILE_TASKS);
3026}
3027static int cgroup_procs_open(struct inode *unused, struct file *file)
3028{
3029 return cgroup_pidlist_open(file, CGROUP_FILE_PROCS);
3030}
3031
3032static u64 cgroup_read_notify_on_release(struct cgroup *cgrp,
3033 struct cftype *cft)
3034{
3035 return notify_on_release(cgrp);
3036}
3037
3038static int cgroup_write_notify_on_release(struct cgroup *cgrp,
3039 struct cftype *cft,
3040 u64 val)
3041{
3042 clear_bit(CGRP_RELEASABLE, &cgrp->flags);
3043 if (val)
3044 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
3045 else
3046 clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
3047 return 0;
3048}
3049
3050
3051
3052
3053
3054
3055static void cgroup_event_remove(struct work_struct *work)
3056{
3057 struct cgroup_event *event = container_of(work, struct cgroup_event,
3058 remove);
3059 struct cgroup *cgrp = event->cgrp;
3060
3061 event->cft->unregister_event(cgrp, event->cft, event->eventfd);
3062
3063 eventfd_ctx_put(event->eventfd);
3064 kfree(event);
3065 dput(cgrp->dentry);
3066}
3067
3068
3069
3070
3071
3072
3073static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
3074 int sync, void *key)
3075{
3076 struct cgroup_event *event = container_of(wait,
3077 struct cgroup_event, wait);
3078 struct cgroup *cgrp = event->cgrp;
3079 unsigned long flags = (unsigned long)key;
3080
3081 if (flags & POLLHUP) {
3082 __remove_wait_queue(event->wqh, &event->wait);
3083 spin_lock(&cgrp->event_list_lock);
3084 list_del(&event->list);
3085 spin_unlock(&cgrp->event_list_lock);
3086
3087
3088
3089
3090 schedule_work(&event->remove);
3091 }
3092
3093 return 0;
3094}
3095
3096static void cgroup_event_ptable_queue_proc(struct file *file,
3097 wait_queue_head_t *wqh, poll_table *pt)
3098{
3099 struct cgroup_event *event = container_of(pt,
3100 struct cgroup_event, pt);
3101
3102 event->wqh = wqh;
3103 add_wait_queue(wqh, &event->wait);
3104}
3105
3106
3107
3108
3109
3110
3111
3112static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft,
3113 const char *buffer)
3114{
3115 struct cgroup_event *event = NULL;
3116 unsigned int efd, cfd;
3117 struct file *efile = NULL;
3118 struct file *cfile = NULL;
3119 char *endp;
3120 int ret;
3121
3122 efd = simple_strtoul(buffer, &endp, 10);
3123 if (*endp != ' ')
3124 return -EINVAL;
3125 buffer = endp + 1;
3126
3127 cfd = simple_strtoul(buffer, &endp, 10);
3128 if ((*endp != ' ') && (*endp != '\0'))
3129 return -EINVAL;
3130 buffer = endp + 1;
3131
3132 event = kzalloc(sizeof(*event), GFP_KERNEL);
3133 if (!event)
3134 return -ENOMEM;
3135 event->cgrp = cgrp;
3136 INIT_LIST_HEAD(&event->list);
3137 init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc);
3138 init_waitqueue_func_entry(&event->wait, cgroup_event_wake);
3139 INIT_WORK(&event->remove, cgroup_event_remove);
3140
3141 efile = eventfd_fget(efd);
3142 if (IS_ERR(efile)) {
3143 ret = PTR_ERR(efile);
3144 goto fail;
3145 }
3146
3147 event->eventfd = eventfd_ctx_fileget(efile);
3148 if (IS_ERR(event->eventfd)) {
3149 ret = PTR_ERR(event->eventfd);
3150 goto fail;
3151 }
3152
3153 cfile = fget(cfd);
3154 if (!cfile) {
3155 ret = -EBADF;
3156 goto fail;
3157 }
3158
3159
3160 ret = file_permission(cfile, MAY_READ);
3161 if (ret < 0)
3162 goto fail;
3163
3164 event->cft = __file_cft(cfile);
3165 if (IS_ERR(event->cft)) {
3166 ret = PTR_ERR(event->cft);
3167 goto fail;
3168 }
3169
3170 if (!event->cft->register_event || !event->cft->unregister_event) {
3171 ret = -EINVAL;
3172 goto fail;
3173 }
3174
3175 ret = event->cft->register_event(cgrp, event->cft,
3176 event->eventfd, buffer);
3177 if (ret)
3178 goto fail;
3179
3180 if (efile->f_op->poll(efile, &event->pt) & POLLHUP) {
3181 event->cft->unregister_event(cgrp, event->cft, event->eventfd);
3182 ret = 0;
3183 goto fail;
3184 }
3185
3186
3187
3188
3189
3190
3191 dget(cgrp->dentry);
3192
3193 spin_lock(&cgrp->event_list_lock);
3194 list_add(&event->list, &cgrp->event_list);
3195 spin_unlock(&cgrp->event_list_lock);
3196
3197 fput(cfile);
3198 fput(efile);
3199
3200 return 0;
3201
3202fail:
3203 if (cfile)
3204 fput(cfile);
3205
3206 if (event && event->eventfd && !IS_ERR(event->eventfd))
3207 eventfd_ctx_put(event->eventfd);
3208
3209 if (!IS_ERR_OR_NULL(efile))
3210 fput(efile);
3211
3212 kfree(event);
3213
3214 return ret;
3215}
3216
3217static u64 cgroup_clone_children_read(struct cgroup *cgrp,
3218 struct cftype *cft)
3219{
3220 return clone_children(cgrp);
3221}
3222
3223static int cgroup_clone_children_write(struct cgroup *cgrp,
3224 struct cftype *cft,
3225 u64 val)
3226{
3227 if (val)
3228 set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
3229 else
3230 clear_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
3231 return 0;
3232}
3233
3234
3235
3236
3237
3238#define CGROUP_FILE_GENERIC_PREFIX "cgroup."
3239static struct cftype files[] = {
3240 {
3241 .name = "tasks",
3242 .open = cgroup_tasks_open,
3243 .write_u64 = cgroup_tasks_write,
3244 .release = cgroup_pidlist_release,
3245 .mode = S_IRUGO | S_IWUSR,
3246 },
3247 {
3248 .name = CGROUP_FILE_GENERIC_PREFIX "procs",
3249 .open = cgroup_procs_open,
3250
3251 .release = cgroup_pidlist_release,
3252 .mode = S_IRUGO,
3253 },
3254 {
3255 .name = "notify_on_release",
3256 .read_u64 = cgroup_read_notify_on_release,
3257 .write_u64 = cgroup_write_notify_on_release,
3258 },
3259 {
3260 .name = CGROUP_FILE_GENERIC_PREFIX "event_control",
3261 .write_string = cgroup_write_event_control,
3262 .mode = S_IWUGO,
3263 },
3264 {
3265 .name = "cgroup.clone_children",
3266 .read_u64 = cgroup_clone_children_read,
3267 .write_u64 = cgroup_clone_children_write,
3268 },
3269};
3270
3271static struct cftype cft_release_agent = {
3272 .name = "release_agent",
3273 .read_seq_string = cgroup_release_agent_show,
3274 .write_string = cgroup_release_agent_write,
3275 .max_write_len = PATH_MAX,
3276};
3277
3278static int cgroup_populate_dir(struct cgroup *cgrp)
3279{
3280 int err;
3281 struct cgroup_subsys *ss;
3282
3283
3284 cgroup_clear_directory(cgrp->dentry);
3285
3286 err = cgroup_add_files(cgrp, NULL, files, ARRAY_SIZE(files));
3287 if (err < 0)
3288 return err;
3289
3290 if (cgrp == cgrp->top_cgroup) {
3291 if ((err = cgroup_add_file(cgrp, NULL, &cft_release_agent)) < 0)
3292 return err;
3293 }
3294
3295 for_each_subsys(cgrp->root, ss) {
3296 if (ss->populate && (err = ss->populate(ss, cgrp)) < 0)
3297 return err;
3298 }
3299
3300 for_each_subsys(cgrp->root, ss) {
3301 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
3302
3303
3304
3305
3306
3307 if (css->id)
3308 rcu_assign_pointer(css->id->css, css);
3309 }
3310
3311 return 0;
3312}
3313
3314static void init_cgroup_css(struct cgroup_subsys_state *css,
3315 struct cgroup_subsys *ss,
3316 struct cgroup *cgrp)
3317{
3318 css->cgroup = cgrp;
3319 atomic_set(&css->refcnt, 1);
3320 css->flags = 0;
3321 css->id = NULL;
3322 if (cgrp == dummytop)
3323 set_bit(CSS_ROOT, &css->flags);
3324 BUG_ON(cgrp->subsys[ss->subsys_id]);
3325 cgrp->subsys[ss->subsys_id] = css;
3326}
3327
3328static void cgroup_lock_hierarchy(struct cgroupfs_root *root)
3329{
3330
3331 int i;
3332
3333
3334
3335
3336
3337 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3338 struct cgroup_subsys *ss = subsys[i];
3339 if (ss == NULL)
3340 continue;
3341 if (ss->root == root)
3342 mutex_lock(&ss->hierarchy_mutex);
3343 }
3344}
3345
3346static void cgroup_unlock_hierarchy(struct cgroupfs_root *root)
3347{
3348 int i;
3349
3350 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3351 struct cgroup_subsys *ss = subsys[i];
3352 if (ss == NULL)
3353 continue;
3354 if (ss->root == root)
3355 mutex_unlock(&ss->hierarchy_mutex);
3356 }
3357}
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
3368 mode_t mode)
3369{
3370 struct cgroup *cgrp;
3371 struct cgroupfs_root *root = parent->root;
3372 int err = 0;
3373 struct cgroup_subsys *ss;
3374 struct super_block *sb = root->sb;
3375
3376 cgrp = kzalloc(sizeof(*cgrp), GFP_KERNEL);
3377 if (!cgrp)
3378 return -ENOMEM;
3379
3380
3381
3382
3383
3384
3385 atomic_inc(&sb->s_active);
3386
3387 mutex_lock(&cgroup_mutex);
3388
3389 init_cgroup_housekeeping(cgrp);
3390
3391 cgrp->parent = parent;
3392 cgrp->root = parent->root;
3393 cgrp->top_cgroup = parent->top_cgroup;
3394
3395 if (notify_on_release(parent))
3396 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
3397
3398 if (clone_children(parent))
3399 set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
3400
3401 for_each_subsys(root, ss) {
3402 struct cgroup_subsys_state *css = ss->create(ss, cgrp);
3403
3404 if (IS_ERR(css)) {
3405 err = PTR_ERR(css);
3406 goto err_destroy;
3407 }
3408 init_cgroup_css(css, ss, cgrp);
3409 if (ss->use_id) {
3410 err = alloc_css_id(ss, parent, cgrp);
3411 if (err)
3412 goto err_destroy;
3413 }
3414
3415 if (clone_children(parent) && ss->post_clone)
3416 ss->post_clone(ss, cgrp);
3417 }
3418
3419 cgroup_lock_hierarchy(root);
3420 list_add(&cgrp->sibling, &cgrp->parent->children);
3421 cgroup_unlock_hierarchy(root);
3422 root->number_of_cgroups++;
3423
3424 err = cgroup_create_dir(cgrp, dentry, mode);
3425 if (err < 0)
3426 goto err_remove;
3427
3428
3429 BUG_ON(!mutex_is_locked(&cgrp->dentry->d_inode->i_mutex));
3430
3431 err = cgroup_populate_dir(cgrp);
3432
3433
3434 mutex_unlock(&cgroup_mutex);
3435 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
3436
3437 return 0;
3438
3439 err_remove:
3440
3441 cgroup_lock_hierarchy(root);
3442 list_del(&cgrp->sibling);
3443 cgroup_unlock_hierarchy(root);
3444 root->number_of_cgroups--;
3445
3446 err_destroy:
3447
3448 for_each_subsys(root, ss) {
3449 if (cgrp->subsys[ss->subsys_id])
3450 ss->destroy(ss, cgrp);
3451 }
3452
3453 mutex_unlock(&cgroup_mutex);
3454
3455
3456 deactivate_super(sb);
3457
3458 kfree(cgrp);
3459 return err;
3460}
3461
3462static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode)
3463{
3464 struct cgroup *c_parent = dentry->d_parent->d_fsdata;
3465
3466
3467 return cgroup_create(c_parent, dentry, mode | S_IFDIR);
3468}
3469
3470static int cgroup_has_css_refs(struct cgroup *cgrp)
3471{
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481 int i;
3482
3483
3484
3485
3486
3487 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3488 struct cgroup_subsys *ss = subsys[i];
3489 struct cgroup_subsys_state *css;
3490
3491 if (ss == NULL || ss->root != cgrp->root)
3492 continue;
3493 css = cgrp->subsys[ss->subsys_id];
3494
3495
3496
3497
3498
3499
3500 if (css && (atomic_read(&css->refcnt) > 1))
3501 return 1;
3502 }
3503 return 0;
3504}
3505
3506
3507
3508
3509
3510
3511
3512static int cgroup_clear_css_refs(struct cgroup *cgrp)
3513{
3514 struct cgroup_subsys *ss;
3515 unsigned long flags;
3516 bool failed = false;
3517 local_irq_save(flags);
3518 for_each_subsys(cgrp->root, ss) {
3519 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
3520 int refcnt;
3521 while (1) {
3522
3523 refcnt = atomic_read(&css->refcnt);
3524 if (refcnt > 1) {
3525 failed = true;
3526 goto done;
3527 }
3528 BUG_ON(!refcnt);
3529
3530
3531
3532
3533
3534
3535 if (atomic_cmpxchg(&css->refcnt, refcnt, 0) == refcnt)
3536 break;
3537 cpu_relax();
3538 }
3539 }
3540 done:
3541 for_each_subsys(cgrp->root, ss) {
3542 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
3543 if (failed) {
3544
3545
3546
3547
3548 if (!atomic_read(&css->refcnt))
3549 atomic_set(&css->refcnt, 1);
3550 } else {
3551
3552 set_bit(CSS_REMOVED, &css->flags);
3553 }
3554 }
3555 local_irq_restore(flags);
3556 return !failed;
3557}
3558
3559static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
3560{
3561 struct cgroup *cgrp = dentry->d_fsdata;
3562 struct dentry *d;
3563 struct cgroup *parent;
3564 DEFINE_WAIT(wait);
3565 struct cgroup_event *event, *tmp;
3566 int ret;
3567
3568
3569again:
3570 mutex_lock(&cgroup_mutex);
3571 if (atomic_read(&cgrp->count) != 0) {
3572 mutex_unlock(&cgroup_mutex);
3573 return -EBUSY;
3574 }
3575 if (!list_empty(&cgrp->children)) {
3576 mutex_unlock(&cgroup_mutex);
3577 return -EBUSY;
3578 }
3579 mutex_unlock(&cgroup_mutex);
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590 set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
3591
3592
3593
3594
3595
3596 ret = cgroup_call_pre_destroy(cgrp);
3597 if (ret) {
3598 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
3599 return ret;
3600 }
3601
3602 mutex_lock(&cgroup_mutex);
3603 parent = cgrp->parent;
3604 if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) {
3605 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
3606 mutex_unlock(&cgroup_mutex);
3607 return -EBUSY;
3608 }
3609 prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE);
3610 if (!cgroup_clear_css_refs(cgrp)) {
3611 mutex_unlock(&cgroup_mutex);
3612
3613
3614
3615
3616 if (test_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags))
3617 schedule();
3618 finish_wait(&cgroup_rmdir_waitq, &wait);
3619 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
3620 if (signal_pending(current))
3621 return -EINTR;
3622 goto again;
3623 }
3624
3625 finish_wait(&cgroup_rmdir_waitq, &wait);
3626 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
3627
3628 spin_lock(&release_list_lock);
3629 set_bit(CGRP_REMOVED, &cgrp->flags);
3630 if (!list_empty(&cgrp->release_list))
3631 list_del_init(&cgrp->release_list);
3632 spin_unlock(&release_list_lock);
3633
3634 cgroup_lock_hierarchy(cgrp->root);
3635
3636 list_del_init(&cgrp->sibling);
3637 cgroup_unlock_hierarchy(cgrp->root);
3638
3639 spin_lock(&cgrp->dentry->d_lock);
3640 d = dget(cgrp->dentry);
3641 spin_unlock(&d->d_lock);
3642
3643 cgroup_d_remove_dir(d);
3644 dput(d);
3645
3646 set_bit(CGRP_RELEASABLE, &parent->flags);
3647 check_for_release(parent);
3648
3649
3650
3651
3652
3653
3654 spin_lock(&cgrp->event_list_lock);
3655 list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
3656 list_del(&event->list);
3657 remove_wait_queue(event->wqh, &event->wait);
3658 eventfd_signal(event->eventfd, 1);
3659 schedule_work(&event->remove);
3660 }
3661 spin_unlock(&cgrp->event_list_lock);
3662
3663 mutex_unlock(&cgroup_mutex);
3664 return 0;
3665}
3666
3667static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
3668{
3669 struct cgroup_subsys_state *css;
3670
3671 printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);
3672
3673
3674 list_add(&ss->sibling, &rootnode.subsys_list);
3675 ss->root = &rootnode;
3676 css = ss->create(ss, dummytop);
3677
3678 BUG_ON(IS_ERR(css));
3679 init_cgroup_css(css, ss, dummytop);
3680
3681
3682
3683
3684
3685 init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id];
3686
3687 need_forkexit_callback |= ss->fork || ss->exit;
3688
3689
3690
3691
3692 BUG_ON(!list_empty(&init_task.tasks));
3693
3694 mutex_init(&ss->hierarchy_mutex);
3695 lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
3696 ss->active = 1;
3697
3698
3699
3700 BUG_ON(ss->module);
3701}
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
3713{
3714 int i;
3715 struct cgroup_subsys_state *css;
3716
3717
3718 if (ss->name == NULL || strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN ||
3719 ss->create == NULL || ss->destroy == NULL)
3720 return -EINVAL;
3721
3722
3723
3724
3725
3726
3727
3728 if (ss->fork || ss->exit)
3729 return -EINVAL;
3730
3731
3732
3733
3734
3735 if (ss->module == NULL) {
3736
3737 BUG_ON(ss->subsys_id >= CGROUP_BUILTIN_SUBSYS_COUNT);
3738 BUG_ON(subsys[ss->subsys_id] != ss);
3739 return 0;
3740 }
3741
3742
3743
3744
3745
3746 mutex_lock(&cgroup_mutex);
3747
3748 for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
3749 if (subsys[i] == NULL)
3750 break;
3751 }
3752 if (i == CGROUP_SUBSYS_COUNT) {
3753
3754 mutex_unlock(&cgroup_mutex);
3755 return -EBUSY;
3756 }
3757
3758 ss->subsys_id = i;
3759 subsys[i] = ss;
3760
3761
3762
3763
3764
3765 css = ss->create(ss, dummytop);
3766 if (IS_ERR(css)) {
3767
3768 subsys[i] = NULL;
3769 mutex_unlock(&cgroup_mutex);
3770 return PTR_ERR(css);
3771 }
3772
3773 list_add(&ss->sibling, &rootnode.subsys_list);
3774 ss->root = &rootnode;
3775
3776
3777 init_cgroup_css(css, ss, dummytop);
3778
3779 if (ss->use_id) {
3780 int ret = cgroup_init_idr(ss, css);
3781 if (ret) {
3782 dummytop->subsys[ss->subsys_id] = NULL;
3783 ss->destroy(ss, dummytop);
3784 subsys[i] = NULL;
3785 mutex_unlock(&cgroup_mutex);
3786 return ret;
3787 }
3788 }
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798 write_lock(&css_set_lock);
3799 for (i = 0; i < CSS_SET_TABLE_SIZE; i++) {
3800 struct css_set *cg;
3801 struct hlist_node *node, *tmp;
3802 struct hlist_head *bucket = &css_set_table[i], *new_bucket;
3803
3804 hlist_for_each_entry_safe(cg, node, tmp, bucket, hlist) {
3805
3806 if (cg->subsys[ss->subsys_id])
3807 continue;
3808
3809 hlist_del(&cg->hlist);
3810
3811 cg->subsys[ss->subsys_id] = css;
3812
3813 new_bucket = css_set_hash(cg->subsys);
3814 hlist_add_head(&cg->hlist, new_bucket);
3815 }
3816 }
3817 write_unlock(&css_set_lock);
3818
3819 mutex_init(&ss->hierarchy_mutex);
3820 lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
3821 ss->active = 1;
3822
3823
3824 mutex_unlock(&cgroup_mutex);
3825 return 0;
3826}
3827EXPORT_SYMBOL_GPL(cgroup_load_subsys);
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837void cgroup_unload_subsys(struct cgroup_subsys *ss)
3838{
3839 struct cg_cgroup_link *link;
3840 struct hlist_head *hhead;
3841
3842 BUG_ON(ss->module == NULL);
3843
3844
3845
3846
3847
3848
3849 BUG_ON(ss->root != &rootnode);
3850
3851 mutex_lock(&cgroup_mutex);
3852
3853 BUG_ON(ss->subsys_id < CGROUP_BUILTIN_SUBSYS_COUNT);
3854 subsys[ss->subsys_id] = NULL;
3855
3856
3857 list_del_init(&ss->sibling);
3858
3859
3860
3861
3862
3863 write_lock(&css_set_lock);
3864 list_for_each_entry(link, &dummytop->css_sets, cgrp_link_list) {
3865 struct css_set *cg = link->cg;
3866
3867 hlist_del(&cg->hlist);
3868 BUG_ON(!cg->subsys[ss->subsys_id]);
3869 cg->subsys[ss->subsys_id] = NULL;
3870 hhead = css_set_hash(cg->subsys);
3871 hlist_add_head(&cg->hlist, hhead);
3872 }
3873 write_unlock(&css_set_lock);
3874
3875
3876
3877
3878
3879
3880
3881 ss->destroy(ss, dummytop);
3882 dummytop->subsys[ss->subsys_id] = NULL;
3883
3884 mutex_unlock(&cgroup_mutex);
3885}
3886EXPORT_SYMBOL_GPL(cgroup_unload_subsys);
3887
3888
3889
3890
3891
3892
3893
3894int __init cgroup_init_early(void)
3895{
3896 int i;
3897 atomic_set(&init_css_set.refcount, 1);
3898 INIT_LIST_HEAD(&init_css_set.cg_links);
3899 INIT_LIST_HEAD(&init_css_set.tasks);
3900 INIT_HLIST_NODE(&init_css_set.hlist);
3901 css_set_count = 1;
3902 init_cgroup_root(&rootnode);
3903 root_count = 1;
3904 init_task.cgroups = &init_css_set;
3905
3906 init_css_set_link.cg = &init_css_set;
3907 init_css_set_link.cgrp = dummytop;
3908 list_add(&init_css_set_link.cgrp_link_list,
3909 &rootnode.top_cgroup.css_sets);
3910 list_add(&init_css_set_link.cg_link_list,
3911 &init_css_set.cg_links);
3912
3913 for (i = 0; i < CSS_SET_TABLE_SIZE; i++)
3914 INIT_HLIST_HEAD(&css_set_table[i]);
3915
3916
3917 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
3918 struct cgroup_subsys *ss = subsys[i];
3919
3920 BUG_ON(!ss->name);
3921 BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN);
3922 BUG_ON(!ss->create);
3923 BUG_ON(!ss->destroy);
3924 if (ss->subsys_id != i) {
3925 printk(KERN_ERR "cgroup: Subsys %s id == %d\n",
3926 ss->name, ss->subsys_id);
3927 BUG();
3928 }
3929
3930 if (ss->early_init)
3931 cgroup_init_subsys(ss);
3932 }
3933 return 0;
3934}
3935
3936
3937
3938
3939
3940
3941
3942int __init cgroup_init(void)
3943{
3944 int err;
3945 int i;
3946 struct hlist_head *hhead;
3947
3948 err = bdi_init(&cgroup_backing_dev_info);
3949 if (err)
3950 return err;
3951
3952
3953 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
3954 struct cgroup_subsys *ss = subsys[i];
3955 if (!ss->early_init)
3956 cgroup_init_subsys(ss);
3957 if (ss->use_id)
3958 cgroup_init_idr(ss, init_css_set.subsys[ss->subsys_id]);
3959 }
3960
3961
3962 hhead = css_set_hash(init_css_set.subsys);
3963 hlist_add_head(&init_css_set.hlist, hhead);
3964 BUG_ON(!init_root_id(&rootnode));
3965
3966 cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj);
3967 if (!cgroup_kobj) {
3968 err = -ENOMEM;
3969 goto out;
3970 }
3971
3972 err = register_filesystem(&cgroup_fs_type);
3973 if (err < 0) {
3974 kobject_put(cgroup_kobj);
3975 goto out;
3976 }
3977
3978 proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations);
3979
3980out:
3981 if (err)
3982 bdi_destroy(&cgroup_backing_dev_info);
3983
3984 return err;
3985}
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000static int proc_cgroup_show(struct seq_file *m, void *v)
4001{
4002 struct pid *pid;
4003 struct task_struct *tsk;
4004 char *buf;
4005 int retval;
4006 struct cgroupfs_root *root;
4007
4008 retval = -ENOMEM;
4009 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
4010 if (!buf)
4011 goto out;
4012
4013 retval = -ESRCH;
4014 pid = m->private;
4015 tsk = get_pid_task(pid, PIDTYPE_PID);
4016 if (!tsk)
4017 goto out_free;
4018
4019 retval = 0;
4020
4021 mutex_lock(&cgroup_mutex);
4022
4023 for_each_active_root(root) {
4024 struct cgroup_subsys *ss;
4025 struct cgroup *cgrp;
4026 int count = 0;
4027
4028 seq_printf(m, "%d:", root->hierarchy_id);
4029 for_each_subsys(root, ss)
4030 seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
4031 if (strlen(root->name))
4032 seq_printf(m, "%sname=%s", count ? "," : "",
4033 root->name);
4034 seq_putc(m, ':');
4035 cgrp = task_cgroup_from_root(tsk, root);
4036 retval = cgroup_path(cgrp, buf, PAGE_SIZE);
4037 if (retval < 0)
4038 goto out_unlock;
4039 seq_puts(m, buf);
4040 seq_putc(m, '\n');
4041 }
4042
4043out_unlock:
4044 mutex_unlock(&cgroup_mutex);
4045 put_task_struct(tsk);
4046out_free:
4047 kfree(buf);
4048out:
4049 return retval;
4050}
4051
4052static int cgroup_open(struct inode *inode, struct file *file)
4053{
4054 struct pid *pid = PROC_I(inode)->pid;
4055 return single_open(file, proc_cgroup_show, pid);
4056}
4057
4058const struct file_operations proc_cgroup_operations = {
4059 .open = cgroup_open,
4060 .read = seq_read,
4061 .llseek = seq_lseek,
4062 .release = single_release,
4063};
4064
4065
4066static int proc_cgroupstats_show(struct seq_file *m, void *v)
4067{
4068 int i;
4069
4070 seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
4071
4072
4073
4074
4075
4076 mutex_lock(&cgroup_mutex);
4077 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
4078 struct cgroup_subsys *ss = subsys[i];
4079 if (ss == NULL)
4080 continue;
4081 seq_printf(m, "%s\t%d\t%d\t%d\n",
4082 ss->name, ss->root->hierarchy_id,
4083 ss->root->number_of_cgroups, !ss->disabled);
4084 }
4085 mutex_unlock(&cgroup_mutex);
4086 return 0;
4087}
4088
4089static int cgroupstats_open(struct inode *inode, struct file *file)
4090{
4091 return single_open(file, proc_cgroupstats_show, NULL);
4092}
4093
4094static const struct file_operations proc_cgroupstats_operations = {
4095 .open = cgroupstats_open,
4096 .read = seq_read,
4097 .llseek = seq_lseek,
4098 .release = single_release,
4099};
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117void cgroup_fork(struct task_struct *child)
4118{
4119 task_lock(current);
4120 child->cgroups = current->cgroups;
4121 get_css_set(child->cgroups);
4122 task_unlock(current);
4123 INIT_LIST_HEAD(&child->cg_list);
4124}
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134void cgroup_fork_callbacks(struct task_struct *child)
4135{
4136 if (need_forkexit_callback) {
4137 int i;
4138
4139
4140
4141
4142
4143 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4144 struct cgroup_subsys *ss = subsys[i];
4145 if (ss->fork)
4146 ss->fork(ss, child);
4147 }
4148 }
4149}
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160void cgroup_post_fork(struct task_struct *child)
4161{
4162 if (use_task_css_set_links) {
4163 write_lock(&css_set_lock);
4164 task_lock(child);
4165 if (list_empty(&child->cg_list))
4166 list_add(&child->cg_list, &child->cgroups->tasks);
4167 task_unlock(child);
4168 write_unlock(&css_set_lock);
4169 }
4170}
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206void cgroup_exit(struct task_struct *tsk, int run_callbacks)
4207{
4208 int i;
4209 struct css_set *cg;
4210
4211 if (run_callbacks && need_forkexit_callback) {
4212
4213
4214
4215
4216 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4217 struct cgroup_subsys *ss = subsys[i];
4218 if (ss->exit)
4219 ss->exit(ss, tsk);
4220 }
4221 }
4222
4223
4224
4225
4226
4227
4228 if (!list_empty(&tsk->cg_list)) {
4229 write_lock(&css_set_lock);
4230 if (!list_empty(&tsk->cg_list))
4231 list_del_init(&tsk->cg_list);
4232 write_unlock(&css_set_lock);
4233 }
4234
4235
4236 task_lock(tsk);
4237 cg = tsk->cgroups;
4238 tsk->cgroups = &init_css_set;
4239 task_unlock(tsk);
4240 if (cg)
4241 put_css_set_taskexit(cg);
4242}
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
4255 char *nodename)
4256{
4257 struct dentry *dentry;
4258 int ret = 0;
4259 struct cgroup *parent, *child;
4260 struct inode *inode;
4261 struct css_set *cg;
4262 struct cgroupfs_root *root;
4263 struct cgroup_subsys *ss;
4264
4265
4266 BUG_ON(!subsys->active);
4267
4268
4269
4270 mutex_lock(&cgroup_mutex);
4271 again:
4272 root = subsys->root;
4273 if (root == &rootnode) {
4274 mutex_unlock(&cgroup_mutex);
4275 return 0;
4276 }
4277
4278
4279 if (!atomic_inc_not_zero(&root->sb->s_active)) {
4280
4281 mutex_unlock(&cgroup_mutex);
4282 return 0;
4283 }
4284
4285
4286 task_lock(tsk);
4287 parent = task_cgroup(tsk, subsys->subsys_id);
4288 cg = tsk->cgroups;
4289 get_css_set(cg);
4290 task_unlock(tsk);
4291
4292 mutex_unlock(&cgroup_mutex);
4293
4294
4295 inode = parent->dentry->d_inode;
4296
4297
4298
4299 mutex_lock(&inode->i_mutex);
4300 dentry = lookup_one_len(nodename, parent->dentry, strlen(nodename));
4301 if (IS_ERR(dentry)) {
4302 printk(KERN_INFO
4303 "cgroup: Couldn't allocate dentry for %s: %ld\n", nodename,
4304 PTR_ERR(dentry));
4305 ret = PTR_ERR(dentry);
4306 goto out_release;
4307 }
4308
4309
4310 ret = vfs_mkdir(inode, dentry, 0755);
4311 child = __d_cgrp(dentry);
4312 dput(dentry);
4313 if (ret) {
4314 printk(KERN_INFO
4315 "Failed to create cgroup %s: %d\n", nodename,
4316 ret);
4317 goto out_release;
4318 }
4319
4320
4321
4322
4323 mutex_lock(&cgroup_mutex);
4324 if ((root != subsys->root) ||
4325 (parent != task_cgroup(tsk, subsys->subsys_id))) {
4326
4327 mutex_unlock(&inode->i_mutex);
4328 put_css_set(cg);
4329
4330 deactivate_super(root->sb);
4331
4332
4333
4334 printk(KERN_INFO
4335 "Race in cgroup_clone() - leaking cgroup %s\n",
4336 nodename);
4337 goto again;
4338 }
4339
4340
4341 for_each_subsys(root, ss) {
4342 if (ss->post_clone)
4343 ss->post_clone(ss, child);
4344 }
4345
4346
4347 ret = cgroup_attach_task(child, tsk);
4348 mutex_unlock(&cgroup_mutex);
4349
4350 out_release:
4351 mutex_unlock(&inode->i_mutex);
4352
4353 mutex_lock(&cgroup_mutex);
4354 put_css_set(cg);
4355 mutex_unlock(&cgroup_mutex);
4356 deactivate_super(root->sb);
4357 return ret;
4358}
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task)
4374{
4375 int ret;
4376 struct cgroup *target;
4377
4378 if (cgrp == dummytop)
4379 return 1;
4380
4381 target = task_cgroup_from_root(task, cgrp->root);
4382 while (cgrp != target && cgrp!= cgrp->top_cgroup)
4383 cgrp = cgrp->parent;
4384 ret = (cgrp == target);
4385 return ret;
4386}
4387
4388static void check_for_release(struct cgroup *cgrp)
4389{
4390
4391
4392 if (cgroup_is_releasable(cgrp) && !atomic_read(&cgrp->count)
4393 && list_empty(&cgrp->children) && !cgroup_has_css_refs(cgrp)) {
4394
4395
4396
4397 int need_schedule_work = 0;
4398 spin_lock(&release_list_lock);
4399 if (!cgroup_is_removed(cgrp) &&
4400 list_empty(&cgrp->release_list)) {
4401 list_add(&cgrp->release_list, &release_list);
4402 need_schedule_work = 1;
4403 }
4404 spin_unlock(&release_list_lock);
4405 if (need_schedule_work)
4406 schedule_work(&release_agent_work);
4407 }
4408}
4409
4410
4411void __css_put(struct cgroup_subsys_state *css, int count)
4412{
4413 struct cgroup *cgrp = css->cgroup;
4414 int val;
4415 rcu_read_lock();
4416 val = atomic_sub_return(count, &css->refcnt);
4417 if (val == 1) {
4418 if (notify_on_release(cgrp)) {
4419 set_bit(CGRP_RELEASABLE, &cgrp->flags);
4420 check_for_release(cgrp);
4421 }
4422 cgroup_wakeup_rmdir_waiter(cgrp);
4423 }
4424 rcu_read_unlock();
4425 WARN_ON_ONCE(val < 1);
4426}
4427EXPORT_SYMBOL_GPL(__css_put);
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452static void cgroup_release_agent(struct work_struct *work)
4453{
4454 BUG_ON(work != &release_agent_work);
4455 mutex_lock(&cgroup_mutex);
4456 spin_lock(&release_list_lock);
4457 while (!list_empty(&release_list)) {
4458 char *argv[3], *envp[3];
4459 int i;
4460 char *pathbuf = NULL, *agentbuf = NULL;
4461 struct cgroup *cgrp = list_entry(release_list.next,
4462 struct cgroup,
4463 release_list);
4464 list_del_init(&cgrp->release_list);
4465 spin_unlock(&release_list_lock);
4466 pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
4467 if (!pathbuf)
4468 goto continue_free;
4469 if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0)
4470 goto continue_free;
4471 agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
4472 if (!agentbuf)
4473 goto continue_free;
4474
4475 i = 0;
4476 argv[i++] = agentbuf;
4477 argv[i++] = pathbuf;
4478 argv[i] = NULL;
4479
4480 i = 0;
4481
4482 envp[i++] = "HOME=/";
4483 envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
4484 envp[i] = NULL;
4485
4486
4487
4488
4489 mutex_unlock(&cgroup_mutex);
4490 call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
4491 mutex_lock(&cgroup_mutex);
4492 continue_free:
4493 kfree(pathbuf);
4494 kfree(agentbuf);
4495 spin_lock(&release_list_lock);
4496 }
4497 spin_unlock(&release_list_lock);
4498 mutex_unlock(&cgroup_mutex);
4499}
4500
4501static int __init cgroup_disable(char *str)
4502{
4503 int i;
4504 char *token;
4505
4506 while ((token = strsep(&str, ",")) != NULL) {
4507 if (!*token)
4508 continue;
4509
4510
4511
4512
4513 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4514 struct cgroup_subsys *ss = subsys[i];
4515
4516 if (!strcmp(token, ss->name)) {
4517 ss->disabled = 1;
4518 printk(KERN_INFO "Disabling %s control group"
4519 " subsystem\n", ss->name);
4520 break;
4521 }
4522 }
4523 }
4524 return 1;
4525}
4526__setup("cgroup_disable=", cgroup_disable);
4527
4528
4529
4530
4531
4532
4533
4534
4535unsigned short css_id(struct cgroup_subsys_state *css)
4536{
4537 struct css_id *cssid;
4538
4539
4540
4541
4542
4543
4544 cssid = rcu_dereference_check(css->id,
4545 rcu_read_lock_held() || atomic_read(&css->refcnt));
4546
4547 if (cssid)
4548 return cssid->id;
4549 return 0;
4550}
4551EXPORT_SYMBOL_GPL(css_id);
4552
4553unsigned short css_depth(struct cgroup_subsys_state *css)
4554{
4555 struct css_id *cssid;
4556
4557 cssid = rcu_dereference_check(css->id,
4558 rcu_read_lock_held() || atomic_read(&css->refcnt));
4559
4560 if (cssid)
4561 return cssid->depth;
4562 return 0;
4563}
4564EXPORT_SYMBOL_GPL(css_depth);
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579bool css_is_ancestor(struct cgroup_subsys_state *child,
4580 const struct cgroup_subsys_state *root)
4581{
4582 struct css_id *child_id;
4583 struct css_id *root_id;
4584 bool ret = true;
4585
4586 rcu_read_lock();
4587 child_id = rcu_dereference(child->id);
4588 root_id = rcu_dereference(root->id);
4589 if (!child_id
4590 || !root_id
4591 || (child_id->depth < root_id->depth)
4592 || (child_id->stack[root_id->depth] != root_id->id))
4593 ret = false;
4594 rcu_read_unlock();
4595 return ret;
4596}
4597
4598static void __free_css_id_cb(struct rcu_head *head)
4599{
4600 struct css_id *id;
4601
4602 id = container_of(head, struct css_id, rcu_head);
4603 kfree(id);
4604}
4605
4606void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
4607{
4608 struct css_id *id = css->id;
4609
4610 if (!id)
4611 return;
4612
4613 BUG_ON(!ss->use_id);
4614
4615 rcu_assign_pointer(id->css, NULL);
4616 rcu_assign_pointer(css->id, NULL);
4617 spin_lock(&ss->id_lock);
4618 idr_remove(&ss->idr, id->id);
4619 spin_unlock(&ss->id_lock);
4620 call_rcu(&id->rcu_head, __free_css_id_cb);
4621}
4622EXPORT_SYMBOL_GPL(free_css_id);
4623
4624
4625
4626
4627
4628
4629static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth)
4630{
4631 struct css_id *newid;
4632 int myid, error, size;
4633
4634 BUG_ON(!ss->use_id);
4635
4636 size = sizeof(*newid) + sizeof(unsigned short) * (depth + 1);
4637 newid = kzalloc(size, GFP_KERNEL);
4638 if (!newid)
4639 return ERR_PTR(-ENOMEM);
4640
4641 if (unlikely(!idr_pre_get(&ss->idr, GFP_KERNEL))) {
4642 error = -ENOMEM;
4643 goto err_out;
4644 }
4645 spin_lock(&ss->id_lock);
4646
4647 error = idr_get_new_above(&ss->idr, newid, 1, &myid);
4648 spin_unlock(&ss->id_lock);
4649
4650
4651 if (error) {
4652 error = -ENOSPC;
4653 goto err_out;
4654 }
4655 if (myid > CSS_ID_MAX)
4656 goto remove_idr;
4657
4658 newid->id = myid;
4659 newid->depth = depth;
4660 return newid;
4661remove_idr:
4662 error = -ENOSPC;
4663 spin_lock(&ss->id_lock);
4664 idr_remove(&ss->idr, myid);
4665 spin_unlock(&ss->id_lock);
4666err_out:
4667 kfree(newid);
4668 return ERR_PTR(error);
4669
4670}
4671
4672static int __init_or_module cgroup_init_idr(struct cgroup_subsys *ss,
4673 struct cgroup_subsys_state *rootcss)
4674{
4675 struct css_id *newid;
4676
4677 spin_lock_init(&ss->id_lock);
4678 idr_init(&ss->idr);
4679
4680 newid = get_new_cssid(ss, 0);
4681 if (IS_ERR(newid))
4682 return PTR_ERR(newid);
4683
4684 newid->stack[0] = newid->id;
4685 newid->css = rootcss;
4686 rootcss->id = newid;
4687 return 0;
4688}
4689
4690static int alloc_css_id(struct cgroup_subsys *ss, struct cgroup *parent,
4691 struct cgroup *child)
4692{
4693 int subsys_id, i, depth = 0;
4694 struct cgroup_subsys_state *parent_css, *child_css;
4695 struct css_id *child_id, *parent_id;
4696
4697 subsys_id = ss->subsys_id;
4698 parent_css = parent->subsys[subsys_id];
4699 child_css = child->subsys[subsys_id];
4700 parent_id = parent_css->id;
4701 depth = parent_id->depth + 1;
4702
4703 child_id = get_new_cssid(ss, depth);
4704 if (IS_ERR(child_id))
4705 return PTR_ERR(child_id);
4706
4707 for (i = 0; i < depth; i++)
4708 child_id->stack[i] = parent_id->stack[i];
4709 child_id->stack[depth] = child_id->id;
4710
4711
4712
4713
4714 rcu_assign_pointer(child_css->id, child_id);
4715
4716 return 0;
4717}
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id)
4728{
4729 struct css_id *cssid = NULL;
4730
4731 BUG_ON(!ss->use_id);
4732 cssid = idr_find(&ss->idr, id);
4733
4734 if (unlikely(!cssid))
4735 return NULL;
4736
4737 return rcu_dereference(cssid->css);
4738}
4739EXPORT_SYMBOL_GPL(css_lookup);
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751struct cgroup_subsys_state *
4752css_get_next(struct cgroup_subsys *ss, int id,
4753 struct cgroup_subsys_state *root, int *foundid)
4754{
4755 struct cgroup_subsys_state *ret = NULL;
4756 struct css_id *tmp;
4757 int tmpid;
4758 int rootid = css_id(root);
4759 int depth = css_depth(root);
4760
4761 if (!rootid)
4762 return NULL;
4763
4764 BUG_ON(!ss->use_id);
4765
4766 tmpid = id;
4767 while (1) {
4768
4769
4770
4771
4772 spin_lock(&ss->id_lock);
4773 tmp = idr_get_next(&ss->idr, &tmpid);
4774 spin_unlock(&ss->id_lock);
4775
4776 if (!tmp)
4777 break;
4778 if (tmp->depth >= depth && tmp->stack[depth] == rootid) {
4779 ret = rcu_dereference(tmp->css);
4780 if (ret) {
4781 *foundid = tmpid;
4782 break;
4783 }
4784 }
4785
4786 tmpid = tmpid + 1;
4787 }
4788 return ret;
4789}
4790
4791#ifdef CONFIG_CGROUP_DEBUG
4792static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss,
4793 struct cgroup *cont)
4794{
4795 struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
4796
4797 if (!css)
4798 return ERR_PTR(-ENOMEM);
4799
4800 return css;
4801}
4802
4803static void debug_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
4804{
4805 kfree(cont->subsys[debug_subsys_id]);
4806}
4807
4808static u64 cgroup_refcount_read(struct cgroup *cont, struct cftype *cft)
4809{
4810 return atomic_read(&cont->count);
4811}
4812
4813static u64 debug_taskcount_read(struct cgroup *cont, struct cftype *cft)
4814{
4815 return cgroup_task_count(cont);
4816}
4817
4818static u64 current_css_set_read(struct cgroup *cont, struct cftype *cft)
4819{
4820 return (u64)(unsigned long)current->cgroups;
4821}
4822
4823static u64 current_css_set_refcount_read(struct cgroup *cont,
4824 struct cftype *cft)
4825{
4826 u64 count;
4827
4828 rcu_read_lock();
4829 count = atomic_read(¤t->cgroups->refcount);
4830 rcu_read_unlock();
4831 return count;
4832}
4833
4834static int current_css_set_cg_links_read(struct cgroup *cont,
4835 struct cftype *cft,
4836 struct seq_file *seq)
4837{
4838 struct cg_cgroup_link *link;
4839 struct css_set *cg;
4840
4841 read_lock(&css_set_lock);
4842 rcu_read_lock();
4843 cg = rcu_dereference(current->cgroups);
4844 list_for_each_entry(link, &cg->cg_links, cg_link_list) {
4845 struct cgroup *c = link->cgrp;
4846 const char *name;
4847
4848 if (c->dentry)
4849 name = c->dentry->d_name.name;
4850 else
4851 name = "?";
4852 seq_printf(seq, "Root %d group %s\n",
4853 c->root->hierarchy_id, name);
4854 }
4855 rcu_read_unlock();
4856 read_unlock(&css_set_lock);
4857 return 0;
4858}
4859
4860#define MAX_TASKS_SHOWN_PER_CSS 25
4861static int cgroup_css_links_read(struct cgroup *cont,
4862 struct cftype *cft,
4863 struct seq_file *seq)
4864{
4865 struct cg_cgroup_link *link;
4866
4867 read_lock(&css_set_lock);
4868 list_for_each_entry(link, &cont->css_sets, cgrp_link_list) {
4869 struct css_set *cg = link->cg;
4870 struct task_struct *task<