1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29#include <linux/cgroup.h>
30#include <linux/ctype.h>
31#include <linux/errno.h>
32#include <linux/fs.h>
33#include <linux/kernel.h>
34#include <linux/list.h>
35#include <linux/mm.h>
36#include <linux/mutex.h>
37#include <linux/mount.h>
38#include <linux/pagemap.h>
39#include <linux/proc_fs.h>
40#include <linux/rcupdate.h>
41#include <linux/sched.h>
42#include <linux/backing-dev.h>
43#include <linux/seq_file.h>
44#include <linux/slab.h>
45#include <linux/magic.h>
46#include <linux/spinlock.h>
47#include <linux/string.h>
48#include <linux/sort.h>
49#include <linux/kmod.h>
50#include <linux/module.h>
51#include <linux/delayacct.h>
52#include <linux/cgroupstats.h>
53#include <linux/hash.h>
54#include <linux/namei.h>
55#include <linux/smp_lock.h>
56#include <linux/pid_namespace.h>
57#include <linux/idr.h>
58#include <linux/vmalloc.h>
59#include <linux/eventfd.h>
60#include <linux/poll.h>
61
62#include <asm/atomic.h>
63
64static DEFINE_MUTEX(cgroup_mutex);
65
66
67
68
69
70
71
72#define SUBSYS(_x) &_x ## _subsys,
73static struct cgroup_subsys *subsys[CGROUP_SUBSYS_COUNT] = {
74#include <linux/cgroup_subsys.h>
75};
76
77#define MAX_CGROUP_ROOT_NAMELEN 64
78
79
80
81
82
83
84struct cgroupfs_root {
85 struct super_block *sb;
86
87
88
89
90
91 unsigned long subsys_bits;
92
93
94 int hierarchy_id;
95
96
97 unsigned long actual_subsys_bits;
98
99
100 struct list_head subsys_list;
101
102
103 struct cgroup top_cgroup;
104
105
106 int number_of_cgroups;
107
108
109 struct list_head root_list;
110
111
112 unsigned long flags;
113
114
115 char release_agent_path[PATH_MAX];
116
117
118 char name[MAX_CGROUP_ROOT_NAMELEN];
119};
120
121
122
123
124
125
126static struct cgroupfs_root rootnode;
127
128
129
130
131
132#define CSS_ID_MAX (65535)
133struct css_id {
134
135
136
137
138
139
140
141 struct cgroup_subsys_state *css;
142
143
144
145 unsigned short id;
146
147
148
149 unsigned short depth;
150
151
152
153 struct rcu_head rcu_head;
154
155
156
157 unsigned short stack[0];
158};
159
160
161
162
163struct cgroup_event {
164
165
166
167 struct cgroup *cgrp;
168
169
170
171 struct cftype *cft;
172
173
174
175 struct eventfd_ctx *eventfd;
176
177
178
179 struct list_head list;
180
181
182
183
184 poll_table pt;
185 wait_queue_head_t *wqh;
186 wait_queue_t wait;
187 struct work_struct remove;
188};
189
190
191
192static LIST_HEAD(roots);
193static int root_count;
194
195static DEFINE_IDA(hierarchy_ida);
196static int next_hierarchy_id;
197static DEFINE_SPINLOCK(hierarchy_id_lock);
198
199
200#define dummytop (&rootnode.top_cgroup)
201
202
203
204
205
206
207static int need_forkexit_callback __read_mostly;
208
209#ifdef CONFIG_PROVE_LOCKING
210int cgroup_lock_is_held(void)
211{
212 return lockdep_is_held(&cgroup_mutex);
213}
214#else
215int cgroup_lock_is_held(void)
216{
217 return mutex_is_locked(&cgroup_mutex);
218}
219#endif
220
221EXPORT_SYMBOL_GPL(cgroup_lock_is_held);
222
223
224inline int cgroup_is_removed(const struct cgroup *cgrp)
225{
226 return test_bit(CGRP_REMOVED, &cgrp->flags);
227}
228
229
230enum {
231 ROOT_NOPREFIX,
232};
233
234static int cgroup_is_releasable(const struct cgroup *cgrp)
235{
236 const int bits =
237 (1 << CGRP_RELEASABLE) |
238 (1 << CGRP_NOTIFY_ON_RELEASE);
239 return (cgrp->flags & bits) == bits;
240}
241
242static int notify_on_release(const struct cgroup *cgrp)
243{
244 return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
245}
246
247
248
249
250
251#define for_each_subsys(_root, _ss) \
252list_for_each_entry(_ss, &_root->subsys_list, sibling)
253
254
255#define for_each_active_root(_root) \
256list_for_each_entry(_root, &roots, root_list)
257
258
259
260static LIST_HEAD(release_list);
261static DEFINE_SPINLOCK(release_list_lock);
262static void cgroup_release_agent(struct work_struct *work);
263static DECLARE_WORK(release_agent_work, cgroup_release_agent);
264static void check_for_release(struct cgroup *cgrp);
265
266
267struct cg_cgroup_link {
268
269
270
271
272 struct list_head cgrp_link_list;
273 struct cgroup *cgrp;
274
275
276
277
278 struct list_head cg_link_list;
279 struct css_set *cg;
280};
281
282
283
284
285
286
287
288
289static struct css_set init_css_set;
290static struct cg_cgroup_link init_css_set_link;
291
292static int cgroup_init_idr(struct cgroup_subsys *ss,
293 struct cgroup_subsys_state *css);
294
295
296
297
298static DEFINE_RWLOCK(css_set_lock);
299static int css_set_count;
300
301
302
303
304
305
306#define CSS_SET_HASH_BITS 7
307#define CSS_SET_TABLE_SIZE (1 << CSS_SET_HASH_BITS)
308static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE];
309
310static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
311{
312 int i;
313 int index;
314 unsigned long tmp = 0UL;
315
316 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++)
317 tmp += (unsigned long)css[i];
318 tmp = (tmp >> 16) ^ tmp;
319
320 index = hash_long(tmp, CSS_SET_HASH_BITS);
321
322 return &css_set_table[index];
323}
324
325static void free_css_set_rcu(struct rcu_head *obj)
326{
327 struct css_set *cg = container_of(obj, struct css_set, rcu_head);
328 kfree(cg);
329}
330
331
332
333
334
335static int use_task_css_set_links __read_mostly;
336
337static void __put_css_set(struct css_set *cg, int taskexit)
338{
339 struct cg_cgroup_link *link;
340 struct cg_cgroup_link *saved_link;
341
342
343
344
345
346 if (atomic_add_unless(&cg->refcount, -1, 1))
347 return;
348 write_lock(&css_set_lock);
349 if (!atomic_dec_and_test(&cg->refcount)) {
350 write_unlock(&css_set_lock);
351 return;
352 }
353
354
355 hlist_del(&cg->hlist);
356 css_set_count--;
357
358 list_for_each_entry_safe(link, saved_link, &cg->cg_links,
359 cg_link_list) {
360 struct cgroup *cgrp = link->cgrp;
361 list_del(&link->cg_link_list);
362 list_del(&link->cgrp_link_list);
363 if (atomic_dec_and_test(&cgrp->count) &&
364 notify_on_release(cgrp)) {
365 if (taskexit)
366 set_bit(CGRP_RELEASABLE, &cgrp->flags);
367 check_for_release(cgrp);
368 }
369
370 kfree(link);
371 }
372
373 write_unlock(&css_set_lock);
374 call_rcu(&cg->rcu_head, free_css_set_rcu);
375}
376
377
378
379
380static inline void get_css_set(struct css_set *cg)
381{
382 atomic_inc(&cg->refcount);
383}
384
385static inline void put_css_set(struct css_set *cg)
386{
387 __put_css_set(cg, 0);
388}
389
390static inline void put_css_set_taskexit(struct css_set *cg)
391{
392 __put_css_set(cg, 1);
393}
394
395
396
397
398
399
400
401
402
403
404
405static bool compare_css_sets(struct css_set *cg,
406 struct css_set *old_cg,
407 struct cgroup *new_cgrp,
408 struct cgroup_subsys_state *template[])
409{
410 struct list_head *l1, *l2;
411
412 if (memcmp(template, cg->subsys, sizeof(cg->subsys))) {
413
414 return false;
415 }
416
417
418
419
420
421
422
423
424
425
426 l1 = &cg->cg_links;
427 l2 = &old_cg->cg_links;
428 while (1) {
429 struct cg_cgroup_link *cgl1, *cgl2;
430 struct cgroup *cg1, *cg2;
431
432 l1 = l1->next;
433 l2 = l2->next;
434
435 if (l1 == &cg->cg_links) {
436 BUG_ON(l2 != &old_cg->cg_links);
437 break;
438 } else {
439 BUG_ON(l2 == &old_cg->cg_links);
440 }
441
442 cgl1 = list_entry(l1, struct cg_cgroup_link, cg_link_list);
443 cgl2 = list_entry(l2, struct cg_cgroup_link, cg_link_list);
444 cg1 = cgl1->cgrp;
445 cg2 = cgl2->cgrp;
446
447 BUG_ON(cg1->root != cg2->root);
448
449
450
451
452
453
454
455
456 if (cg1->root == new_cgrp->root) {
457 if (cg1 != new_cgrp)
458 return false;
459 } else {
460 if (cg1 != cg2)
461 return false;
462 }
463 }
464 return true;
465}
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480static struct css_set *find_existing_css_set(
481 struct css_set *oldcg,
482 struct cgroup *cgrp,
483 struct cgroup_subsys_state *template[])
484{
485 int i;
486 struct cgroupfs_root *root = cgrp->root;
487 struct hlist_head *hhead;
488 struct hlist_node *node;
489 struct css_set *cg;
490
491
492
493
494
495
496 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
497 if (root->subsys_bits & (1UL << i)) {
498
499
500
501 template[i] = cgrp->subsys[i];
502 } else {
503
504
505 template[i] = oldcg->subsys[i];
506 }
507 }
508
509 hhead = css_set_hash(template);
510 hlist_for_each_entry(cg, node, hhead, hlist) {
511 if (!compare_css_sets(cg, oldcg, cgrp, template))
512 continue;
513
514
515 return cg;
516 }
517
518
519 return NULL;
520}
521
522static void free_cg_links(struct list_head *tmp)
523{
524 struct cg_cgroup_link *link;
525 struct cg_cgroup_link *saved_link;
526
527 list_for_each_entry_safe(link, saved_link, tmp, cgrp_link_list) {
528 list_del(&link->cgrp_link_list);
529 kfree(link);
530 }
531}
532
533
534
535
536
537
538static int allocate_cg_links(int count, struct list_head *tmp)
539{
540 struct cg_cgroup_link *link;
541 int i;
542 INIT_LIST_HEAD(tmp);
543 for (i = 0; i < count; i++) {
544 link = kmalloc(sizeof(*link), GFP_KERNEL);
545 if (!link) {
546 free_cg_links(tmp);
547 return -ENOMEM;
548 }
549 list_add(&link->cgrp_link_list, tmp);
550 }
551 return 0;
552}
553
554
555
556
557
558
559
560static void link_css_set(struct list_head *tmp_cg_links,
561 struct css_set *cg, struct cgroup *cgrp)
562{
563 struct cg_cgroup_link *link;
564
565 BUG_ON(list_empty(tmp_cg_links));
566 link = list_first_entry(tmp_cg_links, struct cg_cgroup_link,
567 cgrp_link_list);
568 link->cg = cg;
569 link->cgrp = cgrp;
570 atomic_inc(&cgrp->count);
571 list_move(&link->cgrp_link_list, &cgrp->css_sets);
572
573
574
575
576 list_add_tail(&link->cg_link_list, &cg->cg_links);
577}
578
579
580
581
582
583
584
585
586static struct css_set *find_css_set(
587 struct css_set *oldcg, struct cgroup *cgrp)
588{
589 struct css_set *res;
590 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
591
592 struct list_head tmp_cg_links;
593
594 struct hlist_head *hhead;
595 struct cg_cgroup_link *link;
596
597
598
599 read_lock(&css_set_lock);
600 res = find_existing_css_set(oldcg, cgrp, template);
601 if (res)
602 get_css_set(res);
603 read_unlock(&css_set_lock);
604
605 if (res)
606 return res;
607
608 res = kmalloc(sizeof(*res), GFP_KERNEL);
609 if (!res)
610 return NULL;
611
612
613 if (allocate_cg_links(root_count, &tmp_cg_links) < 0) {
614 kfree(res);
615 return NULL;
616 }
617
618 atomic_set(&res->refcount, 1);
619 INIT_LIST_HEAD(&res->cg_links);
620 INIT_LIST_HEAD(&res->tasks);
621 INIT_HLIST_NODE(&res->hlist);
622
623
624
625 memcpy(res->subsys, template, sizeof(res->subsys));
626
627 write_lock(&css_set_lock);
628
629 list_for_each_entry(link, &oldcg->cg_links, cg_link_list) {
630 struct cgroup *c = link->cgrp;
631 if (c->root == cgrp->root)
632 c = cgrp;
633 link_css_set(&tmp_cg_links, res, c);
634 }
635
636 BUG_ON(!list_empty(&tmp_cg_links));
637
638 css_set_count++;
639
640
641 hhead = css_set_hash(res->subsys);
642 hlist_add_head(&res->hlist, hhead);
643
644 write_unlock(&css_set_lock);
645
646 return res;
647}
648
649
650
651
652
653static struct cgroup *task_cgroup_from_root(struct task_struct *task,
654 struct cgroupfs_root *root)
655{
656 struct css_set *css;
657 struct cgroup *res = NULL;
658
659 BUG_ON(!mutex_is_locked(&cgroup_mutex));
660 read_lock(&css_set_lock);
661
662
663
664
665
666 css = task->cgroups;
667 if (css == &init_css_set) {
668 res = &root->top_cgroup;
669 } else {
670 struct cg_cgroup_link *link;
671 list_for_each_entry(link, &css->cg_links, cg_link_list) {
672 struct cgroup *c = link->cgrp;
673 if (c->root == root) {
674 res = c;
675 break;
676 }
677 }
678 }
679 read_unlock(&css_set_lock);
680 BUG_ON(!res);
681 return res;
682}
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738void cgroup_lock(void)
739{
740 mutex_lock(&cgroup_mutex);
741}
742EXPORT_SYMBOL_GPL(cgroup_lock);
743
744
745
746
747
748
749void cgroup_unlock(void)
750{
751 mutex_unlock(&cgroup_mutex);
752}
753EXPORT_SYMBOL_GPL(cgroup_unlock);
754
755
756
757
758
759
760
761
762static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode);
763static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
764static int cgroup_populate_dir(struct cgroup *cgrp);
765static const struct inode_operations cgroup_dir_inode_operations;
766static const struct file_operations proc_cgroupstats_operations;
767
768static struct backing_dev_info cgroup_backing_dev_info = {
769 .name = "cgroup",
770 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
771};
772
773static int alloc_css_id(struct cgroup_subsys *ss,
774 struct cgroup *parent, struct cgroup *child);
775
776static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
777{
778 struct inode *inode = new_inode(sb);
779
780 if (inode) {
781 inode->i_mode = mode;
782 inode->i_uid = current_fsuid();
783 inode->i_gid = current_fsgid();
784 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
785 inode->i_mapping->backing_dev_info = &cgroup_backing_dev_info;
786 }
787 return inode;
788}
789
790
791
792
793
794static int cgroup_call_pre_destroy(struct cgroup *cgrp)
795{
796 struct cgroup_subsys *ss;
797 int ret = 0;
798
799 for_each_subsys(cgrp->root, ss)
800 if (ss->pre_destroy) {
801 ret = ss->pre_destroy(ss, cgrp);
802 if (ret)
803 break;
804 }
805
806 return ret;
807}
808
809static void free_cgroup_rcu(struct rcu_head *obj)
810{
811 struct cgroup *cgrp = container_of(obj, struct cgroup, rcu_head);
812
813 kfree(cgrp);
814}
815
816static void cgroup_diput(struct dentry *dentry, struct inode *inode)
817{
818
819 if (S_ISDIR(inode->i_mode)) {
820 struct cgroup *cgrp = dentry->d_fsdata;
821 struct cgroup_subsys *ss;
822 BUG_ON(!(cgroup_is_removed(cgrp)));
823
824
825
826
827
828
829 synchronize_rcu();
830
831 mutex_lock(&cgroup_mutex);
832
833
834
835 for_each_subsys(cgrp->root, ss)
836 ss->destroy(ss, cgrp);
837
838 cgrp->root->number_of_cgroups--;
839 mutex_unlock(&cgroup_mutex);
840
841
842
843
844
845 deactivate_super(cgrp->root->sb);
846
847
848
849
850
851 BUG_ON(!list_empty(&cgrp->pidlists));
852
853 call_rcu(&cgrp->rcu_head, free_cgroup_rcu);
854 }
855 iput(inode);
856}
857
858static void remove_dir(struct dentry *d)
859{
860 struct dentry *parent = dget(d->d_parent);
861
862 d_delete(d);
863 simple_rmdir(parent->d_inode, d);
864 dput(parent);
865}
866
867static void cgroup_clear_directory(struct dentry *dentry)
868{
869 struct list_head *node;
870
871 BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
872 spin_lock(&dcache_lock);
873 node = dentry->d_subdirs.next;
874 while (node != &dentry->d_subdirs) {
875 struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
876 list_del_init(node);
877 if (d->d_inode) {
878
879
880 BUG_ON(d->d_inode->i_mode & S_IFDIR);
881 d = dget_locked(d);
882 spin_unlock(&dcache_lock);
883 d_delete(d);
884 simple_unlink(dentry->d_inode, d);
885 dput(d);
886 spin_lock(&dcache_lock);
887 }
888 node = dentry->d_subdirs.next;
889 }
890 spin_unlock(&dcache_lock);
891}
892
893
894
895
896static void cgroup_d_remove_dir(struct dentry *dentry)
897{
898 cgroup_clear_directory(dentry);
899
900 spin_lock(&dcache_lock);
901 list_del_init(&dentry->d_u.d_child);
902 spin_unlock(&dcache_lock);
903 remove_dir(dentry);
904}
905
906
907
908
909
910
911
912
913
914DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
915
916static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
917{
918 if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
919 wake_up_all(&cgroup_rmdir_waitq);
920}
921
922void cgroup_exclude_rmdir(struct cgroup_subsys_state *css)
923{
924 css_get(css);
925}
926
927void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
928{
929 cgroup_wakeup_rmdir_waiter(css->cgroup);
930 css_put(css);
931}
932
933
934
935
936
937
938static int rebind_subsystems(struct cgroupfs_root *root,
939 unsigned long final_bits)
940{
941 unsigned long added_bits, removed_bits;
942 struct cgroup *cgrp = &root->top_cgroup;
943 int i;
944
945 BUG_ON(!mutex_is_locked(&cgroup_mutex));
946
947 removed_bits = root->actual_subsys_bits & ~final_bits;
948 added_bits = final_bits & ~root->actual_subsys_bits;
949
950 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
951 unsigned long bit = 1UL << i;
952 struct cgroup_subsys *ss = subsys[i];
953 if (!(bit & added_bits))
954 continue;
955
956
957
958
959
960 BUG_ON(ss == NULL);
961 if (ss->root != &rootnode) {
962
963 return -EBUSY;
964 }
965 }
966
967
968
969
970
971 if (root->number_of_cgroups > 1)
972 return -EBUSY;
973
974
975 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
976 struct cgroup_subsys *ss = subsys[i];
977 unsigned long bit = 1UL << i;
978 if (bit & added_bits) {
979
980 BUG_ON(ss == NULL);
981 BUG_ON(cgrp->subsys[i]);
982 BUG_ON(!dummytop->subsys[i]);
983 BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
984 mutex_lock(&ss->hierarchy_mutex);
985 cgrp->subsys[i] = dummytop->subsys[i];
986 cgrp->subsys[i]->cgroup = cgrp;
987 list_move(&ss->sibling, &root->subsys_list);
988 ss->root = root;
989 if (ss->bind)
990 ss->bind(ss, cgrp);
991 mutex_unlock(&ss->hierarchy_mutex);
992
993 } else if (bit & removed_bits) {
994
995 BUG_ON(ss == NULL);
996 BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
997 BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
998 mutex_lock(&ss->hierarchy_mutex);
999 if (ss->bind)
1000 ss->bind(ss, dummytop);
1001 dummytop->subsys[i]->cgroup = dummytop;
1002 cgrp->subsys[i] = NULL;
1003 subsys[i]->root = &rootnode;
1004 list_move(&ss->sibling, &rootnode.subsys_list);
1005 mutex_unlock(&ss->hierarchy_mutex);
1006
1007 module_put(ss->module);
1008 } else if (bit & final_bits) {
1009
1010 BUG_ON(ss == NULL);
1011 BUG_ON(!cgrp->subsys[i]);
1012
1013
1014
1015
1016 module_put(ss->module);
1017#ifdef CONFIG_MODULE_UNLOAD
1018 BUG_ON(ss->module && !module_refcount(ss->module));
1019#endif
1020 } else {
1021
1022 BUG_ON(cgrp->subsys[i]);
1023 }
1024 }
1025 root->subsys_bits = root->actual_subsys_bits = final_bits;
1026 synchronize_rcu();
1027
1028 return 0;
1029}
1030
1031static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs)
1032{
1033 struct cgroupfs_root *root = vfs->mnt_sb->s_fs_info;
1034 struct cgroup_subsys *ss;
1035
1036 mutex_lock(&cgroup_mutex);
1037 for_each_subsys(root, ss)
1038 seq_printf(seq, ",%s", ss->name);
1039 if (test_bit(ROOT_NOPREFIX, &root->flags))
1040 seq_puts(seq, ",noprefix");
1041 if (strlen(root->release_agent_path))
1042 seq_printf(seq, ",release_agent=%s", root->release_agent_path);
1043 if (strlen(root->name))
1044 seq_printf(seq, ",name=%s", root->name);
1045 mutex_unlock(&cgroup_mutex);
1046 return 0;
1047}
1048
1049struct cgroup_sb_opts {
1050 unsigned long subsys_bits;
1051 unsigned long flags;
1052 char *release_agent;
1053 char *name;
1054
1055 bool none;
1056
1057 struct cgroupfs_root *new_root;
1058
1059};
1060
1061
1062
1063
1064
1065
1066
1067static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
1068{
1069 char *token, *o = data ?: "all";
1070 unsigned long mask = (unsigned long)-1;
1071 int i;
1072 bool module_pin_failed = false;
1073
1074 BUG_ON(!mutex_is_locked(&cgroup_mutex));
1075
1076#ifdef CONFIG_CPUSETS
1077 mask = ~(1UL << cpuset_subsys_id);
1078#endif
1079
1080 memset(opts, 0, sizeof(*opts));
1081
1082 while ((token = strsep(&o, ",")) != NULL) {
1083 if (!*token)
1084 return -EINVAL;
1085 if (!strcmp(token, "all")) {
1086
1087 opts->subsys_bits = 0;
1088 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1089 struct cgroup_subsys *ss = subsys[i];
1090 if (ss == NULL)
1091 continue;
1092 if (!ss->disabled)
1093 opts->subsys_bits |= 1ul << i;
1094 }
1095 } else if (!strcmp(token, "none")) {
1096
1097 opts->none = true;
1098 } else if (!strcmp(token, "noprefix")) {
1099 set_bit(ROOT_NOPREFIX, &opts->flags);
1100 } else if (!strncmp(token, "release_agent=", 14)) {
1101
1102 if (opts->release_agent)
1103 return -EINVAL;
1104 opts->release_agent =
1105 kstrndup(token + 14, PATH_MAX, GFP_KERNEL);
1106 if (!opts->release_agent)
1107 return -ENOMEM;
1108 } else if (!strncmp(token, "name=", 5)) {
1109 const char *name = token + 5;
1110
1111 if (!strlen(name))
1112 return -EINVAL;
1113
1114 for (i = 0; i < strlen(name); i++) {
1115 char c = name[i];
1116 if (isalnum(c))
1117 continue;
1118 if ((c == '.') || (c == '-') || (c == '_'))
1119 continue;
1120 return -EINVAL;
1121 }
1122
1123 if (opts->name)
1124 return -EINVAL;
1125 opts->name = kstrndup(name,
1126 MAX_CGROUP_ROOT_NAMELEN,
1127 GFP_KERNEL);
1128 if (!opts->name)
1129 return -ENOMEM;
1130 } else {
1131 struct cgroup_subsys *ss;
1132 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1133 ss = subsys[i];
1134 if (ss == NULL)
1135 continue;
1136 if (!strcmp(token, ss->name)) {
1137 if (!ss->disabled)
1138 set_bit(i, &opts->subsys_bits);
1139 break;
1140 }
1141 }
1142 if (i == CGROUP_SUBSYS_COUNT)
1143 return -ENOENT;
1144 }
1145 }
1146
1147
1148
1149
1150
1151
1152
1153
1154 if (test_bit(ROOT_NOPREFIX, &opts->flags) &&
1155 (opts->subsys_bits & mask))
1156 return -EINVAL;
1157
1158
1159
1160 if (opts->subsys_bits && opts->none)
1161 return -EINVAL;
1162
1163
1164
1165
1166
1167 if (!opts->subsys_bits && !opts->name)
1168 return -EINVAL;
1169
1170
1171
1172
1173
1174
1175
1176 for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
1177 unsigned long bit = 1UL << i;
1178
1179 if (!(bit & opts->subsys_bits))
1180 continue;
1181 if (!try_module_get(subsys[i]->module)) {
1182 module_pin_failed = true;
1183 break;
1184 }
1185 }
1186 if (module_pin_failed) {
1187
1188
1189
1190
1191
1192 for (i--; i >= CGROUP_BUILTIN_SUBSYS_COUNT; i--) {
1193
1194 unsigned long bit = 1UL << i;
1195
1196 if (!(bit & opts->subsys_bits))
1197 continue;
1198 module_put(subsys[i]->module);
1199 }
1200 return -ENOENT;
1201 }
1202
1203 return 0;
1204}
1205
1206static void drop_parsed_module_refcounts(unsigned long subsys_bits)
1207{
1208 int i;
1209 for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
1210 unsigned long bit = 1UL << i;
1211
1212 if (!(bit & subsys_bits))
1213 continue;
1214 module_put(subsys[i]->module);
1215 }
1216}
1217
1218static int cgroup_remount(struct super_block *sb, int *flags, char *data)
1219{
1220 int ret = 0;
1221 struct cgroupfs_root *root = sb->s_fs_info;
1222 struct cgroup *cgrp = &root->top_cgroup;
1223 struct cgroup_sb_opts opts;
1224
1225 lock_kernel();
1226 mutex_lock(&cgrp->dentry->d_inode->i_mutex);
1227 mutex_lock(&cgroup_mutex);
1228
1229
1230 ret = parse_cgroupfs_options(data, &opts);
1231 if (ret)
1232 goto out_unlock;
1233
1234
1235 if (opts.flags != root->flags ||
1236 (opts.name && strcmp(opts.name, root->name))) {
1237 ret = -EINVAL;
1238 drop_parsed_module_refcounts(opts.subsys_bits);
1239 goto out_unlock;
1240 }
1241
1242 ret = rebind_subsystems(root, opts.subsys_bits);
1243 if (ret) {
1244 drop_parsed_module_refcounts(opts.subsys_bits);
1245 goto out_unlock;
1246 }
1247
1248
1249 cgroup_populate_dir(cgrp);
1250
1251 if (opts.release_agent)
1252 strcpy(root->release_agent_path, opts.release_agent);
1253 out_unlock:
1254 kfree(opts.release_agent);
1255 kfree(opts.name);
1256 mutex_unlock(&cgroup_mutex);
1257 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
1258 unlock_kernel();
1259 return ret;
1260}
1261
1262static const struct super_operations cgroup_ops = {
1263 .statfs = simple_statfs,
1264 .drop_inode = generic_delete_inode,
1265 .show_options = cgroup_show_options,
1266 .remount_fs = cgroup_remount,
1267};
1268
1269static void init_cgroup_housekeeping(struct cgroup *cgrp)
1270{
1271 INIT_LIST_HEAD(&cgrp->sibling);
1272 INIT_LIST_HEAD(&cgrp->children);
1273 INIT_LIST_HEAD(&cgrp->css_sets);
1274 INIT_LIST_HEAD(&cgrp->release_list);
1275 INIT_LIST_HEAD(&cgrp->pidlists);
1276 mutex_init(&cgrp->pidlist_mutex);
1277 INIT_LIST_HEAD(&cgrp->event_list);
1278 spin_lock_init(&cgrp->event_list_lock);
1279}
1280
1281static void init_cgroup_root(struct cgroupfs_root *root)
1282{
1283 struct cgroup *cgrp = &root->top_cgroup;
1284 INIT_LIST_HEAD(&root->subsys_list);
1285 INIT_LIST_HEAD(&root->root_list);
1286 root->number_of_cgroups = 1;
1287 cgrp->root = root;
1288 cgrp->top_cgroup = cgrp;
1289 init_cgroup_housekeeping(cgrp);
1290}
1291
1292static bool init_root_id(struct cgroupfs_root *root)
1293{
1294 int ret = 0;
1295
1296 do {
1297 if (!ida_pre_get(&hierarchy_ida, GFP_KERNEL))
1298 return false;
1299 spin_lock(&hierarchy_id_lock);
1300
1301 ret = ida_get_new_above(&hierarchy_ida, next_hierarchy_id,
1302 &root->hierarchy_id);
1303 if (ret == -ENOSPC)
1304
1305 ret = ida_get_new(&hierarchy_ida, &root->hierarchy_id);
1306 if (!ret) {
1307 next_hierarchy_id = root->hierarchy_id + 1;
1308 } else if (ret != -EAGAIN) {
1309
1310 BUG_ON(ret);
1311 }
1312 spin_unlock(&hierarchy_id_lock);
1313 } while (ret);
1314 return true;
1315}
1316
1317static int cgroup_test_super(struct super_block *sb, void *data)
1318{
1319 struct cgroup_sb_opts *opts = data;
1320 struct cgroupfs_root *root = sb->s_fs_info;
1321
1322
1323 if (opts->name && strcmp(opts->name, root->name))
1324 return 0;
1325
1326
1327
1328
1329
1330 if ((opts->subsys_bits || opts->none)
1331 && (opts->subsys_bits != root->subsys_bits))
1332 return 0;
1333
1334 return 1;
1335}
1336
1337static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
1338{
1339 struct cgroupfs_root *root;
1340
1341 if (!opts->subsys_bits && !opts->none)
1342 return NULL;
1343
1344 root = kzalloc(sizeof(*root), GFP_KERNEL);
1345 if (!root)
1346 return ERR_PTR(-ENOMEM);
1347
1348 if (!init_root_id(root)) {
1349 kfree(root);
1350 return ERR_PTR(-ENOMEM);
1351 }
1352 init_cgroup_root(root);
1353
1354 root->subsys_bits = opts->subsys_bits;
1355 root->flags = opts->flags;
1356 if (opts->release_agent)
1357 strcpy(root->release_agent_path, opts->release_agent);
1358 if (opts->name)
1359 strcpy(root->name, opts->name);
1360 return root;
1361}
1362
1363static void cgroup_drop_root(struct cgroupfs_root *root)
1364{
1365 if (!root)
1366 return;
1367
1368 BUG_ON(!root->hierarchy_id);
1369 spin_lock(&hierarchy_id_lock);
1370 ida_remove(&hierarchy_ida, root->hierarchy_id);
1371 spin_unlock(&hierarchy_id_lock);
1372 kfree(root);
1373}
1374
1375static int cgroup_set_super(struct super_block *sb, void *data)
1376{
1377 int ret;
1378 struct cgroup_sb_opts *opts = data;
1379
1380
1381 if (!opts->new_root)
1382 return -EINVAL;
1383
1384 BUG_ON(!opts->subsys_bits && !opts->none);
1385
1386 ret = set_anon_super(sb, NULL);
1387 if (ret)
1388 return ret;
1389
1390 sb->s_fs_info = opts->new_root;
1391 opts->new_root->sb = sb;
1392
1393 sb->s_blocksize = PAGE_CACHE_SIZE;
1394 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
1395 sb->s_magic = CGROUP_SUPER_MAGIC;
1396 sb->s_op = &cgroup_ops;
1397
1398 return 0;
1399}
1400
1401static int cgroup_get_rootdir(struct super_block *sb)
1402{
1403 struct inode *inode =
1404 cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb);
1405 struct dentry *dentry;
1406
1407 if (!inode)
1408 return -ENOMEM;
1409
1410 inode->i_fop = &simple_dir_operations;
1411 inode->i_op = &cgroup_dir_inode_operations;
1412
1413 inc_nlink(inode);
1414 dentry = d_alloc_root(inode);
1415 if (!dentry) {
1416 iput(inode);
1417 return -ENOMEM;
1418 }
1419 sb->s_root = dentry;
1420 return 0;
1421}
1422
1423static int cgroup_get_sb(struct file_system_type *fs_type,
1424 int flags, const char *unused_dev_name,
1425 void *data, struct vfsmount *mnt)
1426{
1427 struct cgroup_sb_opts opts;
1428 struct cgroupfs_root *root;
1429 int ret = 0;
1430 struct super_block *sb;
1431 struct cgroupfs_root *new_root;
1432
1433
1434 mutex_lock(&cgroup_mutex);
1435 ret = parse_cgroupfs_options(data, &opts);
1436 mutex_unlock(&cgroup_mutex);
1437 if (ret)
1438 goto out_err;
1439
1440
1441
1442
1443
1444 new_root = cgroup_root_from_opts(&opts);
1445 if (IS_ERR(new_root)) {
1446 ret = PTR_ERR(new_root);
1447 goto drop_modules;
1448 }
1449 opts.new_root = new_root;
1450
1451
1452 sb = sget(fs_type, cgroup_test_super, cgroup_set_super, &opts);
1453 if (IS_ERR(sb)) {
1454 ret = PTR_ERR(sb);
1455 cgroup_drop_root(opts.new_root);
1456 goto drop_modules;
1457 }
1458
1459 root = sb->s_fs_info;
1460 BUG_ON(!root);
1461 if (root == opts.new_root) {
1462
1463 struct list_head tmp_cg_links;
1464 struct cgroup *root_cgrp = &root->top_cgroup;
1465 struct inode *inode;
1466 struct cgroupfs_root *existing_root;
1467 int i;
1468
1469 BUG_ON(sb->s_root != NULL);
1470
1471 ret = cgroup_get_rootdir(sb);
1472 if (ret)
1473 goto drop_new_super;
1474 inode = sb->s_root->d_inode;
1475
1476 mutex_lock(&inode->i_mutex);
1477 mutex_lock(&cgroup_mutex);
1478
1479 if (strlen(root->name)) {
1480
1481 for_each_active_root(existing_root) {
1482 if (!strcmp(existing_root->name, root->name)) {
1483 ret = -EBUSY;
1484 mutex_unlock(&cgroup_mutex);
1485 mutex_unlock(&inode->i_mutex);
1486 goto drop_new_super;
1487 }
1488 }
1489 }
1490
1491
1492
1493
1494
1495
1496
1497
1498 ret = allocate_cg_links(css_set_count, &tmp_cg_links);
1499 if (ret) {
1500 mutex_unlock(&cgroup_mutex);
1501 mutex_unlock(&inode->i_mutex);
1502 goto drop_new_super;
1503 }
1504
1505 ret = rebind_subsystems(root, root->subsys_bits);
1506 if (ret == -EBUSY) {
1507 mutex_unlock(&cgroup_mutex);
1508 mutex_unlock(&inode->i_mutex);
1509 free_cg_links(&tmp_cg_links);
1510 goto drop_new_super;
1511 }
1512
1513
1514
1515
1516
1517
1518
1519 BUG_ON(ret);
1520
1521 list_add(&root->root_list, &roots);
1522 root_count++;
1523
1524 sb->s_root->d_fsdata = root_cgrp;
1525 root->top_cgroup.dentry = sb->s_root;
1526
1527
1528
1529 write_lock(&css_set_lock);
1530 for (i = 0; i < CSS_SET_TABLE_SIZE; i++) {
1531 struct hlist_head *hhead = &css_set_table[i];
1532 struct hlist_node *node;
1533 struct css_set *cg;
1534
1535 hlist_for_each_entry(cg, node, hhead, hlist)
1536 link_css_set(&tmp_cg_links, cg, root_cgrp);
1537 }
1538 write_unlock(&css_set_lock);
1539
1540 free_cg_links(&tmp_cg_links);
1541
1542 BUG_ON(!list_empty(&root_cgrp->sibling));
1543 BUG_ON(!list_empty(&root_cgrp->children));
1544 BUG_ON(root->number_of_cgroups != 1);
1545
1546 cgroup_populate_dir(root_cgrp);
1547 mutex_unlock(&cgroup_mutex);
1548 mutex_unlock(&inode->i_mutex);
1549 } else {
1550
1551
1552
1553
1554 cgroup_drop_root(opts.new_root);
1555
1556 drop_parsed_module_refcounts(opts.subsys_bits);
1557 }
1558
1559 simple_set_mnt(mnt, sb);
1560 kfree(opts.release_agent);
1561 kfree(opts.name);
1562 return 0;
1563
1564 drop_new_super:
1565 deactivate_locked_super(sb);
1566 drop_modules:
1567 drop_parsed_module_refcounts(opts.subsys_bits);
1568 out_err:
1569 kfree(opts.release_agent);
1570 kfree(opts.name);
1571
1572 return ret;
1573}
1574
1575static void cgroup_kill_sb(struct super_block *sb) {
1576 struct cgroupfs_root *root = sb->s_fs_info;
1577 struct cgroup *cgrp = &root->top_cgroup;
1578 int ret;
1579 struct cg_cgroup_link *link;
1580 struct cg_cgroup_link *saved_link;
1581
1582 BUG_ON(!root);
1583
1584 BUG_ON(root->number_of_cgroups != 1);
1585 BUG_ON(!list_empty(&cgrp->children));
1586 BUG_ON(!list_empty(&cgrp->sibling));
1587
1588 mutex_lock(&cgroup_mutex);
1589
1590
1591 ret = rebind_subsystems(root, 0);
1592
1593 BUG_ON(ret);
1594
1595
1596
1597
1598
1599 write_lock(&css_set_lock);
1600
1601 list_for_each_entry_safe(link, saved_link, &cgrp->css_sets,
1602 cgrp_link_list) {
1603 list_del(&link->cg_link_list);
1604 list_del(&link->cgrp_link_list);
1605 kfree(link);
1606 }
1607 write_unlock(&css_set_lock);
1608
1609 if (!list_empty(&root->root_list)) {
1610 list_del(&root->root_list);
1611 root_count--;
1612 }
1613
1614 mutex_unlock(&cgroup_mutex);
1615
1616 kill_litter_super(sb);
1617 cgroup_drop_root(root);
1618}
1619
1620static struct file_system_type cgroup_fs_type = {
1621 .name = "cgroup",
1622 .get_sb = cgroup_get_sb,
1623 .kill_sb = cgroup_kill_sb,
1624};
1625
1626static inline struct cgroup *__d_cgrp(struct dentry *dentry)
1627{
1628 return dentry->d_fsdata;
1629}
1630
1631static inline struct cftype *__d_cft(struct dentry *dentry)
1632{
1633 return dentry->d_fsdata;
1634}
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
1647{
1648 char *start;
1649 struct dentry *dentry = rcu_dereference_check(cgrp->dentry,
1650 rcu_read_lock_held() ||
1651 cgroup_lock_is_held());
1652
1653 if (!dentry || cgrp == dummytop) {
1654
1655
1656
1657
1658 strcpy(buf, "/");
1659 return 0;
1660 }
1661
1662 start = buf + buflen;
1663
1664 *--start = '\0';
1665 for (;;) {
1666 int len = dentry->d_name.len;
1667
1668 if ((start -= len) < buf)
1669 return -ENAMETOOLONG;
1670 memcpy(start, dentry->d_name.name, len);
1671 cgrp = cgrp->parent;
1672 if (!cgrp)
1673 break;
1674
1675 dentry = rcu_dereference_check(cgrp->dentry,
1676 rcu_read_lock_held() ||
1677 cgroup_lock_is_held());
1678 if (!cgrp->parent)
1679 continue;
1680 if (--start < buf)
1681 return -ENAMETOOLONG;
1682 *start = '/';
1683 }
1684 memmove(buf, start, buf + buflen - start);
1685 return 0;
1686}
1687EXPORT_SYMBOL_GPL(cgroup_path);
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1698{
1699 int retval = 0;
1700 struct cgroup_subsys *ss, *failed_ss = NULL;
1701 struct cgroup *oldcgrp;
1702 struct css_set *cg;
1703 struct css_set *newcg;
1704 struct cgroupfs_root *root = cgrp->root;
1705
1706
1707 oldcgrp = task_cgroup_from_root(tsk, root);
1708 if (cgrp == oldcgrp)
1709 return 0;
1710
1711 for_each_subsys(root, ss) {
1712 if (ss->can_attach) {
1713 retval = ss->can_attach(ss, cgrp, tsk, false);
1714 if (retval) {
1715
1716
1717
1718
1719
1720
1721 failed_ss = ss;
1722 goto out;
1723 }
1724 }
1725 }
1726
1727 task_lock(tsk);
1728 cg = tsk->cgroups;
1729 get_css_set(cg);
1730 task_unlock(tsk);
1731
1732
1733
1734
1735 newcg = find_css_set(cg, cgrp);
1736 put_css_set(cg);
1737 if (!newcg) {
1738 retval = -ENOMEM;
1739 goto out;
1740 }
1741
1742 task_lock(tsk);
1743 if (tsk->flags & PF_EXITING) {
1744 task_unlock(tsk);
1745 put_css_set(newcg);
1746 retval = -ESRCH;
1747 goto out;
1748 }
1749 rcu_assign_pointer(tsk->cgroups, newcg);
1750 task_unlock(tsk);
1751
1752
1753 write_lock(&css_set_lock);
1754 if (!list_empty(&tsk->cg_list)) {
1755 list_del(&tsk->cg_list);
1756 list_add(&tsk->cg_list, &newcg->tasks);
1757 }
1758 write_unlock(&css_set_lock);
1759
1760 for_each_subsys(root, ss) {
1761 if (ss->attach)
1762 ss->attach(ss, cgrp, oldcgrp, tsk, false);
1763 }
1764 set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
1765 synchronize_rcu();
1766 put_css_set(cg);
1767
1768
1769
1770
1771
1772 cgroup_wakeup_rmdir_waiter(cgrp);
1773out:
1774 if (retval) {
1775 for_each_subsys(root, ss) {
1776 if (ss == failed_ss)
1777
1778
1779
1780
1781
1782
1783 break;
1784 if (ss->cancel_attach)
1785 ss->cancel_attach(ss, cgrp, tsk, false);
1786 }
1787 }
1788 return retval;
1789}
1790
1791
1792
1793
1794
1795static int attach_task_by_pid(struct cgroup *cgrp, u64 pid)
1796{
1797 struct task_struct *tsk;
1798 const struct cred *cred = current_cred(), *tcred;
1799 int ret;
1800
1801 if (pid) {
1802 rcu_read_lock();
1803 tsk = find_task_by_vpid(pid);
1804 if (!tsk || tsk->flags & PF_EXITING) {
1805 rcu_read_unlock();
1806 return -ESRCH;
1807 }
1808
1809 tcred = __task_cred(tsk);
1810 if (cred->euid &&
1811 cred->euid != tcred->uid &&
1812 cred->euid != tcred->suid) {
1813 rcu_read_unlock();
1814 return -EACCES;
1815 }
1816 get_task_struct(tsk);
1817 rcu_read_unlock();
1818 } else {
1819 tsk = current;
1820 get_task_struct(tsk);
1821 }
1822
1823 ret = cgroup_attach_task(cgrp, tsk);
1824 put_task_struct(tsk);
1825 return ret;
1826}
1827
1828static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid)
1829{
1830 int ret;
1831 if (!cgroup_lock_live_group(cgrp))
1832 return -ENODEV;
1833 ret = attach_task_by_pid(cgrp, pid);
1834 cgroup_unlock();
1835 return ret;
1836}
1837
1838
1839
1840
1841
1842
1843
1844
1845bool cgroup_lock_live_group(struct cgroup *cgrp)
1846{
1847 mutex_lock(&cgroup_mutex);
1848 if (cgroup_is_removed(cgrp)) {
1849 mutex_unlock(&cgroup_mutex);
1850 return false;
1851 }
1852 return true;
1853}
1854EXPORT_SYMBOL_GPL(cgroup_lock_live_group);
1855
1856static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
1857 const char *buffer)
1858{
1859 BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
1860 if (!cgroup_lock_live_group(cgrp))
1861 return -ENODEV;
1862 strcpy(cgrp->root->release_agent_path, buffer);
1863 cgroup_unlock();
1864 return 0;
1865}
1866
1867static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft,
1868 struct seq_file *seq)
1869{
1870 if (!cgroup_lock_live_group(cgrp))
1871 return -ENODEV;
1872 seq_puts(seq, cgrp->root->release_agent_path);
1873 seq_putc(seq, '\n');
1874 cgroup_unlock();
1875 return 0;
1876}
1877
1878
1879#define CGROUP_LOCAL_BUFFER_SIZE 64
1880
1881static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft,
1882 struct file *file,
1883 const char __user *userbuf,
1884 size_t nbytes, loff_t *unused_ppos)
1885{
1886 char buffer[CGROUP_LOCAL_BUFFER_SIZE];
1887 int retval = 0;
1888 char *end;
1889
1890 if (!nbytes)
1891 return -EINVAL;
1892 if (nbytes >= sizeof(buffer))
1893 return -E2BIG;
1894 if (copy_from_user(buffer, userbuf, nbytes))
1895 return -EFAULT;
1896
1897 buffer[nbytes] = 0;
1898 if (cft->write_u64) {
1899 u64 val = simple_strtoull(strstrip(buffer), &end, 0);
1900 if (*end)
1901 return -EINVAL;
1902 retval = cft->write_u64(cgrp, cft, val);
1903 } else {
1904 s64 val = simple_strtoll(strstrip(buffer), &end, 0);
1905 if (*end)
1906 return -EINVAL;
1907 retval = cft->write_s64(cgrp, cft, val);
1908 }
1909 if (!retval)
1910 retval = nbytes;
1911 return retval;
1912}
1913
1914static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft,
1915 struct file *file,
1916 const char __user *userbuf,
1917 size_t nbytes, loff_t *unused_ppos)
1918{
1919 char local_buffer[CGROUP_LOCAL_BUFFER_SIZE];
1920 int retval = 0;
1921 size_t max_bytes = cft->max_write_len;
1922 char *buffer = local_buffer;
1923
1924 if (!max_bytes)
1925 max_bytes = sizeof(local_buffer) - 1;
1926 if (nbytes >= max_bytes)
1927 return -E2BIG;
1928
1929 if (nbytes >= sizeof(local_buffer)) {
1930 buffer = kmalloc(nbytes + 1, GFP_KERNEL);
1931 if (buffer == NULL)
1932 return -ENOMEM;
1933 }
1934 if (nbytes && copy_from_user(buffer, userbuf, nbytes)) {
1935 retval = -EFAULT;
1936 goto out;
1937 }
1938
1939 buffer[nbytes] = 0;
1940 retval = cft->write_string(cgrp, cft, strstrip(buffer));
1941 if (!retval)
1942 retval = nbytes;
1943out:
1944 if (buffer != local_buffer)
1945 kfree(buffer);
1946 return retval;
1947}
1948
1949static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
1950 size_t nbytes, loff_t *ppos)
1951{
1952 struct cftype *cft = __d_cft(file->f_dentry);
1953 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
1954
1955 if (cgroup_is_removed(cgrp))
1956 return -ENODEV;
1957 if (cft->write)
1958 return cft->write(cgrp, cft, file, buf, nbytes, ppos);
1959 if (cft->write_u64 || cft->write_s64)
1960 return cgroup_write_X64(cgrp, cft, file, buf, nbytes, ppos);
1961 if (cft->write_string)
1962 return cgroup_write_string(cgrp, cft, file, buf, nbytes, ppos);
1963 if (cft->trigger) {
1964 int ret = cft->trigger(cgrp, (unsigned int)cft->private);
1965 return ret ? ret : nbytes;
1966 }
1967 return -EINVAL;
1968}
1969
1970static ssize_t cgroup_read_u64(struct cgroup *cgrp, struct cftype *cft,
1971 struct file *file,
1972 char __user *buf, size_t nbytes,
1973 loff_t *ppos)
1974{
1975 char tmp[CGROUP_LOCAL_BUFFER_SIZE];
1976 u64 val = cft->read_u64(cgrp, cft);
1977 int len = sprintf(tmp, "%llu\n", (unsigned long long) val);
1978
1979 return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
1980}
1981
1982static ssize_t cgroup_read_s64(struct cgroup *cgrp, struct cftype *cft,
1983 struct file *file,
1984 char __user *buf, size_t nbytes,
1985 loff_t *ppos)
1986{
1987 char tmp[CGROUP_LOCAL_BUFFER_SIZE];
1988 s64 val = cft->read_s64(cgrp, cft);
1989 int len = sprintf(tmp, "%lld\n", (long long) val);
1990
1991 return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
1992}
1993
1994static ssize_t cgroup_file_read(struct file *file, char __user *buf,
1995 size_t nbytes, loff_t *ppos)
1996{
1997 struct cftype *cft = __d_cft(file->f_dentry);
1998 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
1999
2000 if (cgroup_is_removed(cgrp))
2001 return -ENODEV;
2002
2003 if (cft->read)
2004 return cft->read(cgrp, cft, file, buf, nbytes, ppos);
2005 if (cft->read_u64)
2006 return cgroup_read_u64(cgrp, cft, file, buf, nbytes, ppos);
2007 if (cft->read_s64)
2008 return cgroup_read_s64(cgrp, cft, file, buf, nbytes, ppos);
2009 return -EINVAL;
2010}
2011
2012
2013
2014
2015
2016
2017struct cgroup_seqfile_state {
2018 struct cftype *cft;
2019 struct cgroup *cgroup;
2020};
2021
2022static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value)
2023{
2024 struct seq_file *sf = cb->state;
2025 return seq_printf(sf, "%s %llu\n", key, (unsigned long long)value);
2026}
2027
2028static int cgroup_seqfile_show(struct seq_file *m, void *arg)
2029{
2030 struct cgroup_seqfile_state *state = m->private;
2031 struct cftype *cft = state->cft;
2032 if (cft->read_map) {
2033 struct cgroup_map_cb cb = {
2034 .fill = cgroup_map_add,
2035 .state = m,
2036 };
2037 return cft->read_map(state->cgroup, cft, &cb);
2038 }
2039 return cft->read_seq_string(state->cgroup, cft, m);
2040}
2041
2042static int cgroup_seqfile_release(struct inode *inode, struct file *file)
2043{
2044 struct seq_file *seq = file->private_data;
2045 kfree(seq->private);
2046 return single_release(inode, file);
2047}
2048
2049static const struct file_operations cgroup_seqfile_operations = {
2050 .read = seq_read,
2051 .write = cgroup_file_write,
2052 .llseek = seq_lseek,
2053 .release = cgroup_seqfile_release,
2054};
2055
2056static int cgroup_file_open(struct inode *inode, struct file *file)
2057{
2058 int err;
2059 struct cftype *cft;
2060
2061 err = generic_file_open(inode, file);
2062 if (err)
2063 return err;
2064 cft = __d_cft(file->f_dentry);
2065
2066 if (cft->read_map || cft->read_seq_string) {
2067 struct cgroup_seqfile_state *state =
2068 kzalloc(sizeof(*state), GFP_USER);
2069 if (!state)
2070 return -ENOMEM;
2071 state->cft = cft;
2072 state->cgroup = __d_cgrp(file->f_dentry->d_parent);
2073 file->f_op = &cgroup_seqfile_operations;
2074 err = single_open(file, cgroup_seqfile_show, state);
2075 if (err < 0)
2076 kfree(state);
2077 } else if (cft->open)
2078 err = cft->open(inode, file);
2079 else
2080 err = 0;
2081
2082 return err;
2083}
2084
2085static int cgroup_file_release(struct inode *inode, struct file *file)
2086{
2087 struct cftype *cft = __d_cft(file->f_dentry);
2088 if (cft->release)
2089 return cft->release(inode, file);
2090 return 0;
2091}
2092
2093
2094
2095
2096static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry,
2097 struct inode *new_dir, struct dentry *new_dentry)
2098{
2099 if (!S_ISDIR(old_dentry->d_inode->i_mode))
2100 return -ENOTDIR;
2101 if (new_dentry->d_inode)
2102 return -EEXIST;
2103 if (old_dir != new_dir)
2104 return -EIO;
2105 return simple_rename(old_dir, old_dentry, new_dir, new_dentry);
2106}
2107
2108static const struct file_operations cgroup_file_operations = {
2109 .read = cgroup_file_read,
2110 .write = cgroup_file_write,
2111 .llseek = generic_file_llseek,
2112 .open = cgroup_file_open,
2113 .release = cgroup_file_release,
2114};
2115
2116static const struct inode_operations cgroup_dir_inode_operations = {
2117 .lookup = simple_lookup,
2118 .mkdir = cgroup_mkdir,
2119 .rmdir = cgroup_rmdir,
2120 .rename = cgroup_rename,
2121};
2122
2123
2124
2125
2126static inline struct cftype *__file_cft(struct file *file)
2127{
2128 if (file->f_dentry->d_inode->i_fop != &cgroup_file_operations)
2129 return ERR_PTR(-EINVAL);
2130 return __d_cft(file->f_dentry);
2131}
2132
2133static int cgroup_create_file(struct dentry *dentry, mode_t mode,
2134 struct super_block *sb)
2135{
2136 static const struct dentry_operations cgroup_dops = {
2137 .d_iput = cgroup_diput,
2138 };
2139
2140 struct inode *inode;
2141
2142 if (!dentry)
2143 return -ENOENT;
2144 if (dentry->d_inode)
2145 return -EEXIST;
2146
2147 inode = cgroup_new_inode(mode, sb);
2148 if (!inode)
2149 return -ENOMEM;
2150
2151 if (S_ISDIR(mode)) {
2152 inode->i_op = &cgroup_dir_inode_operations;
2153 inode->i_fop = &simple_dir_operations;
2154
2155
2156 inc_nlink(inode);
2157
2158
2159
2160 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
2161 } else if (S_ISREG(mode)) {
2162 inode->i_size = 0;
2163 inode->i_fop = &cgroup_file_operations;
2164 }
2165 dentry->d_op = &cgroup_dops;
2166 d_instantiate(dentry, inode);
2167 dget(dentry);
2168 return 0;
2169}
2170
2171
2172
2173
2174
2175
2176
2177
2178static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry,
2179 mode_t mode)
2180{
2181 struct dentry *parent;
2182 int error = 0;
2183
2184 parent = cgrp->parent->dentry;
2185 error = cgroup_create_file(dentry, S_IFDIR | mode, cgrp->root->sb);
2186 if (!error) {
2187 dentry->d_fsdata = cgrp;
2188 inc_nlink(parent->d_inode);
2189 rcu_assign_pointer(cgrp->dentry, dentry);
2190 dget(dentry);
2191 }
2192 dput(dentry);
2193
2194 return error;
2195}
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206static mode_t cgroup_file_mode(const struct cftype *cft)
2207{
2208 mode_t mode = 0;
2209
2210 if (cft->mode)
2211 return cft->mode;
2212
2213 if (cft->read || cft->read_u64 || cft->read_s64 ||
2214 cft->read_map || cft->read_seq_string)
2215 mode |= S_IRUGO;
2216
2217 if (cft->write || cft->write_u64 || cft->write_s64 ||
2218 cft->write_string || cft->trigger)
2219 mode |= S_IWUSR;
2220
2221 return mode;
2222}
2223
2224int cgroup_add_file(struct cgroup *cgrp,
2225 struct cgroup_subsys *subsys,
2226 const struct cftype *cft)
2227{
2228 struct dentry *dir = cgrp->dentry;
2229 struct dentry *dentry;
2230 int error;
2231 mode_t mode;
2232
2233 char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 };
2234 if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) {
2235 strcpy(name, subsys->name);
2236 strcat(name, ".");
2237 }
2238 strcat(name, cft->name);
2239 BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex));
2240 dentry = lookup_one_len(name, dir, strlen(name));
2241 if (!IS_ERR(dentry)) {
2242 mode = cgroup_file_mode(cft);
2243 error = cgroup_create_file(dentry, mode | S_IFREG,
2244 cgrp->root->sb);
2245 if (!error)
2246 dentry->d_fsdata = (void *)cft;
2247 dput(dentry);
2248 } else
2249 error = PTR_ERR(dentry);
2250 return error;
2251}
2252EXPORT_SYMBOL_GPL(cgroup_add_file);
2253
2254int cgroup_add_files(struct cgroup *cgrp,
2255 struct cgroup_subsys *subsys,
2256 const struct cftype cft[],
2257 int count)
2258{
2259 int i, err;
2260 for (i = 0; i < count; i++) {
2261 err = cgroup_add_file(cgrp, subsys, &cft[i]);
2262 if (err)
2263 return err;
2264 }
2265 return 0;
2266}
2267EXPORT_SYMBOL_GPL(cgroup_add_files);
2268
2269
2270
2271
2272
2273
2274
2275int cgroup_task_count(const struct cgroup *cgrp)
2276{
2277 int count = 0;
2278 struct cg_cgroup_link *link;
2279
2280 read_lock(&css_set_lock);
2281 list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {
2282 count += atomic_read(&link->cg->refcount);
2283 }
2284 read_unlock(&css_set_lock);
2285 return count;
2286}
2287
2288
2289
2290
2291
2292static void cgroup_advance_iter(struct cgroup *cgrp,
2293 struct cgroup_iter *it)
2294{
2295 struct list_head *l = it->cg_link;
2296 struct cg_cgroup_link *link;
2297 struct css_set *cg;
2298
2299
2300 do {
2301 l = l->next;
2302 if (l == &cgrp->css_sets) {
2303 it->cg_link = NULL;
2304 return;
2305 }
2306 link = list_entry(l, struct cg_cgroup_link, cgrp_link_list);
2307 cg = link->cg;
2308 } while (list_empty(&cg->tasks));
2309 it->cg_link = l;
2310 it->task = cg->tasks.next;
2311}
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322static void cgroup_enable_task_cg_lists(void)
2323{
2324 struct task_struct *p, *g;
2325 write_lock(&css_set_lock);
2326 use_task_css_set_links = 1;
2327 do_each_thread(g, p) {
2328 task_lock(p);
2329
2330
2331
2332
2333
2334 if (!(p->flags & PF_EXITING) && list_empty(&p->cg_list))
2335 list_add(&p->cg_list, &p->cgroups->tasks);
2336 task_unlock(p);
2337 } while_each_thread(g, p);
2338 write_unlock(&css_set_lock);
2339}
2340
2341void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it)
2342{
2343
2344
2345
2346
2347
2348 if (!use_task_css_set_links)
2349 cgroup_enable_task_cg_lists();
2350
2351 read_lock(&css_set_lock);
2352 it->cg_link = &cgrp->css_sets;
2353 cgroup_advance_iter(cgrp, it);
2354}
2355
2356struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
2357 struct cgroup_iter *it)
2358{
2359 struct task_struct *res;
2360 struct list_head *l = it->task;
2361 struct cg_cgroup_link *link;
2362
2363
2364 if (!it->cg_link)
2365 return NULL;
2366 res = list_entry(l, struct task_struct, cg_list);
2367
2368 l = l->next;
2369 link = list_entry(it->cg_link, struct cg_cgroup_link, cgrp_link_list);
2370 if (l == &link->cg->tasks) {
2371
2372
2373 cgroup_advance_iter(cgrp, it);
2374 } else {
2375 it->task = l;
2376 }
2377 return res;
2378}
2379
2380void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it)
2381{
2382 read_unlock(&css_set_lock);
2383}
2384
2385static inline int started_after_time(struct task_struct *t1,
2386 struct timespec *time,
2387 struct task_struct *t2)
2388{
2389 int start_diff = timespec_compare(&t1->start_time, time);
2390 if (start_diff > 0) {
2391 return 1;
2392 } else if (start_diff < 0) {
2393 return 0;
2394 } else {
2395
2396
2397
2398
2399
2400
2401
2402
2403 return t1 > t2;
2404 }
2405}
2406
2407
2408
2409
2410
2411
2412static inline int started_after(void *p1, void *p2)
2413{
2414 struct task_struct *t1 = p1;
2415 struct task_struct *t2 = p2;
2416 return started_after_time(t1, &t2->start_time, t2);
2417}
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446int cgroup_scan_tasks(struct cgroup_scanner *scan)
2447{
2448 int retval, i;
2449 struct cgroup_iter it;
2450 struct task_struct *p, *dropped;
2451
2452 struct task_struct *latest_task = NULL;
2453 struct ptr_heap tmp_heap;
2454 struct ptr_heap *heap;
2455 struct timespec latest_time = { 0, 0 };
2456
2457 if (scan->heap) {
2458
2459 heap = scan->heap;
2460 heap->gt = &started_after;
2461 } else {
2462
2463 heap = &tmp_heap;
2464 retval = heap_init(heap, PAGE_SIZE, GFP_KERNEL, &started_after);
2465 if (retval)
2466
2467 return retval;
2468 }
2469
2470 again:
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483 heap->size = 0;
2484 cgroup_iter_start(scan->cg, &it);
2485 while ((p = cgroup_iter_next(scan->cg, &it))) {
2486
2487
2488
2489
2490 if (scan->test_task && !scan->test_task(p, scan))
2491 continue;
2492
2493
2494
2495
2496 if (!started_after_time(p, &latest_time, latest_task))
2497 continue;
2498 dropped = heap_insert(heap, p);
2499 if (dropped == NULL) {
2500
2501
2502
2503
2504 get_task_struct(p);
2505 } else if (dropped != p) {
2506
2507
2508
2509
2510 get_task_struct(p);
2511 put_task_struct(dropped);
2512 }
2513
2514
2515
2516
2517 }
2518 cgroup_iter_end(scan->cg, &it);
2519
2520 if (heap->size) {
2521 for (i = 0; i < heap->size; i++) {
2522 struct task_struct *q = heap->ptrs[i];
2523 if (i == 0) {
2524 latest_time = q->start_time;
2525 latest_task = q;
2526 }
2527
2528 scan->process_task(q, scan);
2529 put_task_struct(q);
2530 }
2531
2532
2533
2534
2535
2536
2537
2538 goto again;
2539 }
2540 if (heap == &tmp_heap)
2541 heap_free(&tmp_heap);
2542 return 0;
2543}
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560#define PIDLIST_TOO_LARGE(c) ((c) * sizeof(pid_t) > (PAGE_SIZE * 2))
2561static void *pidlist_allocate(int count)
2562{
2563 if (PIDLIST_TOO_LARGE(count))
2564 return vmalloc(count * sizeof(pid_t));
2565 else
2566 return kmalloc(count * sizeof(pid_t), GFP_KERNEL);
2567}
2568static void pidlist_free(void *p)
2569{
2570 if (is_vmalloc_addr(p))
2571 vfree(p);
2572 else
2573 kfree(p);
2574}
2575static void *pidlist_resize(void *p, int newcount)
2576{
2577 void *newlist;
2578
2579 if (is_vmalloc_addr(p)) {
2580 newlist = vmalloc(newcount * sizeof(pid_t));
2581 if (!newlist)
2582 return NULL;
2583 memcpy(newlist, p, newcount * sizeof(pid_t));
2584 vfree(p);
2585 } else {
2586 newlist = krealloc(p, newcount * sizeof(pid_t), GFP_KERNEL);
2587 }
2588 return newlist;
2589}
2590
2591
2592
2593
2594
2595
2596
2597
2598#define PIDLIST_REALLOC_DIFFERENCE(old, new) ((old) - PAGE_SIZE >= (new))
2599static int pidlist_uniq(pid_t **p, int length)
2600{
2601 int src, dest = 1;
2602 pid_t *list = *p;
2603 pid_t *newlist;
2604
2605
2606
2607
2608
2609 if (length == 0 || length == 1)
2610 return length;
2611
2612 for (src = 1; src < length; src++) {
2613
2614 while (list[src] == list[src-1]) {
2615 src++;
2616 if (src == length)
2617 goto after;
2618 }
2619
2620 list[dest] = list[src];
2621 dest++;
2622 }
2623after:
2624
2625
2626
2627
2628
2629 if (PIDLIST_REALLOC_DIFFERENCE(length, dest)) {
2630 newlist = pidlist_resize(list, dest);
2631 if (newlist)
2632 *p = newlist;
2633 }
2634 return dest;
2635}
2636
2637static int cmppid(const void *a, const void *b)
2638{
2639 return *(pid_t *)a - *(pid_t *)b;
2640}
2641
2642
2643
2644
2645
2646
2647
2648static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
2649 enum cgroup_filetype type)
2650{
2651 struct cgroup_pidlist *l;
2652
2653 struct pid_namespace *ns = current->nsproxy->pid_ns;
2654
2655
2656
2657
2658
2659
2660
2661 mutex_lock(&cgrp->pidlist_mutex);
2662 list_for_each_entry(l, &cgrp->pidlists, links) {
2663 if (l->key.type == type && l->key.ns == ns) {
2664
2665 down_write(&l->mutex);
2666 mutex_unlock(&cgrp->pidlist_mutex);
2667 return l;
2668 }
2669 }
2670
2671 l = kmalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL);
2672 if (!l) {
2673 mutex_unlock(&cgrp->pidlist_mutex);
2674 return l;
2675 }
2676 init_rwsem(&l->mutex);
2677 down_write(&l->mutex);
2678 l->key.type = type;
2679 l->key.ns = get_pid_ns(ns);
2680 l->use_count = 0;
2681 l->list = NULL;
2682 l->owner = cgrp;
2683 list_add(&l->links, &cgrp->pidlists);
2684 mutex_unlock(&cgrp->pidlist_mutex);
2685 return l;
2686}
2687
2688
2689
2690
2691static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
2692 struct cgroup_pidlist **lp)
2693{
2694 pid_t *array;
2695 int length;
2696 int pid, n = 0;
2697 struct cgroup_iter it;
2698 struct task_struct *tsk;
2699 struct cgroup_pidlist *l;
2700
2701
2702
2703
2704
2705
2706
2707 length = cgroup_task_count(cgrp);
2708 array = pidlist_allocate(length);
2709 if (!array)
2710 return -ENOMEM;
2711
2712 cgroup_iter_start(cgrp, &it);
2713 while ((tsk = cgroup_iter_next(cgrp, &it))) {
2714 if (unlikely(n == length))
2715 break;
2716
2717 if (type == CGROUP_FILE_PROCS)
2718 pid = task_tgid_vnr(tsk);
2719 else
2720 pid = task_pid_vnr(tsk);
2721 if (pid > 0)
2722 array[n++] = pid;
2723 }
2724 cgroup_iter_end(cgrp, &it);
2725 length = n;
2726
2727 sort(array, length, sizeof(pid_t), cmppid, NULL);
2728 if (type == CGROUP_FILE_PROCS)
2729 length = pidlist_uniq(&array, length);
2730 l = cgroup_pidlist_find(cgrp, type);
2731 if (!l) {
2732 pidlist_free(array);
2733 return -ENOMEM;
2734 }
2735
2736 pidlist_free(l->list);
2737 l->list = array;
2738 l->length = length;
2739 l->use_count++;
2740 up_write(&l->mutex);
2741 *lp = l;
2742 return 0;
2743}
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
2755{
2756 int ret = -EINVAL;
2757 struct cgroup *cgrp;
2758 struct cgroup_iter it;
2759 struct task_struct *tsk;
2760
2761
2762
2763
2764
2765 if (dentry->d_sb->s_op != &cgroup_ops ||
2766 !S_ISDIR(dentry->d_inode->i_mode))
2767 goto err;
2768
2769 ret = 0;
2770 cgrp = dentry->d_fsdata;
2771
2772 cgroup_iter_start(cgrp, &it);
2773 while ((tsk = cgroup_iter_next(cgrp, &it))) {
2774 switch (tsk->state) {
2775 case TASK_RUNNING:
2776 stats->nr_running++;
2777 break;
2778 case TASK_INTERRUPTIBLE:
2779 stats->nr_sleeping++;
2780 break;
2781 case TASK_UNINTERRUPTIBLE:
2782 stats->nr_uninterruptible++;
2783 break;
2784 case TASK_STOPPED:
2785 stats->nr_stopped++;
2786 break;
2787 default:
2788 if (delayacct_is_task_waiting_on_io(tsk))
2789 stats->nr_io_wait++;
2790 break;
2791 }
2792 }
2793 cgroup_iter_end(cgrp, &it);
2794
2795err:
2796 return ret;
2797}
2798
2799
2800
2801
2802
2803
2804
2805
2806static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
2807{
2808
2809
2810
2811
2812
2813
2814 struct cgroup_pidlist *l = s->private;
2815 int index = 0, pid = *pos;
2816 int *iter;
2817
2818 down_read(&l->mutex);
2819 if (pid) {
2820 int end = l->length;
2821
2822 while (index < end) {
2823 int mid = (index + end) / 2;
2824 if (l->list[mid] == pid) {
2825 index = mid;
2826 break;
2827 } else if (l->list[mid] <= pid)
2828 index = mid + 1;
2829 else
2830 end = mid;
2831 }
2832 }
2833
2834 if (index >= l->length)
2835 return NULL;
2836
2837 iter = l->list + index;
2838 *pos = *iter;
2839 return iter;
2840}
2841
2842static void cgroup_pidlist_stop(struct seq_file *s, void *v)
2843{
2844 struct cgroup_pidlist *l = s->private;
2845 up_read(&l->mutex);
2846}
2847
2848static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
2849{
2850 struct cgroup_pidlist *l = s->private;
2851 pid_t *p = v;
2852 pid_t *end = l->list + l->length;
2853
2854
2855
2856
2857 p++;
2858 if (p >= end) {
2859 return NULL;
2860 } else {
2861 *pos = *p;
2862 return p;
2863 }
2864}
2865
2866static int cgroup_pidlist_show(struct seq_file *s, void *v)
2867{
2868 return seq_printf(s, "%d\n", *(int *)v);
2869}
2870
2871
2872
2873
2874
2875static const struct seq_operations cgroup_pidlist_seq_operations = {
2876 .start = cgroup_pidlist_start,
2877 .stop = cgroup_pidlist_stop,
2878 .next = cgroup_pidlist_next,
2879 .show = cgroup_pidlist_show,
2880};
2881
2882static void cgroup_release_pid_array(struct cgroup_pidlist *l)
2883{
2884
2885
2886
2887
2888
2889
2890 mutex_lock(&l->owner->pidlist_mutex);
2891 down_write(&l->mutex);
2892 BUG_ON(!l->use_count);
2893 if (!--l->use_count) {
2894
2895 list_del(&l->links);
2896 mutex_unlock(&l->owner->pidlist_mutex);
2897 pidlist_free(l->list);
2898 put_pid_ns(l->key.ns);
2899 up_write(&l->mutex);
2900 kfree(l);
2901 return;
2902 }
2903 mutex_unlock(&l->owner->pidlist_mutex);
2904 up_write(&l->mutex);
2905}
2906
2907static int cgroup_pidlist_release(struct inode *inode, struct file *file)
2908{
2909 struct cgroup_pidlist *l;
2910 if (!(file->f_mode & FMODE_READ))
2911 return 0;
2912
2913
2914
2915
2916 l = ((struct seq_file *)file->private_data)->private;
2917 cgroup_release_pid_array(l);
2918 return seq_release(inode, file);
2919}
2920
2921static const struct file_operations cgroup_pidlist_operations = {
2922 .read = seq_read,
2923 .llseek = seq_lseek,
2924 .write = cgroup_file_write,
2925 .release = cgroup_pidlist_release,
2926};
2927
2928
2929
2930
2931
2932
2933
2934static int cgroup_pidlist_open(struct file *file, enum cgroup_filetype type)
2935{
2936 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
2937 struct cgroup_pidlist *l;
2938 int retval;
2939
2940
2941 if (!(file->f_mode & FMODE_READ))
2942 return 0;
2943
2944
2945 retval = pidlist_array_load(cgrp, type, &l);
2946 if (retval)
2947 return retval;
2948
2949 file->f_op = &cgroup_pidlist_operations;
2950
2951 retval = seq_open(file, &cgroup_pidlist_seq_operations);
2952 if (retval) {
2953 cgroup_release_pid_array(l);
2954 return retval;
2955 }
2956 ((struct seq_file *)file->private_data)->private = l;
2957 return 0;
2958}
2959static int cgroup_tasks_open(struct inode *unused, struct file *file)
2960{
2961 return cgroup_pidlist_open(file, CGROUP_FILE_TASKS);
2962}
2963static int cgroup_procs_open(struct inode *unused, struct file *file)
2964{
2965 return cgroup_pidlist_open(file, CGROUP_FILE_PROCS);
2966}
2967
2968static u64 cgroup_read_notify_on_release(struct cgroup *cgrp,
2969 struct cftype *cft)
2970{
2971 return notify_on_release(cgrp);
2972}
2973
2974static int cgroup_write_notify_on_release(struct cgroup *cgrp,
2975 struct cftype *cft,
2976 u64 val)
2977{
2978 clear_bit(CGRP_RELEASABLE, &cgrp->flags);
2979 if (val)
2980 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
2981 else
2982 clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
2983 return 0;
2984}
2985
2986
2987
2988
2989
2990
2991static void cgroup_event_remove(struct work_struct *work)
2992{
2993 struct cgroup_event *event = container_of(work, struct cgroup_event,
2994 remove);
2995 struct cgroup *cgrp = event->cgrp;
2996
2997 event->cft->unregister_event(cgrp, event->cft, event->eventfd);
2998
2999 eventfd_ctx_put(event->eventfd);
3000 kfree(event);
3001 dput(cgrp->dentry);
3002}
3003
3004
3005
3006
3007
3008
3009static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
3010 int sync, void *key)
3011{
3012 struct cgroup_event *event = container_of(wait,
3013 struct cgroup_event, wait);
3014 struct cgroup *cgrp = event->cgrp;
3015 unsigned long flags = (unsigned long)key;
3016
3017 if (flags & POLLHUP) {
3018 __remove_wait_queue(event->wqh, &event->wait);
3019 spin_lock(&cgrp->event_list_lock);
3020 list_del(&event->list);
3021 spin_unlock(&cgrp->event_list_lock);
3022
3023
3024
3025
3026 schedule_work(&event->remove);
3027 }
3028
3029 return 0;
3030}
3031
3032static void cgroup_event_ptable_queue_proc(struct file *file,
3033 wait_queue_head_t *wqh, poll_table *pt)
3034{
3035 struct cgroup_event *event = container_of(pt,
3036 struct cgroup_event, pt);
3037
3038 event->wqh = wqh;
3039 add_wait_queue(wqh, &event->wait);
3040}
3041
3042
3043
3044
3045
3046
3047
3048static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft,
3049 const char *buffer)
3050{
3051 struct cgroup_event *event = NULL;
3052 unsigned int efd, cfd;
3053 struct file *efile = NULL;
3054 struct file *cfile = NULL;
3055 char *endp;
3056 int ret;
3057
3058 efd = simple_strtoul(buffer, &endp, 10);
3059 if (*endp != ' ')
3060 return -EINVAL;
3061 buffer = endp + 1;
3062
3063 cfd = simple_strtoul(buffer, &endp, 10);
3064 if ((*endp != ' ') && (*endp != '\0'))
3065 return -EINVAL;
3066 buffer = endp + 1;
3067
3068 event = kzalloc(sizeof(*event), GFP_KERNEL);
3069 if (!event)
3070 return -ENOMEM;
3071 event->cgrp = cgrp;
3072 INIT_LIST_HEAD(&event->list);
3073 init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc);
3074 init_waitqueue_func_entry(&event->wait, cgroup_event_wake);
3075 INIT_WORK(&event->remove, cgroup_event_remove);
3076
3077 efile = eventfd_fget(efd);
3078 if (IS_ERR(efile)) {
3079 ret = PTR_ERR(efile);
3080 goto fail;
3081 }
3082
3083 event->eventfd = eventfd_ctx_fileget(efile);
3084 if (IS_ERR(event->eventfd)) {
3085 ret = PTR_ERR(event->eventfd);
3086 goto fail;
3087 }
3088
3089 cfile = fget(cfd);
3090 if (!cfile) {
3091 ret = -EBADF;
3092 goto fail;
3093 }
3094
3095
3096 ret = file_permission(cfile, MAY_READ);
3097 if (ret < 0)
3098 goto fail;
3099
3100 event->cft = __file_cft(cfile);
3101 if (IS_ERR(event->cft)) {
3102 ret = PTR_ERR(event->cft);
3103 goto fail;
3104 }
3105
3106 if (!event->cft->register_event || !event->cft->unregister_event) {
3107 ret = -EINVAL;
3108 goto fail;
3109 }
3110
3111 ret = event->cft->register_event(cgrp, event->cft,
3112 event->eventfd, buffer);
3113 if (ret)
3114 goto fail;
3115
3116 if (efile->f_op->poll(efile, &event->pt) & POLLHUP) {
3117 event->cft->unregister_event(cgrp, event->cft, event->eventfd);
3118 ret = 0;
3119 goto fail;
3120 }
3121
3122
3123
3124
3125
3126
3127 dget(cgrp->dentry);
3128
3129 spin_lock(&cgrp->event_list_lock);
3130 list_add(&event->list, &cgrp->event_list);
3131 spin_unlock(&cgrp->event_list_lock);
3132
3133 fput(cfile);
3134 fput(efile);
3135
3136 return 0;
3137
3138fail:
3139 if (cfile)
3140 fput(cfile);
3141
3142 if (event && event->eventfd && !IS_ERR(event->eventfd))
3143 eventfd_ctx_put(event->eventfd);
3144
3145 if (!IS_ERR_OR_NULL(efile))
3146 fput(efile);
3147
3148 kfree(event);
3149
3150 return ret;
3151}
3152
3153
3154
3155
3156
3157#define CGROUP_FILE_GENERIC_PREFIX "cgroup."
3158static struct cftype files[] = {
3159 {
3160 .name = "tasks",
3161 .open = cgroup_tasks_open,
3162 .write_u64 = cgroup_tasks_write,
3163 .release = cgroup_pidlist_release,
3164 .mode = S_IRUGO | S_IWUSR,
3165 },
3166 {
3167 .name = CGROUP_FILE_GENERIC_PREFIX "procs",
3168 .open = cgroup_procs_open,
3169
3170 .release = cgroup_pidlist_release,
3171 .mode = S_IRUGO,
3172 },
3173 {
3174 .name = "notify_on_release",
3175 .read_u64 = cgroup_read_notify_on_release,
3176 .write_u64 = cgroup_write_notify_on_release,
3177 },
3178 {
3179 .name = CGROUP_FILE_GENERIC_PREFIX "event_control",
3180 .write_string = cgroup_write_event_control,
3181 .mode = S_IWUGO,
3182 },
3183};
3184
3185static struct cftype cft_release_agent = {
3186 .name = "release_agent",
3187 .read_seq_string = cgroup_release_agent_show,
3188 .write_string = cgroup_release_agent_write,
3189 .max_write_len = PATH_MAX,
3190};
3191
3192static int cgroup_populate_dir(struct cgroup *cgrp)
3193{
3194 int err;
3195 struct cgroup_subsys *ss;
3196
3197
3198 cgroup_clear_directory(cgrp->dentry);
3199
3200 err = cgroup_add_files(cgrp, NULL, files, ARRAY_SIZE(files));
3201 if (err < 0)
3202 return err;
3203
3204 if (cgrp == cgrp->top_cgroup) {
3205 if ((err = cgroup_add_file(cgrp, NULL, &cft_release_agent)) < 0)
3206 return err;
3207 }
3208
3209 for_each_subsys(cgrp->root, ss) {
3210 if (ss->populate && (err = ss->populate(ss, cgrp)) < 0)
3211 return err;
3212 }
3213
3214 for_each_subsys(cgrp->root, ss) {
3215 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
3216
3217
3218
3219
3220
3221 if (css->id)
3222 rcu_assign_pointer(css->id->css, css);
3223 }
3224
3225 return 0;
3226}
3227
3228static void init_cgroup_css(struct cgroup_subsys_state *css,
3229 struct cgroup_subsys *ss,
3230 struct cgroup *cgrp)
3231{
3232 css->cgroup = cgrp;
3233 atomic_set(&css->refcnt, 1);
3234 css->flags = 0;
3235 css->id = NULL;
3236 if (cgrp == dummytop)
3237 set_bit(CSS_ROOT, &css->flags);
3238 BUG_ON(cgrp->subsys[ss->subsys_id]);
3239 cgrp->subsys[ss->subsys_id] = css;
3240}
3241
3242static void cgroup_lock_hierarchy(struct cgroupfs_root *root)
3243{
3244
3245 int i;
3246
3247
3248
3249
3250
3251 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3252 struct cgroup_subsys *ss = subsys[i];
3253 if (ss == NULL)
3254 continue;
3255 if (ss->root == root)
3256 mutex_lock(&ss->hierarchy_mutex);
3257 }
3258}
3259
3260static void cgroup_unlock_hierarchy(struct cgroupfs_root *root)
3261{
3262 int i;
3263
3264 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3265 struct cgroup_subsys *ss = subsys[i];
3266 if (ss == NULL)
3267 continue;
3268 if (ss->root == root)
3269 mutex_unlock(&ss->hierarchy_mutex);
3270 }
3271}
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
3282 mode_t mode)
3283{
3284 struct cgroup *cgrp;
3285 struct cgroupfs_root *root = parent->root;
3286 int err = 0;
3287 struct cgroup_subsys *ss;
3288 struct super_block *sb = root->sb;
3289
3290 cgrp = kzalloc(sizeof(*cgrp), GFP_KERNEL);
3291 if (!cgrp)
3292 return -ENOMEM;
3293
3294
3295
3296
3297
3298
3299 atomic_inc(&sb->s_active);
3300
3301 mutex_lock(&cgroup_mutex);
3302
3303 init_cgroup_housekeeping(cgrp);
3304
3305 cgrp->parent = parent;
3306 cgrp->root = parent->root;
3307 cgrp->top_cgroup = parent->top_cgroup;
3308
3309 if (notify_on_release(parent))
3310 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
3311
3312 for_each_subsys(root, ss) {
3313 struct cgroup_subsys_state *css = ss->create(ss, cgrp);
3314
3315 if (IS_ERR(css)) {
3316 err = PTR_ERR(css);
3317 goto err_destroy;
3318 }
3319 init_cgroup_css(css, ss, cgrp);
3320 if (ss->use_id) {
3321 err = alloc_css_id(ss, parent, cgrp);
3322 if (err)
3323 goto err_destroy;
3324 }
3325
3326 }
3327
3328 cgroup_lock_hierarchy(root);
3329 list_add(&cgrp->sibling, &cgrp->parent->children);
3330 cgroup_unlock_hierarchy(root);
3331 root->number_of_cgroups++;
3332
3333 err = cgroup_create_dir(cgrp, dentry, mode);
3334 if (err < 0)
3335 goto err_remove;
3336
3337
3338 BUG_ON(!mutex_is_locked(&cgrp->dentry->d_inode->i_mutex));
3339
3340 err = cgroup_populate_dir(cgrp);
3341
3342
3343 mutex_unlock(&cgroup_mutex);
3344 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
3345
3346 return 0;
3347
3348 err_remove:
3349
3350 cgroup_lock_hierarchy(root);
3351 list_del(&cgrp->sibling);
3352 cgroup_unlock_hierarchy(root);
3353 root->number_of_cgroups--;
3354
3355 err_destroy:
3356
3357 for_each_subsys(root, ss) {
3358 if (cgrp->subsys[ss->subsys_id])
3359 ss->destroy(ss, cgrp);
3360 }
3361
3362 mutex_unlock(&cgroup_mutex);
3363
3364
3365 deactivate_super(sb);
3366
3367 kfree(cgrp);
3368 return err;
3369}
3370
3371static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode)
3372{
3373 struct cgroup *c_parent = dentry->d_parent->d_fsdata;
3374
3375
3376 return cgroup_create(c_parent, dentry, mode | S_IFDIR);
3377}
3378
3379static int cgroup_has_css_refs(struct cgroup *cgrp)
3380{
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390 int i;
3391
3392
3393
3394
3395
3396 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3397 struct cgroup_subsys *ss = subsys[i];
3398 struct cgroup_subsys_state *css;
3399
3400 if (ss == NULL || ss->root != cgrp->root)
3401 continue;
3402 css = cgrp->subsys[ss->subsys_id];
3403
3404
3405
3406
3407
3408
3409 if (css && (atomic_read(&css->refcnt) > 1))
3410 return 1;
3411 }
3412 return 0;
3413}
3414
3415
3416
3417
3418
3419
3420
3421static int cgroup_clear_css_refs(struct cgroup *cgrp)
3422{
3423 struct cgroup_subsys *ss;
3424 unsigned long flags;
3425 bool failed = false;
3426 local_irq_save(flags);
3427 for_each_subsys(cgrp->root, ss) {
3428 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
3429 int refcnt;
3430 while (1) {
3431
3432 refcnt = atomic_read(&css->refcnt);
3433 if (refcnt > 1) {
3434 failed = true;
3435 goto done;
3436 }
3437 BUG_ON(!refcnt);
3438
3439
3440
3441
3442
3443
3444 if (atomic_cmpxchg(&css->refcnt, refcnt, 0) == refcnt)
3445 break;
3446 cpu_relax();
3447 }
3448 }
3449 done:
3450 for_each_subsys(cgrp->root, ss) {
3451 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
3452 if (failed) {
3453
3454
3455
3456
3457 if (!atomic_read(&css->refcnt))
3458 atomic_set(&css->refcnt, 1);
3459 } else {
3460
3461 set_bit(CSS_REMOVED, &css->flags);
3462 }
3463 }
3464 local_irq_restore(flags);
3465 return !failed;
3466}
3467
3468static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
3469{
3470 struct cgroup *cgrp = dentry->d_fsdata;
3471 struct dentry *d;
3472 struct cgroup *parent;
3473 DEFINE_WAIT(wait);
3474 struct cgroup_event *event, *tmp;
3475 int ret;
3476
3477
3478again:
3479 mutex_lock(&cgroup_mutex);
3480 if (atomic_read(&cgrp->count) != 0) {
3481 mutex_unlock(&cgroup_mutex);
3482 return -EBUSY;
3483 }
3484 if (!list_empty(&cgrp->children)) {
3485 mutex_unlock(&cgroup_mutex);
3486 return -EBUSY;
3487 }
3488 mutex_unlock(&cgroup_mutex);
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499 set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
3500
3501
3502
3503
3504
3505 ret = cgroup_call_pre_destroy(cgrp);
3506 if (ret) {
3507 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
3508 return ret;
3509 }
3510
3511 mutex_lock(&cgroup_mutex);
3512 parent = cgrp->parent;
3513 if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) {
3514 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
3515 mutex_unlock(&cgroup_mutex);
3516 return -EBUSY;
3517 }
3518 prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE);
3519 if (!cgroup_clear_css_refs(cgrp)) {
3520 mutex_unlock(&cgroup_mutex);
3521
3522
3523
3524
3525 if (test_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags))
3526 schedule();
3527 finish_wait(&cgroup_rmdir_waitq, &wait);
3528 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
3529 if (signal_pending(current))
3530 return -EINTR;
3531 goto again;
3532 }
3533
3534 finish_wait(&cgroup_rmdir_waitq, &wait);
3535 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
3536
3537 spin_lock(&release_list_lock);
3538 set_bit(CGRP_REMOVED, &cgrp->flags);
3539 if (!list_empty(&cgrp->release_list))
3540 list_del(&cgrp->release_list);
3541 spin_unlock(&release_list_lock);
3542
3543 cgroup_lock_hierarchy(cgrp->root);
3544
3545 list_del(&cgrp->sibling);
3546 cgroup_unlock_hierarchy(cgrp->root);
3547
3548 spin_lock(&cgrp->dentry->d_lock);
3549 d = dget(cgrp->dentry);
3550 spin_unlock(&d->d_lock);
3551
3552 cgroup_d_remove_dir(d);
3553 dput(d);
3554
3555 set_bit(CGRP_RELEASABLE, &parent->flags);
3556 check_for_release(parent);
3557
3558
3559
3560
3561
3562
3563 spin_lock(&cgrp->event_list_lock);
3564 list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
3565 list_del(&event->list);
3566 remove_wait_queue(event->wqh, &event->wait);
3567 eventfd_signal(event->eventfd, 1);
3568 schedule_work(&event->remove);
3569 }
3570 spin_unlock(&cgrp->event_list_lock);
3571
3572 mutex_unlock(&cgroup_mutex);
3573 return 0;
3574}
3575
3576static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
3577{
3578 struct cgroup_subsys_state *css;
3579
3580 printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);
3581
3582
3583 list_add(&ss->sibling, &rootnode.subsys_list);
3584 ss->root = &rootnode;
3585 css = ss->create(ss, dummytop);
3586
3587 BUG_ON(IS_ERR(css));
3588 init_cgroup_css(css, ss, dummytop);
3589
3590
3591
3592
3593
3594 init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id];
3595
3596 need_forkexit_callback |= ss->fork || ss->exit;
3597
3598
3599
3600
3601 BUG_ON(!list_empty(&init_task.tasks));
3602
3603 mutex_init(&ss->hierarchy_mutex);
3604 lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
3605 ss->active = 1;
3606
3607
3608
3609 BUG_ON(ss->module);
3610}
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
3622{
3623 int i;
3624 struct cgroup_subsys_state *css;
3625
3626
3627 if (ss->name == NULL || strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN ||
3628 ss->create == NULL || ss->destroy == NULL)
3629 return -EINVAL;
3630
3631
3632
3633
3634
3635
3636
3637 if (ss->fork || ss->exit)
3638 return -EINVAL;
3639
3640
3641
3642
3643
3644 if (ss->module == NULL) {
3645
3646 BUG_ON(ss->subsys_id >= CGROUP_BUILTIN_SUBSYS_COUNT);
3647 BUG_ON(subsys[ss->subsys_id] != ss);
3648 return 0;
3649 }
3650
3651
3652
3653
3654
3655 mutex_lock(&cgroup_mutex);
3656
3657 for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
3658 if (subsys[i] == NULL)
3659 break;
3660 }
3661 if (i == CGROUP_SUBSYS_COUNT) {
3662
3663 mutex_unlock(&cgroup_mutex);
3664 return -EBUSY;
3665 }
3666
3667 ss->subsys_id = i;
3668 subsys[i] = ss;
3669
3670
3671
3672
3673
3674 css = ss->create(ss, dummytop);
3675 if (IS_ERR(css)) {
3676
3677 subsys[i] = NULL;
3678 mutex_unlock(&cgroup_mutex);
3679 return PTR_ERR(css);
3680 }
3681
3682 list_add(&ss->sibling, &rootnode.subsys_list);
3683 ss->root = &rootnode;
3684
3685
3686 init_cgroup_css(css, ss, dummytop);
3687
3688 if (ss->use_id) {
3689 int ret = cgroup_init_idr(ss, css);
3690 if (ret) {
3691 dummytop->subsys[ss->subsys_id] = NULL;
3692 ss->destroy(ss, dummytop);
3693 subsys[i] = NULL;
3694 mutex_unlock(&cgroup_mutex);
3695 return ret;
3696 }
3697 }
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707 write_lock(&css_set_lock);
3708 for (i = 0; i < CSS_SET_TABLE_SIZE; i++) {
3709 struct css_set *cg;
3710 struct hlist_node *node, *tmp;
3711 struct hlist_head *bucket = &css_set_table[i], *new_bucket;
3712
3713 hlist_for_each_entry_safe(cg, node, tmp, bucket, hlist) {
3714
3715 if (cg->subsys[ss->subsys_id])
3716 continue;
3717
3718 hlist_del(&cg->hlist);
3719
3720 cg->subsys[ss->subsys_id] = css;
3721
3722 new_bucket = css_set_hash(cg->subsys);
3723 hlist_add_head(&cg->hlist, new_bucket);
3724 }
3725 }
3726 write_unlock(&css_set_lock);
3727
3728 mutex_init(&ss->hierarchy_mutex);
3729 lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
3730 ss->active = 1;
3731
3732
3733 mutex_unlock(&cgroup_mutex);
3734 return 0;
3735}
3736EXPORT_SYMBOL_GPL(cgroup_load_subsys);
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746void cgroup_unload_subsys(struct cgroup_subsys *ss)
3747{
3748 struct cg_cgroup_link *link;
3749 struct hlist_head *hhead;
3750
3751 BUG_ON(ss->module == NULL);
3752
3753
3754
3755
3756
3757
3758 BUG_ON(ss->root != &rootnode);
3759
3760 mutex_lock(&cgroup_mutex);
3761
3762 BUG_ON(ss->subsys_id < CGROUP_BUILTIN_SUBSYS_COUNT);
3763 subsys[ss->subsys_id] = NULL;
3764
3765
3766 list_del(&ss->sibling);
3767
3768
3769
3770
3771
3772 write_lock(&css_set_lock);
3773 list_for_each_entry(link, &dummytop->css_sets, cgrp_link_list) {
3774 struct css_set *cg = link->cg;
3775
3776 hlist_del(&cg->hlist);
3777 BUG_ON(!cg->subsys[ss->subsys_id]);
3778 cg->subsys[ss->subsys_id] = NULL;
3779 hhead = css_set_hash(cg->subsys);
3780 hlist_add_head(&cg->hlist, hhead);
3781 }
3782 write_unlock(&css_set_lock);
3783
3784
3785
3786
3787
3788
3789
3790 ss->destroy(ss, dummytop);
3791 dummytop->subsys[ss->subsys_id] = NULL;
3792
3793 mutex_unlock(&cgroup_mutex);
3794}
3795EXPORT_SYMBOL_GPL(cgroup_unload_subsys);
3796
3797
3798
3799
3800
3801
3802
3803int __init cgroup_init_early(void)
3804{
3805 int i;
3806 atomic_set(&init_css_set.refcount, 1);
3807 INIT_LIST_HEAD(&init_css_set.cg_links);
3808 INIT_LIST_HEAD(&init_css_set.tasks);
3809 INIT_HLIST_NODE(&init_css_set.hlist);
3810 css_set_count = 1;
3811 init_cgroup_root(&rootnode);
3812 root_count = 1;
3813 init_task.cgroups = &init_css_set;
3814
3815 init_css_set_link.cg = &init_css_set;
3816 init_css_set_link.cgrp = dummytop;
3817 list_add(&init_css_set_link.cgrp_link_list,
3818 &rootnode.top_cgroup.css_sets);
3819 list_add(&init_css_set_link.cg_link_list,
3820 &init_css_set.cg_links);
3821
3822 for (i = 0; i < CSS_SET_TABLE_SIZE; i++)
3823 INIT_HLIST_HEAD(&css_set_table[i]);
3824
3825
3826 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
3827 struct cgroup_subsys *ss = subsys[i];
3828
3829 BUG_ON(!ss->name);
3830 BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN);
3831 BUG_ON(!ss->create);
3832 BUG_ON(!ss->destroy);
3833 if (ss->subsys_id != i) {
3834 printk(KERN_ERR "cgroup: Subsys %s id == %d\n",
3835 ss->name, ss->subsys_id);
3836 BUG();
3837 }
3838
3839 if (ss->early_init)
3840 cgroup_init_subsys(ss);
3841 }
3842 return 0;
3843}
3844
3845
3846
3847
3848
3849
3850
3851int __init cgroup_init(void)
3852{
3853 int err;
3854 int i;
3855 struct hlist_head *hhead;
3856
3857 err = bdi_init(&cgroup_backing_dev_info);
3858 if (err)
3859 return err;
3860
3861
3862 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
3863 struct cgroup_subsys *ss = subsys[i];
3864 if (!ss->early_init)
3865 cgroup_init_subsys(ss);
3866 if (ss->use_id)
3867 cgroup_init_idr(ss, init_css_set.subsys[ss->subsys_id]);
3868 }
3869
3870
3871 hhead = css_set_hash(init_css_set.subsys);
3872 hlist_add_head(&init_css_set.hlist, hhead);
3873 BUG_ON(!init_root_id(&rootnode));
3874 err = register_filesystem(&cgroup_fs_type);
3875 if (err < 0)
3876 goto out;
3877
3878 proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations);
3879
3880out:
3881 if (err)
3882 bdi_destroy(&cgroup_backing_dev_info);
3883
3884 return err;
3885}
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900static int proc_cgroup_show(struct seq_file *m, void *v)
3901{
3902 struct pid *pid;
3903 struct task_struct *tsk;
3904 char *buf;
3905 int retval;
3906 struct cgroupfs_root *root;
3907
3908 retval = -ENOMEM;
3909 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
3910 if (!buf)
3911 goto out;
3912
3913 retval = -ESRCH;
3914 pid = m->private;
3915 tsk = get_pid_task(pid, PIDTYPE_PID);
3916 if (!tsk)
3917 goto out_free;
3918
3919 retval = 0;
3920
3921 mutex_lock(&cgroup_mutex);
3922
3923 for_each_active_root(root) {
3924 struct cgroup_subsys *ss;
3925 struct cgroup *cgrp;
3926 int count = 0;
3927
3928 seq_printf(m, "%d:", root->hierarchy_id);
3929 for_each_subsys(root, ss)
3930 seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
3931 if (strlen(root->name))
3932 seq_printf(m, "%sname=%s", count ? "," : "",
3933 root->name);
3934 seq_putc(m, ':');
3935 cgrp = task_cgroup_from_root(tsk, root);
3936 retval = cgroup_path(cgrp, buf, PAGE_SIZE);
3937 if (retval < 0)
3938 goto out_unlock;
3939 seq_puts(m, buf);
3940 seq_putc(m, '\n');
3941 }
3942
3943out_unlock:
3944 mutex_unlock(&cgroup_mutex);
3945 put_task_struct(tsk);
3946out_free:
3947 kfree(buf);
3948out:
3949 return retval;
3950}
3951
3952static int cgroup_open(struct inode *inode, struct file *file)
3953{
3954 struct pid *pid = PROC_I(inode)->pid;
3955 return single_open(file, proc_cgroup_show, pid);
3956}
3957
3958const struct file_operations proc_cgroup_operations = {
3959 .open = cgroup_open,
3960 .read = seq_read,
3961 .llseek = seq_lseek,
3962 .release = single_release,
3963};
3964
3965
3966static int proc_cgroupstats_show(struct seq_file *m, void *v)
3967{
3968 int i;
3969
3970 seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
3971
3972
3973
3974
3975
3976 mutex_lock(&cgroup_mutex);
3977 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3978 struct cgroup_subsys *ss = subsys[i];
3979 if (ss == NULL)
3980 continue;
3981 seq_printf(m, "%s\t%d\t%d\t%d\n",
3982 ss->name, ss->root->hierarchy_id,
3983 ss->root->number_of_cgroups, !ss->disabled);
3984 }
3985 mutex_unlock(&cgroup_mutex);
3986 return 0;
3987}
3988
3989static int cgroupstats_open(struct inode *inode, struct file *file)
3990{
3991 return single_open(file, proc_cgroupstats_show, NULL);
3992}
3993
3994static const struct file_operations proc_cgroupstats_operations = {
3995 .open = cgroupstats_open,
3996 .read = seq_read,
3997 .llseek = seq_lseek,
3998 .release = single_release,
3999};
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017void cgroup_fork(struct task_struct *child)
4018{
4019 task_lock(current);
4020 child->cgroups = current->cgroups;
4021 get_css_set(child->cgroups);
4022 task_unlock(current);
4023 INIT_LIST_HEAD(&child->cg_list);
4024}
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034void cgroup_fork_callbacks(struct task_struct *child)
4035{
4036 if (need_forkexit_callback) {
4037 int i;
4038
4039
4040
4041
4042
4043 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4044 struct cgroup_subsys *ss = subsys[i];
4045 if (ss->fork)
4046 ss->fork(ss, child);
4047 }
4048 }
4049}
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060void cgroup_post_fork(struct task_struct *child)
4061{
4062 if (use_task_css_set_links) {
4063 write_lock(&css_set_lock);
4064 task_lock(child);
4065 if (list_empty(&child->cg_list))
4066 list_add(&child->cg_list, &child->cgroups->tasks);
4067 task_unlock(child);
4068 write_unlock(&css_set_lock);
4069 }
4070}
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106void cgroup_exit(struct task_struct *tsk, int run_callbacks)
4107{
4108 int i;
4109 struct css_set *cg;
4110
4111 if (run_callbacks && need_forkexit_callback) {
4112
4113
4114
4115
4116 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4117 struct cgroup_subsys *ss = subsys[i];
4118 if (ss->exit)
4119 ss->exit(ss, tsk);
4120 }
4121 }
4122
4123
4124
4125
4126
4127
4128 if (!list_empty(&tsk->cg_list)) {
4129 write_lock(&css_set_lock);
4130 if (!list_empty(&tsk->cg_list))
4131 list_del(&tsk->cg_list);
4132 write_unlock(&css_set_lock);
4133 }
4134
4135
4136 task_lock(tsk);
4137 cg = tsk->cgroups;
4138 tsk->cgroups = &init_css_set;
4139 task_unlock(tsk);
4140 if (cg)
4141 put_css_set_taskexit(cg);
4142}
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
4155 char *nodename)
4156{
4157 struct dentry *dentry;
4158 int ret = 0;
4159 struct cgroup *parent, *child;
4160 struct inode *inode;
4161 struct css_set *cg;
4162 struct cgroupfs_root *root;
4163 struct cgroup_subsys *ss;
4164
4165
4166 BUG_ON(!subsys->active);
4167
4168
4169
4170 mutex_lock(&cgroup_mutex);
4171 again:
4172 root = subsys->root;
4173 if (root == &rootnode) {
4174 mutex_unlock(&cgroup_mutex);
4175 return 0;
4176 }
4177
4178
4179 if (!atomic_inc_not_zero(&root->sb->s_active)) {
4180
4181 mutex_unlock(&cgroup_mutex);
4182 return 0;
4183 }
4184
4185
4186 task_lock(tsk);
4187 parent = task_cgroup(tsk, subsys->subsys_id);
4188 cg = tsk->cgroups;
4189 get_css_set(cg);
4190 task_unlock(tsk);
4191
4192 mutex_unlock(&cgroup_mutex);
4193
4194
4195 inode = parent->dentry->d_inode;
4196
4197
4198
4199 mutex_lock(&inode->i_mutex);
4200 dentry = lookup_one_len(nodename, parent->dentry, strlen(nodename));
4201 if (IS_ERR(dentry)) {
4202 printk(KERN_INFO
4203 "cgroup: Couldn't allocate dentry for %s: %ld\n", nodename,
4204 PTR_ERR(dentry));
4205 ret = PTR_ERR(dentry);
4206 goto out_release;
4207 }
4208
4209
4210 ret = vfs_mkdir(inode, dentry, 0755);
4211 child = __d_cgrp(dentry);
4212 dput(dentry);
4213 if (ret) {
4214 printk(KERN_INFO
4215 "Failed to create cgroup %s: %d\n", nodename,
4216 ret);
4217 goto out_release;
4218 }
4219
4220
4221
4222
4223 mutex_lock(&cgroup_mutex);
4224 if ((root != subsys->root) ||
4225 (parent != task_cgroup(tsk, subsys->subsys_id))) {
4226
4227 mutex_unlock(&inode->i_mutex);
4228 put_css_set(cg);
4229
4230 deactivate_super(root->sb);
4231
4232
4233
4234 printk(KERN_INFO
4235 "Race in cgroup_clone() - leaking cgroup %s\n",
4236 nodename);
4237 goto again;
4238 }
4239
4240
4241 for_each_subsys(root, ss) {
4242 if (ss->post_clone)
4243 ss->post_clone(ss, child);
4244 }
4245
4246
4247 ret = cgroup_attach_task(child, tsk);
4248 mutex_unlock(&cgroup_mutex);
4249
4250 out_release:
4251 mutex_unlock(&inode->i_mutex);
4252
4253 mutex_lock(&cgroup_mutex);
4254 put_css_set(cg);
4255 mutex_unlock(&cgroup_mutex);
4256 deactivate_super(root->sb);
4257 return ret;
4258}
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task)
4274{
4275 int ret;
4276 struct cgroup *target;
4277
4278 if (cgrp == dummytop)
4279 return 1;
4280
4281 target = task_cgroup_from_root(task, cgrp->root);
4282 while (cgrp != target && cgrp!= cgrp->top_cgroup)
4283 cgrp = cgrp->parent;
4284 ret = (cgrp == target);
4285 return ret;
4286}
4287
4288static void check_for_release(struct cgroup *cgrp)
4289{
4290
4291
4292 if (cgroup_is_releasable(cgrp) && !atomic_read(&cgrp->count)
4293 && list_empty(&cgrp->children) && !cgroup_has_css_refs(cgrp)) {
4294
4295
4296
4297 int need_schedule_work = 0;
4298 spin_lock(&release_list_lock);
4299 if (!cgroup_is_removed(cgrp) &&
4300 list_empty(&cgrp->release_list)) {
4301 list_add(&cgrp->release_list, &release_list);
4302 need_schedule_work = 1;
4303 }
4304 spin_unlock(&release_list_lock);
4305 if (need_schedule_work)
4306 schedule_work(&release_agent_work);
4307 }
4308}
4309
4310
4311void __css_put(struct cgroup_subsys_state *css, int count)
4312{
4313 struct cgroup *cgrp = css->cgroup;
4314 int val;
4315 rcu_read_lock();
4316 val = atomic_sub_return(count, &css->refcnt);
4317 if (val == 1) {
4318 if (notify_on_release(cgrp)) {
4319 set_bit(CGRP_RELEASABLE, &cgrp->flags);
4320 check_for_release(cgrp);
4321 }
4322 cgroup_wakeup_rmdir_waiter(cgrp);
4323 }
4324 rcu_read_unlock();
4325 WARN_ON_ONCE(val < 1);
4326}
4327EXPORT_SYMBOL_GPL(__css_put);
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352static void cgroup_release_agent(struct work_struct *work)
4353{
4354 BUG_ON(work != &release_agent_work);
4355 mutex_lock(&cgroup_mutex);
4356 spin_lock(&release_list_lock);
4357 while (!list_empty(&release_list)) {
4358 char *argv[3], *envp[3];
4359 int i;
4360 char *pathbuf = NULL, *agentbuf = NULL;
4361 struct cgroup *cgrp = list_entry(release_list.next,
4362 struct cgroup,
4363 release_list);
4364 list_del_init(&cgrp->release_list);
4365 spin_unlock(&release_list_lock);
4366 pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
4367 if (!pathbuf)
4368 goto continue_free;
4369 if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0)
4370 goto continue_free;
4371 agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
4372 if (!agentbuf)
4373 goto continue_free;
4374
4375 i = 0;
4376 argv[i++] = agentbuf;
4377 argv[i++] = pathbuf;
4378 argv[i] = NULL;
4379
4380 i = 0;
4381
4382 envp[i++] = "HOME=/";
4383 envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
4384 envp[i] = NULL;
4385
4386
4387
4388
4389 mutex_unlock(&cgroup_mutex);
4390 call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
4391 mutex_lock(&cgroup_mutex);
4392 continue_free:
4393 kfree(pathbuf);
4394 kfree(agentbuf);
4395 spin_lock(&release_list_lock);
4396 }
4397 spin_unlock(&release_list_lock);
4398 mutex_unlock(&cgroup_mutex);
4399}
4400
4401static int __init cgroup_disable(char *str)
4402{
4403 int i;
4404 char *token;
4405
4406 while ((token = strsep(&str, ",")) != NULL) {
4407 if (!*token)
4408 continue;
4409
4410
4411
4412
4413 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4414 struct cgroup_subsys *ss = subsys[i];
4415
4416 if (!strcmp(token, ss->name)) {
4417 ss->disabled = 1;
4418 printk(KERN_INFO "Disabling %s control group"
4419 " subsystem\n", ss->name);
4420 break;
4421 }
4422 }
4423 }
4424 return 1;
4425}
4426__setup("cgroup_disable=", cgroup_disable);
4427
4428
4429
4430
4431
4432
4433
4434
4435unsigned short css_id(struct cgroup_subsys_state *css)
4436{
4437 struct css_id *cssid;
4438
4439
4440
4441
4442
4443
4444 cssid = rcu_dereference_check(css->id,
4445 rcu_read_lock_held() || atomic_read(&css->refcnt));
4446
4447 if (cssid)
4448 return cssid->id;
4449 return 0;
4450}
4451EXPORT_SYMBOL_GPL(css_id);
4452
4453unsigned short css_depth(struct cgroup_subsys_state *css)
4454{
4455 struct css_id *cssid;
4456
4457 cssid = rcu_dereference_check(css->id,
4458 rcu_read_lock_held() || atomic_read(&css->refcnt));
4459
4460 if (cssid)
4461 return cssid->depth;
4462 return 0;
4463}
4464EXPORT_SYMBOL_GPL(css_depth);
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479bool css_is_ancestor(struct cgroup_subsys_state *child,
4480 const struct cgroup_subsys_state *root)
4481{
4482 struct css_id *child_id;
4483 struct css_id *root_id;
4484 bool ret = true;
4485
4486 rcu_read_lock();
4487 child_id = rcu_dereference(child->id);
4488 root_id = rcu_dereference(root->id);
4489 if (!child_id
4490 || !root_id
4491 || (child_id->depth < root_id->depth)
4492 || (child_id->stack[root_id->depth] != root_id->id))
4493 ret = false;
4494 rcu_read_unlock();
4495 return ret;
4496}
4497
4498static void __free_css_id_cb(struct rcu_head *head)
4499{
4500 struct css_id *id;
4501
4502 id = container_of(head, struct css_id, rcu_head);
4503 kfree(id);
4504}
4505
4506void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
4507{
4508 struct css_id *id = css->id;
4509
4510 if (!id)
4511 return;
4512
4513 BUG_ON(!ss->use_id);
4514
4515 rcu_assign_pointer(id->css, NULL);
4516 rcu_assign_pointer(css->id, NULL);
4517 spin_lock(&ss->id_lock);
4518 idr_remove(&ss->idr, id->id);
4519 spin_unlock(&ss->id_lock);
4520 call_rcu(&id->rcu_head, __free_css_id_cb);
4521}
4522EXPORT_SYMBOL_GPL(free_css_id);
4523
4524
4525
4526
4527
4528
4529static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth)
4530{
4531 struct css_id *newid;
4532 int myid, error, size;
4533
4534 BUG_ON(!ss->use_id);
4535
4536 size = sizeof(*newid) + sizeof(unsigned short) * (depth + 1);
4537 newid = kzalloc(size, GFP_KERNEL);
4538 if (!newid)
4539 return ERR_PTR(-ENOMEM);
4540
4541 if (unlikely(!idr_pre_get(&ss->idr, GFP_KERNEL))) {
4542 error = -ENOMEM;
4543 goto err_out;
4544 }
4545 spin_lock(&ss->id_lock);
4546
4547 error = idr_get_new_above(&ss->idr, newid, 1, &myid);
4548 spin_unlock(&ss->id_lock);
4549
4550
4551 if (error) {
4552 error = -ENOSPC;
4553 goto err_out;
4554 }
4555 if (myid > CSS_ID_MAX)
4556 goto remove_idr;
4557
4558 newid->id = myid;
4559 newid->depth = depth;
4560 return newid;
4561remove_idr:
4562 error = -ENOSPC;
4563 spin_lock(&ss->id_lock);
4564 idr_remove(&ss->idr, myid);
4565 spin_unlock(&ss->id_lock);
4566err_out:
4567 kfree(newid);
4568 return ERR_PTR(error);
4569
4570}
4571
4572static int __init_or_module cgroup_init_idr(struct cgroup_subsys *ss,
4573 struct cgroup_subsys_state *rootcss)
4574{
4575 struct css_id *newid;
4576
4577 spin_lock_init(&ss->id_lock);
4578 idr_init(&ss->idr);
4579
4580 newid = get_new_cssid(ss, 0);
4581 if (IS_ERR(newid))
4582 return PTR_ERR(newid);
4583
4584 newid->stack[0] = newid->id;
4585 newid->css = rootcss;
4586 rootcss->id = newid;
4587 return 0;
4588}
4589
4590static int alloc_css_id(struct cgroup_subsys *ss, struct cgroup *parent,
4591 struct cgroup *child)
4592{
4593 int subsys_id, i, depth = 0;
4594 struct cgroup_subsys_state *parent_css, *child_css;
4595 struct css_id *child_id, *parent_id;
4596
4597 subsys_id = ss->subsys_id;
4598 parent_css = parent->subsys[subsys_id];
4599 child_css = child->subsys[subsys_id];
4600 parent_id = parent_css->id;
4601 depth = parent_id->depth + 1;
4602
4603 child_id = get_new_cssid(ss, depth);
4604 if (IS_ERR(child_id))
4605 return PTR_ERR(child_id);
4606
4607 for (i = 0; i < depth; i++)
4608 child_id->stack[i] = parent_id->stack[i];
4609 child_id->stack[depth] = child_id->id;
4610
4611
4612
4613
4614 rcu_assign_pointer(child_css->id, child_id);
4615
4616 return 0;
4617}
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id)
4628{
4629 struct css_id *cssid = NULL;
4630
4631 BUG_ON(!ss->use_id);
4632 cssid = idr_find(&ss->idr, id);
4633
4634 if (unlikely(!cssid))
4635 return NULL;
4636
4637 return rcu_dereference(cssid->css);
4638}
4639EXPORT_SYMBOL_GPL(css_lookup);
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651struct cgroup_subsys_state *
4652css_get_next(struct cgroup_subsys *ss, int id,
4653 struct cgroup_subsys_state *root, int *foundid)
4654{
4655 struct cgroup_subsys_state *ret = NULL;
4656 struct css_id *tmp;
4657 int tmpid;
4658 int rootid = css_id(root);
4659 int depth = css_depth(root);
4660
4661 if (!rootid)
4662 return NULL;
4663
4664 BUG_ON(!ss->use_id);
4665
4666 tmpid = id;
4667 while (1) {
4668
4669
4670
4671
4672 spin_lock(&ss->id_lock);
4673 tmp = idr_get_next(&ss->idr, &tmpid);
4674 spin_unlock(&ss->id_lock);
4675
4676 if (!tmp)
4677 break;
4678 if (tmp->depth >= depth && tmp->stack[depth] == rootid) {
4679 ret = rcu_dereference(tmp->css);
4680 if (ret) {
4681 *foundid = tmpid;
4682 break;
4683 }
4684 }
4685
4686 tmpid = tmpid + 1;
4687 }
4688 return ret;
4689}
4690
4691#ifdef CONFIG_CGROUP_DEBUG
4692static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss,
4693 struct cgroup *cont)
4694{
4695 struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
4696
4697 if (!css)
4698 return ERR_PTR(-ENOMEM);
4699
4700 return css;
4701}
4702
4703static void debug_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
4704{
4705 kfree(cont->subsys[debug_subsys_id]);
4706}
4707
4708static u64 cgroup_refcount_read(struct cgroup *cont, struct cftype *cft)
4709{
4710 return atomic_read(&cont->count);
4711}
4712
4713static u64 debug_taskcount_read(struct cgroup *cont, struct cftype *cft)
4714{
4715 return cgroup_task_count(cont);
4716}
4717
4718static u64 current_css_set_read(struct cgroup *cont, struct cftype *cft)
4719{
4720 return (u64)(unsigned long)current->cgroups;
4721}
4722
4723static u64 current_css_set_refcount_read(struct cgroup *cont,
4724 struct cftype *cft)
4725{
4726 u64 count;
4727
4728 rcu_read_lock();
4729 count = atomic_read(¤t->cgroups->refcount);
4730 rcu_read_unlock();
4731 return count;
4732}
4733
4734static int current_css_set_cg_links_read(struct cgroup *cont,
4735 struct cftype *cft,
4736 struct seq_file *seq)
4737{
4738 struct cg_cgroup_link *link;
4739 struct css_set *cg;
4740
4741 read_lock(&css_set_lock);
4742 rcu_read_lock();
4743 cg = rcu_dereference(current->cgroups);
4744 list_for_each_entry(link, &cg->cg_links, cg_link_list) {
4745 struct cgroup *c = link->cgrp;
4746 const char *name;
4747
4748 if (c->dentry)
4749 name = c->dentry->d_name.name;
4750 else
4751 name = "?";
4752 seq_printf(seq, "Root %d group %s\n",
4753 c->root->hierarchy_id, name);
4754 }
4755 rcu_read_unlock();
4756 read_unlock(&css_set_lock);
4757 return 0;
4758}
4759
4760#define MAX_TASKS_SHOWN_PER_CSS 25
4761static int cgroup_css_links_read(struct cgroup *cont,
4762 struct cftype *cft,
4763 struct seq_file *seq)
4764{
4765 struct cg_cgroup_link *link;
4766
4767 read_lock(&css_set_lock);
4768 list_for_each_entry(link, &cont->css_sets, cgrp_link_list) {
4769 struct css_set *cg = link->cg;
4770 struct task_struct *task;
4771 int count = 0;
4772 seq_printf(seq, "css_set %p\n", cg);
4773 list_for_each_entry(task, &cg->tasks, cg_list) {
4774 if (count++ > MAX_TASKS_SHOWN_PER_CSS) {
4775 seq_puts(seq, " ...\n");
4776 break;
4777 } else {
4778 seq_printf(seq, " task %d\n",
4779 task_pid_vnr(task));
4780 }
4781 }
4782 }
4783 read_unlock(&css_set_lock);
4784 return 0;
4785}
4786
4787static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft)
4788{
4789 return test_bit(CGRP_RELEASABLE, &cgrp->flags);
4790}
4791
4792static struct cftype debug_files[] = {
4793 {
4794 .name = "cgroup_refcount",
4795 .read_u64 = cgroup_refcount_read,
4796 },
4797 {
4798 .name = "taskcount",
4799 .read_u64 = debug_taskcount_read,
4800 },
4801
4802 {
4803 .name = "current_css_set",
4804 .read_u64 = current_css_set_read,
4805 },
4806
4807 {
4808 .name = "current_css_set_refcount",
4809 .read_u64 = current_css_set_refcount_read,
4810 },
4811
4812 {
4813 .name = "current_css_set_cg_links",
4814 .read_seq_string = current_css_set_cg_links_read,
4815 },
4816
4817 {
4818 .name = "cgroup_css_links",
4819 .read_seq_string = cgroup_css_links_read,
4820 },
4821
4822 {
4823 .name = "releasable",
4824 .read_u64 = releasable_read,
4825 },
4826};
4827
4828static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont)
4829{
4830 return cgroup_add_files(cont, ss, debug_files,
4831 ARRAY_SIZE(debug_files));
4832}
4833
4834struct cgroup_subsys debug_subsys = {
4835 .name = "debug",
4836 .create = debug_create,
4837 .destroy = debug_destroy,
4838 .populate = debug_populate,
4839 .subsys_id = debug_subsys_id,
4840};
4841#endif
4842