1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include <linux/cgroup.h>
26#include <linux/errno.h>
27#include <linux/fs.h>
28#include <linux/kernel.h>
29#include <linux/list.h>
30#include <linux/mm.h>
31#include <linux/mutex.h>
32#include <linux/mount.h>
33#include <linux/pagemap.h>
34#include <linux/proc_fs.h>
35#include <linux/rcupdate.h>
36#include <linux/sched.h>
37#include <linux/backing-dev.h>
38#include <linux/seq_file.h>
39#include <linux/slab.h>
40#include <linux/magic.h>
41#include <linux/spinlock.h>
42#include <linux/string.h>
43#include <linux/sort.h>
44#include <linux/kmod.h>
45#include <linux/delayacct.h>
46#include <linux/cgroupstats.h>
47#include <linux/hash.h>
48#include <linux/namei.h>
49#include <linux/smp_lock.h>
50#include <linux/pid_namespace.h>
51
52#include <asm/atomic.h>
53
54static DEFINE_MUTEX(cgroup_mutex);
55
56
57#define SUBSYS(_x) &_x ## _subsys,
58
59static struct cgroup_subsys *subsys[] = {
60#include <linux/cgroup_subsys.h>
61};
62
63
64
65
66
67
68struct cgroupfs_root {
69 struct super_block *sb;
70
71
72
73
74
75 unsigned long subsys_bits;
76
77
78 unsigned long actual_subsys_bits;
79
80
81 struct list_head subsys_list;
82
83
84 struct cgroup top_cgroup;
85
86
87 int number_of_cgroups;
88
89
90 struct list_head root_list;
91
92
93 unsigned long flags;
94
95
96 char release_agent_path[PATH_MAX];
97};
98
99
100
101
102
103
104static struct cgroupfs_root rootnode;
105
106
107
108
109
110#define CSS_ID_MAX (65535)
111struct css_id {
112
113
114
115
116
117
118
119 struct cgroup_subsys_state *css;
120
121
122
123 unsigned short id;
124
125
126
127 unsigned short depth;
128
129
130
131 struct rcu_head rcu_head;
132
133
134
135 unsigned short stack[0];
136};
137
138
139
140
141static LIST_HEAD(roots);
142static int root_count;
143
144
145#define dummytop (&rootnode.top_cgroup)
146
147
148
149
150
151
152static int need_forkexit_callback __read_mostly;
153
154
155inline int cgroup_is_removed(const struct cgroup *cgrp)
156{
157 return test_bit(CGRP_REMOVED, &cgrp->flags);
158}
159
160
161enum {
162 ROOT_NOPREFIX,
163};
164
165static int cgroup_is_releasable(const struct cgroup *cgrp)
166{
167 const int bits =
168 (1 << CGRP_RELEASABLE) |
169 (1 << CGRP_NOTIFY_ON_RELEASE);
170 return (cgrp->flags & bits) == bits;
171}
172
173static int notify_on_release(const struct cgroup *cgrp)
174{
175 return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
176}
177
178
179
180
181
182#define for_each_subsys(_root, _ss) \
183list_for_each_entry(_ss, &_root->subsys_list, sibling)
184
185
186#define for_each_active_root(_root) \
187list_for_each_entry(_root, &roots, root_list)
188
189
190
191static LIST_HEAD(release_list);
192static DEFINE_SPINLOCK(release_list_lock);
193static void cgroup_release_agent(struct work_struct *work);
194static DECLARE_WORK(release_agent_work, cgroup_release_agent);
195static void check_for_release(struct cgroup *cgrp);
196
197
198struct cg_cgroup_link {
199
200
201
202
203 struct list_head cgrp_link_list;
204
205
206
207
208 struct list_head cg_link_list;
209 struct css_set *cg;
210};
211
212
213
214
215
216
217
218
219static struct css_set init_css_set;
220static struct cg_cgroup_link init_css_set_link;
221
222static int cgroup_subsys_init_idr(struct cgroup_subsys *ss);
223
224
225
226
227static DEFINE_RWLOCK(css_set_lock);
228static int css_set_count;
229
230
231
232#define CSS_SET_HASH_BITS 7
233#define CSS_SET_TABLE_SIZE (1 << CSS_SET_HASH_BITS)
234static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE];
235
236static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
237{
238 int i;
239 int index;
240 unsigned long tmp = 0UL;
241
242 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++)
243 tmp += (unsigned long)css[i];
244 tmp = (tmp >> 16) ^ tmp;
245
246 index = hash_long(tmp, CSS_SET_HASH_BITS);
247
248 return &css_set_table[index];
249}
250
251
252
253
254
255static int use_task_css_set_links __read_mostly;
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274static void unlink_css_set(struct css_set *cg)
275{
276 struct cg_cgroup_link *link;
277 struct cg_cgroup_link *saved_link;
278
279 hlist_del(&cg->hlist);
280 css_set_count--;
281
282 list_for_each_entry_safe(link, saved_link, &cg->cg_links,
283 cg_link_list) {
284 list_del(&link->cg_link_list);
285 list_del(&link->cgrp_link_list);
286 kfree(link);
287 }
288}
289
290static void __put_css_set(struct css_set *cg, int taskexit)
291{
292 int i;
293
294
295
296
297
298 if (atomic_add_unless(&cg->refcount, -1, 1))
299 return;
300 write_lock(&css_set_lock);
301 if (!atomic_dec_and_test(&cg->refcount)) {
302 write_unlock(&css_set_lock);
303 return;
304 }
305 unlink_css_set(cg);
306 write_unlock(&css_set_lock);
307
308 rcu_read_lock();
309 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
310 struct cgroup *cgrp = rcu_dereference(cg->subsys[i]->cgroup);
311 if (atomic_dec_and_test(&cgrp->count) &&
312 notify_on_release(cgrp)) {
313 if (taskexit)
314 set_bit(CGRP_RELEASABLE, &cgrp->flags);
315 check_for_release(cgrp);
316 }
317 }
318 rcu_read_unlock();
319 kfree(cg);
320}
321
322
323
324
325static inline void get_css_set(struct css_set *cg)
326{
327 atomic_inc(&cg->refcount);
328}
329
330static inline void put_css_set(struct css_set *cg)
331{
332 __put_css_set(cg, 0);
333}
334
335static inline void put_css_set_taskexit(struct css_set *cg)
336{
337 __put_css_set(cg, 1);
338}
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353static struct css_set *find_existing_css_set(
354 struct css_set *oldcg,
355 struct cgroup *cgrp,
356 struct cgroup_subsys_state *template[])
357{
358 int i;
359 struct cgroupfs_root *root = cgrp->root;
360 struct hlist_head *hhead;
361 struct hlist_node *node;
362 struct css_set *cg;
363
364
365
366 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
367 if (root->subsys_bits & (1UL << i)) {
368
369
370
371 template[i] = cgrp->subsys[i];
372 } else {
373
374
375 template[i] = oldcg->subsys[i];
376 }
377 }
378
379 hhead = css_set_hash(template);
380 hlist_for_each_entry(cg, node, hhead, hlist) {
381 if (!memcmp(template, cg->subsys, sizeof(cg->subsys))) {
382
383 return cg;
384 }
385 }
386
387
388 return NULL;
389}
390
391static void free_cg_links(struct list_head *tmp)
392{
393 struct cg_cgroup_link *link;
394 struct cg_cgroup_link *saved_link;
395
396 list_for_each_entry_safe(link, saved_link, tmp, cgrp_link_list) {
397 list_del(&link->cgrp_link_list);
398 kfree(link);
399 }
400}
401
402
403
404
405
406
407static int allocate_cg_links(int count, struct list_head *tmp)
408{
409 struct cg_cgroup_link *link;
410 int i;
411 INIT_LIST_HEAD(tmp);
412 for (i = 0; i < count; i++) {
413 link = kmalloc(sizeof(*link), GFP_KERNEL);
414 if (!link) {
415 free_cg_links(tmp);
416 return -ENOMEM;
417 }
418 list_add(&link->cgrp_link_list, tmp);
419 }
420 return 0;
421}
422
423
424
425
426
427
428
429static void link_css_set(struct list_head *tmp_cg_links,
430 struct css_set *cg, struct cgroup *cgrp)
431{
432 struct cg_cgroup_link *link;
433
434 BUG_ON(list_empty(tmp_cg_links));
435 link = list_first_entry(tmp_cg_links, struct cg_cgroup_link,
436 cgrp_link_list);
437 link->cg = cg;
438 list_move(&link->cgrp_link_list, &cgrp->css_sets);
439 list_add(&link->cg_link_list, &cg->cg_links);
440}
441
442
443
444
445
446
447
448
449static struct css_set *find_css_set(
450 struct css_set *oldcg, struct cgroup *cgrp)
451{
452 struct css_set *res;
453 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
454 int i;
455
456 struct list_head tmp_cg_links;
457
458 struct hlist_head *hhead;
459
460
461
462 read_lock(&css_set_lock);
463 res = find_existing_css_set(oldcg, cgrp, template);
464 if (res)
465 get_css_set(res);
466 read_unlock(&css_set_lock);
467
468 if (res)
469 return res;
470
471 res = kmalloc(sizeof(*res), GFP_KERNEL);
472 if (!res)
473 return NULL;
474
475
476 if (allocate_cg_links(root_count, &tmp_cg_links) < 0) {
477 kfree(res);
478 return NULL;
479 }
480
481 atomic_set(&res->refcount, 1);
482 INIT_LIST_HEAD(&res->cg_links);
483 INIT_LIST_HEAD(&res->tasks);
484 INIT_HLIST_NODE(&res->hlist);
485
486
487
488 memcpy(res->subsys, template, sizeof(res->subsys));
489
490 write_lock(&css_set_lock);
491
492 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
493 struct cgroup *cgrp = res->subsys[i]->cgroup;
494 struct cgroup_subsys *ss = subsys[i];
495 atomic_inc(&cgrp->count);
496
497
498
499
500
501 if (ss->root->subsys_list.next == &ss->sibling)
502 link_css_set(&tmp_cg_links, res, cgrp);
503 }
504 if (list_empty(&rootnode.subsys_list))
505 link_css_set(&tmp_cg_links, res, dummytop);
506
507 BUG_ON(!list_empty(&tmp_cg_links));
508
509 css_set_count++;
510
511
512 hhead = css_set_hash(res->subsys);
513 hlist_add_head(&res->hlist, hhead);
514
515 write_unlock(&css_set_lock);
516
517 return res;
518}
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574void cgroup_lock(void)
575{
576 mutex_lock(&cgroup_mutex);
577}
578
579
580
581
582
583
584void cgroup_unlock(void)
585{
586 mutex_unlock(&cgroup_mutex);
587}
588
589
590
591
592
593
594
595
596static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode);
597static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
598static int cgroup_populate_dir(struct cgroup *cgrp);
599static struct inode_operations cgroup_dir_inode_operations;
600static struct file_operations proc_cgroupstats_operations;
601
602static struct backing_dev_info cgroup_backing_dev_info = {
603 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
604};
605
606static int alloc_css_id(struct cgroup_subsys *ss,
607 struct cgroup *parent, struct cgroup *child);
608
609static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
610{
611 struct inode *inode = new_inode(sb);
612
613 if (inode) {
614 inode->i_mode = mode;
615 inode->i_uid = current_fsuid();
616 inode->i_gid = current_fsgid();
617 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
618 inode->i_mapping->backing_dev_info = &cgroup_backing_dev_info;
619 }
620 return inode;
621}
622
623
624
625
626
627static int cgroup_call_pre_destroy(struct cgroup *cgrp)
628{
629 struct cgroup_subsys *ss;
630 int ret = 0;
631
632 for_each_subsys(cgrp->root, ss)
633 if (ss->pre_destroy) {
634 ret = ss->pre_destroy(ss, cgrp);
635 if (ret)
636 break;
637 }
638 return ret;
639}
640
641static void free_cgroup_rcu(struct rcu_head *obj)
642{
643 struct cgroup *cgrp = container_of(obj, struct cgroup, rcu_head);
644
645 kfree(cgrp);
646}
647
648static void cgroup_diput(struct dentry *dentry, struct inode *inode)
649{
650
651 if (S_ISDIR(inode->i_mode)) {
652 struct cgroup *cgrp = dentry->d_fsdata;
653 struct cgroup_subsys *ss;
654 BUG_ON(!(cgroup_is_removed(cgrp)));
655
656
657
658
659
660
661 synchronize_rcu();
662
663 mutex_lock(&cgroup_mutex);
664
665
666
667 for_each_subsys(cgrp->root, ss)
668 ss->destroy(ss, cgrp);
669
670 cgrp->root->number_of_cgroups--;
671 mutex_unlock(&cgroup_mutex);
672
673
674
675
676
677 deactivate_super(cgrp->root->sb);
678
679 call_rcu(&cgrp->rcu_head, free_cgroup_rcu);
680 }
681 iput(inode);
682}
683
684static void remove_dir(struct dentry *d)
685{
686 struct dentry *parent = dget(d->d_parent);
687
688 d_delete(d);
689 simple_rmdir(parent->d_inode, d);
690 dput(parent);
691}
692
693static void cgroup_clear_directory(struct dentry *dentry)
694{
695 struct list_head *node;
696
697 BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
698 spin_lock(&dcache_lock);
699 node = dentry->d_subdirs.next;
700 while (node != &dentry->d_subdirs) {
701 struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
702 list_del_init(node);
703 if (d->d_inode) {
704
705
706 BUG_ON(d->d_inode->i_mode & S_IFDIR);
707 d = dget_locked(d);
708 spin_unlock(&dcache_lock);
709 d_delete(d);
710 simple_unlink(dentry->d_inode, d);
711 dput(d);
712 spin_lock(&dcache_lock);
713 }
714 node = dentry->d_subdirs.next;
715 }
716 spin_unlock(&dcache_lock);
717}
718
719
720
721
722static void cgroup_d_remove_dir(struct dentry *dentry)
723{
724 cgroup_clear_directory(dentry);
725
726 spin_lock(&dcache_lock);
727 list_del_init(&dentry->d_u.d_child);
728 spin_unlock(&dcache_lock);
729 remove_dir(dentry);
730}
731
732
733
734
735
736
737
738
739
740DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
741
742static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
743{
744 if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
745 wake_up_all(&cgroup_rmdir_waitq);
746}
747
748void cgroup_exclude_rmdir(struct cgroup_subsys_state *css)
749{
750 css_get(css);
751}
752
753void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
754{
755 cgroup_wakeup_rmdir_waiter(css->cgroup);
756 css_put(css);
757}
758
759
760static int rebind_subsystems(struct cgroupfs_root *root,
761 unsigned long final_bits)
762{
763 unsigned long added_bits, removed_bits;
764 struct cgroup *cgrp = &root->top_cgroup;
765 int i;
766
767 removed_bits = root->actual_subsys_bits & ~final_bits;
768 added_bits = final_bits & ~root->actual_subsys_bits;
769
770 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
771 unsigned long bit = 1UL << i;
772 struct cgroup_subsys *ss = subsys[i];
773 if (!(bit & added_bits))
774 continue;
775 if (ss->root != &rootnode) {
776
777 return -EBUSY;
778 }
779 }
780
781
782
783
784
785 if (root->number_of_cgroups > 1)
786 return -EBUSY;
787
788
789 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
790 struct cgroup_subsys *ss = subsys[i];
791 unsigned long bit = 1UL << i;
792 if (bit & added_bits) {
793
794 BUG_ON(cgrp->subsys[i]);
795 BUG_ON(!dummytop->subsys[i]);
796 BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
797 mutex_lock(&ss->hierarchy_mutex);
798 cgrp->subsys[i] = dummytop->subsys[i];
799 cgrp->subsys[i]->cgroup = cgrp;
800 list_move(&ss->sibling, &root->subsys_list);
801 ss->root = root;
802 if (ss->bind)
803 ss->bind(ss, cgrp);
804 mutex_unlock(&ss->hierarchy_mutex);
805 } else if (bit & removed_bits) {
806
807 BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
808 BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
809 mutex_lock(&ss->hierarchy_mutex);
810 if (ss->bind)
811 ss->bind(ss, dummytop);
812 dummytop->subsys[i]->cgroup = dummytop;
813 cgrp->subsys[i] = NULL;
814 subsys[i]->root = &rootnode;
815 list_move(&ss->sibling, &rootnode.subsys_list);
816 mutex_unlock(&ss->hierarchy_mutex);
817 } else if (bit & final_bits) {
818
819 BUG_ON(!cgrp->subsys[i]);
820 } else {
821
822 BUG_ON(cgrp->subsys[i]);
823 }
824 }
825 root->subsys_bits = root->actual_subsys_bits = final_bits;
826 synchronize_rcu();
827
828 return 0;
829}
830
831static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs)
832{
833 struct cgroupfs_root *root = vfs->mnt_sb->s_fs_info;
834 struct cgroup_subsys *ss;
835
836 mutex_lock(&cgroup_mutex);
837 for_each_subsys(root, ss)
838 seq_printf(seq, ",%s", ss->name);
839 if (test_bit(ROOT_NOPREFIX, &root->flags))
840 seq_puts(seq, ",noprefix");
841 if (strlen(root->release_agent_path))
842 seq_printf(seq, ",release_agent=%s", root->release_agent_path);
843 mutex_unlock(&cgroup_mutex);
844 return 0;
845}
846
847struct cgroup_sb_opts {
848 unsigned long subsys_bits;
849 unsigned long flags;
850 char *release_agent;
851};
852
853
854
855static int parse_cgroupfs_options(char *data,
856 struct cgroup_sb_opts *opts)
857{
858 char *token, *o = data ?: "all";
859 unsigned long mask = (unsigned long)-1;
860
861#ifdef CONFIG_CPUSETS
862 mask = ~(1UL << cpuset_subsys_id);
863#endif
864
865 opts->subsys_bits = 0;
866 opts->flags = 0;
867 opts->release_agent = NULL;
868
869 while ((token = strsep(&o, ",")) != NULL) {
870 if (!*token)
871 return -EINVAL;
872 if (!strcmp(token, "all")) {
873
874 int i;
875 opts->subsys_bits = 0;
876 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
877 struct cgroup_subsys *ss = subsys[i];
878 if (!ss->disabled)
879 opts->subsys_bits |= 1ul << i;
880 }
881 } else if (!strcmp(token, "noprefix")) {
882 set_bit(ROOT_NOPREFIX, &opts->flags);
883 } else if (!strncmp(token, "release_agent=", 14)) {
884
885 if (opts->release_agent)
886 return -EINVAL;
887 opts->release_agent = kzalloc(PATH_MAX, GFP_KERNEL);
888 if (!opts->release_agent)
889 return -ENOMEM;
890 strncpy(opts->release_agent, token + 14, PATH_MAX - 1);
891 opts->release_agent[PATH_MAX - 1] = 0;
892 } else {
893 struct cgroup_subsys *ss;
894 int i;
895 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
896 ss = subsys[i];
897 if (!strcmp(token, ss->name)) {
898 if (!ss->disabled)
899 set_bit(i, &opts->subsys_bits);
900 break;
901 }
902 }
903 if (i == CGROUP_SUBSYS_COUNT)
904 return -ENOENT;
905 }
906 }
907
908
909
910
911
912
913 if (test_bit(ROOT_NOPREFIX, &opts->flags) &&
914 (opts->subsys_bits & mask))
915 return -EINVAL;
916
917
918 if (!opts->subsys_bits)
919 return -EINVAL;
920
921 return 0;
922}
923
924static int cgroup_remount(struct super_block *sb, int *flags, char *data)
925{
926 int ret = 0;
927 struct cgroupfs_root *root = sb->s_fs_info;
928 struct cgroup *cgrp = &root->top_cgroup;
929 struct cgroup_sb_opts opts;
930
931 lock_kernel();
932 mutex_lock(&cgrp->dentry->d_inode->i_mutex);
933 mutex_lock(&cgroup_mutex);
934
935
936 ret = parse_cgroupfs_options(data, &opts);
937 if (ret)
938 goto out_unlock;
939
940
941 if (opts.flags != root->flags) {
942 ret = -EINVAL;
943 goto out_unlock;
944 }
945
946 ret = rebind_subsystems(root, opts.subsys_bits);
947 if (ret)
948 goto out_unlock;
949
950
951 cgroup_populate_dir(cgrp);
952
953 if (opts.release_agent)
954 strcpy(root->release_agent_path, opts.release_agent);
955 out_unlock:
956 kfree(opts.release_agent);
957 mutex_unlock(&cgroup_mutex);
958 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
959 unlock_kernel();
960 return ret;
961}
962
963static struct super_operations cgroup_ops = {
964 .statfs = simple_statfs,
965 .drop_inode = generic_delete_inode,
966 .show_options = cgroup_show_options,
967 .remount_fs = cgroup_remount,
968};
969
970static void init_cgroup_housekeeping(struct cgroup *cgrp)
971{
972 INIT_LIST_HEAD(&cgrp->sibling);
973 INIT_LIST_HEAD(&cgrp->children);
974 INIT_LIST_HEAD(&cgrp->css_sets);
975 INIT_LIST_HEAD(&cgrp->release_list);
976 INIT_LIST_HEAD(&cgrp->pids_list);
977 init_rwsem(&cgrp->pids_mutex);
978}
979static void init_cgroup_root(struct cgroupfs_root *root)
980{
981 struct cgroup *cgrp = &root->top_cgroup;
982 INIT_LIST_HEAD(&root->subsys_list);
983 INIT_LIST_HEAD(&root->root_list);
984 root->number_of_cgroups = 1;
985 cgrp->root = root;
986 cgrp->top_cgroup = cgrp;
987 init_cgroup_housekeeping(cgrp);
988}
989
990static int cgroup_test_super(struct super_block *sb, void *data)
991{
992 struct cgroupfs_root *new = data;
993 struct cgroupfs_root *root = sb->s_fs_info;
994
995
996 if (new->subsys_bits != root->subsys_bits)
997 return 0;
998
999
1000 if (new->flags != root->flags)
1001 return 0;
1002
1003 return 1;
1004}
1005
1006static int cgroup_set_super(struct super_block *sb, void *data)
1007{
1008 int ret;
1009 struct cgroupfs_root *root = data;
1010
1011 ret = set_anon_super(sb, NULL);
1012 if (ret)
1013 return ret;
1014
1015 sb->s_fs_info = root;
1016 root->sb = sb;
1017
1018 sb->s_blocksize = PAGE_CACHE_SIZE;
1019 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
1020 sb->s_magic = CGROUP_SUPER_MAGIC;
1021 sb->s_op = &cgroup_ops;
1022
1023 return 0;
1024}
1025
1026static int cgroup_get_rootdir(struct super_block *sb)
1027{
1028 struct inode *inode =
1029 cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb);
1030 struct dentry *dentry;
1031
1032 if (!inode)
1033 return -ENOMEM;
1034
1035 inode->i_fop = &simple_dir_operations;
1036 inode->i_op = &cgroup_dir_inode_operations;
1037
1038 inc_nlink(inode);
1039 dentry = d_alloc_root(inode);
1040 if (!dentry) {
1041 iput(inode);
1042 return -ENOMEM;
1043 }
1044 sb->s_root = dentry;
1045 return 0;
1046}
1047
1048static int cgroup_get_sb(struct file_system_type *fs_type,
1049 int flags, const char *unused_dev_name,
1050 void *data, struct vfsmount *mnt)
1051{
1052 struct cgroup_sb_opts opts;
1053 int ret = 0;
1054 struct super_block *sb;
1055 struct cgroupfs_root *root;
1056 struct list_head tmp_cg_links;
1057
1058
1059 ret = parse_cgroupfs_options(data, &opts);
1060 if (ret) {
1061 kfree(opts.release_agent);
1062 return ret;
1063 }
1064
1065 root = kzalloc(sizeof(*root), GFP_KERNEL);
1066 if (!root) {
1067 kfree(opts.release_agent);
1068 return -ENOMEM;
1069 }
1070
1071 init_cgroup_root(root);
1072 root->subsys_bits = opts.subsys_bits;
1073 root->flags = opts.flags;
1074 if (opts.release_agent) {
1075 strcpy(root->release_agent_path, opts.release_agent);
1076 kfree(opts.release_agent);
1077 }
1078
1079 sb = sget(fs_type, cgroup_test_super, cgroup_set_super, root);
1080
1081 if (IS_ERR(sb)) {
1082 kfree(root);
1083 return PTR_ERR(sb);
1084 }
1085
1086 if (sb->s_fs_info != root) {
1087
1088 BUG_ON(sb->s_root == NULL);
1089 kfree(root);
1090 root = NULL;
1091 } else {
1092
1093 struct cgroup *root_cgrp = &root->top_cgroup;
1094 struct inode *inode;
1095 int i;
1096
1097 BUG_ON(sb->s_root != NULL);
1098
1099 ret = cgroup_get_rootdir(sb);
1100 if (ret)
1101 goto drop_new_super;
1102 inode = sb->s_root->d_inode;
1103
1104 mutex_lock(&inode->i_mutex);
1105 mutex_lock(&cgroup_mutex);
1106
1107
1108
1109
1110
1111
1112
1113
1114 ret = allocate_cg_links(css_set_count, &tmp_cg_links);
1115 if (ret) {
1116 mutex_unlock(&cgroup_mutex);
1117 mutex_unlock(&inode->i_mutex);
1118 goto drop_new_super;
1119 }
1120
1121 ret = rebind_subsystems(root, root->subsys_bits);
1122 if (ret == -EBUSY) {
1123 mutex_unlock(&cgroup_mutex);
1124 mutex_unlock(&inode->i_mutex);
1125 goto free_cg_links;
1126 }
1127
1128
1129 BUG_ON(ret);
1130
1131 list_add(&root->root_list, &roots);
1132 root_count++;
1133
1134 sb->s_root->d_fsdata = root_cgrp;
1135 root->top_cgroup.dentry = sb->s_root;
1136
1137
1138
1139 write_lock(&css_set_lock);
1140 for (i = 0; i < CSS_SET_TABLE_SIZE; i++) {
1141 struct hlist_head *hhead = &css_set_table[i];
1142 struct hlist_node *node;
1143 struct css_set *cg;
1144
1145 hlist_for_each_entry(cg, node, hhead, hlist)
1146 link_css_set(&tmp_cg_links, cg, root_cgrp);
1147 }
1148 write_unlock(&css_set_lock);
1149
1150 free_cg_links(&tmp_cg_links);
1151
1152 BUG_ON(!list_empty(&root_cgrp->sibling));
1153 BUG_ON(!list_empty(&root_cgrp->children));
1154 BUG_ON(root->number_of_cgroups != 1);
1155
1156 cgroup_populate_dir(root_cgrp);
1157 mutex_unlock(&inode->i_mutex);
1158 mutex_unlock(&cgroup_mutex);
1159 }
1160
1161 simple_set_mnt(mnt, sb);
1162 return 0;
1163
1164 free_cg_links:
1165 free_cg_links(&tmp_cg_links);
1166 drop_new_super:
1167 deactivate_locked_super(sb);
1168 return ret;
1169}
1170
1171static void cgroup_kill_sb(struct super_block *sb) {
1172 struct cgroupfs_root *root = sb->s_fs_info;
1173 struct cgroup *cgrp = &root->top_cgroup;
1174 int ret;
1175 struct cg_cgroup_link *link;
1176 struct cg_cgroup_link *saved_link;
1177
1178 BUG_ON(!root);
1179
1180 BUG_ON(root->number_of_cgroups != 1);
1181 BUG_ON(!list_empty(&cgrp->children));
1182 BUG_ON(!list_empty(&cgrp->sibling));
1183
1184 mutex_lock(&cgroup_mutex);
1185
1186
1187 ret = rebind_subsystems(root, 0);
1188
1189 BUG_ON(ret);
1190
1191
1192
1193
1194
1195 write_lock(&css_set_lock);
1196
1197 list_for_each_entry_safe(link, saved_link, &cgrp->css_sets,
1198 cgrp_link_list) {
1199 list_del(&link->cg_link_list);
1200 list_del(&link->cgrp_link_list);
1201 kfree(link);
1202 }
1203 write_unlock(&css_set_lock);
1204
1205 if (!list_empty(&root->root_list)) {
1206 list_del(&root->root_list);
1207 root_count--;
1208 }
1209
1210 mutex_unlock(&cgroup_mutex);
1211
1212 kill_litter_super(sb);
1213 kfree(root);
1214}
1215
1216static struct file_system_type cgroup_fs_type = {
1217 .name = "cgroup",
1218 .get_sb = cgroup_get_sb,
1219 .kill_sb = cgroup_kill_sb,
1220};
1221
1222static inline struct cgroup *__d_cgrp(struct dentry *dentry)
1223{
1224 return dentry->d_fsdata;
1225}
1226
1227static inline struct cftype *__d_cft(struct dentry *dentry)
1228{
1229 return dentry->d_fsdata;
1230}
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
1243{
1244 char *start;
1245 struct dentry *dentry = rcu_dereference(cgrp->dentry);
1246
1247 if (!dentry || cgrp == dummytop) {
1248
1249
1250
1251
1252 strcpy(buf, "/");
1253 return 0;
1254 }
1255
1256 start = buf + buflen;
1257
1258 *--start = '\0';
1259 for (;;) {
1260 int len = dentry->d_name.len;
1261 if ((start -= len) < buf)
1262 return -ENAMETOOLONG;
1263 memcpy(start, cgrp->dentry->d_name.name, len);
1264 cgrp = cgrp->parent;
1265 if (!cgrp)
1266 break;
1267 dentry = rcu_dereference(cgrp->dentry);
1268 if (!cgrp->parent)
1269 continue;
1270 if (--start < buf)
1271 return -ENAMETOOLONG;
1272 *start = '/';
1273 }
1274 memmove(buf, start, buf + buflen - start);
1275 return 0;
1276}
1277
1278
1279
1280
1281
1282
1283static void get_first_subsys(const struct cgroup *cgrp,
1284 struct cgroup_subsys_state **css, int *subsys_id)
1285{
1286 const struct cgroupfs_root *root = cgrp->root;
1287 const struct cgroup_subsys *test_ss;
1288 BUG_ON(list_empty(&root->subsys_list));
1289 test_ss = list_entry(root->subsys_list.next,
1290 struct cgroup_subsys, sibling);
1291 if (css) {
1292 *css = cgrp->subsys[test_ss->subsys_id];
1293 BUG_ON(!*css);
1294 }
1295 if (subsys_id)
1296 *subsys_id = test_ss->subsys_id;
1297}
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1308{
1309 int retval = 0;
1310 struct cgroup_subsys *ss;
1311 struct cgroup *oldcgrp;
1312 struct css_set *cg;
1313 struct css_set *newcg;
1314 struct cgroupfs_root *root = cgrp->root;
1315 int subsys_id;
1316
1317 get_first_subsys(cgrp, NULL, &subsys_id);
1318
1319
1320 oldcgrp = task_cgroup(tsk, subsys_id);
1321 if (cgrp == oldcgrp)
1322 return 0;
1323
1324 for_each_subsys(root, ss) {
1325 if (ss->can_attach) {
1326 retval = ss->can_attach(ss, cgrp, tsk);
1327 if (retval)
1328 return retval;
1329 }
1330 }
1331
1332 task_lock(tsk);
1333 cg = tsk->cgroups;
1334 get_css_set(cg);
1335 task_unlock(tsk);
1336
1337
1338
1339
1340 newcg = find_css_set(cg, cgrp);
1341 put_css_set(cg);
1342 if (!newcg)
1343 return -ENOMEM;
1344
1345 task_lock(tsk);
1346 if (tsk->flags & PF_EXITING) {
1347 task_unlock(tsk);
1348 put_css_set(newcg);
1349 return -ESRCH;
1350 }
1351 rcu_assign_pointer(tsk->cgroups, newcg);
1352 task_unlock(tsk);
1353
1354
1355 write_lock(&css_set_lock);
1356 if (!list_empty(&tsk->cg_list)) {
1357 list_del(&tsk->cg_list);
1358 list_add(&tsk->cg_list, &newcg->tasks);
1359 }
1360 write_unlock(&css_set_lock);
1361
1362 for_each_subsys(root, ss) {
1363 if (ss->attach)
1364 ss->attach(ss, cgrp, oldcgrp, tsk);
1365 }
1366 set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
1367 synchronize_rcu();
1368 put_css_set(cg);
1369
1370
1371
1372
1373
1374 cgroup_wakeup_rmdir_waiter(cgrp);
1375 return 0;
1376}
1377
1378
1379
1380
1381
1382static int attach_task_by_pid(struct cgroup *cgrp, u64 pid)
1383{
1384 struct task_struct *tsk;
1385 const struct cred *cred = current_cred(), *tcred;
1386 int ret;
1387
1388 if (pid) {
1389 rcu_read_lock();
1390 tsk = find_task_by_vpid(pid);
1391 if (!tsk || tsk->flags & PF_EXITING) {
1392 rcu_read_unlock();
1393 return -ESRCH;
1394 }
1395
1396 tcred = __task_cred(tsk);
1397 if (cred->euid &&
1398 cred->euid != tcred->uid &&
1399 cred->euid != tcred->suid) {
1400 rcu_read_unlock();
1401 return -EACCES;
1402 }
1403 get_task_struct(tsk);
1404 rcu_read_unlock();
1405 } else {
1406 tsk = current;
1407 get_task_struct(tsk);
1408 }
1409
1410 ret = cgroup_attach_task(cgrp, tsk);
1411 put_task_struct(tsk);
1412 return ret;
1413}
1414
1415static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid)
1416{
1417 int ret;
1418 if (!cgroup_lock_live_group(cgrp))
1419 return -ENODEV;
1420 ret = attach_task_by_pid(cgrp, pid);
1421 cgroup_unlock();
1422 return ret;
1423}
1424
1425
1426enum cgroup_filetype {
1427 FILE_ROOT,
1428 FILE_DIR,
1429 FILE_TASKLIST,
1430 FILE_NOTIFY_ON_RELEASE,
1431 FILE_RELEASE_AGENT,
1432};
1433
1434
1435
1436
1437
1438
1439
1440
1441bool cgroup_lock_live_group(struct cgroup *cgrp)
1442{
1443 mutex_lock(&cgroup_mutex);
1444 if (cgroup_is_removed(cgrp)) {
1445 mutex_unlock(&cgroup_mutex);
1446 return false;
1447 }
1448 return true;
1449}
1450
1451static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
1452 const char *buffer)
1453{
1454 BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
1455 if (!cgroup_lock_live_group(cgrp))
1456 return -ENODEV;
1457 strcpy(cgrp->root->release_agent_path, buffer);
1458 cgroup_unlock();
1459 return 0;
1460}
1461
1462static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft,
1463 struct seq_file *seq)
1464{
1465 if (!cgroup_lock_live_group(cgrp))
1466 return -ENODEV;
1467 seq_puts(seq, cgrp->root->release_agent_path);
1468 seq_putc(seq, '\n');
1469 cgroup_unlock();
1470 return 0;
1471}
1472
1473
1474#define CGROUP_LOCAL_BUFFER_SIZE 64
1475
1476static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft,
1477 struct file *file,
1478 const char __user *userbuf,
1479 size_t nbytes, loff_t *unused_ppos)
1480{
1481 char buffer[CGROUP_LOCAL_BUFFER_SIZE];
1482 int retval = 0;
1483 char *end;
1484
1485 if (!nbytes)
1486 return -EINVAL;
1487 if (nbytes >= sizeof(buffer))
1488 return -E2BIG;
1489 if (copy_from_user(buffer, userbuf, nbytes))
1490 return -EFAULT;
1491
1492 buffer[nbytes] = 0;
1493 strstrip(buffer);
1494 if (cft->write_u64) {
1495 u64 val = simple_strtoull(buffer, &end, 0);
1496 if (*end)
1497 return -EINVAL;
1498 retval = cft->write_u64(cgrp, cft, val);
1499 } else {
1500 s64 val = simple_strtoll(buffer, &end, 0);
1501 if (*end)
1502 return -EINVAL;
1503 retval = cft->write_s64(cgrp, cft, val);
1504 }
1505 if (!retval)
1506 retval = nbytes;
1507 return retval;
1508}
1509
1510static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft,
1511 struct file *file,
1512 const char __user *userbuf,
1513 size_t nbytes, loff_t *unused_ppos)
1514{
1515 char local_buffer[CGROUP_LOCAL_BUFFER_SIZE];
1516 int retval = 0;
1517 size_t max_bytes = cft->max_write_len;
1518 char *buffer = local_buffer;
1519
1520 if (!max_bytes)
1521 max_bytes = sizeof(local_buffer) - 1;
1522 if (nbytes >= max_bytes)
1523 return -E2BIG;
1524
1525 if (nbytes >= sizeof(local_buffer)) {
1526 buffer = kmalloc(nbytes + 1, GFP_KERNEL);
1527 if (buffer == NULL)
1528 return -ENOMEM;
1529 }
1530 if (nbytes && copy_from_user(buffer, userbuf, nbytes)) {
1531 retval = -EFAULT;
1532 goto out;
1533 }
1534
1535 buffer[nbytes] = 0;
1536 strstrip(buffer);
1537 retval = cft->write_string(cgrp, cft, buffer);
1538 if (!retval)
1539 retval = nbytes;
1540out:
1541 if (buffer != local_buffer)
1542 kfree(buffer);
1543 return retval;
1544}
1545
1546static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
1547 size_t nbytes, loff_t *ppos)
1548{
1549 struct cftype *cft = __d_cft(file->f_dentry);
1550 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
1551
1552 if (cgroup_is_removed(cgrp))
1553 return -ENODEV;
1554 if (cft->write)
1555 return cft->write(cgrp, cft, file, buf, nbytes, ppos);
1556 if (cft->write_u64 || cft->write_s64)
1557 return cgroup_write_X64(cgrp, cft, file, buf, nbytes, ppos);
1558 if (cft->write_string)
1559 return cgroup_write_string(cgrp, cft, file, buf, nbytes, ppos);
1560 if (cft->trigger) {
1561 int ret = cft->trigger(cgrp, (unsigned int)cft->private);
1562 return ret ? ret : nbytes;
1563 }
1564 return -EINVAL;
1565}
1566
1567static ssize_t cgroup_read_u64(struct cgroup *cgrp, struct cftype *cft,
1568 struct file *file,
1569 char __user *buf, size_t nbytes,
1570 loff_t *ppos)
1571{
1572 char tmp[CGROUP_LOCAL_BUFFER_SIZE];
1573 u64 val = cft->read_u64(cgrp, cft);
1574 int len = sprintf(tmp, "%llu\n", (unsigned long long) val);
1575
1576 return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
1577}
1578
1579static ssize_t cgroup_read_s64(struct cgroup *cgrp, struct cftype *cft,
1580 struct file *file,
1581 char __user *buf, size_t nbytes,
1582 loff_t *ppos)
1583{
1584 char tmp[CGROUP_LOCAL_BUFFER_SIZE];
1585 s64 val = cft->read_s64(cgrp, cft);
1586 int len = sprintf(tmp, "%lld\n", (long long) val);
1587
1588 return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
1589}
1590
1591static ssize_t cgroup_file_read(struct file *file, char __user *buf,
1592 size_t nbytes, loff_t *ppos)
1593{
1594 struct cftype *cft = __d_cft(file->f_dentry);
1595 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
1596
1597 if (cgroup_is_removed(cgrp))
1598 return -ENODEV;
1599
1600 if (cft->read)
1601 return cft->read(cgrp, cft, file, buf, nbytes, ppos);
1602 if (cft->read_u64)
1603 return cgroup_read_u64(cgrp, cft, file, buf, nbytes, ppos);
1604 if (cft->read_s64)
1605 return cgroup_read_s64(cgrp, cft, file, buf, nbytes, ppos);
1606 return -EINVAL;
1607}
1608
1609
1610
1611
1612
1613
1614struct cgroup_seqfile_state {
1615 struct cftype *cft;
1616 struct cgroup *cgroup;
1617};
1618
1619static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value)
1620{
1621 struct seq_file *sf = cb->state;
1622 return seq_printf(sf, "%s %llu\n", key, (unsigned long long)value);
1623}
1624
1625static int cgroup_seqfile_show(struct seq_file *m, void *arg)
1626{
1627 struct cgroup_seqfile_state *state = m->private;
1628 struct cftype *cft = state->cft;
1629 if (cft->read_map) {
1630 struct cgroup_map_cb cb = {
1631 .fill = cgroup_map_add,
1632 .state = m,
1633 };
1634 return cft->read_map(state->cgroup, cft, &cb);
1635 }
1636 return cft->read_seq_string(state->cgroup, cft, m);
1637}
1638
1639static int cgroup_seqfile_release(struct inode *inode, struct file *file)
1640{
1641 struct seq_file *seq = file->private_data;
1642 kfree(seq->private);
1643 return single_release(inode, file);
1644}
1645
1646static struct file_operations cgroup_seqfile_operations = {
1647 .read = seq_read,
1648 .write = cgroup_file_write,
1649 .llseek = seq_lseek,
1650 .release = cgroup_seqfile_release,
1651};
1652
1653static int cgroup_file_open(struct inode *inode, struct file *file)
1654{
1655 int err;
1656 struct cftype *cft;
1657
1658 err = generic_file_open(inode, file);
1659 if (err)
1660 return err;
1661 cft = __d_cft(file->f_dentry);
1662
1663 if (cft->read_map || cft->read_seq_string) {
1664 struct cgroup_seqfile_state *state =
1665 kzalloc(sizeof(*state), GFP_USER);
1666 if (!state)
1667 return -ENOMEM;
1668 state->cft = cft;
1669 state->cgroup = __d_cgrp(file->f_dentry->d_parent);
1670 file->f_op = &cgroup_seqfile_operations;
1671 err = single_open(file, cgroup_seqfile_show, state);
1672 if (err < 0)
1673 kfree(state);
1674 } else if (cft->open)
1675 err = cft->open(inode, file);
1676 else
1677 err = 0;
1678
1679 return err;
1680}
1681
1682static int cgroup_file_release(struct inode *inode, struct file *file)
1683{
1684 struct cftype *cft = __d_cft(file->f_dentry);
1685 if (cft->release)
1686 return cft->release(inode, file);
1687 return 0;
1688}
1689
1690
1691
1692
1693static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry,
1694 struct inode *new_dir, struct dentry *new_dentry)
1695{
1696 if (!S_ISDIR(old_dentry->d_inode->i_mode))
1697 return -ENOTDIR;
1698 if (new_dentry->d_inode)
1699 return -EEXIST;
1700 if (old_dir != new_dir)
1701 return -EIO;
1702 return simple_rename(old_dir, old_dentry, new_dir, new_dentry);
1703}
1704
1705static struct file_operations cgroup_file_operations = {
1706 .read = cgroup_file_read,
1707 .write = cgroup_file_write,
1708 .llseek = generic_file_llseek,
1709 .open = cgroup_file_open,
1710 .release = cgroup_file_release,
1711};
1712
1713static struct inode_operations cgroup_dir_inode_operations = {
1714 .lookup = simple_lookup,
1715 .mkdir = cgroup_mkdir,
1716 .rmdir = cgroup_rmdir,
1717 .rename = cgroup_rename,
1718};
1719
1720static int cgroup_create_file(struct dentry *dentry, mode_t mode,
1721 struct super_block *sb)
1722{
1723 static const struct dentry_operations cgroup_dops = {
1724 .d_iput = cgroup_diput,
1725 };
1726
1727 struct inode *inode;
1728
1729 if (!dentry)
1730 return -ENOENT;
1731 if (dentry->d_inode)
1732 return -EEXIST;
1733
1734 inode = cgroup_new_inode(mode, sb);
1735 if (!inode)
1736 return -ENOMEM;
1737
1738 if (S_ISDIR(mode)) {
1739 inode->i_op = &cgroup_dir_inode_operations;
1740 inode->i_fop = &simple_dir_operations;
1741
1742
1743 inc_nlink(inode);
1744
1745
1746
1747 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
1748 } else if (S_ISREG(mode)) {
1749 inode->i_size = 0;
1750 inode->i_fop = &cgroup_file_operations;
1751 }
1752 dentry->d_op = &cgroup_dops;
1753 d_instantiate(dentry, inode);
1754 dget(dentry);
1755 return 0;
1756}
1757
1758
1759
1760
1761
1762
1763
1764
1765static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry,
1766 mode_t mode)
1767{
1768 struct dentry *parent;
1769 int error = 0;
1770
1771 parent = cgrp->parent->dentry;
1772 error = cgroup_create_file(dentry, S_IFDIR | mode, cgrp->root->sb);
1773 if (!error) {
1774 dentry->d_fsdata = cgrp;
1775 inc_nlink(parent->d_inode);
1776 rcu_assign_pointer(cgrp->dentry, dentry);
1777 dget(dentry);
1778 }
1779 dput(dentry);
1780
1781 return error;
1782}
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793static mode_t cgroup_file_mode(const struct cftype *cft)
1794{
1795 mode_t mode = 0;
1796
1797 if (cft->mode)
1798 return cft->mode;
1799
1800 if (cft->read || cft->read_u64 || cft->read_s64 ||
1801 cft->read_map || cft->read_seq_string)
1802 mode |= S_IRUGO;
1803
1804 if (cft->write || cft->write_u64 || cft->write_s64 ||
1805 cft->write_string || cft->trigger)
1806 mode |= S_IWUSR;
1807
1808 return mode;
1809}
1810
1811int cgroup_add_file(struct cgroup *cgrp,
1812 struct cgroup_subsys *subsys,
1813 const struct cftype *cft)
1814{
1815 struct dentry *dir = cgrp->dentry;
1816 struct dentry *dentry;
1817 int error;
1818 mode_t mode;
1819
1820 char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 };
1821 if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) {
1822 strcpy(name, subsys->name);
1823 strcat(name, ".");
1824 }
1825 strcat(name, cft->name);
1826 BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex));
1827 dentry = lookup_one_len(name, dir, strlen(name));
1828 if (!IS_ERR(dentry)) {
1829 mode = cgroup_file_mode(cft);
1830 error = cgroup_create_file(dentry, mode | S_IFREG,
1831 cgrp->root->sb);
1832 if (!error)
1833 dentry->d_fsdata = (void *)cft;
1834 dput(dentry);
1835 } else
1836 error = PTR_ERR(dentry);
1837 return error;
1838}
1839
1840int cgroup_add_files(struct cgroup *cgrp,
1841 struct cgroup_subsys *subsys,
1842 const struct cftype cft[],
1843 int count)
1844{
1845 int i, err;
1846 for (i = 0; i < count; i++) {
1847 err = cgroup_add_file(cgrp, subsys, &cft[i]);
1848 if (err)
1849 return err;
1850 }
1851 return 0;
1852}
1853
1854
1855
1856
1857
1858
1859
1860int cgroup_task_count(const struct cgroup *cgrp)
1861{
1862 int count = 0;
1863 struct cg_cgroup_link *link;
1864
1865 read_lock(&css_set_lock);
1866 list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {
1867 count += atomic_read(&link->cg->refcount);
1868 }
1869 read_unlock(&css_set_lock);
1870 return count;
1871}
1872
1873
1874
1875
1876
1877static void cgroup_advance_iter(struct cgroup *cgrp,
1878 struct cgroup_iter *it)
1879{
1880 struct list_head *l = it->cg_link;
1881 struct cg_cgroup_link *link;
1882 struct css_set *cg;
1883
1884
1885 do {
1886 l = l->next;
1887 if (l == &cgrp->css_sets) {
1888 it->cg_link = NULL;
1889 return;
1890 }
1891 link = list_entry(l, struct cg_cgroup_link, cgrp_link_list);
1892 cg = link->cg;
1893 } while (list_empty(&cg->tasks));
1894 it->cg_link = l;
1895 it->task = cg->tasks.next;
1896}
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907static void cgroup_enable_task_cg_lists(void)
1908{
1909 struct task_struct *p, *g;
1910 write_lock(&css_set_lock);
1911 use_task_css_set_links = 1;
1912 do_each_thread(g, p) {
1913 task_lock(p);
1914
1915
1916
1917
1918
1919 if (!(p->flags & PF_EXITING) && list_empty(&p->cg_list))
1920 list_add(&p->cg_list, &p->cgroups->tasks);
1921 task_unlock(p);
1922 } while_each_thread(g, p);
1923 write_unlock(&css_set_lock);
1924}
1925
1926void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it)
1927{
1928
1929
1930
1931
1932
1933 if (!use_task_css_set_links)
1934 cgroup_enable_task_cg_lists();
1935
1936 read_lock(&css_set_lock);
1937 it->cg_link = &cgrp->css_sets;
1938 cgroup_advance_iter(cgrp, it);
1939}
1940
1941struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
1942 struct cgroup_iter *it)
1943{
1944 struct task_struct *res;
1945 struct list_head *l = it->task;
1946 struct cg_cgroup_link *link;
1947
1948
1949 if (!it->cg_link)
1950 return NULL;
1951 res = list_entry(l, struct task_struct, cg_list);
1952
1953 l = l->next;
1954 link = list_entry(it->cg_link, struct cg_cgroup_link, cgrp_link_list);
1955 if (l == &link->cg->tasks) {
1956
1957
1958 cgroup_advance_iter(cgrp, it);
1959 } else {
1960 it->task = l;
1961 }
1962 return res;
1963}
1964
1965void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it)
1966{
1967 read_unlock(&css_set_lock);
1968}
1969
1970static inline int started_after_time(struct task_struct *t1,
1971 struct timespec *time,
1972 struct task_struct *t2)
1973{
1974 int start_diff = timespec_compare(&t1->start_time, time);
1975 if (start_diff > 0) {
1976 return 1;
1977 } else if (start_diff < 0) {
1978 return 0;
1979 } else {
1980
1981
1982
1983
1984
1985
1986
1987
1988 return t1 > t2;
1989 }
1990}
1991
1992
1993
1994
1995
1996
1997static inline int started_after(void *p1, void *p2)
1998{
1999 struct task_struct *t1 = p1;
2000 struct task_struct *t2 = p2;
2001 return started_after_time(t1, &t2->start_time, t2);
2002}
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031int cgroup_scan_tasks(struct cgroup_scanner *scan)
2032{
2033 int retval, i;
2034 struct cgroup_iter it;
2035 struct task_struct *p, *dropped;
2036
2037 struct task_struct *latest_task = NULL;
2038 struct ptr_heap tmp_heap;
2039 struct ptr_heap *heap;
2040 struct timespec latest_time = { 0, 0 };
2041
2042 if (scan->heap) {
2043
2044 heap = scan->heap;
2045 heap->gt = &started_after;
2046 } else {
2047
2048 heap = &tmp_heap;
2049 retval = heap_init(heap, PAGE_SIZE, GFP_KERNEL, &started_after);
2050 if (retval)
2051
2052 return retval;
2053 }
2054
2055 again:
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068 heap->size = 0;
2069 cgroup_iter_start(scan->cg, &it);
2070 while ((p = cgroup_iter_next(scan->cg, &it))) {
2071
2072
2073
2074
2075 if (scan->test_task && !scan->test_task(p, scan))
2076 continue;
2077
2078
2079
2080
2081 if (!started_after_time(p, &latest_time, latest_task))
2082 continue;
2083 dropped = heap_insert(heap, p);
2084 if (dropped == NULL) {
2085
2086
2087
2088
2089 get_task_struct(p);
2090 } else if (dropped != p) {
2091
2092
2093
2094
2095 get_task_struct(p);
2096 put_task_struct(dropped);
2097 }
2098
2099
2100
2101
2102 }
2103 cgroup_iter_end(scan->cg, &it);
2104
2105 if (heap->size) {
2106 for (i = 0; i < heap->size; i++) {
2107 struct task_struct *q = heap->ptrs[i];
2108 if (i == 0) {
2109 latest_time = q->start_time;
2110 latest_task = q;
2111 }
2112
2113 scan->process_task(q, scan);
2114 put_task_struct(q);
2115 }
2116
2117
2118
2119
2120
2121
2122
2123 goto again;
2124 }
2125 if (heap == &tmp_heap)
2126 heap_free(&tmp_heap);
2127 return 0;
2128}
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147static int pid_array_load(pid_t *pidarray, int npids, struct cgroup *cgrp)
2148{
2149 int n = 0, pid;
2150 struct cgroup_iter it;
2151 struct task_struct *tsk;
2152 cgroup_iter_start(cgrp, &it);
2153 while ((tsk = cgroup_iter_next(cgrp, &it))) {
2154 if (unlikely(n == npids))
2155 break;
2156 pid = task_pid_vnr(tsk);
2157 if (pid > 0)
2158 pidarray[n++] = pid;
2159 }
2160 cgroup_iter_end(cgrp, &it);
2161 return n;
2162}
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
2174{
2175 int ret = -EINVAL;
2176 struct cgroup *cgrp;
2177 struct cgroup_iter it;
2178 struct task_struct *tsk;
2179
2180
2181
2182
2183
2184 if (dentry->d_sb->s_op != &cgroup_ops ||
2185 !S_ISDIR(dentry->d_inode->i_mode))
2186 goto err;
2187
2188 ret = 0;
2189 cgrp = dentry->d_fsdata;
2190
2191 cgroup_iter_start(cgrp, &it);
2192 while ((tsk = cgroup_iter_next(cgrp, &it))) {
2193 switch (tsk->state) {
2194 case TASK_RUNNING:
2195 stats->nr_running++;
2196 break;
2197 case TASK_INTERRUPTIBLE:
2198 stats->nr_sleeping++;
2199 break;
2200 case TASK_UNINTERRUPTIBLE:
2201 stats->nr_uninterruptible++;
2202 break;
2203 case TASK_STOPPED:
2204 stats->nr_stopped++;
2205 break;
2206 default:
2207 if (delayacct_is_task_waiting_on_io(tsk))
2208 stats->nr_io_wait++;
2209 break;
2210 }
2211 }
2212 cgroup_iter_end(cgrp, &it);
2213
2214err:
2215 return ret;
2216}
2217
2218
2219
2220
2221
2222struct cgroup_pids {
2223
2224 struct list_head list;
2225
2226 struct cgroup *cgrp;
2227
2228 struct pid_namespace *ns;
2229
2230 pid_t *tasks_pids;
2231
2232 int use_count;
2233
2234 int length;
2235};
2236
2237static int cmppid(const void *a, const void *b)
2238{
2239 return *(pid_t *)a - *(pid_t *)b;
2240}
2241
2242
2243
2244
2245
2246
2247
2248static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos)
2249{
2250
2251
2252
2253
2254
2255
2256 struct cgroup_pids *cp = s->private;
2257 struct cgroup *cgrp = cp->cgrp;
2258 int index = 0, pid = *pos;
2259 int *iter;
2260
2261 down_read(&cgrp->pids_mutex);
2262 if (pid) {
2263 int end = cp->length;
2264
2265 while (index < end) {
2266 int mid = (index + end) / 2;
2267 if (cp->tasks_pids[mid] == pid) {
2268 index = mid;
2269 break;
2270 } else if (cp->tasks_pids[mid] <= pid)
2271 index = mid + 1;
2272 else
2273 end = mid;
2274 }
2275 }
2276
2277 if (index >= cp->length)
2278 return NULL;
2279
2280 iter = cp->tasks_pids + index;
2281 *pos = *iter;
2282 return iter;
2283}
2284
2285static void cgroup_tasks_stop(struct seq_file *s, void *v)
2286{
2287 struct cgroup_pids *cp = s->private;
2288 struct cgroup *cgrp = cp->cgrp;
2289 up_read(&cgrp->pids_mutex);
2290}
2291
2292static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos)
2293{
2294 struct cgroup_pids *cp = s->private;
2295 int *p = v;
2296 int *end = cp->tasks_pids + cp->length;
2297
2298
2299
2300
2301
2302 p++;
2303 if (p >= end) {
2304 return NULL;
2305 } else {
2306 *pos = *p;
2307 return p;
2308 }
2309}
2310
2311static int cgroup_tasks_show(struct seq_file *s, void *v)
2312{
2313 return seq_printf(s, "%d\n", *(int *)v);
2314}
2315
2316static struct seq_operations cgroup_tasks_seq_operations = {
2317 .start = cgroup_tasks_start,
2318 .stop = cgroup_tasks_stop,
2319 .next = cgroup_tasks_next,
2320 .show = cgroup_tasks_show,
2321};
2322
2323static void release_cgroup_pid_array(struct cgroup_pids *cp)
2324{
2325 struct cgroup *cgrp = cp->cgrp;
2326
2327 down_write(&cgrp->pids_mutex);
2328 BUG_ON(!cp->use_count);
2329 if (!--cp->use_count) {
2330 list_del(&cp->list);
2331 put_pid_ns(cp->ns);
2332 kfree(cp->tasks_pids);
2333 kfree(cp);
2334 }
2335 up_write(&cgrp->pids_mutex);
2336}
2337
2338static int cgroup_tasks_release(struct inode *inode, struct file *file)
2339{
2340 struct seq_file *seq;
2341 struct cgroup_pids *cp;
2342
2343 if (!(file->f_mode & FMODE_READ))
2344 return 0;
2345
2346 seq = file->private_data;
2347 cp = seq->private;
2348
2349 release_cgroup_pid_array(cp);
2350 return seq_release(inode, file);
2351}
2352
2353static struct file_operations cgroup_tasks_operations = {
2354 .read = seq_read,
2355 .llseek = seq_lseek,
2356 .write = cgroup_file_write,
2357 .release = cgroup_tasks_release,
2358};
2359
2360
2361
2362
2363
2364
2365static int cgroup_tasks_open(struct inode *unused, struct file *file)
2366{
2367 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
2368 struct pid_namespace *ns = current->nsproxy->pid_ns;
2369 struct cgroup_pids *cp;
2370 pid_t *pidarray;
2371 int npids;
2372 int retval;
2373
2374
2375 if (!(file->f_mode & FMODE_READ))
2376 return 0;
2377
2378
2379
2380
2381
2382
2383
2384 npids = cgroup_task_count(cgrp);
2385 pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL);
2386 if (!pidarray)
2387 return -ENOMEM;
2388 npids = pid_array_load(pidarray, npids, cgrp);
2389 sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
2390
2391
2392
2393
2394
2395 down_write(&cgrp->pids_mutex);
2396
2397 list_for_each_entry(cp, &cgrp->pids_list, list) {
2398 if (ns == cp->ns)
2399 goto found;
2400 }
2401
2402 cp = kzalloc(sizeof(*cp), GFP_KERNEL);
2403 if (!cp) {
2404 up_write(&cgrp->pids_mutex);
2405 kfree(pidarray);
2406 return -ENOMEM;
2407 }
2408 cp->cgrp = cgrp;
2409 cp->ns = ns;
2410 get_pid_ns(ns);
2411 list_add(&cp->list, &cgrp->pids_list);
2412found:
2413 kfree(cp->tasks_pids);
2414 cp->tasks_pids = pidarray;
2415 cp->length = npids;
2416 cp->use_count++;
2417 up_write(&cgrp->pids_mutex);
2418
2419 file->f_op = &cgroup_tasks_operations;
2420
2421 retval = seq_open(file, &cgroup_tasks_seq_operations);
2422 if (retval) {
2423 release_cgroup_pid_array(cp);
2424 return retval;
2425 }
2426 ((struct seq_file *)file->private_data)->private = cp;
2427 return 0;
2428}
2429
2430static u64 cgroup_read_notify_on_release(struct cgroup *cgrp,
2431 struct cftype *cft)
2432{
2433 return notify_on_release(cgrp);
2434}
2435
2436static int cgroup_write_notify_on_release(struct cgroup *cgrp,
2437 struct cftype *cft,
2438 u64 val)
2439{
2440 clear_bit(CGRP_RELEASABLE, &cgrp->flags);
2441 if (val)
2442 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
2443 else
2444 clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
2445 return 0;
2446}
2447
2448
2449
2450
2451static struct cftype files[] = {
2452 {
2453 .name = "tasks",
2454 .open = cgroup_tasks_open,
2455 .write_u64 = cgroup_tasks_write,
2456 .release = cgroup_tasks_release,
2457 .private = FILE_TASKLIST,
2458 .mode = S_IRUGO | S_IWUSR,
2459 },
2460
2461 {
2462 .name = "notify_on_release",
2463 .read_u64 = cgroup_read_notify_on_release,
2464 .write_u64 = cgroup_write_notify_on_release,
2465 .private = FILE_NOTIFY_ON_RELEASE,
2466 },
2467};
2468
2469static struct cftype cft_release_agent = {
2470 .name = "release_agent",
2471 .read_seq_string = cgroup_release_agent_show,
2472 .write_string = cgroup_release_agent_write,
2473 .max_write_len = PATH_MAX,
2474 .private = FILE_RELEASE_AGENT,
2475};
2476
2477static int cgroup_populate_dir(struct cgroup *cgrp)
2478{
2479 int err;
2480 struct cgroup_subsys *ss;
2481
2482
2483 cgroup_clear_directory(cgrp->dentry);
2484
2485 err = cgroup_add_files(cgrp, NULL, files, ARRAY_SIZE(files));
2486 if (err < 0)
2487 return err;
2488
2489 if (cgrp == cgrp->top_cgroup) {
2490 if ((err = cgroup_add_file(cgrp, NULL, &cft_release_agent)) < 0)
2491 return err;
2492 }
2493
2494 for_each_subsys(cgrp->root, ss) {
2495 if (ss->populate && (err = ss->populate(ss, cgrp)) < 0)
2496 return err;
2497 }
2498
2499 for_each_subsys(cgrp->root, ss) {
2500 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
2501
2502
2503
2504
2505
2506 if (css->id)
2507 rcu_assign_pointer(css->id->css, css);
2508 }
2509
2510 return 0;
2511}
2512
2513static void init_cgroup_css(struct cgroup_subsys_state *css,
2514 struct cgroup_subsys *ss,
2515 struct cgroup *cgrp)
2516{
2517 css->cgroup = cgrp;
2518 atomic_set(&css->refcnt, 1);
2519 css->flags = 0;
2520 css->id = NULL;
2521 if (cgrp == dummytop)
2522 set_bit(CSS_ROOT, &css->flags);
2523 BUG_ON(cgrp->subsys[ss->subsys_id]);
2524 cgrp->subsys[ss->subsys_id] = css;
2525}
2526
2527static void cgroup_lock_hierarchy(struct cgroupfs_root *root)
2528{
2529
2530 int i;
2531
2532 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2533 struct cgroup_subsys *ss = subsys[i];
2534 if (ss->root == root)
2535 mutex_lock(&ss->hierarchy_mutex);
2536 }
2537}
2538
2539static void cgroup_unlock_hierarchy(struct cgroupfs_root *root)
2540{
2541 int i;
2542
2543 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2544 struct cgroup_subsys *ss = subsys[i];
2545 if (ss->root == root)
2546 mutex_unlock(&ss->hierarchy_mutex);
2547 }
2548}
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
2559 mode_t mode)
2560{
2561 struct cgroup *cgrp;
2562 struct cgroupfs_root *root = parent->root;
2563 int err = 0;
2564 struct cgroup_subsys *ss;
2565 struct super_block *sb = root->sb;
2566
2567 cgrp = kzalloc(sizeof(*cgrp), GFP_KERNEL);
2568 if (!cgrp)
2569 return -ENOMEM;
2570
2571
2572
2573
2574
2575
2576 atomic_inc(&sb->s_active);
2577
2578 mutex_lock(&cgroup_mutex);
2579
2580 init_cgroup_housekeeping(cgrp);
2581
2582 cgrp->parent = parent;
2583 cgrp->root = parent->root;
2584 cgrp->top_cgroup = parent->top_cgroup;
2585
2586 if (notify_on_release(parent))
2587 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
2588
2589 for_each_subsys(root, ss) {
2590 struct cgroup_subsys_state *css = ss->create(ss, cgrp);
2591 if (IS_ERR(css)) {
2592 err = PTR_ERR(css);
2593 goto err_destroy;
2594 }
2595 init_cgroup_css(css, ss, cgrp);
2596 if (ss->use_id)
2597 if (alloc_css_id(ss, parent, cgrp))
2598 goto err_destroy;
2599
2600 }
2601
2602 cgroup_lock_hierarchy(root);
2603 list_add(&cgrp->sibling, &cgrp->parent->children);
2604 cgroup_unlock_hierarchy(root);
2605 root->number_of_cgroups++;
2606
2607 err = cgroup_create_dir(cgrp, dentry, mode);
2608 if (err < 0)
2609 goto err_remove;
2610
2611
2612 BUG_ON(!mutex_is_locked(&cgrp->dentry->d_inode->i_mutex));
2613
2614 err = cgroup_populate_dir(cgrp);
2615
2616
2617 mutex_unlock(&cgroup_mutex);
2618 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
2619
2620 return 0;
2621
2622 err_remove:
2623
2624 cgroup_lock_hierarchy(root);
2625 list_del(&cgrp->sibling);
2626 cgroup_unlock_hierarchy(root);
2627 root->number_of_cgroups--;
2628
2629 err_destroy:
2630
2631 for_each_subsys(root, ss) {
2632 if (cgrp->subsys[ss->subsys_id])
2633 ss->destroy(ss, cgrp);
2634 }
2635
2636 mutex_unlock(&cgroup_mutex);
2637
2638
2639 deactivate_super(sb);
2640
2641 kfree(cgrp);
2642 return err;
2643}
2644
2645static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode)
2646{
2647 struct cgroup *c_parent = dentry->d_parent->d_fsdata;
2648
2649
2650 return cgroup_create(c_parent, dentry, mode | S_IFDIR);
2651}
2652
2653static int cgroup_has_css_refs(struct cgroup *cgrp)
2654{
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664 int i;
2665 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2666 struct cgroup_subsys *ss = subsys[i];
2667 struct cgroup_subsys_state *css;
2668
2669 if (ss->root != cgrp->root)
2670 continue;
2671 css = cgrp->subsys[ss->subsys_id];
2672
2673
2674
2675
2676
2677
2678 if (css && (atomic_read(&css->refcnt) > 1))
2679 return 1;
2680 }
2681 return 0;
2682}
2683
2684
2685
2686
2687
2688
2689
2690static int cgroup_clear_css_refs(struct cgroup *cgrp)
2691{
2692 struct cgroup_subsys *ss;
2693 unsigned long flags;
2694 bool failed = false;
2695 local_irq_save(flags);
2696 for_each_subsys(cgrp->root, ss) {
2697 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
2698 int refcnt;
2699 while (1) {
2700
2701 refcnt = atomic_read(&css->refcnt);
2702 if (refcnt > 1) {
2703 failed = true;
2704 goto done;
2705 }
2706 BUG_ON(!refcnt);
2707
2708
2709
2710
2711
2712
2713 if (atomic_cmpxchg(&css->refcnt, refcnt, 0) == refcnt)
2714 break;
2715 cpu_relax();
2716 }
2717 }
2718 done:
2719 for_each_subsys(cgrp->root, ss) {
2720 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
2721 if (failed) {
2722
2723
2724
2725
2726 if (!atomic_read(&css->refcnt))
2727 atomic_set(&css->refcnt, 1);
2728 } else {
2729
2730 set_bit(CSS_REMOVED, &css->flags);
2731 }
2732 }
2733 local_irq_restore(flags);
2734 return !failed;
2735}
2736
2737static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
2738{
2739 struct cgroup *cgrp = dentry->d_fsdata;
2740 struct dentry *d;
2741 struct cgroup *parent;
2742 DEFINE_WAIT(wait);
2743 int ret;
2744
2745
2746again:
2747 mutex_lock(&cgroup_mutex);
2748 if (atomic_read(&cgrp->count) != 0) {
2749 mutex_unlock(&cgroup_mutex);
2750 return -EBUSY;
2751 }
2752 if (!list_empty(&cgrp->children)) {
2753 mutex_unlock(&cgroup_mutex);
2754 return -EBUSY;
2755 }
2756 mutex_unlock(&cgroup_mutex);
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767 set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
2768
2769
2770
2771
2772
2773 ret = cgroup_call_pre_destroy(cgrp);
2774 if (ret) {
2775 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
2776 return ret;
2777 }
2778
2779 mutex_lock(&cgroup_mutex);
2780 parent = cgrp->parent;
2781 if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) {
2782 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
2783 mutex_unlock(&cgroup_mutex);
2784 return -EBUSY;
2785 }
2786 prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE);
2787 if (!cgroup_clear_css_refs(cgrp)) {
2788 mutex_unlock(&cgroup_mutex);
2789
2790
2791
2792
2793 if (test_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags))
2794 schedule();
2795 finish_wait(&cgroup_rmdir_waitq, &wait);
2796 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
2797 if (signal_pending(current))
2798 return -EINTR;
2799 goto again;
2800 }
2801
2802 finish_wait(&cgroup_rmdir_waitq, &wait);
2803 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
2804
2805 spin_lock(&release_list_lock);
2806 set_bit(CGRP_REMOVED, &cgrp->flags);
2807 if (!list_empty(&cgrp->release_list))
2808 list_del(&cgrp->release_list);
2809 spin_unlock(&release_list_lock);
2810
2811 cgroup_lock_hierarchy(cgrp->root);
2812
2813 list_del(&cgrp->sibling);
2814 cgroup_unlock_hierarchy(cgrp->root);
2815
2816 spin_lock(&cgrp->dentry->d_lock);
2817 d = dget(cgrp->dentry);
2818 spin_unlock(&d->d_lock);
2819
2820 cgroup_d_remove_dir(d);
2821 dput(d);
2822
2823 set_bit(CGRP_RELEASABLE, &parent->flags);
2824 check_for_release(parent);
2825
2826 mutex_unlock(&cgroup_mutex);
2827 return 0;
2828}
2829
2830static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
2831{
2832 struct cgroup_subsys_state *css;
2833
2834 printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);
2835
2836
2837 list_add(&ss->sibling, &rootnode.subsys_list);
2838 ss->root = &rootnode;
2839 css = ss->create(ss, dummytop);
2840
2841 BUG_ON(IS_ERR(css));
2842 init_cgroup_css(css, ss, dummytop);
2843
2844
2845
2846
2847
2848 init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id];
2849
2850 need_forkexit_callback |= ss->fork || ss->exit;
2851
2852
2853
2854
2855 BUG_ON(!list_empty(&init_task.tasks));
2856
2857 mutex_init(&ss->hierarchy_mutex);
2858 lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
2859 ss->active = 1;
2860}
2861
2862
2863
2864
2865
2866
2867
2868int __init cgroup_init_early(void)
2869{
2870 int i;
2871 atomic_set(&init_css_set.refcount, 1);
2872 INIT_LIST_HEAD(&init_css_set.cg_links);
2873 INIT_LIST_HEAD(&init_css_set.tasks);
2874 INIT_HLIST_NODE(&init_css_set.hlist);
2875 css_set_count = 1;
2876 init_cgroup_root(&rootnode);
2877 root_count = 1;
2878 init_task.cgroups = &init_css_set;
2879
2880 init_css_set_link.cg = &init_css_set;
2881 list_add(&init_css_set_link.cgrp_link_list,
2882 &rootnode.top_cgroup.css_sets);
2883 list_add(&init_css_set_link.cg_link_list,
2884 &init_css_set.cg_links);
2885
2886 for (i = 0; i < CSS_SET_TABLE_SIZE; i++)
2887 INIT_HLIST_HEAD(&css_set_table[i]);
2888
2889 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2890 struct cgroup_subsys *ss = subsys[i];
2891
2892 BUG_ON(!ss->name);
2893 BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN);
2894 BUG_ON(!ss->create);
2895 BUG_ON(!ss->destroy);
2896 if (ss->subsys_id != i) {
2897 printk(KERN_ERR "cgroup: Subsys %s id == %d\n",
2898 ss->name, ss->subsys_id);
2899 BUG();
2900 }
2901
2902 if (ss->early_init)
2903 cgroup_init_subsys(ss);
2904 }
2905 return 0;
2906}
2907
2908
2909
2910
2911
2912
2913
2914int __init cgroup_init(void)
2915{
2916 int err;
2917 int i;
2918 struct hlist_head *hhead;
2919
2920 err = bdi_init(&cgroup_backing_dev_info);
2921 if (err)
2922 return err;
2923
2924 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2925 struct cgroup_subsys *ss = subsys[i];
2926 if (!ss->early_init)
2927 cgroup_init_subsys(ss);
2928 if (ss->use_id)
2929 cgroup_subsys_init_idr(ss);
2930 }
2931
2932
2933 hhead = css_set_hash(init_css_set.subsys);
2934 hlist_add_head(&init_css_set.hlist, hhead);
2935
2936 err = register_filesystem(&cgroup_fs_type);
2937 if (err < 0)
2938 goto out;
2939
2940 proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations);
2941
2942out:
2943 if (err)
2944 bdi_destroy(&cgroup_backing_dev_info);
2945
2946 return err;
2947}
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962static int proc_cgroup_show(struct seq_file *m, void *v)
2963{
2964 struct pid *pid;
2965 struct task_struct *tsk;
2966 char *buf;
2967 int retval;
2968 struct cgroupfs_root *root;
2969
2970 retval = -ENOMEM;
2971 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
2972 if (!buf)
2973 goto out;
2974
2975 retval = -ESRCH;
2976 pid = m->private;
2977 tsk = get_pid_task(pid, PIDTYPE_PID);
2978 if (!tsk)
2979 goto out_free;
2980
2981 retval = 0;
2982
2983 mutex_lock(&cgroup_mutex);
2984
2985 for_each_active_root(root) {
2986 struct cgroup_subsys *ss;
2987 struct cgroup *cgrp;
2988 int subsys_id;
2989 int count = 0;
2990
2991 seq_printf(m, "%lu:", root->subsys_bits);
2992 for_each_subsys(root, ss)
2993 seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
2994 seq_putc(m, ':');
2995 get_first_subsys(&root->top_cgroup, NULL, &subsys_id);
2996 cgrp = task_cgroup(tsk, subsys_id);
2997 retval = cgroup_path(cgrp, buf, PAGE_SIZE);
2998 if (retval < 0)
2999 goto out_unlock;
3000 seq_puts(m, buf);
3001 seq_putc(m, '\n');
3002 }
3003
3004out_unlock:
3005 mutex_unlock(&cgroup_mutex);
3006 put_task_struct(tsk);
3007out_free:
3008 kfree(buf);
3009out:
3010 return retval;
3011}
3012
3013static int cgroup_open(struct inode *inode, struct file *file)
3014{
3015 struct pid *pid = PROC_I(inode)->pid;
3016 return single_open(file, proc_cgroup_show, pid);
3017}
3018
3019struct file_operations proc_cgroup_operations = {
3020 .open = cgroup_open,
3021 .read = seq_read,
3022 .llseek = seq_lseek,
3023 .release = single_release,
3024};
3025
3026
3027static int proc_cgroupstats_show(struct seq_file *m, void *v)
3028{
3029 int i;
3030
3031 seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
3032 mutex_lock(&cgroup_mutex);
3033 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3034 struct cgroup_subsys *ss = subsys[i];
3035 seq_printf(m, "%s\t%lu\t%d\t%d\n",
3036 ss->name, ss->root->subsys_bits,
3037 ss->root->number_of_cgroups, !ss->disabled);
3038 }
3039 mutex_unlock(&cgroup_mutex);
3040 return 0;
3041}
3042
3043static int cgroupstats_open(struct inode *inode, struct file *file)
3044{
3045 return single_open(file, proc_cgroupstats_show, NULL);
3046}
3047
3048static struct file_operations proc_cgroupstats_operations = {
3049 .open = cgroupstats_open,
3050 .read = seq_read,
3051 .llseek = seq_lseek,
3052 .release = single_release,
3053};
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071void cgroup_fork(struct task_struct *child)
3072{
3073 task_lock(current);
3074 child->cgroups = current->cgroups;
3075 get_css_set(child->cgroups);
3076 task_unlock(current);
3077 INIT_LIST_HEAD(&child->cg_list);
3078}
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088void cgroup_fork_callbacks(struct task_struct *child)
3089{
3090 if (need_forkexit_callback) {
3091 int i;
3092 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3093 struct cgroup_subsys *ss = subsys[i];
3094 if (ss->fork)
3095 ss->fork(ss, child);
3096 }
3097 }
3098}
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109void cgroup_post_fork(struct task_struct *child)
3110{
3111 if (use_task_css_set_links) {
3112 write_lock(&css_set_lock);
3113 task_lock(child);
3114 if (list_empty(&child->cg_list))
3115 list_add(&child->cg_list, &child->cgroups->tasks);
3116 task_unlock(child);
3117 write_unlock(&css_set_lock);
3118 }
3119}
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155void cgroup_exit(struct task_struct *tsk, int run_callbacks)
3156{
3157 int i;
3158 struct css_set *cg;
3159
3160 if (run_callbacks && need_forkexit_callback) {
3161 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3162 struct cgroup_subsys *ss = subsys[i];
3163 if (ss->exit)
3164 ss->exit(ss, tsk);
3165 }
3166 }
3167
3168
3169
3170
3171
3172
3173 if (!list_empty(&tsk->cg_list)) {
3174 write_lock(&css_set_lock);
3175 if (!list_empty(&tsk->cg_list))
3176 list_del(&tsk->cg_list);
3177 write_unlock(&css_set_lock);
3178 }
3179
3180
3181 task_lock(tsk);
3182 cg = tsk->cgroups;
3183 tsk->cgroups = &init_css_set;
3184 task_unlock(tsk);
3185 if (cg)
3186 put_css_set_taskexit(cg);
3187}
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
3200 char *nodename)
3201{
3202 struct dentry *dentry;
3203 int ret = 0;
3204 struct cgroup *parent, *child;
3205 struct inode *inode;
3206 struct css_set *cg;
3207 struct cgroupfs_root *root;
3208 struct cgroup_subsys *ss;
3209
3210
3211 BUG_ON(!subsys->active);
3212
3213
3214
3215 mutex_lock(&cgroup_mutex);
3216 again:
3217 root = subsys->root;
3218 if (root == &rootnode) {
3219 mutex_unlock(&cgroup_mutex);
3220 return 0;
3221 }
3222
3223
3224 if (!atomic_inc_not_zero(&root->sb->s_active)) {
3225
3226 mutex_unlock(&cgroup_mutex);
3227 return 0;
3228 }
3229
3230
3231 task_lock(tsk);
3232 parent = task_cgroup(tsk, subsys->subsys_id);
3233 cg = tsk->cgroups;
3234 get_css_set(cg);
3235 task_unlock(tsk);
3236
3237 mutex_unlock(&cgroup_mutex);
3238
3239
3240 inode = parent->dentry->d_inode;
3241
3242
3243
3244 mutex_lock(&inode->i_mutex);
3245 dentry = lookup_one_len(nodename, parent->dentry, strlen(nodename));
3246 if (IS_ERR(dentry)) {
3247 printk(KERN_INFO
3248 "cgroup: Couldn't allocate dentry for %s: %ld\n", nodename,
3249 PTR_ERR(dentry));
3250 ret = PTR_ERR(dentry);
3251 goto out_release;
3252 }
3253
3254
3255 ret = vfs_mkdir(inode, dentry, 0755);
3256 child = __d_cgrp(dentry);
3257 dput(dentry);
3258 if (ret) {
3259 printk(KERN_INFO
3260 "Failed to create cgroup %s: %d\n", nodename,
3261 ret);
3262 goto out_release;
3263 }
3264
3265
3266
3267
3268 mutex_lock(&cgroup_mutex);
3269 if ((root != subsys->root) ||
3270 (parent != task_cgroup(tsk, subsys->subsys_id))) {
3271
3272 mutex_unlock(&inode->i_mutex);
3273 put_css_set(cg);
3274
3275 deactivate_super(root->sb);
3276
3277
3278
3279 printk(KERN_INFO
3280 "Race in cgroup_clone() - leaking cgroup %s\n",
3281 nodename);
3282 goto again;
3283 }
3284
3285
3286 for_each_subsys(root, ss) {
3287 if (ss->post_clone)
3288 ss->post_clone(ss, child);
3289 }
3290
3291
3292 ret = cgroup_attach_task(child, tsk);
3293 mutex_unlock(&cgroup_mutex);
3294
3295 out_release:
3296 mutex_unlock(&inode->i_mutex);
3297
3298 mutex_lock(&cgroup_mutex);
3299 put_css_set(cg);
3300 mutex_unlock(&cgroup_mutex);
3301 deactivate_super(root->sb);
3302 return ret;
3303}
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task)
3319{
3320 int ret;
3321 struct cgroup *target;
3322 int subsys_id;
3323
3324 if (cgrp == dummytop)
3325 return 1;
3326
3327 get_first_subsys(cgrp, NULL, &subsys_id);
3328 target = task_cgroup(task, subsys_id);
3329 while (cgrp != target && cgrp!= cgrp->top_cgroup)
3330 cgrp = cgrp->parent;
3331 ret = (cgrp == target);
3332 return ret;
3333}
3334
3335static void check_for_release(struct cgroup *cgrp)
3336{
3337
3338
3339 if (cgroup_is_releasable(cgrp) && !atomic_read(&cgrp->count)
3340 && list_empty(&cgrp->children) && !cgroup_has_css_refs(cgrp)) {
3341
3342
3343
3344 int need_schedule_work = 0;
3345 spin_lock(&release_list_lock);
3346 if (!cgroup_is_removed(cgrp) &&
3347 list_empty(&cgrp->release_list)) {
3348 list_add(&cgrp->release_list, &release_list);
3349 need_schedule_work = 1;
3350 }
3351 spin_unlock(&release_list_lock);
3352 if (need_schedule_work)
3353 schedule_work(&release_agent_work);
3354 }
3355}
3356
3357void __css_put(struct cgroup_subsys_state *css)
3358{
3359 struct cgroup *cgrp = css->cgroup;
3360 rcu_read_lock();
3361 if (atomic_dec_return(&css->refcnt) == 1) {
3362 if (notify_on_release(cgrp)) {
3363 set_bit(CGRP_RELEASABLE, &cgrp->flags);
3364 check_for_release(cgrp);
3365 }
3366 cgroup_wakeup_rmdir_waiter(cgrp);
3367 }
3368 rcu_read_unlock();
3369}
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394static void cgroup_release_agent(struct work_struct *work)
3395{
3396 BUG_ON(work != &release_agent_work);
3397 mutex_lock(&cgroup_mutex);
3398 spin_lock(&release_list_lock);
3399 while (!list_empty(&release_list)) {
3400 char *argv[3], *envp[3];
3401 int i;
3402 char *pathbuf = NULL, *agentbuf = NULL;
3403 struct cgroup *cgrp = list_entry(release_list.next,
3404 struct cgroup,
3405 release_list);
3406 list_del_init(&cgrp->release_list);
3407 spin_unlock(&release_list_lock);
3408 pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
3409 if (!pathbuf)
3410 goto continue_free;
3411 if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0)
3412 goto continue_free;
3413 agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
3414 if (!agentbuf)
3415 goto continue_free;
3416
3417 i = 0;
3418 argv[i++] = agentbuf;
3419 argv[i++] = pathbuf;
3420 argv[i] = NULL;
3421
3422 i = 0;
3423
3424 envp[i++] = "HOME=/";
3425 envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
3426 envp[i] = NULL;
3427
3428
3429
3430
3431 mutex_unlock(&cgroup_mutex);
3432 call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
3433 mutex_lock(&cgroup_mutex);
3434 continue_free:
3435 kfree(pathbuf);
3436 kfree(agentbuf);
3437 spin_lock(&release_list_lock);
3438 }
3439 spin_unlock(&release_list_lock);
3440 mutex_unlock(&cgroup_mutex);
3441}
3442
3443static int __init cgroup_disable(char *str)
3444{
3445 int i;
3446 char *token;
3447
3448 while ((token = strsep(&str, ",")) != NULL) {
3449 if (!*token)
3450 continue;
3451
3452 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3453 struct cgroup_subsys *ss = subsys[i];
3454
3455 if (!strcmp(token, ss->name)) {
3456 ss->disabled = 1;
3457 printk(KERN_INFO "Disabling %s control group"
3458 " subsystem\n", ss->name);
3459 break;
3460 }
3461 }
3462 }
3463 return 1;
3464}
3465__setup("cgroup_disable=", cgroup_disable);
3466
3467
3468
3469
3470
3471
3472
3473
3474unsigned short css_id(struct cgroup_subsys_state *css)
3475{
3476 struct css_id *cssid = rcu_dereference(css->id);
3477
3478 if (cssid)
3479 return cssid->id;
3480 return 0;
3481}
3482
3483unsigned short css_depth(struct cgroup_subsys_state *css)
3484{
3485 struct css_id *cssid = rcu_dereference(css->id);
3486
3487 if (cssid)
3488 return cssid->depth;
3489 return 0;
3490}
3491
3492bool css_is_ancestor(struct cgroup_subsys_state *child,
3493 const struct cgroup_subsys_state *root)
3494{
3495 struct css_id *child_id = rcu_dereference(child->id);
3496 struct css_id *root_id = rcu_dereference(root->id);
3497
3498 if (!child_id || !root_id || (child_id->depth < root_id->depth))
3499 return false;
3500 return child_id->stack[root_id->depth] == root_id->id;
3501}
3502
3503static void __free_css_id_cb(struct rcu_head *head)
3504{
3505 struct css_id *id;
3506
3507 id = container_of(head, struct css_id, rcu_head);
3508 kfree(id);
3509}
3510
3511void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
3512{
3513 struct css_id *id = css->id;
3514
3515 if (!id)
3516 return;
3517
3518 BUG_ON(!ss->use_id);
3519
3520 rcu_assign_pointer(id->css, NULL);
3521 rcu_assign_pointer(css->id, NULL);
3522 spin_lock(&ss->id_lock);
3523 idr_remove(&ss->idr, id->id);
3524 spin_unlock(&ss->id_lock);
3525 call_rcu(&id->rcu_head, __free_css_id_cb);
3526}
3527
3528
3529
3530
3531
3532
3533static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth)
3534{
3535 struct css_id *newid;
3536 int myid, error, size;
3537
3538 BUG_ON(!ss->use_id);
3539
3540 size = sizeof(*newid) + sizeof(unsigned short) * (depth + 1);
3541 newid = kzalloc(size, GFP_KERNEL);
3542 if (!newid)
3543 return ERR_PTR(-ENOMEM);
3544
3545 if (unlikely(!idr_pre_get(&ss->idr, GFP_KERNEL))) {
3546 error = -ENOMEM;
3547 goto err_out;
3548 }
3549 spin_lock(&ss->id_lock);
3550
3551 error = idr_get_new_above(&ss->idr, newid, 1, &myid);
3552 spin_unlock(&ss->id_lock);
3553
3554
3555 if (error) {
3556 error = -ENOSPC;
3557 goto err_out;
3558 }
3559 if (myid > CSS_ID_MAX)
3560 goto remove_idr;
3561
3562 newid->id = myid;
3563 newid->depth = depth;
3564 return newid;
3565remove_idr:
3566 error = -ENOSPC;
3567 spin_lock(&ss->id_lock);
3568 idr_remove(&ss->idr, myid);
3569 spin_unlock(&ss->id_lock);
3570err_out:
3571 kfree(newid);
3572 return ERR_PTR(error);
3573
3574}
3575
3576static int __init cgroup_subsys_init_idr(struct cgroup_subsys *ss)
3577{
3578 struct css_id *newid;
3579 struct cgroup_subsys_state *rootcss;
3580
3581 spin_lock_init(&ss->id_lock);
3582 idr_init(&ss->idr);
3583
3584 rootcss = init_css_set.subsys[ss->subsys_id];
3585 newid = get_new_cssid(ss, 0);
3586 if (IS_ERR(newid))
3587 return PTR_ERR(newid);
3588
3589 newid->stack[0] = newid->id;
3590 newid->css = rootcss;
3591 rootcss->id = newid;
3592 return 0;
3593}
3594
3595static int alloc_css_id(struct cgroup_subsys *ss, struct cgroup *parent,
3596 struct cgroup *child)
3597{
3598 int subsys_id, i, depth = 0;
3599 struct cgroup_subsys_state *parent_css, *child_css;
3600 struct css_id *child_id, *parent_id = NULL;
3601
3602 subsys_id = ss->subsys_id;
3603 parent_css = parent->subsys[subsys_id];
3604 child_css = child->subsys[subsys_id];
3605 depth = css_depth(parent_css) + 1;
3606 parent_id = parent_css->id;
3607
3608 child_id = get_new_cssid(ss, depth);
3609 if (IS_ERR(child_id))
3610 return PTR_ERR(child_id);
3611
3612 for (i = 0; i < depth; i++)
3613 child_id->stack[i] = parent_id->stack[i];
3614 child_id->stack[depth] = child_id->id;
3615
3616
3617
3618
3619 rcu_assign_pointer(child_css->id, child_id);
3620
3621 return 0;
3622}
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id)
3633{
3634 struct css_id *cssid = NULL;
3635
3636 BUG_ON(!ss->use_id);
3637 cssid = idr_find(&ss->idr, id);
3638
3639 if (unlikely(!cssid))
3640 return NULL;
3641
3642 return rcu_dereference(cssid->css);
3643}
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655struct cgroup_subsys_state *
3656css_get_next(struct cgroup_subsys *ss, int id,
3657 struct cgroup_subsys_state *root, int *foundid)
3658{
3659 struct cgroup_subsys_state *ret = NULL;
3660 struct css_id *tmp;
3661 int tmpid;
3662 int rootid = css_id(root);
3663 int depth = css_depth(root);
3664
3665 if (!rootid)
3666 return NULL;
3667
3668 BUG_ON(!ss->use_id);
3669
3670 tmpid = id;
3671 while (1) {
3672
3673
3674
3675
3676 spin_lock(&ss->id_lock);
3677 tmp = idr_get_next(&ss->idr, &tmpid);
3678 spin_unlock(&ss->id_lock);
3679
3680 if (!tmp)
3681 break;
3682 if (tmp->depth >= depth && tmp->stack[depth] == rootid) {
3683 ret = rcu_dereference(tmp->css);
3684 if (ret) {
3685 *foundid = tmpid;
3686 break;
3687 }
3688 }
3689
3690 tmpid = tmpid + 1;
3691 }
3692 return ret;
3693}
3694
3695