1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include <linux/cgroup.h>
26#include <linux/errno.h>
27#include <linux/fs.h>
28#include <linux/kernel.h>
29#include <linux/list.h>
30#include <linux/mm.h>
31#include <linux/mutex.h>
32#include <linux/mount.h>
33#include <linux/pagemap.h>
34#include <linux/proc_fs.h>
35#include <linux/rcupdate.h>
36#include <linux/sched.h>
37#include <linux/backing-dev.h>
38#include <linux/seq_file.h>
39#include <linux/slab.h>
40#include <linux/magic.h>
41#include <linux/spinlock.h>
42#include <linux/string.h>
43#include <linux/sort.h>
44#include <linux/kmod.h>
45#include <linux/delayacct.h>
46#include <linux/cgroupstats.h>
47#include <linux/hash.h>
48
49#include <asm/atomic.h>
50
51static DEFINE_MUTEX(cgroup_mutex);
52
53
54#define SUBSYS(_x) &_x ## _subsys,
55
56static struct cgroup_subsys *subsys[] = {
57#include <linux/cgroup_subsys.h>
58};
59
60
61
62
63
64
65struct cgroupfs_root {
66 struct super_block *sb;
67
68
69
70
71
72 unsigned long subsys_bits;
73
74
75 unsigned long actual_subsys_bits;
76
77
78 struct list_head subsys_list;
79
80
81 struct cgroup top_cgroup;
82
83
84 int number_of_cgroups;
85
86
87 struct list_head root_list;
88
89
90 unsigned long flags;
91
92
93
94
95
96
97 char release_agent_path[PATH_MAX];
98};
99
100
101
102
103
104
105
106static struct cgroupfs_root rootnode;
107
108
109
110static LIST_HEAD(roots);
111static int root_count;
112
113
114#define dummytop (&rootnode.top_cgroup)
115
116
117
118
119
120
121static int need_forkexit_callback;
122static int need_mm_owner_callback __read_mostly;
123
124
125inline int cgroup_is_removed(const struct cgroup *cgrp)
126{
127 return test_bit(CGRP_REMOVED, &cgrp->flags);
128}
129
130
131enum {
132 ROOT_NOPREFIX,
133};
134
135static int cgroup_is_releasable(const struct cgroup *cgrp)
136{
137 const int bits =
138 (1 << CGRP_RELEASABLE) |
139 (1 << CGRP_NOTIFY_ON_RELEASE);
140 return (cgrp->flags & bits) == bits;
141}
142
143static int notify_on_release(const struct cgroup *cgrp)
144{
145 return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
146}
147
148
149
150
151
152#define for_each_subsys(_root, _ss) \
153list_for_each_entry(_ss, &_root->subsys_list, sibling)
154
155
156#define for_each_root(_root) \
157list_for_each_entry(_root, &roots, root_list)
158
159
160
161static LIST_HEAD(release_list);
162static DEFINE_SPINLOCK(release_list_lock);
163static void cgroup_release_agent(struct work_struct *work);
164static DECLARE_WORK(release_agent_work, cgroup_release_agent);
165static void check_for_release(struct cgroup *cgrp);
166
167
168struct cg_cgroup_link {
169
170
171
172
173 struct list_head cgrp_link_list;
174
175
176
177
178 struct list_head cg_link_list;
179 struct css_set *cg;
180};
181
182
183
184
185
186
187
188
189static struct css_set init_css_set;
190static struct cg_cgroup_link init_css_set_link;
191
192
193
194
195static DEFINE_RWLOCK(css_set_lock);
196static int css_set_count;
197
198
199
200#define CSS_SET_HASH_BITS 7
201#define CSS_SET_TABLE_SIZE (1 << CSS_SET_HASH_BITS)
202static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE];
203
204static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
205{
206 int i;
207 int index;
208 unsigned long tmp = 0UL;
209
210 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++)
211 tmp += (unsigned long)css[i];
212 tmp = (tmp >> 16) ^ tmp;
213
214 index = hash_long(tmp, CSS_SET_HASH_BITS);
215
216 return &css_set_table[index];
217}
218
219
220
221
222
223static int use_task_css_set_links;
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242static void unlink_css_set(struct css_set *cg)
243{
244 write_lock(&css_set_lock);
245 hlist_del(&cg->hlist);
246 css_set_count--;
247 while (!list_empty(&cg->cg_links)) {
248 struct cg_cgroup_link *link;
249 link = list_entry(cg->cg_links.next,
250 struct cg_cgroup_link, cg_link_list);
251 list_del(&link->cg_link_list);
252 list_del(&link->cgrp_link_list);
253 kfree(link);
254 }
255 write_unlock(&css_set_lock);
256}
257
258static void __release_css_set(struct kref *k, int taskexit)
259{
260 int i;
261 struct css_set *cg = container_of(k, struct css_set, ref);
262
263 unlink_css_set(cg);
264
265 rcu_read_lock();
266 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
267 struct cgroup *cgrp = cg->subsys[i]->cgroup;
268 if (atomic_dec_and_test(&cgrp->count) &&
269 notify_on_release(cgrp)) {
270 if (taskexit)
271 set_bit(CGRP_RELEASABLE, &cgrp->flags);
272 check_for_release(cgrp);
273 }
274 }
275 rcu_read_unlock();
276 kfree(cg);
277}
278
279static void release_css_set(struct kref *k)
280{
281 __release_css_set(k, 0);
282}
283
284static void release_css_set_taskexit(struct kref *k)
285{
286 __release_css_set(k, 1);
287}
288
289
290
291
292static inline void get_css_set(struct css_set *cg)
293{
294 kref_get(&cg->ref);
295}
296
297static inline void put_css_set(struct css_set *cg)
298{
299 kref_put(&cg->ref, release_css_set);
300}
301
302static inline void put_css_set_taskexit(struct css_set *cg)
303{
304 kref_put(&cg->ref, release_css_set_taskexit);
305}
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320static struct css_set *find_existing_css_set(
321 struct css_set *oldcg,
322 struct cgroup *cgrp,
323 struct cgroup_subsys_state *template[])
324{
325 int i;
326 struct cgroupfs_root *root = cgrp->root;
327 struct hlist_head *hhead;
328 struct hlist_node *node;
329 struct css_set *cg;
330
331
332
333 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
334 if (root->subsys_bits & (1UL << i)) {
335
336
337
338 template[i] = cgrp->subsys[i];
339 } else {
340
341
342 template[i] = oldcg->subsys[i];
343 }
344 }
345
346 hhead = css_set_hash(template);
347 hlist_for_each_entry(cg, node, hhead, hlist) {
348 if (!memcmp(template, cg->subsys, sizeof(cg->subsys))) {
349
350 return cg;
351 }
352 }
353
354
355 return NULL;
356}
357
358
359
360
361
362
363static int allocate_cg_links(int count, struct list_head *tmp)
364{
365 struct cg_cgroup_link *link;
366 int i;
367 INIT_LIST_HEAD(tmp);
368 for (i = 0; i < count; i++) {
369 link = kmalloc(sizeof(*link), GFP_KERNEL);
370 if (!link) {
371 while (!list_empty(tmp)) {
372 link = list_entry(tmp->next,
373 struct cg_cgroup_link,
374 cgrp_link_list);
375 list_del(&link->cgrp_link_list);
376 kfree(link);
377 }
378 return -ENOMEM;
379 }
380 list_add(&link->cgrp_link_list, tmp);
381 }
382 return 0;
383}
384
385static void free_cg_links(struct list_head *tmp)
386{
387 while (!list_empty(tmp)) {
388 struct cg_cgroup_link *link;
389 link = list_entry(tmp->next,
390 struct cg_cgroup_link,
391 cgrp_link_list);
392 list_del(&link->cgrp_link_list);
393 kfree(link);
394 }
395}
396
397
398
399
400
401
402
403
404static struct css_set *find_css_set(
405 struct css_set *oldcg, struct cgroup *cgrp)
406{
407 struct css_set *res;
408 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
409 int i;
410
411 struct list_head tmp_cg_links;
412 struct cg_cgroup_link *link;
413
414 struct hlist_head *hhead;
415
416
417
418 write_lock(&css_set_lock);
419 res = find_existing_css_set(oldcg, cgrp, template);
420 if (res)
421 get_css_set(res);
422 write_unlock(&css_set_lock);
423
424 if (res)
425 return res;
426
427 res = kmalloc(sizeof(*res), GFP_KERNEL);
428 if (!res)
429 return NULL;
430
431
432 if (allocate_cg_links(root_count, &tmp_cg_links) < 0) {
433 kfree(res);
434 return NULL;
435 }
436
437 kref_init(&res->ref);
438 INIT_LIST_HEAD(&res->cg_links);
439 INIT_LIST_HEAD(&res->tasks);
440 INIT_HLIST_NODE(&res->hlist);
441
442
443
444 memcpy(res->subsys, template, sizeof(res->subsys));
445
446 write_lock(&css_set_lock);
447
448 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
449 struct cgroup *cgrp = res->subsys[i]->cgroup;
450 struct cgroup_subsys *ss = subsys[i];
451 atomic_inc(&cgrp->count);
452
453
454
455
456
457 if (ss->root->subsys_list.next == &ss->sibling) {
458 BUG_ON(list_empty(&tmp_cg_links));
459 link = list_entry(tmp_cg_links.next,
460 struct cg_cgroup_link,
461 cgrp_link_list);
462 list_del(&link->cgrp_link_list);
463 list_add(&link->cgrp_link_list, &cgrp->css_sets);
464 link->cg = res;
465 list_add(&link->cg_link_list, &res->cg_links);
466 }
467 }
468 if (list_empty(&rootnode.subsys_list)) {
469 link = list_entry(tmp_cg_links.next,
470 struct cg_cgroup_link,
471 cgrp_link_list);
472 list_del(&link->cgrp_link_list);
473 list_add(&link->cgrp_link_list, &dummytop->css_sets);
474 link->cg = res;
475 list_add(&link->cg_link_list, &res->cg_links);
476 }
477
478 BUG_ON(!list_empty(&tmp_cg_links));
479
480 css_set_count++;
481
482
483 hhead = css_set_hash(res->subsys);
484 hlist_add_head(&res->hlist, hhead);
485
486 write_unlock(&css_set_lock);
487
488 return res;
489}
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549void cgroup_lock(void)
550{
551 mutex_lock(&cgroup_mutex);
552}
553
554
555
556
557
558
559void cgroup_unlock(void)
560{
561 mutex_unlock(&cgroup_mutex);
562}
563
564
565
566
567
568
569
570
571static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode);
572static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
573static int cgroup_populate_dir(struct cgroup *cgrp);
574static struct inode_operations cgroup_dir_inode_operations;
575static struct file_operations proc_cgroupstats_operations;
576
577static struct backing_dev_info cgroup_backing_dev_info = {
578 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
579};
580
581static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
582{
583 struct inode *inode = new_inode(sb);
584
585 if (inode) {
586 inode->i_mode = mode;
587 inode->i_uid = current->fsuid;
588 inode->i_gid = current->fsgid;
589 inode->i_blocks = 0;
590 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
591 inode->i_mapping->backing_dev_info = &cgroup_backing_dev_info;
592 }
593 return inode;
594}
595
596
597
598
599
600static void cgroup_call_pre_destroy(struct cgroup *cgrp)
601{
602 struct cgroup_subsys *ss;
603 for_each_subsys(cgrp->root, ss)
604 if (ss->pre_destroy && cgrp->subsys[ss->subsys_id])
605 ss->pre_destroy(ss, cgrp);
606 return;
607}
608
609static void cgroup_diput(struct dentry *dentry, struct inode *inode)
610{
611
612 if (S_ISDIR(inode->i_mode)) {
613 struct cgroup *cgrp = dentry->d_fsdata;
614 struct cgroup_subsys *ss;
615 BUG_ON(!(cgroup_is_removed(cgrp)));
616
617
618
619
620
621
622 synchronize_rcu();
623
624 mutex_lock(&cgroup_mutex);
625
626
627
628 for_each_subsys(cgrp->root, ss) {
629 if (cgrp->subsys[ss->subsys_id])
630 ss->destroy(ss, cgrp);
631 }
632
633 cgrp->root->number_of_cgroups--;
634 mutex_unlock(&cgroup_mutex);
635
636
637
638 deactivate_super(cgrp->root->sb);
639
640 kfree(cgrp);
641 }
642 iput(inode);
643}
644
645static void remove_dir(struct dentry *d)
646{
647 struct dentry *parent = dget(d->d_parent);
648
649 d_delete(d);
650 simple_rmdir(parent->d_inode, d);
651 dput(parent);
652}
653
654static void cgroup_clear_directory(struct dentry *dentry)
655{
656 struct list_head *node;
657
658 BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
659 spin_lock(&dcache_lock);
660 node = dentry->d_subdirs.next;
661 while (node != &dentry->d_subdirs) {
662 struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
663 list_del_init(node);
664 if (d->d_inode) {
665
666
667 BUG_ON(d->d_inode->i_mode & S_IFDIR);
668 d = dget_locked(d);
669 spin_unlock(&dcache_lock);
670 d_delete(d);
671 simple_unlink(dentry->d_inode, d);
672 dput(d);
673 spin_lock(&dcache_lock);
674 }
675 node = dentry->d_subdirs.next;
676 }
677 spin_unlock(&dcache_lock);
678}
679
680
681
682
683static void cgroup_d_remove_dir(struct dentry *dentry)
684{
685 cgroup_clear_directory(dentry);
686
687 spin_lock(&dcache_lock);
688 list_del_init(&dentry->d_u.d_child);
689 spin_unlock(&dcache_lock);
690 remove_dir(dentry);
691}
692
693static int rebind_subsystems(struct cgroupfs_root *root,
694 unsigned long final_bits)
695{
696 unsigned long added_bits, removed_bits;
697 struct cgroup *cgrp = &root->top_cgroup;
698 int i;
699
700 removed_bits = root->actual_subsys_bits & ~final_bits;
701 added_bits = final_bits & ~root->actual_subsys_bits;
702
703 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
704 unsigned long bit = 1UL << i;
705 struct cgroup_subsys *ss = subsys[i];
706 if (!(bit & added_bits))
707 continue;
708 if (ss->root != &rootnode) {
709
710 return -EBUSY;
711 }
712 }
713
714
715
716
717
718 if (!list_empty(&cgrp->children))
719 return -EBUSY;
720
721
722 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
723 struct cgroup_subsys *ss = subsys[i];
724 unsigned long bit = 1UL << i;
725 if (bit & added_bits) {
726
727 BUG_ON(cgrp->subsys[i]);
728 BUG_ON(!dummytop->subsys[i]);
729 BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
730 cgrp->subsys[i] = dummytop->subsys[i];
731 cgrp->subsys[i]->cgroup = cgrp;
732 list_add(&ss->sibling, &root->subsys_list);
733 rcu_assign_pointer(ss->root, root);
734 if (ss->bind)
735 ss->bind(ss, cgrp);
736
737 } else if (bit & removed_bits) {
738
739 BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
740 BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
741 if (ss->bind)
742 ss->bind(ss, dummytop);
743 dummytop->subsys[i]->cgroup = dummytop;
744 cgrp->subsys[i] = NULL;
745 rcu_assign_pointer(subsys[i]->root, &rootnode);
746 list_del(&ss->sibling);
747 } else if (bit & final_bits) {
748
749 BUG_ON(!cgrp->subsys[i]);
750 } else {
751
752 BUG_ON(cgrp->subsys[i]);
753 }
754 }
755 root->subsys_bits = root->actual_subsys_bits = final_bits;
756 synchronize_rcu();
757
758 return 0;
759}
760
761static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs)
762{
763 struct cgroupfs_root *root = vfs->mnt_sb->s_fs_info;
764 struct cgroup_subsys *ss;
765
766 mutex_lock(&cgroup_mutex);
767 for_each_subsys(root, ss)
768 seq_printf(seq, ",%s", ss->name);
769 if (test_bit(ROOT_NOPREFIX, &root->flags))
770 seq_puts(seq, ",noprefix");
771 if (strlen(root->release_agent_path))
772 seq_printf(seq, ",release_agent=%s", root->release_agent_path);
773 mutex_unlock(&cgroup_mutex);
774 return 0;
775}
776
777struct cgroup_sb_opts {
778 unsigned long subsys_bits;
779 unsigned long flags;
780 char *release_agent;
781};
782
783
784
785static int parse_cgroupfs_options(char *data,
786 struct cgroup_sb_opts *opts)
787{
788 char *token, *o = data ?: "all";
789
790 opts->subsys_bits = 0;
791 opts->flags = 0;
792 opts->release_agent = NULL;
793
794 while ((token = strsep(&o, ",")) != NULL) {
795 if (!*token)
796 return -EINVAL;
797 if (!strcmp(token, "all")) {
798
799 int i;
800 opts->subsys_bits = 0;
801 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
802 struct cgroup_subsys *ss = subsys[i];
803 if (!ss->disabled)
804 opts->subsys_bits |= 1ul << i;
805 }
806 } else if (!strcmp(token, "noprefix")) {
807 set_bit(ROOT_NOPREFIX, &opts->flags);
808 } else if (!strncmp(token, "release_agent=", 14)) {
809
810 if (opts->release_agent)
811 return -EINVAL;
812 opts->release_agent = kzalloc(PATH_MAX, GFP_KERNEL);
813 if (!opts->release_agent)
814 return -ENOMEM;
815 strncpy(opts->release_agent, token + 14, PATH_MAX - 1);
816 opts->release_agent[PATH_MAX - 1] = 0;
817 } else {
818 struct cgroup_subsys *ss;
819 int i;
820 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
821 ss = subsys[i];
822 if (!strcmp(token, ss->name)) {
823 if (!ss->disabled)
824 set_bit(i, &opts->subsys_bits);
825 break;
826 }
827 }
828 if (i == CGROUP_SUBSYS_COUNT)
829 return -ENOENT;
830 }
831 }
832
833
834 if (!opts->subsys_bits)
835 return -EINVAL;
836
837 return 0;
838}
839
840static int cgroup_remount(struct super_block *sb, int *flags, char *data)
841{
842 int ret = 0;
843 struct cgroupfs_root *root = sb->s_fs_info;
844 struct cgroup *cgrp = &root->top_cgroup;
845 struct cgroup_sb_opts opts;
846
847 mutex_lock(&cgrp->dentry->d_inode->i_mutex);
848 mutex_lock(&cgroup_mutex);
849
850
851 ret = parse_cgroupfs_options(data, &opts);
852 if (ret)
853 goto out_unlock;
854
855
856 if (opts.flags != root->flags) {
857 ret = -EINVAL;
858 goto out_unlock;
859 }
860
861 ret = rebind_subsystems(root, opts.subsys_bits);
862
863
864 if (!ret)
865 cgroup_populate_dir(cgrp);
866
867 if (opts.release_agent)
868 strcpy(root->release_agent_path, opts.release_agent);
869 out_unlock:
870 if (opts.release_agent)
871 kfree(opts.release_agent);
872 mutex_unlock(&cgroup_mutex);
873 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
874 return ret;
875}
876
877static struct super_operations cgroup_ops = {
878 .statfs = simple_statfs,
879 .drop_inode = generic_delete_inode,
880 .show_options = cgroup_show_options,
881 .remount_fs = cgroup_remount,
882};
883
884static void init_cgroup_root(struct cgroupfs_root *root)
885{
886 struct cgroup *cgrp = &root->top_cgroup;
887 INIT_LIST_HEAD(&root->subsys_list);
888 INIT_LIST_HEAD(&root->root_list);
889 root->number_of_cgroups = 1;
890 cgrp->root = root;
891 cgrp->top_cgroup = cgrp;
892 INIT_LIST_HEAD(&cgrp->sibling);
893 INIT_LIST_HEAD(&cgrp->children);
894 INIT_LIST_HEAD(&cgrp->css_sets);
895 INIT_LIST_HEAD(&cgrp->release_list);
896}
897
898static int cgroup_test_super(struct super_block *sb, void *data)
899{
900 struct cgroupfs_root *new = data;
901 struct cgroupfs_root *root = sb->s_fs_info;
902
903
904 if (new->subsys_bits != root->subsys_bits)
905 return 0;
906
907
908 if (new->flags != root->flags)
909 return 0;
910
911 return 1;
912}
913
914static int cgroup_set_super(struct super_block *sb, void *data)
915{
916 int ret;
917 struct cgroupfs_root *root = data;
918
919 ret = set_anon_super(sb, NULL);
920 if (ret)
921 return ret;
922
923 sb->s_fs_info = root;
924 root->sb = sb;
925
926 sb->s_blocksize = PAGE_CACHE_SIZE;
927 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
928 sb->s_magic = CGROUP_SUPER_MAGIC;
929 sb->s_op = &cgroup_ops;
930
931 return 0;
932}
933
934static int cgroup_get_rootdir(struct super_block *sb)
935{
936 struct inode *inode =
937 cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb);
938 struct dentry *dentry;
939
940 if (!inode)
941 return -ENOMEM;
942
943 inode->i_fop = &simple_dir_operations;
944 inode->i_op = &cgroup_dir_inode_operations;
945
946 inc_nlink(inode);
947 dentry = d_alloc_root(inode);
948 if (!dentry) {
949 iput(inode);
950 return -ENOMEM;
951 }
952 sb->s_root = dentry;
953 return 0;
954}
955
956static int cgroup_get_sb(struct file_system_type *fs_type,
957 int flags, const char *unused_dev_name,
958 void *data, struct vfsmount *mnt)
959{
960 struct cgroup_sb_opts opts;
961 int ret = 0;
962 struct super_block *sb;
963 struct cgroupfs_root *root;
964 struct list_head tmp_cg_links;
965 INIT_LIST_HEAD(&tmp_cg_links);
966
967
968 ret = parse_cgroupfs_options(data, &opts);
969 if (ret) {
970 if (opts.release_agent)
971 kfree(opts.release_agent);
972 return ret;
973 }
974
975 root = kzalloc(sizeof(*root), GFP_KERNEL);
976 if (!root) {
977 if (opts.release_agent)
978 kfree(opts.release_agent);
979 return -ENOMEM;
980 }
981
982 init_cgroup_root(root);
983 root->subsys_bits = opts.subsys_bits;
984 root->flags = opts.flags;
985 if (opts.release_agent) {
986 strcpy(root->release_agent_path, opts.release_agent);
987 kfree(opts.release_agent);
988 }
989
990 sb = sget(fs_type, cgroup_test_super, cgroup_set_super, root);
991
992 if (IS_ERR(sb)) {
993 kfree(root);
994 return PTR_ERR(sb);
995 }
996
997 if (sb->s_fs_info != root) {
998
999 BUG_ON(sb->s_root == NULL);
1000 kfree(root);
1001 root = NULL;
1002 } else {
1003
1004 struct cgroup *cgrp = &root->top_cgroup;
1005 struct inode *inode;
1006 int i;
1007
1008 BUG_ON(sb->s_root != NULL);
1009
1010 ret = cgroup_get_rootdir(sb);
1011 if (ret)
1012 goto drop_new_super;
1013 inode = sb->s_root->d_inode;
1014
1015 mutex_lock(&inode->i_mutex);
1016 mutex_lock(&cgroup_mutex);
1017
1018
1019
1020
1021
1022
1023
1024
1025 ret = allocate_cg_links(css_set_count, &tmp_cg_links);
1026 if (ret) {
1027 mutex_unlock(&cgroup_mutex);
1028 mutex_unlock(&inode->i_mutex);
1029 goto drop_new_super;
1030 }
1031
1032 ret = rebind_subsystems(root, root->subsys_bits);
1033 if (ret == -EBUSY) {
1034 mutex_unlock(&cgroup_mutex);
1035 mutex_unlock(&inode->i_mutex);
1036 goto drop_new_super;
1037 }
1038
1039
1040 BUG_ON(ret);
1041
1042 list_add(&root->root_list, &roots);
1043 root_count++;
1044
1045 sb->s_root->d_fsdata = &root->top_cgroup;
1046 root->top_cgroup.dentry = sb->s_root;
1047
1048
1049
1050 write_lock(&css_set_lock);
1051 for (i = 0; i < CSS_SET_TABLE_SIZE; i++) {
1052 struct hlist_head *hhead = &css_set_table[i];
1053 struct hlist_node *node;
1054 struct css_set *cg;
1055
1056 hlist_for_each_entry(cg, node, hhead, hlist) {
1057 struct cg_cgroup_link *link;
1058
1059 BUG_ON(list_empty(&tmp_cg_links));
1060 link = list_entry(tmp_cg_links.next,
1061 struct cg_cgroup_link,
1062 cgrp_link_list);
1063 list_del(&link->cgrp_link_list);
1064 link->cg = cg;
1065 list_add(&link->cgrp_link_list,
1066 &root->top_cgroup.css_sets);
1067 list_add(&link->cg_link_list, &cg->cg_links);
1068 }
1069 }
1070 write_unlock(&css_set_lock);
1071
1072 free_cg_links(&tmp_cg_links);
1073
1074 BUG_ON(!list_empty(&cgrp->sibling));
1075 BUG_ON(!list_empty(&cgrp->children));
1076 BUG_ON(root->number_of_cgroups != 1);
1077
1078 cgroup_populate_dir(cgrp);
1079 mutex_unlock(&inode->i_mutex);
1080 mutex_unlock(&cgroup_mutex);
1081 }
1082
1083 return simple_set_mnt(mnt, sb);
1084
1085 drop_new_super:
1086 up_write(&sb->s_umount);
1087 deactivate_super(sb);
1088 free_cg_links(&tmp_cg_links);
1089 return ret;
1090}
1091
1092static void cgroup_kill_sb(struct super_block *sb) {
1093 struct cgroupfs_root *root = sb->s_fs_info;
1094 struct cgroup *cgrp = &root->top_cgroup;
1095 int ret;
1096
1097 BUG_ON(!root);
1098
1099 BUG_ON(root->number_of_cgroups != 1);
1100 BUG_ON(!list_empty(&cgrp->children));
1101 BUG_ON(!list_empty(&cgrp->sibling));
1102
1103 mutex_lock(&cgroup_mutex);
1104
1105
1106 ret = rebind_subsystems(root, 0);
1107
1108 BUG_ON(ret);
1109
1110
1111
1112
1113
1114 write_lock(&css_set_lock);
1115 while (!list_empty(&cgrp->css_sets)) {
1116 struct cg_cgroup_link *link;
1117 link = list_entry(cgrp->css_sets.next,
1118 struct cg_cgroup_link, cgrp_link_list);
1119 list_del(&link->cg_link_list);
1120 list_del(&link->cgrp_link_list);
1121 kfree(link);
1122 }
1123 write_unlock(&css_set_lock);
1124
1125 if (!list_empty(&root->root_list)) {
1126 list_del(&root->root_list);
1127 root_count--;
1128 }
1129 mutex_unlock(&cgroup_mutex);
1130
1131 kfree(root);
1132 kill_litter_super(sb);
1133}
1134
1135static struct file_system_type cgroup_fs_type = {
1136 .name = "cgroup",
1137 .get_sb = cgroup_get_sb,
1138 .kill_sb = cgroup_kill_sb,
1139};
1140
1141static inline struct cgroup *__d_cgrp(struct dentry *dentry)
1142{
1143 return dentry->d_fsdata;
1144}
1145
1146static inline struct cftype *__d_cft(struct dentry *dentry)
1147{
1148 return dentry->d_fsdata;
1149}
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
1161{
1162 char *start;
1163
1164 if (cgrp == dummytop) {
1165
1166
1167
1168
1169 strcpy(buf, "/");
1170 return 0;
1171 }
1172
1173 start = buf + buflen;
1174
1175 *--start = '\0';
1176 for (;;) {
1177 int len = cgrp->dentry->d_name.len;
1178 if ((start -= len) < buf)
1179 return -ENAMETOOLONG;
1180 memcpy(start, cgrp->dentry->d_name.name, len);
1181 cgrp = cgrp->parent;
1182 if (!cgrp)
1183 break;
1184 if (!cgrp->parent)
1185 continue;
1186 if (--start < buf)
1187 return -ENAMETOOLONG;
1188 *start = '/';
1189 }
1190 memmove(buf, start, buf + buflen - start);
1191 return 0;
1192}
1193
1194
1195
1196
1197
1198
1199static void get_first_subsys(const struct cgroup *cgrp,
1200 struct cgroup_subsys_state **css, int *subsys_id)
1201{
1202 const struct cgroupfs_root *root = cgrp->root;
1203 const struct cgroup_subsys *test_ss;
1204 BUG_ON(list_empty(&root->subsys_list));
1205 test_ss = list_entry(root->subsys_list.next,
1206 struct cgroup_subsys, sibling);
1207 if (css) {
1208 *css = cgrp->subsys[test_ss->subsys_id];
1209 BUG_ON(!*css);
1210 }
1211 if (subsys_id)
1212 *subsys_id = test_ss->subsys_id;
1213}
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1224{
1225 int retval = 0;
1226 struct cgroup_subsys *ss;
1227 struct cgroup *oldcgrp;
1228 struct css_set *cg = tsk->cgroups;
1229 struct css_set *newcg;
1230 struct cgroupfs_root *root = cgrp->root;
1231 int subsys_id;
1232
1233 get_first_subsys(cgrp, NULL, &subsys_id);
1234
1235
1236 oldcgrp = task_cgroup(tsk, subsys_id);
1237 if (cgrp == oldcgrp)
1238 return 0;
1239
1240 for_each_subsys(root, ss) {
1241 if (ss->can_attach) {
1242 retval = ss->can_attach(ss, cgrp, tsk);
1243 if (retval)
1244 return retval;
1245 }
1246 }
1247
1248
1249
1250
1251
1252 newcg = find_css_set(cg, cgrp);
1253 if (!newcg)
1254 return -ENOMEM;
1255
1256 task_lock(tsk);
1257 if (tsk->flags & PF_EXITING) {
1258 task_unlock(tsk);
1259 put_css_set(newcg);
1260 return -ESRCH;
1261 }
1262 rcu_assign_pointer(tsk->cgroups, newcg);
1263 task_unlock(tsk);
1264
1265
1266 write_lock(&css_set_lock);
1267 if (!list_empty(&tsk->cg_list)) {
1268 list_del(&tsk->cg_list);
1269 list_add(&tsk->cg_list, &newcg->tasks);
1270 }
1271 write_unlock(&css_set_lock);
1272
1273 for_each_subsys(root, ss) {
1274 if (ss->attach)
1275 ss->attach(ss, cgrp, oldcgrp, tsk);
1276 }
1277 set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
1278 synchronize_rcu();
1279 put_css_set(cg);
1280 return 0;
1281}
1282
1283
1284
1285
1286
1287static int attach_task_by_pid(struct cgroup *cgrp, char *pidbuf)
1288{
1289 pid_t pid;
1290 struct task_struct *tsk;
1291 int ret;
1292
1293 if (sscanf(pidbuf, "%d", &pid) != 1)
1294 return -EIO;
1295
1296 if (pid) {
1297 rcu_read_lock();
1298 tsk = find_task_by_vpid(pid);
1299 if (!tsk || tsk->flags & PF_EXITING) {
1300 rcu_read_unlock();
1301 return -ESRCH;
1302 }
1303 get_task_struct(tsk);
1304 rcu_read_unlock();
1305
1306 if ((current->euid) && (current->euid != tsk->uid)
1307 && (current->euid != tsk->suid)) {
1308 put_task_struct(tsk);
1309 return -EACCES;
1310 }
1311 } else {
1312 tsk = current;
1313 get_task_struct(tsk);
1314 }
1315
1316 ret = cgroup_attach_task(cgrp, tsk);
1317 put_task_struct(tsk);
1318 return ret;
1319}
1320
1321
1322enum cgroup_filetype {
1323 FILE_ROOT,
1324 FILE_DIR,
1325 FILE_TASKLIST,
1326 FILE_NOTIFY_ON_RELEASE,
1327 FILE_RELEASE_AGENT,
1328};
1329
1330static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft,
1331 struct file *file,
1332 const char __user *userbuf,
1333 size_t nbytes, loff_t *unused_ppos)
1334{
1335 char buffer[64];
1336 int retval = 0;
1337 char *end;
1338
1339 if (!nbytes)
1340 return -EINVAL;
1341 if (nbytes >= sizeof(buffer))
1342 return -E2BIG;
1343 if (copy_from_user(buffer, userbuf, nbytes))
1344 return -EFAULT;
1345
1346 buffer[nbytes] = 0;
1347 strstrip(buffer);
1348 if (cft->write_u64) {
1349 u64 val = simple_strtoull(buffer, &end, 0);
1350 if (*end)
1351 return -EINVAL;
1352 retval = cft->write_u64(cgrp, cft, val);
1353 } else {
1354 s64 val = simple_strtoll(buffer, &end, 0);
1355 if (*end)
1356 return -EINVAL;
1357 retval = cft->write_s64(cgrp, cft, val);
1358 }
1359 if (!retval)
1360 retval = nbytes;
1361 return retval;
1362}
1363
1364static ssize_t cgroup_common_file_write(struct cgroup *cgrp,
1365 struct cftype *cft,
1366 struct file *file,
1367 const char __user *userbuf,
1368 size_t nbytes, loff_t *unused_ppos)
1369{
1370 enum cgroup_filetype type = cft->private;
1371 char *buffer;
1372 int retval = 0;
1373
1374 if (nbytes >= PATH_MAX)
1375 return -E2BIG;
1376
1377
1378 buffer = kmalloc(nbytes + 1, GFP_KERNEL);
1379 if (buffer == NULL)
1380 return -ENOMEM;
1381
1382 if (copy_from_user(buffer, userbuf, nbytes)) {
1383 retval = -EFAULT;
1384 goto out1;
1385 }
1386 buffer[nbytes] = 0;
1387 strstrip(buffer);
1388
1389 mutex_lock(&cgroup_mutex);
1390
1391
1392
1393
1394
1395 if (cgroup_is_removed(cgrp)) {
1396 retval = -ENODEV;
1397 goto out2;
1398 }
1399
1400 switch (type) {
1401 case FILE_TASKLIST:
1402 retval = attach_task_by_pid(cgrp, buffer);
1403 break;
1404 case FILE_NOTIFY_ON_RELEASE:
1405 clear_bit(CGRP_RELEASABLE, &cgrp->flags);
1406 if (simple_strtoul(buffer, NULL, 10) != 0)
1407 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
1408 else
1409 clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
1410 break;
1411 case FILE_RELEASE_AGENT:
1412 BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
1413 strcpy(cgrp->root->release_agent_path, buffer);
1414 break;
1415 default:
1416 retval = -EINVAL;
1417 goto out2;
1418 }
1419
1420 if (retval == 0)
1421 retval = nbytes;
1422out2:
1423 mutex_unlock(&cgroup_mutex);
1424out1:
1425 kfree(buffer);
1426 return retval;
1427}
1428
1429static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
1430 size_t nbytes, loff_t *ppos)
1431{
1432 struct cftype *cft = __d_cft(file->f_dentry);
1433 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
1434
1435 if (!cft || cgroup_is_removed(cgrp))
1436 return -ENODEV;
1437 if (cft->write)
1438 return cft->write(cgrp, cft, file, buf, nbytes, ppos);
1439 if (cft->write_u64 || cft->write_s64)
1440 return cgroup_write_X64(cgrp, cft, file, buf, nbytes, ppos);
1441 if (cft->trigger) {
1442 int ret = cft->trigger(cgrp, (unsigned int)cft->private);
1443 return ret ? ret : nbytes;
1444 }
1445 return -EINVAL;
1446}
1447
1448static ssize_t cgroup_read_u64(struct cgroup *cgrp, struct cftype *cft,
1449 struct file *file,
1450 char __user *buf, size_t nbytes,
1451 loff_t *ppos)
1452{
1453 char tmp[64];
1454 u64 val = cft->read_u64(cgrp, cft);
1455 int len = sprintf(tmp, "%llu\n", (unsigned long long) val);
1456
1457 return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
1458}
1459
1460static ssize_t cgroup_read_s64(struct cgroup *cgrp, struct cftype *cft,
1461 struct file *file,
1462 char __user *buf, size_t nbytes,
1463 loff_t *ppos)
1464{
1465 char tmp[64];
1466 s64 val = cft->read_s64(cgrp, cft);
1467 int len = sprintf(tmp, "%lld\n", (long long) val);
1468
1469 return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
1470}
1471
1472static ssize_t cgroup_common_file_read(struct cgroup *cgrp,
1473 struct cftype *cft,
1474 struct file *file,
1475 char __user *buf,
1476 size_t nbytes, loff_t *ppos)
1477{
1478 enum cgroup_filetype type = cft->private;
1479 char *page;
1480 ssize_t retval = 0;
1481 char *s;
1482
1483 if (!(page = (char *)__get_free_page(GFP_KERNEL)))
1484 return -ENOMEM;
1485
1486 s = page;
1487
1488 switch (type) {
1489 case FILE_RELEASE_AGENT:
1490 {
1491 struct cgroupfs_root *root;
1492 size_t n;
1493 mutex_lock(&cgroup_mutex);
1494 root = cgrp->root;
1495 n = strnlen(root->release_agent_path,
1496 sizeof(root->release_agent_path));
1497 n = min(n, (size_t) PAGE_SIZE);
1498 strncpy(s, root->release_agent_path, n);
1499 mutex_unlock(&cgroup_mutex);
1500 s += n;
1501 break;
1502 }
1503 default:
1504 retval = -EINVAL;
1505 goto out;
1506 }
1507 *s++ = '\n';
1508
1509 retval = simple_read_from_buffer(buf, nbytes, ppos, page, s - page);
1510out:
1511 free_page((unsigned long)page);
1512 return retval;
1513}
1514
1515static ssize_t cgroup_file_read(struct file *file, char __user *buf,
1516 size_t nbytes, loff_t *ppos)
1517{
1518 struct cftype *cft = __d_cft(file->f_dentry);
1519 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
1520
1521 if (!cft || cgroup_is_removed(cgrp))
1522 return -ENODEV;
1523
1524 if (cft->read)
1525 return cft->read(cgrp, cft, file, buf, nbytes, ppos);
1526 if (cft->read_u64)
1527 return cgroup_read_u64(cgrp, cft, file, buf, nbytes, ppos);
1528 if (cft->read_s64)
1529 return cgroup_read_s64(cgrp, cft, file, buf, nbytes, ppos);
1530 return -EINVAL;
1531}
1532
1533
1534
1535
1536
1537
1538struct cgroup_seqfile_state {
1539 struct cftype *cft;
1540 struct cgroup *cgroup;
1541};
1542
1543static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value)
1544{
1545 struct seq_file *sf = cb->state;
1546 return seq_printf(sf, "%s %llu\n", key, (unsigned long long)value);
1547}
1548
1549static int cgroup_seqfile_show(struct seq_file *m, void *arg)
1550{
1551 struct cgroup_seqfile_state *state = m->private;
1552 struct cftype *cft = state->cft;
1553 if (cft->read_map) {
1554 struct cgroup_map_cb cb = {
1555 .fill = cgroup_map_add,
1556 .state = m,
1557 };
1558 return cft->read_map(state->cgroup, cft, &cb);
1559 }
1560 return cft->read_seq_string(state->cgroup, cft, m);
1561}
1562
1563int cgroup_seqfile_release(struct inode *inode, struct file *file)
1564{
1565 struct seq_file *seq = file->private_data;
1566 kfree(seq->private);
1567 return single_release(inode, file);
1568}
1569
1570static struct file_operations cgroup_seqfile_operations = {
1571 .read = seq_read,
1572 .llseek = seq_lseek,
1573 .release = cgroup_seqfile_release,
1574};
1575
1576static int cgroup_file_open(struct inode *inode, struct file *file)
1577{
1578 int err;
1579 struct cftype *cft;
1580
1581 err = generic_file_open(inode, file);
1582 if (err)
1583 return err;
1584
1585 cft = __d_cft(file->f_dentry);
1586 if (!cft)
1587 return -ENODEV;
1588 if (cft->read_map || cft->read_seq_string) {
1589 struct cgroup_seqfile_state *state =
1590 kzalloc(sizeof(*state), GFP_USER);
1591 if (!state)
1592 return -ENOMEM;
1593 state->cft = cft;
1594 state->cgroup = __d_cgrp(file->f_dentry->d_parent);
1595 file->f_op = &cgroup_seqfile_operations;
1596 err = single_open(file, cgroup_seqfile_show, state);
1597 if (err < 0)
1598 kfree(state);
1599 } else if (cft->open)
1600 err = cft->open(inode, file);
1601 else
1602 err = 0;
1603
1604 return err;
1605}
1606
1607static int cgroup_file_release(struct inode *inode, struct file *file)
1608{
1609 struct cftype *cft = __d_cft(file->f_dentry);
1610 if (cft->release)
1611 return cft->release(inode, file);
1612 return 0;
1613}
1614
1615
1616
1617
1618static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry,
1619 struct inode *new_dir, struct dentry *new_dentry)
1620{
1621 if (!S_ISDIR(old_dentry->d_inode->i_mode))
1622 return -ENOTDIR;
1623 if (new_dentry->d_inode)
1624 return -EEXIST;
1625 if (old_dir != new_dir)
1626 return -EIO;
1627 return simple_rename(old_dir, old_dentry, new_dir, new_dentry);
1628}
1629
1630static struct file_operations cgroup_file_operations = {
1631 .read = cgroup_file_read,
1632 .write = cgroup_file_write,
1633 .llseek = generic_file_llseek,
1634 .open = cgroup_file_open,
1635 .release = cgroup_file_release,
1636};
1637
1638static struct inode_operations cgroup_dir_inode_operations = {
1639 .lookup = simple_lookup,
1640 .mkdir = cgroup_mkdir,
1641 .rmdir = cgroup_rmdir,
1642 .rename = cgroup_rename,
1643};
1644
1645static int cgroup_create_file(struct dentry *dentry, int mode,
1646 struct super_block *sb)
1647{
1648 static struct dentry_operations cgroup_dops = {
1649 .d_iput = cgroup_diput,
1650 };
1651
1652 struct inode *inode;
1653
1654 if (!dentry)
1655 return -ENOENT;
1656 if (dentry->d_inode)
1657 return -EEXIST;
1658
1659 inode = cgroup_new_inode(mode, sb);
1660 if (!inode)
1661 return -ENOMEM;
1662
1663 if (S_ISDIR(mode)) {
1664 inode->i_op = &cgroup_dir_inode_operations;
1665 inode->i_fop = &simple_dir_operations;
1666
1667
1668 inc_nlink(inode);
1669
1670
1671
1672 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
1673 } else if (S_ISREG(mode)) {
1674 inode->i_size = 0;
1675 inode->i_fop = &cgroup_file_operations;
1676 }
1677 dentry->d_op = &cgroup_dops;
1678 d_instantiate(dentry, inode);
1679 dget(dentry);
1680 return 0;
1681}
1682
1683
1684
1685
1686
1687
1688
1689
1690static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry,
1691 int mode)
1692{
1693 struct dentry *parent;
1694 int error = 0;
1695
1696 parent = cgrp->parent->dentry;
1697 error = cgroup_create_file(dentry, S_IFDIR | mode, cgrp->root->sb);
1698 if (!error) {
1699 dentry->d_fsdata = cgrp;
1700 inc_nlink(parent->d_inode);
1701 cgrp->dentry = dentry;
1702 dget(dentry);
1703 }
1704 dput(dentry);
1705
1706 return error;
1707}
1708
1709int cgroup_add_file(struct cgroup *cgrp,
1710 struct cgroup_subsys *subsys,
1711 const struct cftype *cft)
1712{
1713 struct dentry *dir = cgrp->dentry;
1714 struct dentry *dentry;
1715 int error;
1716
1717 char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 };
1718 if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) {
1719 strcpy(name, subsys->name);
1720 strcat(name, ".");
1721 }
1722 strcat(name, cft->name);
1723 BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex));
1724 dentry = lookup_one_len(name, dir, strlen(name));
1725 if (!IS_ERR(dentry)) {
1726 error = cgroup_create_file(dentry, 0644 | S_IFREG,
1727 cgrp->root->sb);
1728 if (!error)
1729 dentry->d_fsdata = (void *)cft;
1730 dput(dentry);
1731 } else
1732 error = PTR_ERR(dentry);
1733 return error;
1734}
1735
1736int cgroup_add_files(struct cgroup *cgrp,
1737 struct cgroup_subsys *subsys,
1738 const struct cftype cft[],
1739 int count)
1740{
1741 int i, err;
1742 for (i = 0; i < count; i++) {
1743 err = cgroup_add_file(cgrp, subsys, &cft[i]);
1744 if (err)
1745 return err;
1746 }
1747 return 0;
1748}
1749
1750
1751
1752
1753
1754
1755
1756int cgroup_task_count(const struct cgroup *cgrp)
1757{
1758 int count = 0;
1759 struct list_head *l;
1760
1761 read_lock(&css_set_lock);
1762 l = cgrp->css_sets.next;
1763 while (l != &cgrp->css_sets) {
1764 struct cg_cgroup_link *link =
1765 list_entry(l, struct cg_cgroup_link, cgrp_link_list);
1766 count += atomic_read(&link->cg->ref.refcount);
1767 l = l->next;
1768 }
1769 read_unlock(&css_set_lock);
1770 return count;
1771}
1772
1773
1774
1775
1776
1777static void cgroup_advance_iter(struct cgroup *cgrp,
1778 struct cgroup_iter *it)
1779{
1780 struct list_head *l = it->cg_link;
1781 struct cg_cgroup_link *link;
1782 struct css_set *cg;
1783
1784
1785 do {
1786 l = l->next;
1787 if (l == &cgrp->css_sets) {
1788 it->cg_link = NULL;
1789 return;
1790 }
1791 link = list_entry(l, struct cg_cgroup_link, cgrp_link_list);
1792 cg = link->cg;
1793 } while (list_empty(&cg->tasks));
1794 it->cg_link = l;
1795 it->task = cg->tasks.next;
1796}
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807static void cgroup_enable_task_cg_lists(void)
1808{
1809 struct task_struct *p, *g;
1810 write_lock(&css_set_lock);
1811 use_task_css_set_links = 1;
1812 do_each_thread(g, p) {
1813 task_lock(p);
1814
1815
1816
1817
1818
1819 if (!(p->flags & PF_EXITING) && list_empty(&p->cg_list))
1820 list_add(&p->cg_list, &p->cgroups->tasks);
1821 task_unlock(p);
1822 } while_each_thread(g, p);
1823 write_unlock(&css_set_lock);
1824}
1825
1826void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it)
1827{
1828
1829
1830
1831
1832
1833 if (!use_task_css_set_links)
1834 cgroup_enable_task_cg_lists();
1835
1836 read_lock(&css_set_lock);
1837 it->cg_link = &cgrp->css_sets;
1838 cgroup_advance_iter(cgrp, it);
1839}
1840
1841struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
1842 struct cgroup_iter *it)
1843{
1844 struct task_struct *res;
1845 struct list_head *l = it->task;
1846
1847
1848 if (!it->cg_link)
1849 return NULL;
1850 res = list_entry(l, struct task_struct, cg_list);
1851
1852 l = l->next;
1853 if (l == &res->cgroups->tasks) {
1854
1855
1856 cgroup_advance_iter(cgrp, it);
1857 } else {
1858 it->task = l;
1859 }
1860 return res;
1861}
1862
1863void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it)
1864{
1865 read_unlock(&css_set_lock);
1866}
1867
1868static inline int started_after_time(struct task_struct *t1,
1869 struct timespec *time,
1870 struct task_struct *t2)
1871{
1872 int start_diff = timespec_compare(&t1->start_time, time);
1873 if (start_diff > 0) {
1874 return 1;
1875 } else if (start_diff < 0) {
1876 return 0;
1877 } else {
1878
1879
1880
1881
1882
1883
1884
1885
1886 return t1 > t2;
1887 }
1888}
1889
1890
1891
1892
1893
1894
1895static inline int started_after(void *p1, void *p2)
1896{
1897 struct task_struct *t1 = p1;
1898 struct task_struct *t2 = p2;
1899 return started_after_time(t1, &t2->start_time, t2);
1900}
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929int cgroup_scan_tasks(struct cgroup_scanner *scan)
1930{
1931 int retval, i;
1932 struct cgroup_iter it;
1933 struct task_struct *p, *dropped;
1934
1935 struct task_struct *latest_task = NULL;
1936 struct ptr_heap tmp_heap;
1937 struct ptr_heap *heap;
1938 struct timespec latest_time = { 0, 0 };
1939
1940 if (scan->heap) {
1941
1942 heap = scan->heap;
1943 heap->gt = &started_after;
1944 } else {
1945
1946 heap = &tmp_heap;
1947 retval = heap_init(heap, PAGE_SIZE, GFP_KERNEL, &started_after);
1948 if (retval)
1949
1950 return retval;
1951 }
1952
1953 again:
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966 heap->size = 0;
1967 cgroup_iter_start(scan->cg, &it);
1968 while ((p = cgroup_iter_next(scan->cg, &it))) {
1969
1970
1971
1972
1973 if (scan->test_task && !scan->test_task(p, scan))
1974 continue;
1975
1976
1977
1978
1979 if (!started_after_time(p, &latest_time, latest_task))
1980 continue;
1981 dropped = heap_insert(heap, p);
1982 if (dropped == NULL) {
1983
1984
1985
1986
1987 get_task_struct(p);
1988 } else if (dropped != p) {
1989
1990
1991
1992
1993 get_task_struct(p);
1994 put_task_struct(dropped);
1995 }
1996
1997
1998
1999
2000 }
2001 cgroup_iter_end(scan->cg, &it);
2002
2003 if (heap->size) {
2004 for (i = 0; i < heap->size; i++) {
2005 struct task_struct *q = heap->ptrs[i];
2006 if (i == 0) {
2007 latest_time = q->start_time;
2008 latest_task = q;
2009 }
2010
2011 scan->process_task(q, scan);
2012 put_task_struct(q);
2013 }
2014
2015
2016
2017
2018
2019
2020
2021 goto again;
2022 }
2023 if (heap == &tmp_heap)
2024 heap_free(&tmp_heap);
2025 return 0;
2026}
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042struct ctr_struct {
2043 char *buf;
2044 int bufsz;
2045};
2046
2047
2048
2049
2050
2051
2052
2053
2054static int pid_array_load(pid_t *pidarray, int npids, struct cgroup *cgrp)
2055{
2056 int n = 0;
2057 struct cgroup_iter it;
2058 struct task_struct *tsk;
2059 cgroup_iter_start(cgrp, &it);
2060 while ((tsk = cgroup_iter_next(cgrp, &it))) {
2061 if (unlikely(n == npids))
2062 break;
2063 pidarray[n++] = task_pid_vnr(tsk);
2064 }
2065 cgroup_iter_end(cgrp, &it);
2066 return n;
2067}
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
2079{
2080 int ret = -EINVAL;
2081 struct cgroup *cgrp;
2082 struct cgroup_iter it;
2083 struct task_struct *tsk;
2084
2085
2086
2087 if (dentry->d_sb->s_op != &cgroup_ops)
2088 goto err;
2089
2090 ret = 0;
2091 cgrp = dentry->d_fsdata;
2092 rcu_read_lock();
2093
2094 cgroup_iter_start(cgrp, &it);
2095 while ((tsk = cgroup_iter_next(cgrp, &it))) {
2096 switch (tsk->state) {
2097 case TASK_RUNNING:
2098 stats->nr_running++;
2099 break;
2100 case TASK_INTERRUPTIBLE:
2101 stats->nr_sleeping++;
2102 break;
2103 case TASK_UNINTERRUPTIBLE:
2104 stats->nr_uninterruptible++;
2105 break;
2106 case TASK_STOPPED:
2107 stats->nr_stopped++;
2108 break;
2109 default:
2110 if (delayacct_is_task_waiting_on_io(tsk))
2111 stats->nr_io_wait++;
2112 break;
2113 }
2114 }
2115 cgroup_iter_end(cgrp, &it);
2116
2117 rcu_read_unlock();
2118err:
2119 return ret;
2120}
2121
2122static int cmppid(const void *a, const void *b)
2123{
2124 return *(pid_t *)a - *(pid_t *)b;
2125}
2126
2127
2128
2129
2130
2131
2132static int pid_array_to_buf(char *buf, int sz, pid_t *a, int npids)
2133{
2134 int cnt = 0;
2135 int i;
2136
2137 for (i = 0; i < npids; i++)
2138 cnt += snprintf(buf + cnt, max(sz - cnt, 0), "%d\n", a[i]);
2139 return cnt;
2140}
2141
2142
2143
2144
2145
2146
2147
2148static int cgroup_tasks_open(struct inode *unused, struct file *file)
2149{
2150 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
2151 struct ctr_struct *ctr;
2152 pid_t *pidarray;
2153 int npids;
2154 char c;
2155
2156 if (!(file->f_mode & FMODE_READ))
2157 return 0;
2158
2159 ctr = kmalloc(sizeof(*ctr), GFP_KERNEL);
2160 if (!ctr)
2161 goto err0;
2162
2163
2164
2165
2166
2167
2168
2169 npids = cgroup_task_count(cgrp);
2170 if (npids) {
2171 pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL);
2172 if (!pidarray)
2173 goto err1;
2174
2175 npids = pid_array_load(pidarray, npids, cgrp);
2176 sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
2177
2178
2179 ctr->bufsz = pid_array_to_buf(&c, sizeof(c), pidarray, npids) + 1;
2180 ctr->buf = kmalloc(ctr->bufsz, GFP_KERNEL);
2181 if (!ctr->buf)
2182 goto err2;
2183 ctr->bufsz = pid_array_to_buf(ctr->buf, ctr->bufsz, pidarray, npids);
2184
2185 kfree(pidarray);
2186 } else {
2187 ctr->buf = NULL;
2188 ctr->bufsz = 0;
2189 }
2190 file->private_data = ctr;
2191 return 0;
2192
2193err2:
2194 kfree(pidarray);
2195err1:
2196 kfree(ctr);
2197err0:
2198 return -ENOMEM;
2199}
2200
2201static ssize_t cgroup_tasks_read(struct cgroup *cgrp,
2202 struct cftype *cft,
2203 struct file *file, char __user *buf,
2204 size_t nbytes, loff_t *ppos)
2205{
2206 struct ctr_struct *ctr = file->private_data;
2207
2208 return simple_read_from_buffer(buf, nbytes, ppos, ctr->buf, ctr->bufsz);
2209}
2210
2211static int cgroup_tasks_release(struct inode *unused_inode,
2212 struct file *file)
2213{
2214 struct ctr_struct *ctr;
2215
2216 if (file->f_mode & FMODE_READ) {
2217 ctr = file->private_data;
2218 kfree(ctr->buf);
2219 kfree(ctr);
2220 }
2221 return 0;
2222}
2223
2224static u64 cgroup_read_notify_on_release(struct cgroup *cgrp,
2225 struct cftype *cft)
2226{
2227 return notify_on_release(cgrp);
2228}
2229
2230
2231
2232
2233static struct cftype files[] = {
2234 {
2235 .name = "tasks",
2236 .open = cgroup_tasks_open,
2237 .read = cgroup_tasks_read,
2238 .write = cgroup_common_file_write,
2239 .release = cgroup_tasks_release,
2240 .private = FILE_TASKLIST,
2241 },
2242
2243 {
2244 .name = "notify_on_release",
2245 .read_u64 = cgroup_read_notify_on_release,
2246 .write = cgroup_common_file_write,
2247 .private = FILE_NOTIFY_ON_RELEASE,
2248 },
2249};
2250
2251static struct cftype cft_release_agent = {
2252 .name = "release_agent",
2253 .read = cgroup_common_file_read,
2254 .write = cgroup_common_file_write,
2255 .private = FILE_RELEASE_AGENT,
2256};
2257
2258static int cgroup_populate_dir(struct cgroup *cgrp)
2259{
2260 int err;
2261 struct cgroup_subsys *ss;
2262
2263
2264 cgroup_clear_directory(cgrp->dentry);
2265
2266 err = cgroup_add_files(cgrp, NULL, files, ARRAY_SIZE(files));
2267 if (err < 0)
2268 return err;
2269
2270 if (cgrp == cgrp->top_cgroup) {
2271 if ((err = cgroup_add_file(cgrp, NULL, &cft_release_agent)) < 0)
2272 return err;
2273 }
2274
2275 for_each_subsys(cgrp->root, ss) {
2276 if (ss->populate && (err = ss->populate(ss, cgrp)) < 0)
2277 return err;
2278 }
2279
2280 return 0;
2281}
2282
2283static void init_cgroup_css(struct cgroup_subsys_state *css,
2284 struct cgroup_subsys *ss,
2285 struct cgroup *cgrp)
2286{
2287 css->cgroup = cgrp;
2288 atomic_set(&css->refcnt, 0);
2289 css->flags = 0;
2290 if (cgrp == dummytop)
2291 set_bit(CSS_ROOT, &css->flags);
2292 BUG_ON(cgrp->subsys[ss->subsys_id]);
2293 cgrp->subsys[ss->subsys_id] = css;
2294}
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
2305 int mode)
2306{
2307 struct cgroup *cgrp;
2308 struct cgroupfs_root *root = parent->root;
2309 int err = 0;
2310 struct cgroup_subsys *ss;
2311 struct super_block *sb = root->sb;
2312
2313 cgrp = kzalloc(sizeof(*cgrp), GFP_KERNEL);
2314 if (!cgrp)
2315 return -ENOMEM;
2316
2317
2318
2319
2320
2321
2322 atomic_inc(&sb->s_active);
2323
2324 mutex_lock(&cgroup_mutex);
2325
2326 INIT_LIST_HEAD(&cgrp->sibling);
2327 INIT_LIST_HEAD(&cgrp->children);
2328 INIT_LIST_HEAD(&cgrp->css_sets);
2329 INIT_LIST_HEAD(&cgrp->release_list);
2330
2331 cgrp->parent = parent;
2332 cgrp->root = parent->root;
2333 cgrp->top_cgroup = parent->top_cgroup;
2334
2335 if (notify_on_release(parent))
2336 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
2337
2338 for_each_subsys(root, ss) {
2339 struct cgroup_subsys_state *css = ss->create(ss, cgrp);
2340 if (IS_ERR(css)) {
2341 err = PTR_ERR(css);
2342 goto err_destroy;
2343 }
2344 init_cgroup_css(css, ss, cgrp);
2345 }
2346
2347 list_add(&cgrp->sibling, &cgrp->parent->children);
2348 root->number_of_cgroups++;
2349
2350 err = cgroup_create_dir(cgrp, dentry, mode);
2351 if (err < 0)
2352 goto err_remove;
2353
2354
2355 BUG_ON(!mutex_is_locked(&cgrp->dentry->d_inode->i_mutex));
2356
2357 err = cgroup_populate_dir(cgrp);
2358
2359
2360 mutex_unlock(&cgroup_mutex);
2361 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
2362
2363 return 0;
2364
2365 err_remove:
2366
2367 list_del(&cgrp->sibling);
2368 root->number_of_cgroups--;
2369
2370 err_destroy:
2371
2372 for_each_subsys(root, ss) {
2373 if (cgrp->subsys[ss->subsys_id])
2374 ss->destroy(ss, cgrp);
2375 }
2376
2377 mutex_unlock(&cgroup_mutex);
2378
2379
2380 deactivate_super(sb);
2381
2382 kfree(cgrp);
2383 return err;
2384}
2385
2386static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode)
2387{
2388 struct cgroup *c_parent = dentry->d_parent->d_fsdata;
2389
2390
2391 return cgroup_create(c_parent, dentry, mode | S_IFDIR);
2392}
2393
2394static inline int cgroup_has_css_refs(struct cgroup *cgrp)
2395{
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405 int i;
2406 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2407 struct cgroup_subsys *ss = subsys[i];
2408 struct cgroup_subsys_state *css;
2409
2410 if (ss->root != cgrp->root)
2411 continue;
2412 css = cgrp->subsys[ss->subsys_id];
2413
2414
2415
2416
2417
2418
2419 if (css && atomic_read(&css->refcnt))
2420 return 1;
2421 }
2422 return 0;
2423}
2424
2425static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
2426{
2427 struct cgroup *cgrp = dentry->d_fsdata;
2428 struct dentry *d;
2429 struct cgroup *parent;
2430 struct super_block *sb;
2431 struct cgroupfs_root *root;
2432
2433
2434
2435 mutex_lock(&cgroup_mutex);
2436 if (atomic_read(&cgrp->count) != 0) {
2437 mutex_unlock(&cgroup_mutex);
2438 return -EBUSY;
2439 }
2440 if (!list_empty(&cgrp->children)) {
2441 mutex_unlock(&cgroup_mutex);
2442 return -EBUSY;
2443 }
2444
2445 parent = cgrp->parent;
2446 root = cgrp->root;
2447 sb = root->sb;
2448
2449
2450
2451
2452
2453 cgroup_call_pre_destroy(cgrp);
2454
2455 if (cgroup_has_css_refs(cgrp)) {
2456 mutex_unlock(&cgroup_mutex);
2457 return -EBUSY;
2458 }
2459
2460 spin_lock(&release_list_lock);
2461 set_bit(CGRP_REMOVED, &cgrp->flags);
2462 if (!list_empty(&cgrp->release_list))
2463 list_del(&cgrp->release_list);
2464 spin_unlock(&release_list_lock);
2465
2466 list_del(&cgrp->sibling);
2467 spin_lock(&cgrp->dentry->d_lock);
2468 d = dget(cgrp->dentry);
2469 cgrp->dentry = NULL;
2470 spin_unlock(&d->d_lock);
2471
2472 cgroup_d_remove_dir(d);
2473 dput(d);
2474
2475 set_bit(CGRP_RELEASABLE, &parent->flags);
2476 check_for_release(parent);
2477
2478 mutex_unlock(&cgroup_mutex);
2479 return 0;
2480}
2481
2482static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
2483{
2484 struct cgroup_subsys_state *css;
2485
2486 printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);
2487
2488
2489 ss->root = &rootnode;
2490 css = ss->create(ss, dummytop);
2491
2492 BUG_ON(IS_ERR(css));
2493 init_cgroup_css(css, ss, dummytop);
2494
2495
2496
2497
2498
2499 init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id];
2500
2501 need_forkexit_callback |= ss->fork || ss->exit;
2502 need_mm_owner_callback |= !!ss->mm_owner_changed;
2503
2504
2505
2506
2507 BUG_ON(!list_empty(&init_task.tasks));
2508
2509 ss->active = 1;
2510}
2511
2512
2513
2514
2515
2516
2517
2518int __init cgroup_init_early(void)
2519{
2520 int i;
2521 kref_init(&init_css_set.ref);
2522 kref_get(&init_css_set.ref);
2523 INIT_LIST_HEAD(&init_css_set.cg_links);
2524 INIT_LIST_HEAD(&init_css_set.tasks);
2525 INIT_HLIST_NODE(&init_css_set.hlist);
2526 css_set_count = 1;
2527 init_cgroup_root(&rootnode);
2528 list_add(&rootnode.root_list, &roots);
2529 root_count = 1;
2530 init_task.cgroups = &init_css_set;
2531
2532 init_css_set_link.cg = &init_css_set;
2533 list_add(&init_css_set_link.cgrp_link_list,
2534 &rootnode.top_cgroup.css_sets);
2535 list_add(&init_css_set_link.cg_link_list,
2536 &init_css_set.cg_links);
2537
2538 for (i = 0; i < CSS_SET_TABLE_SIZE; i++)
2539 INIT_HLIST_HEAD(&css_set_table[i]);
2540
2541 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2542 struct cgroup_subsys *ss = subsys[i];
2543
2544 BUG_ON(!ss->name);
2545 BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN);
2546 BUG_ON(!ss->create);
2547 BUG_ON(!ss->destroy);
2548 if (ss->subsys_id != i) {
2549 printk(KERN_ERR "cgroup: Subsys %s id == %d\n",
2550 ss->name, ss->subsys_id);
2551 BUG();
2552 }
2553
2554 if (ss->early_init)
2555 cgroup_init_subsys(ss);
2556 }
2557 return 0;
2558}
2559
2560
2561
2562
2563
2564
2565
2566int __init cgroup_init(void)
2567{
2568 int err;
2569 int i;
2570 struct hlist_head *hhead;
2571
2572 err = bdi_init(&cgroup_backing_dev_info);
2573 if (err)
2574 return err;
2575
2576 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2577 struct cgroup_subsys *ss = subsys[i];
2578 if (!ss->early_init)
2579 cgroup_init_subsys(ss);
2580 }
2581
2582
2583 hhead = css_set_hash(init_css_set.subsys);
2584 hlist_add_head(&init_css_set.hlist, hhead);
2585
2586 err = register_filesystem(&cgroup_fs_type);
2587 if (err < 0)
2588 goto out;
2589
2590 proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations);
2591
2592out:
2593 if (err)
2594 bdi_destroy(&cgroup_backing_dev_info);
2595
2596 return err;
2597}
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612static int proc_cgroup_show(struct seq_file *m, void *v)
2613{
2614 struct pid *pid;
2615 struct task_struct *tsk;
2616 char *buf;
2617 int retval;
2618 struct cgroupfs_root *root;
2619
2620 retval = -ENOMEM;
2621 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
2622 if (!buf)
2623 goto out;
2624
2625 retval = -ESRCH;
2626 pid = m->private;
2627 tsk = get_pid_task(pid, PIDTYPE_PID);
2628 if (!tsk)
2629 goto out_free;
2630
2631 retval = 0;
2632
2633 mutex_lock(&cgroup_mutex);
2634
2635 for_each_root(root) {
2636 struct cgroup_subsys *ss;
2637 struct cgroup *cgrp;
2638 int subsys_id;
2639 int count = 0;
2640
2641
2642 if (!root->actual_subsys_bits)
2643 continue;
2644 seq_printf(m, "%lu:", root->subsys_bits);
2645 for_each_subsys(root, ss)
2646 seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
2647 seq_putc(m, ':');
2648 get_first_subsys(&root->top_cgroup, NULL, &subsys_id);
2649 cgrp = task_cgroup(tsk, subsys_id);
2650 retval = cgroup_path(cgrp, buf, PAGE_SIZE);
2651 if (retval < 0)
2652 goto out_unlock;
2653 seq_puts(m, buf);
2654 seq_putc(m, '\n');
2655 }
2656
2657out_unlock:
2658 mutex_unlock(&cgroup_mutex);
2659 put_task_struct(tsk);
2660out_free:
2661 kfree(buf);
2662out:
2663 return retval;
2664}
2665
2666static int cgroup_open(struct inode *inode, struct file *file)
2667{
2668 struct pid *pid = PROC_I(inode)->pid;
2669 return single_open(file, proc_cgroup_show, pid);
2670}
2671
2672struct file_operations proc_cgroup_operations = {
2673 .open = cgroup_open,
2674 .read = seq_read,
2675 .llseek = seq_lseek,
2676 .release = single_release,
2677};
2678
2679
2680static int proc_cgroupstats_show(struct seq_file *m, void *v)
2681{
2682 int i;
2683
2684 seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
2685 mutex_lock(&cgroup_mutex);
2686 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2687 struct cgroup_subsys *ss = subsys[i];
2688 seq_printf(m, "%s\t%lu\t%d\t%d\n",
2689 ss->name, ss->root->subsys_bits,
2690 ss->root->number_of_cgroups, !ss->disabled);
2691 }
2692 mutex_unlock(&cgroup_mutex);
2693 return 0;
2694}
2695
2696static int cgroupstats_open(struct inode *inode, struct file *file)
2697{
2698 return single_open(file, proc_cgroupstats_show, NULL);
2699}
2700
2701static struct file_operations proc_cgroupstats_operations = {
2702 .open = cgroupstats_open,
2703 .read = seq_read,
2704 .llseek = seq_lseek,
2705 .release = single_release,
2706};
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724void cgroup_fork(struct task_struct *child)
2725{
2726 task_lock(current);
2727 child->cgroups = current->cgroups;
2728 get_css_set(child->cgroups);
2729 task_unlock(current);
2730 INIT_LIST_HEAD(&child->cg_list);
2731}
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741void cgroup_fork_callbacks(struct task_struct *child)
2742{
2743 if (need_forkexit_callback) {
2744 int i;
2745 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2746 struct cgroup_subsys *ss = subsys[i];
2747 if (ss->fork)
2748 ss->fork(ss, child);
2749 }
2750 }
2751}
2752
2753#ifdef CONFIG_MM_OWNER
2754
2755
2756
2757
2758
2759
2760
2761
2762void cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new)
2763{
2764 struct cgroup *oldcgrp, *newcgrp = NULL;
2765
2766 if (need_mm_owner_callback) {
2767 int i;
2768 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2769 struct cgroup_subsys *ss = subsys[i];
2770 oldcgrp = task_cgroup(old, ss->subsys_id);
2771 if (new)
2772 newcgrp = task_cgroup(new, ss->subsys_id);
2773 if (oldcgrp == newcgrp)
2774 continue;
2775 if (ss->mm_owner_changed)
2776 ss->mm_owner_changed(ss, oldcgrp, newcgrp);
2777 }
2778 }
2779}
2780#endif
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791void cgroup_post_fork(struct task_struct *child)
2792{
2793 if (use_task_css_set_links) {
2794 write_lock(&css_set_lock);
2795 if (list_empty(&child->cg_list))
2796 list_add(&child->cg_list, &child->cgroups->tasks);
2797 write_unlock(&css_set_lock);
2798 }
2799}
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835void cgroup_exit(struct task_struct *tsk, int run_callbacks)
2836{
2837 int i;
2838 struct css_set *cg;
2839
2840 if (run_callbacks && need_forkexit_callback) {
2841 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2842 struct cgroup_subsys *ss = subsys[i];
2843 if (ss->exit)
2844 ss->exit(ss, tsk);
2845 }
2846 }
2847
2848
2849
2850
2851
2852
2853 if (!list_empty(&tsk->cg_list)) {
2854 write_lock(&css_set_lock);
2855 if (!list_empty(&tsk->cg_list))
2856 list_del(&tsk->cg_list);
2857 write_unlock(&css_set_lock);
2858 }
2859
2860
2861 task_lock(tsk);
2862 cg = tsk->cgroups;
2863 tsk->cgroups = &init_css_set;
2864 task_unlock(tsk);
2865 if (cg)
2866 put_css_set_taskexit(cg);
2867}
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys)
2879{
2880 struct dentry *dentry;
2881 int ret = 0;
2882 char nodename[MAX_CGROUP_TYPE_NAMELEN];
2883 struct cgroup *parent, *child;
2884 struct inode *inode;
2885 struct css_set *cg;
2886 struct cgroupfs_root *root;
2887 struct cgroup_subsys *ss;
2888
2889
2890 BUG_ON(!subsys->active);
2891
2892
2893
2894 mutex_lock(&cgroup_mutex);
2895 again:
2896 root = subsys->root;
2897 if (root == &rootnode) {
2898 printk(KERN_INFO
2899 "Not cloning cgroup for unused subsystem %s\n",
2900 subsys->name);
2901 mutex_unlock(&cgroup_mutex);
2902 return 0;
2903 }
2904 cg = tsk->cgroups;
2905 parent = task_cgroup(tsk, subsys->subsys_id);
2906
2907 snprintf(nodename, MAX_CGROUP_TYPE_NAMELEN, "%d", tsk->pid);
2908
2909
2910 atomic_inc(&parent->root->sb->s_active);
2911
2912
2913 get_css_set(cg);
2914 mutex_unlock(&cgroup_mutex);
2915
2916
2917 inode = parent->dentry->d_inode;
2918
2919
2920
2921 mutex_lock(&inode->i_mutex);
2922 dentry = lookup_one_len(nodename, parent->dentry, strlen(nodename));
2923 if (IS_ERR(dentry)) {
2924 printk(KERN_INFO
2925 "cgroup: Couldn't allocate dentry for %s: %ld\n", nodename,
2926 PTR_ERR(dentry));
2927 ret = PTR_ERR(dentry);
2928 goto out_release;
2929 }
2930
2931
2932 ret = vfs_mkdir(inode, dentry, S_IFDIR | 0755);
2933 child = __d_cgrp(dentry);
2934 dput(dentry);
2935 if (ret) {
2936 printk(KERN_INFO
2937 "Failed to create cgroup %s: %d\n", nodename,
2938 ret);
2939 goto out_release;
2940 }
2941
2942 if (!child) {
2943 printk(KERN_INFO
2944 "Couldn't find new cgroup %s\n", nodename);
2945 ret = -ENOMEM;
2946 goto out_release;
2947 }
2948
2949
2950
2951
2952 mutex_lock(&cgroup_mutex);
2953 if ((root != subsys->root) ||
2954 (parent != task_cgroup(tsk, subsys->subsys_id))) {
2955
2956 mutex_unlock(&inode->i_mutex);
2957 put_css_set(cg);
2958
2959 deactivate_super(parent->root->sb);
2960
2961
2962
2963 printk(KERN_INFO
2964 "Race in cgroup_clone() - leaking cgroup %s\n",
2965 nodename);
2966 goto again;
2967 }
2968
2969
2970 for_each_subsys(root, ss) {
2971 if (ss->post_clone)
2972 ss->post_clone(ss, child);
2973 }
2974
2975
2976 ret = cgroup_attach_task(child, tsk);
2977 mutex_unlock(&cgroup_mutex);
2978
2979 out_release:
2980 mutex_unlock(&inode->i_mutex);
2981
2982 mutex_lock(&cgroup_mutex);
2983 put_css_set(cg);
2984 mutex_unlock(&cgroup_mutex);
2985 deactivate_super(parent->root->sb);
2986 return ret;
2987}
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001int cgroup_is_descendant(const struct cgroup *cgrp)
3002{
3003 int ret;
3004 struct cgroup *target;
3005 int subsys_id;
3006
3007 if (cgrp == dummytop)
3008 return 1;
3009
3010 get_first_subsys(cgrp, NULL, &subsys_id);
3011 target = task_cgroup(current, subsys_id);
3012 while (cgrp != target && cgrp!= cgrp->top_cgroup)
3013 cgrp = cgrp->parent;
3014 ret = (cgrp == target);
3015 return ret;
3016}
3017
3018static void check_for_release(struct cgroup *cgrp)
3019{
3020
3021
3022 if (cgroup_is_releasable(cgrp) && !atomic_read(&cgrp->count)
3023 && list_empty(&cgrp->children) && !cgroup_has_css_refs(cgrp)) {
3024
3025
3026
3027 int need_schedule_work = 0;
3028 spin_lock(&release_list_lock);
3029 if (!cgroup_is_removed(cgrp) &&
3030 list_empty(&cgrp->release_list)) {
3031 list_add(&cgrp->release_list, &release_list);
3032 need_schedule_work = 1;
3033 }
3034 spin_unlock(&release_list_lock);
3035 if (need_schedule_work)
3036 schedule_work(&release_agent_work);
3037 }
3038}
3039
3040void __css_put(struct cgroup_subsys_state *css)
3041{
3042 struct cgroup *cgrp = css->cgroup;
3043 rcu_read_lock();
3044 if (atomic_dec_and_test(&css->refcnt) && notify_on_release(cgrp)) {
3045 set_bit(CGRP_RELEASABLE, &cgrp->flags);
3046 check_for_release(cgrp);
3047 }
3048 rcu_read_unlock();
3049}
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074static void cgroup_release_agent(struct work_struct *work)
3075{
3076 BUG_ON(work != &release_agent_work);
3077 mutex_lock(&cgroup_mutex);
3078 spin_lock(&release_list_lock);
3079 while (!list_empty(&release_list)) {
3080 char *argv[3], *envp[3];
3081 int i;
3082 char *pathbuf;
3083 struct cgroup *cgrp = list_entry(release_list.next,
3084 struct cgroup,
3085 release_list);
3086 list_del_init(&cgrp->release_list);
3087 spin_unlock(&release_list_lock);
3088 pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
3089 if (!pathbuf) {
3090 spin_lock(&release_list_lock);
3091 continue;
3092 }
3093
3094 if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0) {
3095 kfree(pathbuf);
3096 spin_lock(&release_list_lock);
3097 continue;
3098 }
3099
3100 i = 0;
3101 argv[i++] = cgrp->root->release_agent_path;
3102 argv[i++] = (char *)pathbuf;
3103 argv[i] = NULL;
3104
3105 i = 0;
3106
3107 envp[i++] = "HOME=/";
3108 envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
3109 envp[i] = NULL;
3110
3111
3112
3113
3114 mutex_unlock(&cgroup_mutex);
3115 call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
3116 kfree(pathbuf);
3117 mutex_lock(&cgroup_mutex);
3118 spin_lock(&release_list_lock);
3119 }
3120 spin_unlock(&release_list_lock);
3121 mutex_unlock(&cgroup_mutex);
3122}
3123
3124static int __init cgroup_disable(char *str)
3125{
3126 int i;
3127 char *token;
3128
3129 while ((token = strsep(&str, ",")) != NULL) {
3130 if (!*token)
3131 continue;
3132
3133 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3134 struct cgroup_subsys *ss = subsys[i];
3135
3136 if (!strcmp(token, ss->name)) {
3137 ss->disabled = 1;
3138 printk(KERN_INFO "Disabling %s control group"
3139 " subsystem\n", ss->name);
3140 break;
3141 }
3142 }
3143 }
3144 return 1;
3145}
3146__setup("cgroup_disable=", cgroup_disable);
3147