1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
30
31#include "cgroup-internal.h"
32
33#include <linux/cred.h>
34#include <linux/errno.h>
35#include <linux/init_task.h>
36#include <linux/kernel.h>
37#include <linux/magic.h>
38#include <linux/mutex.h>
39#include <linux/mount.h>
40#include <linux/pagemap.h>
41#include <linux/proc_fs.h>
42#include <linux/rcupdate.h>
43#include <linux/sched.h>
44#include <linux/sched/task.h>
45#include <linux/slab.h>
46#include <linux/spinlock.h>
47#include <linux/percpu-rwsem.h>
48#include <linux/string.h>
49#include <linux/hashtable.h>
50#include <linux/idr.h>
51#include <linux/kthread.h>
52#include <linux/atomic.h>
53#include <linux/cpuset.h>
54#include <linux/proc_ns.h>
55#include <linux/nsproxy.h>
56#include <linux/file.h>
57#include <linux/fs_parser.h>
58#include <linux/sched/cputime.h>
59#include <linux/psi.h>
60#include <net/sock.h>
61
62#define CREATE_TRACE_POINTS
63#include <trace/events/cgroup.h>
64
65#define CGROUP_FILE_NAME_MAX (MAX_CGROUP_TYPE_NAMELEN + \
66 MAX_CFTYPE_NAME + 2)
67
68#define CGROUP_FILE_NOTIFY_MIN_INTV DIV_ROUND_UP(HZ, 100)
69
70
71
72
73
74
75
76
77
78
79
80DEFINE_MUTEX(cgroup_mutex);
81DEFINE_SPINLOCK(css_set_lock);
82
83#ifdef CONFIG_PROVE_RCU
84EXPORT_SYMBOL_GPL(cgroup_mutex);
85EXPORT_SYMBOL_GPL(css_set_lock);
86#endif
87
88DEFINE_SPINLOCK(trace_cgroup_path_lock);
89char trace_cgroup_path[TRACE_CGROUP_PATH_LEN];
90bool cgroup_debug __read_mostly;
91
92
93
94
95
96static DEFINE_SPINLOCK(cgroup_idr_lock);
97
98
99
100
101
102static DEFINE_SPINLOCK(cgroup_file_kn_lock);
103
104DEFINE_PERCPU_RWSEM(cgroup_threadgroup_rwsem);
105
106#define cgroup_assert_mutex_or_rcu_locked() \
107 RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
108 !lockdep_is_held(&cgroup_mutex), \
109 "cgroup_mutex or RCU read lock required");
110
111
112
113
114
115
116
117static struct workqueue_struct *cgroup_destroy_wq;
118
119
120#define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys,
121struct cgroup_subsys *cgroup_subsys[] = {
122#include <linux/cgroup_subsys.h>
123};
124#undef SUBSYS
125
126
127#define SUBSYS(_x) [_x ## _cgrp_id] = #_x,
128static const char *cgroup_subsys_name[] = {
129#include <linux/cgroup_subsys.h>
130};
131#undef SUBSYS
132
133
134#define SUBSYS(_x) \
135 DEFINE_STATIC_KEY_TRUE(_x ## _cgrp_subsys_enabled_key); \
136 DEFINE_STATIC_KEY_TRUE(_x ## _cgrp_subsys_on_dfl_key); \
137 EXPORT_SYMBOL_GPL(_x ## _cgrp_subsys_enabled_key); \
138 EXPORT_SYMBOL_GPL(_x ## _cgrp_subsys_on_dfl_key);
139#include <linux/cgroup_subsys.h>
140#undef SUBSYS
141
142#define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys_enabled_key,
143static struct static_key_true *cgroup_subsys_enabled_key[] = {
144#include <linux/cgroup_subsys.h>
145};
146#undef SUBSYS
147
148#define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys_on_dfl_key,
149static struct static_key_true *cgroup_subsys_on_dfl_key[] = {
150#include <linux/cgroup_subsys.h>
151};
152#undef SUBSYS
153
154static DEFINE_PER_CPU(struct cgroup_rstat_cpu, cgrp_dfl_root_rstat_cpu);
155
156
157struct cgroup_root cgrp_dfl_root = { .cgrp.rstat_cpu = &cgrp_dfl_root_rstat_cpu };
158EXPORT_SYMBOL_GPL(cgrp_dfl_root);
159
160
161
162
163
164static bool cgrp_dfl_visible;
165
166
167static u16 cgrp_dfl_inhibit_ss_mask;
168
169
170static u16 cgrp_dfl_implicit_ss_mask;
171
172
173static u16 cgrp_dfl_threaded_ss_mask;
174
175
176LIST_HEAD(cgroup_roots);
177static int cgroup_root_count;
178
179
180static DEFINE_IDR(cgroup_hierarchy_idr);
181
182
183
184
185
186
187
188
189static u64 css_serial_nr_next = 1;
190
191
192
193
194
195static u16 have_fork_callback __read_mostly;
196static u16 have_exit_callback __read_mostly;
197static u16 have_release_callback __read_mostly;
198static u16 have_canfork_callback __read_mostly;
199
200
201struct cgroup_namespace init_cgroup_ns = {
202 .ns.count = REFCOUNT_INIT(2),
203 .user_ns = &init_user_ns,
204 .ns.ops = &cgroupns_operations,
205 .ns.inum = PROC_CGROUP_INIT_INO,
206 .root_cset = &init_css_set,
207};
208
209static struct file_system_type cgroup2_fs_type;
210static struct cftype cgroup_base_files[];
211
212
213enum cgroup_opt_features {
214#ifdef CONFIG_PSI
215 OPT_FEATURE_PRESSURE,
216#endif
217 OPT_FEATURE_COUNT
218};
219
220static const char *cgroup_opt_feature_names[OPT_FEATURE_COUNT] = {
221#ifdef CONFIG_PSI
222 "pressure",
223#endif
224};
225
226static u16 cgroup_feature_disable_mask __read_mostly;
227
228static int cgroup_apply_control(struct cgroup *cgrp);
229static void cgroup_finalize_control(struct cgroup *cgrp, int ret);
230static void css_task_iter_skip(struct css_task_iter *it,
231 struct task_struct *task);
232static int cgroup_destroy_locked(struct cgroup *cgrp);
233static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
234 struct cgroup_subsys *ss);
235static void css_release(struct percpu_ref *ref);
236static void kill_css(struct cgroup_subsys_state *css);
237static int cgroup_addrm_files(struct cgroup_subsys_state *css,
238 struct cgroup *cgrp, struct cftype cfts[],
239 bool is_add);
240
241
242
243
244
245
246
247
248
249bool cgroup_ssid_enabled(int ssid)
250{
251 if (CGROUP_SUBSYS_COUNT == 0)
252 return false;
253
254 return static_key_enabled(cgroup_subsys_enabled_key[ssid]);
255}
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304bool cgroup_on_dfl(const struct cgroup *cgrp)
305{
306 return cgrp->root == &cgrp_dfl_root;
307}
308
309
310static int cgroup_idr_alloc(struct idr *idr, void *ptr, int start, int end,
311 gfp_t gfp_mask)
312{
313 int ret;
314
315 idr_preload(gfp_mask);
316 spin_lock_bh(&cgroup_idr_lock);
317 ret = idr_alloc(idr, ptr, start, end, gfp_mask & ~__GFP_DIRECT_RECLAIM);
318 spin_unlock_bh(&cgroup_idr_lock);
319 idr_preload_end();
320 return ret;
321}
322
323static void *cgroup_idr_replace(struct idr *idr, void *ptr, int id)
324{
325 void *ret;
326
327 spin_lock_bh(&cgroup_idr_lock);
328 ret = idr_replace(idr, ptr, id);
329 spin_unlock_bh(&cgroup_idr_lock);
330 return ret;
331}
332
333static void cgroup_idr_remove(struct idr *idr, int id)
334{
335 spin_lock_bh(&cgroup_idr_lock);
336 idr_remove(idr, id);
337 spin_unlock_bh(&cgroup_idr_lock);
338}
339
340static bool cgroup_has_tasks(struct cgroup *cgrp)
341{
342 return cgrp->nr_populated_csets;
343}
344
345bool cgroup_is_threaded(struct cgroup *cgrp)
346{
347 return cgrp->dom_cgrp != cgrp;
348}
349
350
351static bool cgroup_is_mixable(struct cgroup *cgrp)
352{
353
354
355
356
357
358 return !cgroup_parent(cgrp);
359}
360
361
362static bool cgroup_can_be_thread_root(struct cgroup *cgrp)
363{
364
365 if (cgroup_is_mixable(cgrp))
366 return true;
367
368
369 if (cgroup_is_threaded(cgrp))
370 return false;
371
372
373 if (cgrp->nr_populated_domain_children)
374 return false;
375
376
377 if (cgrp->subtree_control & ~cgrp_dfl_threaded_ss_mask)
378 return false;
379
380 return true;
381}
382
383
384bool cgroup_is_thread_root(struct cgroup *cgrp)
385{
386
387 if (cgroup_is_threaded(cgrp))
388 return false;
389
390
391 if (cgrp->nr_threaded_children)
392 return true;
393
394
395
396
397
398 if (cgroup_has_tasks(cgrp) &&
399 (cgrp->subtree_control & cgrp_dfl_threaded_ss_mask))
400 return true;
401
402 return false;
403}
404
405
406static bool cgroup_is_valid_domain(struct cgroup *cgrp)
407{
408
409 if (cgroup_is_threaded(cgrp))
410 return false;
411
412
413 while ((cgrp = cgroup_parent(cgrp))) {
414 if (!cgroup_is_mixable(cgrp) && cgroup_is_thread_root(cgrp))
415 return false;
416 if (cgroup_is_threaded(cgrp))
417 return false;
418 }
419
420 return true;
421}
422
423
424static u16 cgroup_control(struct cgroup *cgrp)
425{
426 struct cgroup *parent = cgroup_parent(cgrp);
427 u16 root_ss_mask = cgrp->root->subsys_mask;
428
429 if (parent) {
430 u16 ss_mask = parent->subtree_control;
431
432
433 if (cgroup_is_threaded(cgrp))
434 ss_mask &= cgrp_dfl_threaded_ss_mask;
435 return ss_mask;
436 }
437
438 if (cgroup_on_dfl(cgrp))
439 root_ss_mask &= ~(cgrp_dfl_inhibit_ss_mask |
440 cgrp_dfl_implicit_ss_mask);
441 return root_ss_mask;
442}
443
444
445static u16 cgroup_ss_mask(struct cgroup *cgrp)
446{
447 struct cgroup *parent = cgroup_parent(cgrp);
448
449 if (parent) {
450 u16 ss_mask = parent->subtree_ss_mask;
451
452
453 if (cgroup_is_threaded(cgrp))
454 ss_mask &= cgrp_dfl_threaded_ss_mask;
455 return ss_mask;
456 }
457
458 return cgrp->root->subsys_mask;
459}
460
461
462
463
464
465
466
467
468
469
470
471
472static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp,
473 struct cgroup_subsys *ss)
474{
475 if (ss)
476 return rcu_dereference_check(cgrp->subsys[ss->id],
477 lockdep_is_held(&cgroup_mutex));
478 else
479 return &cgrp->self;
480}
481
482
483
484
485
486
487
488
489
490static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp,
491 struct cgroup_subsys *ss)
492{
493 struct cgroup_subsys_state *css;
494
495 rcu_read_lock();
496 css = cgroup_css(cgrp, ss);
497 if (css && !css_tryget_online(css))
498 css = NULL;
499 rcu_read_unlock();
500
501 return css;
502}
503
504
505
506
507
508
509
510
511
512
513
514static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp,
515 struct cgroup_subsys *ss)
516{
517 lockdep_assert_held(&cgroup_mutex);
518
519 if (!ss)
520 return &cgrp->self;
521
522
523
524
525
526 while (!(cgroup_ss_mask(cgrp) & (1 << ss->id))) {
527 cgrp = cgroup_parent(cgrp);
528 if (!cgrp)
529 return NULL;
530 }
531
532 return cgroup_css(cgrp, ss);
533}
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
549 struct cgroup_subsys *ss)
550{
551 struct cgroup_subsys_state *css;
552
553 do {
554 css = cgroup_css(cgrp, ss);
555
556 if (css)
557 return css;
558 cgrp = cgroup_parent(cgrp);
559 } while (cgrp);
560
561 return init_css_set.subsys[ss->id];
562}
563
564
565
566
567
568
569
570
571
572
573
574
575struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgrp,
576 struct cgroup_subsys *ss)
577{
578 struct cgroup_subsys_state *css;
579
580 rcu_read_lock();
581
582 do {
583 css = cgroup_css(cgrp, ss);
584
585 if (css && css_tryget_online(css))
586 goto out_unlock;
587 cgrp = cgroup_parent(cgrp);
588 } while (cgrp);
589
590 css = init_css_set.subsys[ss->id];
591 css_get(css);
592out_unlock:
593 rcu_read_unlock();
594 return css;
595}
596EXPORT_SYMBOL_GPL(cgroup_get_e_css);
597
598static void cgroup_get_live(struct cgroup *cgrp)
599{
600 WARN_ON_ONCE(cgroup_is_dead(cgrp));
601 css_get(&cgrp->self);
602}
603
604
605
606
607
608
609int __cgroup_task_count(const struct cgroup *cgrp)
610{
611 int count = 0;
612 struct cgrp_cset_link *link;
613
614 lockdep_assert_held(&css_set_lock);
615
616 list_for_each_entry(link, &cgrp->cset_links, cset_link)
617 count += link->cset->nr_tasks;
618
619 return count;
620}
621
622
623
624
625
626int cgroup_task_count(const struct cgroup *cgrp)
627{
628 int count;
629
630 spin_lock_irq(&css_set_lock);
631 count = __cgroup_task_count(cgrp);
632 spin_unlock_irq(&css_set_lock);
633
634 return count;
635}
636
637struct cgroup_subsys_state *of_css(struct kernfs_open_file *of)
638{
639 struct cgroup *cgrp = of->kn->parent->priv;
640 struct cftype *cft = of_cft(of);
641
642
643
644
645
646
647
648
649
650 if (cft->ss)
651 return rcu_dereference_raw(cgrp->subsys[cft->ss->id]);
652 else
653 return &cgrp->self;
654}
655EXPORT_SYMBOL_GPL(of_css);
656
657
658
659
660
661
662
663
664
665#define for_each_css(css, ssid, cgrp) \
666 for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \
667 if (!((css) = rcu_dereference_check( \
668 (cgrp)->subsys[(ssid)], \
669 lockdep_is_held(&cgroup_mutex)))) { } \
670 else
671
672
673
674
675
676
677
678
679
680#define for_each_e_css(css, ssid, cgrp) \
681 for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \
682 if (!((css) = cgroup_e_css_by_mask(cgrp, \
683 cgroup_subsys[(ssid)]))) \
684 ; \
685 else
686
687
688
689
690
691
692
693
694
695
696#define do_each_subsys_mask(ss, ssid, ss_mask) do { \
697 unsigned long __ss_mask = (ss_mask); \
698 if (!CGROUP_SUBSYS_COUNT) { \
699 (ssid) = 0; \
700 break; \
701 } \
702 for_each_set_bit(ssid, &__ss_mask, CGROUP_SUBSYS_COUNT) { \
703 (ss) = cgroup_subsys[ssid]; \
704 {
705
706#define while_each_subsys_mask() \
707 } \
708 } \
709} while (false)
710
711
712#define cgroup_for_each_live_child(child, cgrp) \
713 list_for_each_entry((child), &(cgrp)->self.children, self.sibling) \
714 if (({ lockdep_assert_held(&cgroup_mutex); \
715 cgroup_is_dead(child); })) \
716 ; \
717 else
718
719
720#define cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) \
721 css_for_each_descendant_pre((d_css), cgroup_css((cgrp), NULL)) \
722 if (({ lockdep_assert_held(&cgroup_mutex); \
723 (dsct) = (d_css)->cgroup; \
724 cgroup_is_dead(dsct); })) \
725 ; \
726 else
727
728
729#define cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) \
730 css_for_each_descendant_post((d_css), cgroup_css((cgrp), NULL)) \
731 if (({ lockdep_assert_held(&cgroup_mutex); \
732 (dsct) = (d_css)->cgroup; \
733 cgroup_is_dead(dsct); })) \
734 ; \
735 else
736
737
738
739
740
741
742
743
744struct css_set init_css_set = {
745 .refcount = REFCOUNT_INIT(1),
746 .dom_cset = &init_css_set,
747 .tasks = LIST_HEAD_INIT(init_css_set.tasks),
748 .mg_tasks = LIST_HEAD_INIT(init_css_set.mg_tasks),
749 .dying_tasks = LIST_HEAD_INIT(init_css_set.dying_tasks),
750 .task_iters = LIST_HEAD_INIT(init_css_set.task_iters),
751 .threaded_csets = LIST_HEAD_INIT(init_css_set.threaded_csets),
752 .cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links),
753 .mg_preload_node = LIST_HEAD_INIT(init_css_set.mg_preload_node),
754 .mg_node = LIST_HEAD_INIT(init_css_set.mg_node),
755
756
757
758
759
760
761
762 .dfl_cgrp = &cgrp_dfl_root.cgrp,
763};
764
765static int css_set_count = 1;
766
767static bool css_set_threaded(struct css_set *cset)
768{
769 return cset->dom_cset != cset;
770}
771
772
773
774
775
776
777
778
779
780
781static bool css_set_populated(struct css_set *cset)
782{
783 lockdep_assert_held(&css_set_lock);
784
785 return !list_empty(&cset->tasks) || !list_empty(&cset->mg_tasks);
786}
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805static void cgroup_update_populated(struct cgroup *cgrp, bool populated)
806{
807 struct cgroup *child = NULL;
808 int adj = populated ? 1 : -1;
809
810 lockdep_assert_held(&css_set_lock);
811
812 do {
813 bool was_populated = cgroup_is_populated(cgrp);
814
815 if (!child) {
816 cgrp->nr_populated_csets += adj;
817 } else {
818 if (cgroup_is_threaded(child))
819 cgrp->nr_populated_threaded_children += adj;
820 else
821 cgrp->nr_populated_domain_children += adj;
822 }
823
824 if (was_populated == cgroup_is_populated(cgrp))
825 break;
826
827 cgroup1_check_for_release(cgrp);
828 TRACE_CGROUP_PATH(notify_populated, cgrp,
829 cgroup_is_populated(cgrp));
830 cgroup_file_notify(&cgrp->events_file);
831
832 child = cgrp;
833 cgrp = cgroup_parent(cgrp);
834 } while (cgrp);
835}
836
837
838
839
840
841
842
843
844
845static void css_set_update_populated(struct css_set *cset, bool populated)
846{
847 struct cgrp_cset_link *link;
848
849 lockdep_assert_held(&css_set_lock);
850
851 list_for_each_entry(link, &cset->cgrp_links, cgrp_link)
852 cgroup_update_populated(link->cgrp, populated);
853}
854
855
856
857
858
859
860
861static void css_set_skip_task_iters(struct css_set *cset,
862 struct task_struct *task)
863{
864 struct css_task_iter *it, *pos;
865
866 list_for_each_entry_safe(it, pos, &cset->task_iters, iters_node)
867 css_task_iter_skip(it, task);
868}
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885static void css_set_move_task(struct task_struct *task,
886 struct css_set *from_cset, struct css_set *to_cset,
887 bool use_mg_tasks)
888{
889 lockdep_assert_held(&css_set_lock);
890
891 if (to_cset && !css_set_populated(to_cset))
892 css_set_update_populated(to_cset, true);
893
894 if (from_cset) {
895 WARN_ON_ONCE(list_empty(&task->cg_list));
896
897 css_set_skip_task_iters(from_cset, task);
898 list_del_init(&task->cg_list);
899 if (!css_set_populated(from_cset))
900 css_set_update_populated(from_cset, false);
901 } else {
902 WARN_ON_ONCE(!list_empty(&task->cg_list));
903 }
904
905 if (to_cset) {
906
907
908
909
910
911 WARN_ON_ONCE(task->flags & PF_EXITING);
912
913 cgroup_move_task(task, to_cset);
914 list_add_tail(&task->cg_list, use_mg_tasks ? &to_cset->mg_tasks :
915 &to_cset->tasks);
916 }
917}
918
919
920
921
922
923
924#define CSS_SET_HASH_BITS 7
925static DEFINE_HASHTABLE(css_set_table, CSS_SET_HASH_BITS);
926
927static unsigned long css_set_hash(struct cgroup_subsys_state *css[])
928{
929 unsigned long key = 0UL;
930 struct cgroup_subsys *ss;
931 int i;
932
933 for_each_subsys(ss, i)
934 key += (unsigned long)css[i];
935 key = (key >> 16) ^ key;
936
937 return key;
938}
939
940void put_css_set_locked(struct css_set *cset)
941{
942 struct cgrp_cset_link *link, *tmp_link;
943 struct cgroup_subsys *ss;
944 int ssid;
945
946 lockdep_assert_held(&css_set_lock);
947
948 if (!refcount_dec_and_test(&cset->refcount))
949 return;
950
951 WARN_ON_ONCE(!list_empty(&cset->threaded_csets));
952
953
954 for_each_subsys(ss, ssid) {
955 list_del(&cset->e_cset_node[ssid]);
956 css_put(cset->subsys[ssid]);
957 }
958 hash_del(&cset->hlist);
959 css_set_count--;
960
961 list_for_each_entry_safe(link, tmp_link, &cset->cgrp_links, cgrp_link) {
962 list_del(&link->cset_link);
963 list_del(&link->cgrp_link);
964 if (cgroup_parent(link->cgrp))
965 cgroup_put(link->cgrp);
966 kfree(link);
967 }
968
969 if (css_set_threaded(cset)) {
970 list_del(&cset->threaded_csets_node);
971 put_css_set_locked(cset->dom_cset);
972 }
973
974 kfree_rcu(cset, rcu_head);
975}
976
977
978
979
980
981
982
983
984
985
986
987static bool compare_css_sets(struct css_set *cset,
988 struct css_set *old_cset,
989 struct cgroup *new_cgrp,
990 struct cgroup_subsys_state *template[])
991{
992 struct cgroup *new_dfl_cgrp;
993 struct list_head *l1, *l2;
994
995
996
997
998
999
1000 if (memcmp(template, cset->subsys, sizeof(cset->subsys)))
1001 return false;
1002
1003
1004
1005 if (cgroup_on_dfl(new_cgrp))
1006 new_dfl_cgrp = new_cgrp;
1007 else
1008 new_dfl_cgrp = old_cset->dfl_cgrp;
1009
1010 if (new_dfl_cgrp->dom_cgrp != cset->dom_cset->dfl_cgrp)
1011 return false;
1012
1013
1014
1015
1016
1017
1018
1019 l1 = &cset->cgrp_links;
1020 l2 = &old_cset->cgrp_links;
1021 while (1) {
1022 struct cgrp_cset_link *link1, *link2;
1023 struct cgroup *cgrp1, *cgrp2;
1024
1025 l1 = l1->next;
1026 l2 = l2->next;
1027
1028 if (l1 == &cset->cgrp_links) {
1029 BUG_ON(l2 != &old_cset->cgrp_links);
1030 break;
1031 } else {
1032 BUG_ON(l2 == &old_cset->cgrp_links);
1033 }
1034
1035 link1 = list_entry(l1, struct cgrp_cset_link, cgrp_link);
1036 link2 = list_entry(l2, struct cgrp_cset_link, cgrp_link);
1037 cgrp1 = link1->cgrp;
1038 cgrp2 = link2->cgrp;
1039
1040 BUG_ON(cgrp1->root != cgrp2->root);
1041
1042
1043
1044
1045
1046
1047
1048
1049 if (cgrp1->root == new_cgrp->root) {
1050 if (cgrp1 != new_cgrp)
1051 return false;
1052 } else {
1053 if (cgrp1 != cgrp2)
1054 return false;
1055 }
1056 }
1057 return true;
1058}
1059
1060
1061
1062
1063
1064
1065
1066static struct css_set *find_existing_css_set(struct css_set *old_cset,
1067 struct cgroup *cgrp,
1068 struct cgroup_subsys_state *template[])
1069{
1070 struct cgroup_root *root = cgrp->root;
1071 struct cgroup_subsys *ss;
1072 struct css_set *cset;
1073 unsigned long key;
1074 int i;
1075
1076
1077
1078
1079
1080
1081 for_each_subsys(ss, i) {
1082 if (root->subsys_mask & (1UL << i)) {
1083
1084
1085
1086
1087 template[i] = cgroup_e_css_by_mask(cgrp, ss);
1088 } else {
1089
1090
1091
1092
1093 template[i] = old_cset->subsys[i];
1094 }
1095 }
1096
1097 key = css_set_hash(template);
1098 hash_for_each_possible(css_set_table, cset, hlist, key) {
1099 if (!compare_css_sets(cset, old_cset, cgrp, template))
1100 continue;
1101
1102
1103 return cset;
1104 }
1105
1106
1107 return NULL;
1108}
1109
1110static void free_cgrp_cset_links(struct list_head *links_to_free)
1111{
1112 struct cgrp_cset_link *link, *tmp_link;
1113
1114 list_for_each_entry_safe(link, tmp_link, links_to_free, cset_link) {
1115 list_del(&link->cset_link);
1116 kfree(link);
1117 }
1118}
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128static int allocate_cgrp_cset_links(int count, struct list_head *tmp_links)
1129{
1130 struct cgrp_cset_link *link;
1131 int i;
1132
1133 INIT_LIST_HEAD(tmp_links);
1134
1135 for (i = 0; i < count; i++) {
1136 link = kzalloc(sizeof(*link), GFP_KERNEL);
1137 if (!link) {
1138 free_cgrp_cset_links(tmp_links);
1139 return -ENOMEM;
1140 }
1141 list_add(&link->cset_link, tmp_links);
1142 }
1143 return 0;
1144}
1145
1146
1147
1148
1149
1150
1151
1152static void link_css_set(struct list_head *tmp_links, struct css_set *cset,
1153 struct cgroup *cgrp)
1154{
1155 struct cgrp_cset_link *link;
1156
1157 BUG_ON(list_empty(tmp_links));
1158
1159 if (cgroup_on_dfl(cgrp))
1160 cset->dfl_cgrp = cgrp;
1161
1162 link = list_first_entry(tmp_links, struct cgrp_cset_link, cset_link);
1163 link->cset = cset;
1164 link->cgrp = cgrp;
1165
1166
1167
1168
1169
1170 list_move_tail(&link->cset_link, &cgrp->cset_links);
1171 list_add_tail(&link->cgrp_link, &cset->cgrp_links);
1172
1173 if (cgroup_parent(cgrp))
1174 cgroup_get_live(cgrp);
1175}
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185static struct css_set *find_css_set(struct css_set *old_cset,
1186 struct cgroup *cgrp)
1187{
1188 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT] = { };
1189 struct css_set *cset;
1190 struct list_head tmp_links;
1191 struct cgrp_cset_link *link;
1192 struct cgroup_subsys *ss;
1193 unsigned long key;
1194 int ssid;
1195
1196 lockdep_assert_held(&cgroup_mutex);
1197
1198
1199
1200 spin_lock_irq(&css_set_lock);
1201 cset = find_existing_css_set(old_cset, cgrp, template);
1202 if (cset)
1203 get_css_set(cset);
1204 spin_unlock_irq(&css_set_lock);
1205
1206 if (cset)
1207 return cset;
1208
1209 cset = kzalloc(sizeof(*cset), GFP_KERNEL);
1210 if (!cset)
1211 return NULL;
1212
1213
1214 if (allocate_cgrp_cset_links(cgroup_root_count, &tmp_links) < 0) {
1215 kfree(cset);
1216 return NULL;
1217 }
1218
1219 refcount_set(&cset->refcount, 1);
1220 cset->dom_cset = cset;
1221 INIT_LIST_HEAD(&cset->tasks);
1222 INIT_LIST_HEAD(&cset->mg_tasks);
1223 INIT_LIST_HEAD(&cset->dying_tasks);
1224 INIT_LIST_HEAD(&cset->task_iters);
1225 INIT_LIST_HEAD(&cset->threaded_csets);
1226 INIT_HLIST_NODE(&cset->hlist);
1227 INIT_LIST_HEAD(&cset->cgrp_links);
1228 INIT_LIST_HEAD(&cset->mg_preload_node);
1229 INIT_LIST_HEAD(&cset->mg_node);
1230
1231
1232
1233 memcpy(cset->subsys, template, sizeof(cset->subsys));
1234
1235 spin_lock_irq(&css_set_lock);
1236
1237 list_for_each_entry(link, &old_cset->cgrp_links, cgrp_link) {
1238 struct cgroup *c = link->cgrp;
1239
1240 if (c->root == cgrp->root)
1241 c = cgrp;
1242 link_css_set(&tmp_links, cset, c);
1243 }
1244
1245 BUG_ON(!list_empty(&tmp_links));
1246
1247 css_set_count++;
1248
1249
1250 key = css_set_hash(cset->subsys);
1251 hash_add(css_set_table, &cset->hlist, key);
1252
1253 for_each_subsys(ss, ssid) {
1254 struct cgroup_subsys_state *css = cset->subsys[ssid];
1255
1256 list_add_tail(&cset->e_cset_node[ssid],
1257 &css->cgroup->e_csets[ssid]);
1258 css_get(css);
1259 }
1260
1261 spin_unlock_irq(&css_set_lock);
1262
1263
1264
1265
1266
1267
1268
1269 if (cgroup_is_threaded(cset->dfl_cgrp)) {
1270 struct css_set *dcset;
1271
1272 dcset = find_css_set(cset, cset->dfl_cgrp->dom_cgrp);
1273 if (!dcset) {
1274 put_css_set(cset);
1275 return NULL;
1276 }
1277
1278 spin_lock_irq(&css_set_lock);
1279 cset->dom_cset = dcset;
1280 list_add_tail(&cset->threaded_csets_node,
1281 &dcset->threaded_csets);
1282 spin_unlock_irq(&css_set_lock);
1283 }
1284
1285 return cset;
1286}
1287
1288struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root)
1289{
1290 struct cgroup *root_cgrp = kf_root->kn->priv;
1291
1292 return root_cgrp->root;
1293}
1294
1295static int cgroup_init_root_id(struct cgroup_root *root)
1296{
1297 int id;
1298
1299 lockdep_assert_held(&cgroup_mutex);
1300
1301 id = idr_alloc_cyclic(&cgroup_hierarchy_idr, root, 0, 0, GFP_KERNEL);
1302 if (id < 0)
1303 return id;
1304
1305 root->hierarchy_id = id;
1306 return 0;
1307}
1308
1309static void cgroup_exit_root_id(struct cgroup_root *root)
1310{
1311 lockdep_assert_held(&cgroup_mutex);
1312
1313 idr_remove(&cgroup_hierarchy_idr, root->hierarchy_id);
1314}
1315
1316void cgroup_free_root(struct cgroup_root *root)
1317{
1318 kfree(root);
1319}
1320
1321static void cgroup_destroy_root(struct cgroup_root *root)
1322{
1323 struct cgroup *cgrp = &root->cgrp;
1324 struct cgrp_cset_link *link, *tmp_link;
1325
1326 trace_cgroup_destroy_root(root);
1327
1328 cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
1329
1330 BUG_ON(atomic_read(&root->nr_cgrps));
1331 BUG_ON(!list_empty(&cgrp->self.children));
1332
1333
1334 WARN_ON(rebind_subsystems(&cgrp_dfl_root, root->subsys_mask));
1335
1336
1337
1338
1339
1340 spin_lock_irq(&css_set_lock);
1341
1342 list_for_each_entry_safe(link, tmp_link, &cgrp->cset_links, cset_link) {
1343 list_del(&link->cset_link);
1344 list_del(&link->cgrp_link);
1345 kfree(link);
1346 }
1347
1348 spin_unlock_irq(&css_set_lock);
1349
1350 if (!list_empty(&root->root_list)) {
1351 list_del(&root->root_list);
1352 cgroup_root_count--;
1353 }
1354
1355 cgroup_exit_root_id(root);
1356
1357 mutex_unlock(&cgroup_mutex);
1358
1359 cgroup_rstat_exit(cgrp);
1360 kernfs_destroy_root(root->kf_root);
1361 cgroup_free_root(root);
1362}
1363
1364
1365
1366
1367
1368static struct cgroup *
1369current_cgns_cgroup_from_root(struct cgroup_root *root)
1370{
1371 struct cgroup *res = NULL;
1372 struct css_set *cset;
1373
1374 lockdep_assert_held(&css_set_lock);
1375
1376 rcu_read_lock();
1377
1378 cset = current->nsproxy->cgroup_ns->root_cset;
1379 if (cset == &init_css_set) {
1380 res = &root->cgrp;
1381 } else if (root == &cgrp_dfl_root) {
1382 res = cset->dfl_cgrp;
1383 } else {
1384 struct cgrp_cset_link *link;
1385
1386 list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
1387 struct cgroup *c = link->cgrp;
1388
1389 if (c->root == root) {
1390 res = c;
1391 break;
1392 }
1393 }
1394 }
1395 rcu_read_unlock();
1396
1397 BUG_ON(!res);
1398 return res;
1399}
1400
1401
1402static struct cgroup *cset_cgroup_from_root(struct css_set *cset,
1403 struct cgroup_root *root)
1404{
1405 struct cgroup *res = NULL;
1406
1407 lockdep_assert_held(&cgroup_mutex);
1408 lockdep_assert_held(&css_set_lock);
1409
1410 if (cset == &init_css_set) {
1411 res = &root->cgrp;
1412 } else if (root == &cgrp_dfl_root) {
1413 res = cset->dfl_cgrp;
1414 } else {
1415 struct cgrp_cset_link *link;
1416
1417 list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
1418 struct cgroup *c = link->cgrp;
1419
1420 if (c->root == root) {
1421 res = c;
1422 break;
1423 }
1424 }
1425 }
1426
1427 BUG_ON(!res);
1428 return res;
1429}
1430
1431
1432
1433
1434
1435struct cgroup *task_cgroup_from_root(struct task_struct *task,
1436 struct cgroup_root *root)
1437{
1438
1439
1440
1441
1442 return cset_cgroup_from_root(task_css_set(task), root);
1443}
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471static struct kernfs_syscall_ops cgroup_kf_syscall_ops;
1472
1473static char *cgroup_file_name(struct cgroup *cgrp, const struct cftype *cft,
1474 char *buf)
1475{
1476 struct cgroup_subsys *ss = cft->ss;
1477
1478 if (cft->ss && !(cft->flags & CFTYPE_NO_PREFIX) &&
1479 !(cgrp->root->flags & CGRP_ROOT_NOPREFIX)) {
1480 const char *dbg = (cft->flags & CFTYPE_DEBUG) ? ".__DEBUG__." : "";
1481
1482 snprintf(buf, CGROUP_FILE_NAME_MAX, "%s%s.%s",
1483 dbg, cgroup_on_dfl(cgrp) ? ss->name : ss->legacy_name,
1484 cft->name);
1485 } else {
1486 strscpy(buf, cft->name, CGROUP_FILE_NAME_MAX);
1487 }
1488 return buf;
1489}
1490
1491
1492
1493
1494
1495
1496
1497static umode_t cgroup_file_mode(const struct cftype *cft)
1498{
1499 umode_t mode = 0;
1500
1501 if (cft->read_u64 || cft->read_s64 || cft->seq_show)
1502 mode |= S_IRUGO;
1503
1504 if (cft->write_u64 || cft->write_s64 || cft->write) {
1505 if (cft->flags & CFTYPE_WORLD_WRITABLE)
1506 mode |= S_IWUGO;
1507 else
1508 mode |= S_IWUSR;
1509 }
1510
1511 return mode;
1512}
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526static u16 cgroup_calc_subtree_ss_mask(u16 subtree_control, u16 this_ss_mask)
1527{
1528 u16 cur_ss_mask = subtree_control;
1529 struct cgroup_subsys *ss;
1530 int ssid;
1531
1532 lockdep_assert_held(&cgroup_mutex);
1533
1534 cur_ss_mask |= cgrp_dfl_implicit_ss_mask;
1535
1536 while (true) {
1537 u16 new_ss_mask = cur_ss_mask;
1538
1539 do_each_subsys_mask(ss, ssid, cur_ss_mask) {
1540 new_ss_mask |= ss->depends_on;
1541 } while_each_subsys_mask();
1542
1543
1544
1545
1546
1547
1548 new_ss_mask &= this_ss_mask;
1549
1550 if (new_ss_mask == cur_ss_mask)
1551 break;
1552 cur_ss_mask = new_ss_mask;
1553 }
1554
1555 return cur_ss_mask;
1556}
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568void cgroup_kn_unlock(struct kernfs_node *kn)
1569{
1570 struct cgroup *cgrp;
1571
1572 if (kernfs_type(kn) == KERNFS_DIR)
1573 cgrp = kn->priv;
1574 else
1575 cgrp = kn->parent->priv;
1576
1577 mutex_unlock(&cgroup_mutex);
1578
1579 kernfs_unbreak_active_protection(kn);
1580 cgroup_put(cgrp);
1581}
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600struct cgroup *cgroup_kn_lock_live(struct kernfs_node *kn, bool drain_offline)
1601{
1602 struct cgroup *cgrp;
1603
1604 if (kernfs_type(kn) == KERNFS_DIR)
1605 cgrp = kn->priv;
1606 else
1607 cgrp = kn->parent->priv;
1608
1609
1610
1611
1612
1613
1614
1615 if (!cgroup_tryget(cgrp))
1616 return NULL;
1617 kernfs_break_active_protection(kn);
1618
1619 if (drain_offline)
1620 cgroup_lock_and_drain_offline(cgrp);
1621 else
1622 mutex_lock(&cgroup_mutex);
1623
1624 if (!cgroup_is_dead(cgrp))
1625 return cgrp;
1626
1627 cgroup_kn_unlock(kn);
1628 return NULL;
1629}
1630
1631static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
1632{
1633 char name[CGROUP_FILE_NAME_MAX];
1634
1635 lockdep_assert_held(&cgroup_mutex);
1636
1637 if (cft->file_offset) {
1638 struct cgroup_subsys_state *css = cgroup_css(cgrp, cft->ss);
1639 struct cgroup_file *cfile = (void *)css + cft->file_offset;
1640
1641 spin_lock_irq(&cgroup_file_kn_lock);
1642 cfile->kn = NULL;
1643 spin_unlock_irq(&cgroup_file_kn_lock);
1644
1645 del_timer_sync(&cfile->notify_timer);
1646 }
1647
1648 kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name));
1649}
1650
1651
1652
1653
1654
1655static void css_clear_dir(struct cgroup_subsys_state *css)
1656{
1657 struct cgroup *cgrp = css->cgroup;
1658 struct cftype *cfts;
1659
1660 if (!(css->flags & CSS_VISIBLE))
1661 return;
1662
1663 css->flags &= ~CSS_VISIBLE;
1664
1665 if (!css->ss) {
1666 if (cgroup_on_dfl(cgrp))
1667 cfts = cgroup_base_files;
1668 else
1669 cfts = cgroup1_base_files;
1670
1671 cgroup_addrm_files(css, cgrp, cfts, false);
1672 } else {
1673 list_for_each_entry(cfts, &css->ss->cfts, node)
1674 cgroup_addrm_files(css, cgrp, cfts, false);
1675 }
1676}
1677
1678
1679
1680
1681
1682
1683
1684static int css_populate_dir(struct cgroup_subsys_state *css)
1685{
1686 struct cgroup *cgrp = css->cgroup;
1687 struct cftype *cfts, *failed_cfts;
1688 int ret;
1689
1690 if ((css->flags & CSS_VISIBLE) || !cgrp->kn)
1691 return 0;
1692
1693 if (!css->ss) {
1694 if (cgroup_on_dfl(cgrp))
1695 cfts = cgroup_base_files;
1696 else
1697 cfts = cgroup1_base_files;
1698
1699 ret = cgroup_addrm_files(&cgrp->self, cgrp, cfts, true);
1700 if (ret < 0)
1701 return ret;
1702 } else {
1703 list_for_each_entry(cfts, &css->ss->cfts, node) {
1704 ret = cgroup_addrm_files(css, cgrp, cfts, true);
1705 if (ret < 0) {
1706 failed_cfts = cfts;
1707 goto err;
1708 }
1709 }
1710 }
1711
1712 css->flags |= CSS_VISIBLE;
1713
1714 return 0;
1715err:
1716 list_for_each_entry(cfts, &css->ss->cfts, node) {
1717 if (cfts == failed_cfts)
1718 break;
1719 cgroup_addrm_files(css, cgrp, cfts, false);
1720 }
1721 return ret;
1722}
1723
1724int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
1725{
1726 struct cgroup *dcgrp = &dst_root->cgrp;
1727 struct cgroup_subsys *ss;
1728 int ssid, i, ret;
1729
1730 lockdep_assert_held(&cgroup_mutex);
1731
1732 do_each_subsys_mask(ss, ssid, ss_mask) {
1733
1734
1735
1736
1737
1738 if (css_next_child(NULL, cgroup_css(&ss->root->cgrp, ss)) &&
1739 !ss->implicit_on_dfl)
1740 return -EBUSY;
1741
1742
1743 if (ss->root != &cgrp_dfl_root && dst_root != &cgrp_dfl_root)
1744 return -EBUSY;
1745 } while_each_subsys_mask();
1746
1747 do_each_subsys_mask(ss, ssid, ss_mask) {
1748 struct cgroup_root *src_root = ss->root;
1749 struct cgroup *scgrp = &src_root->cgrp;
1750 struct cgroup_subsys_state *css = cgroup_css(scgrp, ss);
1751 struct css_set *cset;
1752
1753 WARN_ON(!css || cgroup_css(dcgrp, ss));
1754
1755
1756 src_root->subsys_mask &= ~(1 << ssid);
1757 WARN_ON(cgroup_apply_control(scgrp));
1758 cgroup_finalize_control(scgrp, 0);
1759
1760
1761 RCU_INIT_POINTER(scgrp->subsys[ssid], NULL);
1762 rcu_assign_pointer(dcgrp->subsys[ssid], css);
1763 ss->root = dst_root;
1764 css->cgroup = dcgrp;
1765
1766 spin_lock_irq(&css_set_lock);
1767 hash_for_each(css_set_table, i, cset, hlist)
1768 list_move_tail(&cset->e_cset_node[ss->id],
1769 &dcgrp->e_csets[ss->id]);
1770 spin_unlock_irq(&css_set_lock);
1771
1772 if (ss->css_rstat_flush) {
1773 list_del_rcu(&css->rstat_css_node);
1774 list_add_rcu(&css->rstat_css_node,
1775 &dcgrp->rstat_css_list);
1776 }
1777
1778
1779 dst_root->subsys_mask |= 1 << ssid;
1780 if (dst_root == &cgrp_dfl_root) {
1781 static_branch_enable(cgroup_subsys_on_dfl_key[ssid]);
1782 } else {
1783 dcgrp->subtree_control |= 1 << ssid;
1784 static_branch_disable(cgroup_subsys_on_dfl_key[ssid]);
1785 }
1786
1787 ret = cgroup_apply_control(dcgrp);
1788 if (ret)
1789 pr_warn("partial failure to rebind %s controller (err=%d)\n",
1790 ss->name, ret);
1791
1792 if (ss->bind)
1793 ss->bind(css);
1794 } while_each_subsys_mask();
1795
1796 kernfs_activate(dcgrp->kn);
1797 return 0;
1798}
1799
1800int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
1801 struct kernfs_root *kf_root)
1802{
1803 int len = 0;
1804 char *buf = NULL;
1805 struct cgroup_root *kf_cgroot = cgroup_root_from_kf(kf_root);
1806 struct cgroup *ns_cgroup;
1807
1808 buf = kmalloc(PATH_MAX, GFP_KERNEL);
1809 if (!buf)
1810 return -ENOMEM;
1811
1812 spin_lock_irq(&css_set_lock);
1813 ns_cgroup = current_cgns_cgroup_from_root(kf_cgroot);
1814 len = kernfs_path_from_node(kf_node, ns_cgroup->kn, buf, PATH_MAX);
1815 spin_unlock_irq(&css_set_lock);
1816
1817 if (len >= PATH_MAX)
1818 len = -ERANGE;
1819 else if (len > 0) {
1820 seq_escape(sf, buf, " \t\n\\");
1821 len = 0;
1822 }
1823 kfree(buf);
1824 return len;
1825}
1826
1827enum cgroup2_param {
1828 Opt_nsdelegate,
1829 Opt_memory_localevents,
1830 Opt_memory_recursiveprot,
1831 nr__cgroup2_params
1832};
1833
1834static const struct fs_parameter_spec cgroup2_fs_parameters[] = {
1835 fsparam_flag("nsdelegate", Opt_nsdelegate),
1836 fsparam_flag("memory_localevents", Opt_memory_localevents),
1837 fsparam_flag("memory_recursiveprot", Opt_memory_recursiveprot),
1838 {}
1839};
1840
1841static int cgroup2_parse_param(struct fs_context *fc, struct fs_parameter *param)
1842{
1843 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
1844 struct fs_parse_result result;
1845 int opt;
1846
1847 opt = fs_parse(fc, cgroup2_fs_parameters, param, &result);
1848 if (opt < 0)
1849 return opt;
1850
1851 switch (opt) {
1852 case Opt_nsdelegate:
1853 ctx->flags |= CGRP_ROOT_NS_DELEGATE;
1854 return 0;
1855 case Opt_memory_localevents:
1856 ctx->flags |= CGRP_ROOT_MEMORY_LOCAL_EVENTS;
1857 return 0;
1858 case Opt_memory_recursiveprot:
1859 ctx->flags |= CGRP_ROOT_MEMORY_RECURSIVE_PROT;
1860 return 0;
1861 }
1862 return -EINVAL;
1863}
1864
1865static void apply_cgroup_root_flags(unsigned int root_flags)
1866{
1867 if (current->nsproxy->cgroup_ns == &init_cgroup_ns) {
1868 if (root_flags & CGRP_ROOT_NS_DELEGATE)
1869 cgrp_dfl_root.flags |= CGRP_ROOT_NS_DELEGATE;
1870 else
1871 cgrp_dfl_root.flags &= ~CGRP_ROOT_NS_DELEGATE;
1872
1873 if (root_flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS)
1874 cgrp_dfl_root.flags |= CGRP_ROOT_MEMORY_LOCAL_EVENTS;
1875 else
1876 cgrp_dfl_root.flags &= ~CGRP_ROOT_MEMORY_LOCAL_EVENTS;
1877
1878 if (root_flags & CGRP_ROOT_MEMORY_RECURSIVE_PROT)
1879 cgrp_dfl_root.flags |= CGRP_ROOT_MEMORY_RECURSIVE_PROT;
1880 else
1881 cgrp_dfl_root.flags &= ~CGRP_ROOT_MEMORY_RECURSIVE_PROT;
1882 }
1883}
1884
1885static int cgroup_show_options(struct seq_file *seq, struct kernfs_root *kf_root)
1886{
1887 if (cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE)
1888 seq_puts(seq, ",nsdelegate");
1889 if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS)
1890 seq_puts(seq, ",memory_localevents");
1891 if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_RECURSIVE_PROT)
1892 seq_puts(seq, ",memory_recursiveprot");
1893 return 0;
1894}
1895
1896static int cgroup_reconfigure(struct fs_context *fc)
1897{
1898 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
1899
1900 apply_cgroup_root_flags(ctx->flags);
1901 return 0;
1902}
1903
1904static void init_cgroup_housekeeping(struct cgroup *cgrp)
1905{
1906 struct cgroup_subsys *ss;
1907 int ssid;
1908
1909 INIT_LIST_HEAD(&cgrp->self.sibling);
1910 INIT_LIST_HEAD(&cgrp->self.children);
1911 INIT_LIST_HEAD(&cgrp->cset_links);
1912 INIT_LIST_HEAD(&cgrp->pidlists);
1913 mutex_init(&cgrp->pidlist_mutex);
1914 cgrp->self.cgroup = cgrp;
1915 cgrp->self.flags |= CSS_ONLINE;
1916 cgrp->dom_cgrp = cgrp;
1917 cgrp->max_descendants = INT_MAX;
1918 cgrp->max_depth = INT_MAX;
1919 INIT_LIST_HEAD(&cgrp->rstat_css_list);
1920 prev_cputime_init(&cgrp->prev_cputime);
1921
1922 for_each_subsys(ss, ssid)
1923 INIT_LIST_HEAD(&cgrp->e_csets[ssid]);
1924
1925 init_waitqueue_head(&cgrp->offline_waitq);
1926 INIT_WORK(&cgrp->release_agent_work, cgroup1_release_agent);
1927}
1928
1929void init_cgroup_root(struct cgroup_fs_context *ctx)
1930{
1931 struct cgroup_root *root = ctx->root;
1932 struct cgroup *cgrp = &root->cgrp;
1933
1934 INIT_LIST_HEAD(&root->root_list);
1935 atomic_set(&root->nr_cgrps, 1);
1936 cgrp->root = root;
1937 init_cgroup_housekeeping(cgrp);
1938
1939 root->flags = ctx->flags;
1940 if (ctx->release_agent)
1941 strscpy(root->release_agent_path, ctx->release_agent, PATH_MAX);
1942 if (ctx->name)
1943 strscpy(root->name, ctx->name, MAX_CGROUP_ROOT_NAMELEN);
1944 if (ctx->cpuset_clone_children)
1945 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
1946}
1947
1948int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
1949{
1950 LIST_HEAD(tmp_links);
1951 struct cgroup *root_cgrp = &root->cgrp;
1952 struct kernfs_syscall_ops *kf_sops;
1953 struct css_set *cset;
1954 int i, ret;
1955
1956 lockdep_assert_held(&cgroup_mutex);
1957
1958 ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release,
1959 0, GFP_KERNEL);
1960 if (ret)
1961 goto out;
1962
1963
1964
1965
1966
1967
1968
1969
1970 ret = allocate_cgrp_cset_links(2 * css_set_count, &tmp_links);
1971 if (ret)
1972 goto cancel_ref;
1973
1974 ret = cgroup_init_root_id(root);
1975 if (ret)
1976 goto cancel_ref;
1977
1978 kf_sops = root == &cgrp_dfl_root ?
1979 &cgroup_kf_syscall_ops : &cgroup1_kf_syscall_ops;
1980
1981 root->kf_root = kernfs_create_root(kf_sops,
1982 KERNFS_ROOT_CREATE_DEACTIVATED |
1983 KERNFS_ROOT_SUPPORT_EXPORTOP |
1984 KERNFS_ROOT_SUPPORT_USER_XATTR,
1985 root_cgrp);
1986 if (IS_ERR(root->kf_root)) {
1987 ret = PTR_ERR(root->kf_root);
1988 goto exit_root_id;
1989 }
1990 root_cgrp->kn = root->kf_root->kn;
1991 WARN_ON_ONCE(cgroup_ino(root_cgrp) != 1);
1992 root_cgrp->ancestor_ids[0] = cgroup_id(root_cgrp);
1993
1994 ret = css_populate_dir(&root_cgrp->self);
1995 if (ret)
1996 goto destroy_root;
1997
1998 ret = cgroup_rstat_init(root_cgrp);
1999 if (ret)
2000 goto destroy_root;
2001
2002 ret = rebind_subsystems(root, ss_mask);
2003 if (ret)
2004 goto exit_stats;
2005
2006 ret = cgroup_bpf_inherit(root_cgrp);
2007 WARN_ON_ONCE(ret);
2008
2009 trace_cgroup_setup_root(root);
2010
2011
2012
2013
2014
2015
2016 list_add(&root->root_list, &cgroup_roots);
2017 cgroup_root_count++;
2018
2019
2020
2021
2022
2023 spin_lock_irq(&css_set_lock);
2024 hash_for_each(css_set_table, i, cset, hlist) {
2025 link_css_set(&tmp_links, cset, root_cgrp);
2026 if (css_set_populated(cset))
2027 cgroup_update_populated(root_cgrp, true);
2028 }
2029 spin_unlock_irq(&css_set_lock);
2030
2031 BUG_ON(!list_empty(&root_cgrp->self.children));
2032 BUG_ON(atomic_read(&root->nr_cgrps) != 1);
2033
2034 ret = 0;
2035 goto out;
2036
2037exit_stats:
2038 cgroup_rstat_exit(root_cgrp);
2039destroy_root:
2040 kernfs_destroy_root(root->kf_root);
2041 root->kf_root = NULL;
2042exit_root_id:
2043 cgroup_exit_root_id(root);
2044cancel_ref:
2045 percpu_ref_exit(&root_cgrp->self.refcnt);
2046out:
2047 free_cgrp_cset_links(&tmp_links);
2048 return ret;
2049}
2050
2051int cgroup_do_get_tree(struct fs_context *fc)
2052{
2053 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
2054 int ret;
2055
2056 ctx->kfc.root = ctx->root->kf_root;
2057 if (fc->fs_type == &cgroup2_fs_type)
2058 ctx->kfc.magic = CGROUP2_SUPER_MAGIC;
2059 else
2060 ctx->kfc.magic = CGROUP_SUPER_MAGIC;
2061 ret = kernfs_get_tree(fc);
2062
2063
2064
2065
2066
2067 if (!ret && ctx->ns != &init_cgroup_ns) {
2068 struct dentry *nsdentry;
2069 struct super_block *sb = fc->root->d_sb;
2070 struct cgroup *cgrp;
2071
2072 mutex_lock(&cgroup_mutex);
2073 spin_lock_irq(&css_set_lock);
2074
2075 cgrp = cset_cgroup_from_root(ctx->ns->root_cset, ctx->root);
2076
2077 spin_unlock_irq(&css_set_lock);
2078 mutex_unlock(&cgroup_mutex);
2079
2080 nsdentry = kernfs_node_dentry(cgrp->kn, sb);
2081 dput(fc->root);
2082 if (IS_ERR(nsdentry)) {
2083 deactivate_locked_super(sb);
2084 ret = PTR_ERR(nsdentry);
2085 nsdentry = NULL;
2086 }
2087 fc->root = nsdentry;
2088 }
2089
2090 if (!ctx->kfc.new_sb_created)
2091 cgroup_put(&ctx->root->cgrp);
2092
2093 return ret;
2094}
2095
2096
2097
2098
2099static void cgroup_fs_context_free(struct fs_context *fc)
2100{
2101 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
2102
2103 kfree(ctx->name);
2104 kfree(ctx->release_agent);
2105 put_cgroup_ns(ctx->ns);
2106 kernfs_free_fs_context(fc);
2107 kfree(ctx);
2108}
2109
2110static int cgroup_get_tree(struct fs_context *fc)
2111{
2112 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
2113 int ret;
2114
2115 cgrp_dfl_visible = true;
2116 cgroup_get_live(&cgrp_dfl_root.cgrp);
2117 ctx->root = &cgrp_dfl_root;
2118
2119 ret = cgroup_do_get_tree(fc);
2120 if (!ret)
2121 apply_cgroup_root_flags(ctx->flags);
2122 return ret;
2123}
2124
2125static const struct fs_context_operations cgroup_fs_context_ops = {
2126 .free = cgroup_fs_context_free,
2127 .parse_param = cgroup2_parse_param,
2128 .get_tree = cgroup_get_tree,
2129 .reconfigure = cgroup_reconfigure,
2130};
2131
2132static const struct fs_context_operations cgroup1_fs_context_ops = {
2133 .free = cgroup_fs_context_free,
2134 .parse_param = cgroup1_parse_param,
2135 .get_tree = cgroup1_get_tree,
2136 .reconfigure = cgroup1_reconfigure,
2137};
2138
2139
2140
2141
2142
2143static int cgroup_init_fs_context(struct fs_context *fc)
2144{
2145 struct cgroup_fs_context *ctx;
2146
2147 ctx = kzalloc(sizeof(struct cgroup_fs_context), GFP_KERNEL);
2148 if (!ctx)
2149 return -ENOMEM;
2150
2151 ctx->ns = current->nsproxy->cgroup_ns;
2152 get_cgroup_ns(ctx->ns);
2153 fc->fs_private = &ctx->kfc;
2154 if (fc->fs_type == &cgroup2_fs_type)
2155 fc->ops = &cgroup_fs_context_ops;
2156 else
2157 fc->ops = &cgroup1_fs_context_ops;
2158 put_user_ns(fc->user_ns);
2159 fc->user_ns = get_user_ns(ctx->ns->user_ns);
2160 fc->global = true;
2161 return 0;
2162}
2163
2164static void cgroup_kill_sb(struct super_block *sb)
2165{
2166 struct kernfs_root *kf_root = kernfs_root_from_sb(sb);
2167 struct cgroup_root *root = cgroup_root_from_kf(kf_root);
2168
2169
2170
2171
2172
2173
2174
2175
2176 if (list_empty(&root->cgrp.self.children) && root != &cgrp_dfl_root &&
2177 !percpu_ref_is_dying(&root->cgrp.self.refcnt))
2178 percpu_ref_kill(&root->cgrp.self.refcnt);
2179 cgroup_put(&root->cgrp);
2180 kernfs_kill_sb(sb);
2181}
2182
2183struct file_system_type cgroup_fs_type = {
2184 .name = "cgroup",
2185 .init_fs_context = cgroup_init_fs_context,
2186 .parameters = cgroup1_fs_parameters,
2187 .kill_sb = cgroup_kill_sb,
2188 .fs_flags = FS_USERNS_MOUNT,
2189};
2190
2191static struct file_system_type cgroup2_fs_type = {
2192 .name = "cgroup2",
2193 .init_fs_context = cgroup_init_fs_context,
2194 .parameters = cgroup2_fs_parameters,
2195 .kill_sb = cgroup_kill_sb,
2196 .fs_flags = FS_USERNS_MOUNT,
2197};
2198
2199#ifdef CONFIG_CPUSETS
2200static const struct fs_context_operations cpuset_fs_context_ops = {
2201 .get_tree = cgroup1_get_tree,
2202 .free = cgroup_fs_context_free,
2203};
2204
2205
2206
2207
2208
2209
2210static int cpuset_init_fs_context(struct fs_context *fc)
2211{
2212 char *agent = kstrdup("/sbin/cpuset_release_agent", GFP_USER);
2213 struct cgroup_fs_context *ctx;
2214 int err;
2215
2216 err = cgroup_init_fs_context(fc);
2217 if (err) {
2218 kfree(agent);
2219 return err;
2220 }
2221
2222 fc->ops = &cpuset_fs_context_ops;
2223
2224 ctx = cgroup_fc2context(fc);
2225 ctx->subsys_mask = 1 << cpuset_cgrp_id;
2226 ctx->flags |= CGRP_ROOT_NOPREFIX;
2227 ctx->release_agent = agent;
2228
2229 get_filesystem(&cgroup_fs_type);
2230 put_filesystem(fc->fs_type);
2231 fc->fs_type = &cgroup_fs_type;
2232
2233 return 0;
2234}
2235
2236static struct file_system_type cpuset_fs_type = {
2237 .name = "cpuset",
2238 .init_fs_context = cpuset_init_fs_context,
2239 .fs_flags = FS_USERNS_MOUNT,
2240};
2241#endif
2242
2243int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
2244 struct cgroup_namespace *ns)
2245{
2246 struct cgroup *root = cset_cgroup_from_root(ns->root_cset, cgrp->root);
2247
2248 return kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen);
2249}
2250
2251int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
2252 struct cgroup_namespace *ns)
2253{
2254 int ret;
2255
2256 mutex_lock(&cgroup_mutex);
2257 spin_lock_irq(&css_set_lock);
2258
2259 ret = cgroup_path_ns_locked(cgrp, buf, buflen, ns);
2260
2261 spin_unlock_irq(&css_set_lock);
2262 mutex_unlock(&cgroup_mutex);
2263
2264 return ret;
2265}
2266EXPORT_SYMBOL_GPL(cgroup_path_ns);
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
2282{
2283 struct cgroup_root *root;
2284 struct cgroup *cgrp;
2285 int hierarchy_id = 1;
2286 int ret;
2287
2288 mutex_lock(&cgroup_mutex);
2289 spin_lock_irq(&css_set_lock);
2290
2291 root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id);
2292
2293 if (root) {
2294 cgrp = task_cgroup_from_root(task, root);
2295 ret = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns);
2296 } else {
2297
2298 ret = strlcpy(buf, "/", buflen);
2299 }
2300
2301 spin_unlock_irq(&css_set_lock);
2302 mutex_unlock(&cgroup_mutex);
2303 return ret;
2304}
2305EXPORT_SYMBOL_GPL(task_cgroup_path);
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317static void cgroup_migrate_add_task(struct task_struct *task,
2318 struct cgroup_mgctx *mgctx)
2319{
2320 struct css_set *cset;
2321
2322 lockdep_assert_held(&css_set_lock);
2323
2324
2325 if (task->flags & PF_EXITING)
2326 return;
2327
2328
2329 WARN_ON_ONCE(list_empty(&task->cg_list));
2330
2331 cset = task_css_set(task);
2332 if (!cset->mg_src_cgrp)
2333 return;
2334
2335 mgctx->tset.nr_tasks++;
2336
2337 list_move_tail(&task->cg_list, &cset->mg_tasks);
2338 if (list_empty(&cset->mg_node))
2339 list_add_tail(&cset->mg_node,
2340 &mgctx->tset.src_csets);
2341 if (list_empty(&cset->mg_dst_cset->mg_node))
2342 list_add_tail(&cset->mg_dst_cset->mg_node,
2343 &mgctx->tset.dst_csets);
2344}
2345
2346
2347
2348
2349
2350
2351
2352
2353struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset,
2354 struct cgroup_subsys_state **dst_cssp)
2355{
2356 tset->cur_cset = list_first_entry(tset->csets, struct css_set, mg_node);
2357 tset->cur_task = NULL;
2358
2359 return cgroup_taskset_next(tset, dst_cssp);
2360}
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
2371 struct cgroup_subsys_state **dst_cssp)
2372{
2373 struct css_set *cset = tset->cur_cset;
2374 struct task_struct *task = tset->cur_task;
2375
2376 while (&cset->mg_node != tset->csets) {
2377 if (!task)
2378 task = list_first_entry(&cset->mg_tasks,
2379 struct task_struct, cg_list);
2380 else
2381 task = list_next_entry(task, cg_list);
2382
2383 if (&task->cg_list != &cset->mg_tasks) {
2384 tset->cur_cset = cset;
2385 tset->cur_task = task;
2386
2387
2388
2389
2390
2391
2392
2393 if (cset->mg_dst_cset)
2394 *dst_cssp = cset->mg_dst_cset->subsys[tset->ssid];
2395 else
2396 *dst_cssp = cset->subsys[tset->ssid];
2397
2398 return task;
2399 }
2400
2401 cset = list_next_entry(cset, mg_node);
2402 task = NULL;
2403 }
2404
2405 return NULL;
2406}
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx)
2418{
2419 struct cgroup_taskset *tset = &mgctx->tset;
2420 struct cgroup_subsys *ss;
2421 struct task_struct *task, *tmp_task;
2422 struct css_set *cset, *tmp_cset;
2423 int ssid, failed_ssid, ret;
2424
2425
2426 if (tset->nr_tasks) {
2427 do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
2428 if (ss->can_attach) {
2429 tset->ssid = ssid;
2430 ret = ss->can_attach(tset);
2431 if (ret) {
2432 failed_ssid = ssid;
2433 goto out_cancel_attach;
2434 }
2435 }
2436 } while_each_subsys_mask();
2437 }
2438
2439
2440
2441
2442
2443
2444 spin_lock_irq(&css_set_lock);
2445 list_for_each_entry(cset, &tset->src_csets, mg_node) {
2446 list_for_each_entry_safe(task, tmp_task, &cset->mg_tasks, cg_list) {
2447 struct css_set *from_cset = task_css_set(task);
2448 struct css_set *to_cset = cset->mg_dst_cset;
2449
2450 get_css_set(to_cset);
2451 to_cset->nr_tasks++;
2452 css_set_move_task(task, from_cset, to_cset, true);
2453 from_cset->nr_tasks--;
2454
2455
2456
2457
2458 cgroup_freezer_migrate_task(task, from_cset->dfl_cgrp,
2459 to_cset->dfl_cgrp);
2460 put_css_set_locked(from_cset);
2461
2462 }
2463 }
2464 spin_unlock_irq(&css_set_lock);
2465
2466
2467
2468
2469
2470
2471 tset->csets = &tset->dst_csets;
2472
2473 if (tset->nr_tasks) {
2474 do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
2475 if (ss->attach) {
2476 tset->ssid = ssid;
2477 ss->attach(tset);
2478 }
2479 } while_each_subsys_mask();
2480 }
2481
2482 ret = 0;
2483 goto out_release_tset;
2484
2485out_cancel_attach:
2486 if (tset->nr_tasks) {
2487 do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
2488 if (ssid == failed_ssid)
2489 break;
2490 if (ss->cancel_attach) {
2491 tset->ssid = ssid;
2492 ss->cancel_attach(tset);
2493 }
2494 } while_each_subsys_mask();
2495 }
2496out_release_tset:
2497 spin_lock_irq(&css_set_lock);
2498 list_splice_init(&tset->dst_csets, &tset->src_csets);
2499 list_for_each_entry_safe(cset, tmp_cset, &tset->src_csets, mg_node) {
2500 list_splice_tail_init(&cset->mg_tasks, &cset->tasks);
2501 list_del_init(&cset->mg_node);
2502 }
2503 spin_unlock_irq(&css_set_lock);
2504
2505
2506
2507
2508
2509
2510 tset->nr_tasks = 0;
2511 tset->csets = &tset->src_csets;
2512 return ret;
2513}
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp)
2525{
2526
2527 if (!cgroup_on_dfl(dst_cgrp))
2528 return 0;
2529
2530
2531 if (!cgroup_is_valid_domain(dst_cgrp->dom_cgrp))
2532 return -EOPNOTSUPP;
2533
2534
2535 if (cgroup_is_mixable(dst_cgrp))
2536 return 0;
2537
2538
2539
2540
2541
2542 if (cgroup_can_be_thread_root(dst_cgrp) || cgroup_is_threaded(dst_cgrp))
2543 return 0;
2544
2545
2546 if (dst_cgrp->subtree_control)
2547 return -EBUSY;
2548
2549 return 0;
2550}
2551
2552
2553
2554
2555
2556
2557
2558
2559void cgroup_migrate_finish(struct cgroup_mgctx *mgctx)
2560{
2561 LIST_HEAD(preloaded);
2562 struct css_set *cset, *tmp_cset;
2563
2564 lockdep_assert_held(&cgroup_mutex);
2565
2566 spin_lock_irq(&css_set_lock);
2567
2568 list_splice_tail_init(&mgctx->preloaded_src_csets, &preloaded);
2569 list_splice_tail_init(&mgctx->preloaded_dst_csets, &preloaded);
2570
2571 list_for_each_entry_safe(cset, tmp_cset, &preloaded, mg_preload_node) {
2572 cset->mg_src_cgrp = NULL;
2573 cset->mg_dst_cgrp = NULL;
2574 cset->mg_dst_cset = NULL;
2575 list_del_init(&cset->mg_preload_node);
2576 put_css_set_locked(cset);
2577 }
2578
2579 spin_unlock_irq(&css_set_lock);
2580}
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598void cgroup_migrate_add_src(struct css_set *src_cset,
2599 struct cgroup *dst_cgrp,
2600 struct cgroup_mgctx *mgctx)
2601{
2602 struct cgroup *src_cgrp;
2603
2604 lockdep_assert_held(&cgroup_mutex);
2605 lockdep_assert_held(&css_set_lock);
2606
2607
2608
2609
2610
2611
2612 if (src_cset->dead)
2613 return;
2614
2615 src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root);
2616
2617 if (!list_empty(&src_cset->mg_preload_node))
2618 return;
2619
2620 WARN_ON(src_cset->mg_src_cgrp);
2621 WARN_ON(src_cset->mg_dst_cgrp);
2622 WARN_ON(!list_empty(&src_cset->mg_tasks));
2623 WARN_ON(!list_empty(&src_cset->mg_node));
2624
2625 src_cset->mg_src_cgrp = src_cgrp;
2626 src_cset->mg_dst_cgrp = dst_cgrp;
2627 get_css_set(src_cset);
2628 list_add_tail(&src_cset->mg_preload_node, &mgctx->preloaded_src_csets);
2629}
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx)
2646{
2647 struct css_set *src_cset, *tmp_cset;
2648
2649 lockdep_assert_held(&cgroup_mutex);
2650
2651
2652 list_for_each_entry_safe(src_cset, tmp_cset, &mgctx->preloaded_src_csets,
2653 mg_preload_node) {
2654 struct css_set *dst_cset;
2655 struct cgroup_subsys *ss;
2656 int ssid;
2657
2658 dst_cset = find_css_set(src_cset, src_cset->mg_dst_cgrp);
2659 if (!dst_cset)
2660 return -ENOMEM;
2661
2662 WARN_ON_ONCE(src_cset->mg_dst_cset || dst_cset->mg_dst_cset);
2663
2664
2665
2666
2667
2668
2669 if (src_cset == dst_cset) {
2670 src_cset->mg_src_cgrp = NULL;
2671 src_cset->mg_dst_cgrp = NULL;
2672 list_del_init(&src_cset->mg_preload_node);
2673 put_css_set(src_cset);
2674 put_css_set(dst_cset);
2675 continue;
2676 }
2677
2678 src_cset->mg_dst_cset = dst_cset;
2679
2680 if (list_empty(&dst_cset->mg_preload_node))
2681 list_add_tail(&dst_cset->mg_preload_node,
2682 &mgctx->preloaded_dst_csets);
2683 else
2684 put_css_set(dst_cset);
2685
2686 for_each_subsys(ss, ssid)
2687 if (src_cset->subsys[ssid] != dst_cset->subsys[ssid])
2688 mgctx->ss_mask |= 1 << ssid;
2689 }
2690
2691 return 0;
2692}
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712int cgroup_migrate(struct task_struct *leader, bool threadgroup,
2713 struct cgroup_mgctx *mgctx)
2714{
2715 struct task_struct *task;
2716
2717
2718
2719
2720
2721
2722 spin_lock_irq(&css_set_lock);
2723 rcu_read_lock();
2724 task = leader;
2725 do {
2726 cgroup_migrate_add_task(task, mgctx);
2727 if (!threadgroup)
2728 break;
2729 } while_each_thread(leader, task);
2730 rcu_read_unlock();
2731 spin_unlock_irq(&css_set_lock);
2732
2733 return cgroup_migrate_execute(mgctx);
2734}
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader,
2745 bool threadgroup)
2746{
2747 DEFINE_CGROUP_MGCTX(mgctx);
2748 struct task_struct *task;
2749 int ret = 0;
2750
2751
2752 spin_lock_irq(&css_set_lock);
2753 rcu_read_lock();
2754 task = leader;
2755 do {
2756 cgroup_migrate_add_src(task_css_set(task), dst_cgrp, &mgctx);
2757 if (!threadgroup)
2758 break;
2759 } while_each_thread(leader, task);
2760 rcu_read_unlock();
2761 spin_unlock_irq(&css_set_lock);
2762
2763
2764 ret = cgroup_migrate_prepare_dst(&mgctx);
2765 if (!ret)
2766 ret = cgroup_migrate(leader, threadgroup, &mgctx);
2767
2768 cgroup_migrate_finish(&mgctx);
2769
2770 if (!ret)
2771 TRACE_CGROUP_PATH(attach_task, dst_cgrp, leader, threadgroup);
2772
2773 return ret;
2774}
2775
2776struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup,
2777 bool *locked)
2778 __acquires(&cgroup_threadgroup_rwsem)
2779{
2780 struct task_struct *tsk;
2781 pid_t pid;
2782
2783 if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
2784 return ERR_PTR(-EINVAL);
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794 lockdep_assert_held(&cgroup_mutex);
2795 if (pid || threadgroup) {
2796 percpu_down_write(&cgroup_threadgroup_rwsem);
2797 *locked = true;
2798 } else {
2799 *locked = false;
2800 }
2801
2802 rcu_read_lock();
2803 if (pid) {
2804 tsk = find_task_by_vpid(pid);
2805 if (!tsk) {
2806 tsk = ERR_PTR(-ESRCH);
2807 goto out_unlock_threadgroup;
2808 }
2809 } else {
2810 tsk = current;
2811 }
2812
2813 if (threadgroup)
2814 tsk = tsk->group_leader;
2815
2816
2817
2818
2819
2820
2821
2822 if (tsk->no_cgroup_migration || (tsk->flags & PF_NO_SETAFFINITY)) {
2823 tsk = ERR_PTR(-EINVAL);
2824 goto out_unlock_threadgroup;
2825 }
2826
2827 get_task_struct(tsk);
2828 goto out_unlock_rcu;
2829
2830out_unlock_threadgroup:
2831 if (*locked) {
2832 percpu_up_write(&cgroup_threadgroup_rwsem);
2833 *locked = false;
2834 }
2835out_unlock_rcu:
2836 rcu_read_unlock();
2837 return tsk;
2838}
2839
2840void cgroup_procs_write_finish(struct task_struct *task, bool locked)
2841 __releases(&cgroup_threadgroup_rwsem)
2842{
2843 struct cgroup_subsys *ss;
2844 int ssid;
2845
2846
2847 put_task_struct(task);
2848
2849 if (locked)
2850 percpu_up_write(&cgroup_threadgroup_rwsem);
2851 for_each_subsys(ss, ssid)
2852 if (ss->post_attach)
2853 ss->post_attach();
2854}
2855
2856static void cgroup_print_ss_mask(struct seq_file *seq, u16 ss_mask)
2857{
2858 struct cgroup_subsys *ss;
2859 bool printed = false;
2860 int ssid;
2861
2862 do_each_subsys_mask(ss, ssid, ss_mask) {
2863 if (printed)
2864 seq_putc(seq, ' ');
2865 seq_puts(seq, ss->name);
2866 printed = true;
2867 } while_each_subsys_mask();
2868 if (printed)
2869 seq_putc(seq, '\n');
2870}
2871
2872
2873static int cgroup_controllers_show(struct seq_file *seq, void *v)
2874{
2875 struct cgroup *cgrp = seq_css(seq)->cgroup;
2876
2877 cgroup_print_ss_mask(seq, cgroup_control(cgrp));
2878 return 0;
2879}
2880
2881
2882static int cgroup_subtree_control_show(struct seq_file *seq, void *v)
2883{
2884 struct cgroup *cgrp = seq_css(seq)->cgroup;
2885
2886 cgroup_print_ss_mask(seq, cgrp->subtree_control);
2887 return 0;
2888}
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899static int cgroup_update_dfl_csses(struct cgroup *cgrp)
2900{
2901 DEFINE_CGROUP_MGCTX(mgctx);
2902 struct cgroup_subsys_state *d_css;
2903 struct cgroup *dsct;
2904 struct css_set *src_cset;
2905 int ret;
2906
2907 lockdep_assert_held(&cgroup_mutex);
2908
2909 percpu_down_write(&cgroup_threadgroup_rwsem);
2910
2911
2912 spin_lock_irq(&css_set_lock);
2913 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
2914 struct cgrp_cset_link *link;
2915
2916 list_for_each_entry(link, &dsct->cset_links, cset_link)
2917 cgroup_migrate_add_src(link->cset, dsct, &mgctx);
2918 }
2919 spin_unlock_irq(&css_set_lock);
2920
2921
2922 ret = cgroup_migrate_prepare_dst(&mgctx);
2923 if (ret)
2924 goto out_finish;
2925
2926 spin_lock_irq(&css_set_lock);
2927 list_for_each_entry(src_cset, &mgctx.preloaded_src_csets, mg_preload_node) {
2928 struct task_struct *task, *ntask;
2929
2930
2931 list_for_each_entry_safe(task, ntask, &src_cset->tasks, cg_list)
2932 cgroup_migrate_add_task(task, &mgctx);
2933 }
2934 spin_unlock_irq(&css_set_lock);
2935
2936 ret = cgroup_migrate_execute(&mgctx);
2937out_finish:
2938 cgroup_migrate_finish(&mgctx);
2939 percpu_up_write(&cgroup_threadgroup_rwsem);
2940 return ret;
2941}
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951void cgroup_lock_and_drain_offline(struct cgroup *cgrp)
2952 __acquires(&cgroup_mutex)
2953{
2954 struct cgroup *dsct;
2955 struct cgroup_subsys_state *d_css;
2956 struct cgroup_subsys *ss;
2957 int ssid;
2958
2959restart:
2960 mutex_lock(&cgroup_mutex);
2961
2962 cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
2963 for_each_subsys(ss, ssid) {
2964 struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
2965 DEFINE_WAIT(wait);
2966
2967 if (!css || !percpu_ref_is_dying(&css->refcnt))
2968 continue;
2969
2970 cgroup_get_live(dsct);
2971 prepare_to_wait(&dsct->offline_waitq, &wait,
2972 TASK_UNINTERRUPTIBLE);
2973
2974 mutex_unlock(&cgroup_mutex);
2975 schedule();
2976 finish_wait(&dsct->offline_waitq, &wait);
2977
2978 cgroup_put(dsct);
2979 goto restart;
2980 }
2981 }
2982}
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992static void cgroup_save_control(struct cgroup *cgrp)
2993{
2994 struct cgroup *dsct;
2995 struct cgroup_subsys_state *d_css;
2996
2997 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
2998 dsct->old_subtree_control = dsct->subtree_control;
2999 dsct->old_subtree_ss_mask = dsct->subtree_ss_mask;
3000 dsct->old_dom_cgrp = dsct->dom_cgrp;
3001 }
3002}
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012static void cgroup_propagate_control(struct cgroup *cgrp)
3013{
3014 struct cgroup *dsct;
3015 struct cgroup_subsys_state *d_css;
3016
3017 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
3018 dsct->subtree_control &= cgroup_control(dsct);
3019 dsct->subtree_ss_mask =
3020 cgroup_calc_subtree_ss_mask(dsct->subtree_control,
3021 cgroup_ss_mask(dsct));
3022 }
3023}
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033static void cgroup_restore_control(struct cgroup *cgrp)
3034{
3035 struct cgroup *dsct;
3036 struct cgroup_subsys_state *d_css;
3037
3038 cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
3039 dsct->subtree_control = dsct->old_subtree_control;
3040 dsct->subtree_ss_mask = dsct->old_subtree_ss_mask;
3041 dsct->dom_cgrp = dsct->old_dom_cgrp;
3042 }
3043}
3044
3045static bool css_visible(struct cgroup_subsys_state *css)
3046{
3047 struct cgroup_subsys *ss = css->ss;
3048 struct cgroup *cgrp = css->cgroup;
3049
3050 if (cgroup_control(cgrp) & (1 << ss->id))
3051 return true;
3052 if (!(cgroup_ss_mask(cgrp) & (1 << ss->id)))
3053 return false;
3054 return cgroup_on_dfl(cgrp) && ss->implicit_on_dfl;
3055}
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070static int cgroup_apply_control_enable(struct cgroup *cgrp)
3071{
3072 struct cgroup *dsct;
3073 struct cgroup_subsys_state *d_css;
3074 struct cgroup_subsys *ss;
3075 int ssid, ret;
3076
3077 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
3078 for_each_subsys(ss, ssid) {
3079 struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
3080
3081 if (!(cgroup_ss_mask(dsct) & (1 << ss->id)))
3082 continue;
3083
3084 if (!css) {
3085 css = css_create(dsct, ss);
3086 if (IS_ERR(css))
3087 return PTR_ERR(css);
3088 }
3089
3090 WARN_ON_ONCE(percpu_ref_is_dying(&css->refcnt));
3091
3092 if (css_visible(css)) {
3093 ret = css_populate_dir(css);
3094 if (ret)
3095 return ret;
3096 }
3097 }
3098 }
3099
3100 return 0;
3101}
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116static void cgroup_apply_control_disable(struct cgroup *cgrp)
3117{
3118 struct cgroup *dsct;
3119 struct cgroup_subsys_state *d_css;
3120 struct cgroup_subsys *ss;
3121 int ssid;
3122
3123 cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
3124 for_each_subsys(ss, ssid) {
3125 struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
3126
3127 if (!css)
3128 continue;
3129
3130 WARN_ON_ONCE(percpu_ref_is_dying(&css->refcnt));
3131
3132 if (css->parent &&
3133 !(cgroup_ss_mask(dsct) & (1 << ss->id))) {
3134 kill_css(css);
3135 } else if (!css_visible(css)) {
3136 css_clear_dir(css);
3137 if (ss->css_reset)
3138 ss->css_reset(css);
3139 }
3140 }
3141 }
3142}
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161static int cgroup_apply_control(struct cgroup *cgrp)
3162{
3163 int ret;
3164
3165 cgroup_propagate_control(cgrp);
3166
3167 ret = cgroup_apply_control_enable(cgrp);
3168 if (ret)
3169 return ret;
3170
3171
3172
3173
3174
3175
3176 ret = cgroup_update_dfl_csses(cgrp);
3177 if (ret)
3178 return ret;
3179
3180 return 0;
3181}
3182
3183
3184
3185
3186
3187
3188
3189
3190static void cgroup_finalize_control(struct cgroup *cgrp, int ret)
3191{
3192 if (ret) {
3193 cgroup_restore_control(cgrp);
3194 cgroup_propagate_control(cgrp);
3195 }
3196
3197 cgroup_apply_control_disable(cgrp);
3198}
3199
3200static int cgroup_vet_subtree_control_enable(struct cgroup *cgrp, u16 enable)
3201{
3202 u16 domain_enable = enable & ~cgrp_dfl_threaded_ss_mask;
3203
3204
3205 if (!enable)
3206 return 0;
3207
3208
3209 if (!cgroup_is_valid_domain(cgrp->dom_cgrp))
3210 return -EOPNOTSUPP;
3211
3212
3213 if (cgroup_is_mixable(cgrp))
3214 return 0;
3215
3216 if (domain_enable) {
3217
3218 if (cgroup_is_thread_root(cgrp) || cgroup_is_threaded(cgrp))
3219 return -EOPNOTSUPP;
3220 } else {
3221
3222
3223
3224
3225
3226 if (cgroup_can_be_thread_root(cgrp) || cgroup_is_threaded(cgrp))
3227 return 0;
3228 }
3229
3230
3231
3232
3233
3234 if (cgroup_has_tasks(cgrp))
3235 return -EBUSY;
3236
3237 return 0;
3238}
3239
3240
3241static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
3242 char *buf, size_t nbytes,
3243 loff_t off)
3244{
3245 u16 enable = 0, disable = 0;
3246 struct cgroup *cgrp, *child;
3247 struct cgroup_subsys *ss;
3248 char *tok;
3249 int ssid, ret;
3250
3251
3252
3253
3254
3255 buf = strstrip(buf);
3256 while ((tok = strsep(&buf, " "))) {
3257 if (tok[0] == '\0')
3258 continue;
3259 do_each_subsys_mask(ss, ssid, ~cgrp_dfl_inhibit_ss_mask) {
3260 if (!cgroup_ssid_enabled(ssid) ||
3261 strcmp(tok + 1, ss->name))
3262 continue;
3263
3264 if (*tok == '+') {
3265 enable |= 1 << ssid;
3266 disable &= ~(1 << ssid);
3267 } else if (*tok == '-') {
3268 disable |= 1 << ssid;
3269 enable &= ~(1 << ssid);
3270 } else {
3271 return -EINVAL;
3272 }
3273 break;
3274 } while_each_subsys_mask();
3275 if (ssid == CGROUP_SUBSYS_COUNT)
3276 return -EINVAL;
3277 }
3278
3279 cgrp = cgroup_kn_lock_live(of->kn, true);
3280 if (!cgrp)
3281 return -ENODEV;
3282
3283 for_each_subsys(ss, ssid) {
3284 if (enable & (1 << ssid)) {
3285 if (cgrp->subtree_control & (1 << ssid)) {
3286 enable &= ~(1 << ssid);
3287 continue;
3288 }
3289
3290 if (!(cgroup_control(cgrp) & (1 << ssid))) {
3291 ret = -ENOENT;
3292 goto out_unlock;
3293 }
3294 } else if (disable & (1 << ssid)) {
3295 if (!(cgrp->subtree_control & (1 << ssid))) {
3296 disable &= ~(1 << ssid);
3297 continue;
3298 }
3299
3300
3301 cgroup_for_each_live_child(child, cgrp) {
3302 if (child->subtree_control & (1 << ssid)) {
3303 ret = -EBUSY;
3304 goto out_unlock;
3305 }
3306 }
3307 }
3308 }
3309
3310 if (!enable && !disable) {
3311 ret = 0;
3312 goto out_unlock;
3313 }
3314
3315 ret = cgroup_vet_subtree_control_enable(cgrp, enable);
3316 if (ret)
3317 goto out_unlock;
3318
3319
3320 cgroup_save_control(cgrp);
3321
3322 cgrp->subtree_control |= enable;
3323 cgrp->subtree_control &= ~disable;
3324
3325 ret = cgroup_apply_control(cgrp);
3326 cgroup_finalize_control(cgrp, ret);
3327 if (ret)
3328 goto out_unlock;
3329
3330 kernfs_activate(cgrp->kn);
3331out_unlock:
3332 cgroup_kn_unlock(of->kn);
3333 return ret ?: nbytes;
3334}
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345static int cgroup_enable_threaded(struct cgroup *cgrp)
3346{
3347 struct cgroup *parent = cgroup_parent(cgrp);
3348 struct cgroup *dom_cgrp = parent->dom_cgrp;
3349 struct cgroup *dsct;
3350 struct cgroup_subsys_state *d_css;
3351 int ret;
3352
3353 lockdep_assert_held(&cgroup_mutex);
3354
3355
3356 if (cgroup_is_threaded(cgrp))
3357 return 0;
3358
3359
3360
3361
3362
3363
3364
3365 if (cgroup_is_populated(cgrp) ||
3366 cgrp->subtree_control & ~cgrp_dfl_threaded_ss_mask)
3367 return -EOPNOTSUPP;
3368
3369
3370 if (!cgroup_is_valid_domain(dom_cgrp) ||
3371 !cgroup_can_be_thread_root(dom_cgrp))
3372 return -EOPNOTSUPP;
3373
3374
3375
3376
3377
3378 cgroup_save_control(cgrp);
3379
3380 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp)
3381 if (dsct == cgrp || cgroup_is_threaded(dsct))
3382 dsct->dom_cgrp = dom_cgrp;
3383
3384 ret = cgroup_apply_control(cgrp);
3385 if (!ret)
3386 parent->nr_threaded_children++;
3387
3388 cgroup_finalize_control(cgrp, ret);
3389 return ret;
3390}
3391
3392static int cgroup_type_show(struct seq_file *seq, void *v)
3393{
3394 struct cgroup *cgrp = seq_css(seq)->cgroup;
3395
3396 if (cgroup_is_threaded(cgrp))
3397 seq_puts(seq, "threaded\n");
3398 else if (!cgroup_is_valid_domain(cgrp))
3399 seq_puts(seq, "domain invalid\n");
3400 else if (cgroup_is_thread_root(cgrp))
3401 seq_puts(seq, "domain threaded\n");
3402 else
3403 seq_puts(seq, "domain\n");
3404
3405 return 0;
3406}
3407
3408static ssize_t cgroup_type_write(struct kernfs_open_file *of, char *buf,
3409 size_t nbytes, loff_t off)
3410{
3411 struct cgroup *cgrp;
3412 int ret;
3413
3414
3415 if (strcmp(strstrip(buf), "threaded"))
3416 return -EINVAL;
3417
3418
3419 cgrp = cgroup_kn_lock_live(of->kn, true);
3420 if (!cgrp)
3421 return -ENOENT;
3422
3423
3424 ret = cgroup_enable_threaded(cgrp);
3425
3426 cgroup_kn_unlock(of->kn);
3427 return ret ?: nbytes;
3428}
3429
3430static int cgroup_max_descendants_show(struct seq_file *seq, void *v)
3431{
3432 struct cgroup *cgrp = seq_css(seq)->cgroup;
3433 int descendants = READ_ONCE(cgrp->max_descendants);
3434
3435 if (descendants == INT_MAX)
3436 seq_puts(seq, "max\n");
3437 else
3438 seq_printf(seq, "%d\n", descendants);
3439
3440 return 0;
3441}
3442
3443static ssize_t cgroup_max_descendants_write(struct kernfs_open_file *of,
3444 char *buf, size_t nbytes, loff_t off)
3445{
3446 struct cgroup *cgrp;
3447 int descendants;
3448 ssize_t ret;
3449
3450 buf = strstrip(buf);
3451 if (!strcmp(buf, "max")) {
3452 descendants = INT_MAX;
3453 } else {
3454 ret = kstrtoint(buf, 0, &descendants);
3455 if (ret)
3456 return ret;
3457 }
3458
3459 if (descendants < 0)
3460 return -ERANGE;
3461
3462 cgrp = cgroup_kn_lock_live(of->kn, false);
3463 if (!cgrp)
3464 return -ENOENT;
3465
3466 cgrp->max_descendants = descendants;
3467
3468 cgroup_kn_unlock(of->kn);
3469
3470 return nbytes;
3471}
3472
3473static int cgroup_max_depth_show(struct seq_file *seq, void *v)
3474{
3475 struct cgroup *cgrp = seq_css(seq)->cgroup;
3476 int depth = READ_ONCE(cgrp->max_depth);
3477
3478 if (depth == INT_MAX)
3479 seq_puts(seq, "max\n");
3480 else
3481 seq_printf(seq, "%d\n", depth);
3482
3483 return 0;
3484}
3485
3486static ssize_t cgroup_max_depth_write(struct kernfs_open_file *of,
3487 char *buf, size_t nbytes, loff_t off)
3488{
3489 struct cgroup *cgrp;
3490 ssize_t ret;
3491 int depth;
3492
3493 buf = strstrip(buf);
3494 if (!strcmp(buf, "max")) {
3495 depth = INT_MAX;
3496 } else {
3497 ret = kstrtoint(buf, 0, &depth);
3498 if (ret)
3499 return ret;
3500 }
3501
3502 if (depth < 0)
3503 return -ERANGE;
3504
3505 cgrp = cgroup_kn_lock_live(of->kn, false);
3506 if (!cgrp)
3507 return -ENOENT;
3508
3509 cgrp->max_depth = depth;
3510
3511 cgroup_kn_unlock(of->kn);
3512
3513 return nbytes;
3514}
3515
3516static int cgroup_events_show(struct seq_file *seq, void *v)
3517{
3518 struct cgroup *cgrp = seq_css(seq)->cgroup;
3519
3520 seq_printf(seq, "populated %d\n", cgroup_is_populated(cgrp));
3521 seq_printf(seq, "frozen %d\n", test_bit(CGRP_FROZEN, &cgrp->flags));
3522
3523 return 0;
3524}
3525
3526static int cgroup_stat_show(struct seq_file *seq, void *v)
3527{
3528 struct cgroup *cgroup = seq_css(seq)->cgroup;
3529
3530 seq_printf(seq, "nr_descendants %d\n",
3531 cgroup->nr_descendants);
3532 seq_printf(seq, "nr_dying_descendants %d\n",
3533 cgroup->nr_dying_descendants);
3534
3535 return 0;
3536}
3537
3538static int __maybe_unused cgroup_extra_stat_show(struct seq_file *seq,
3539 struct cgroup *cgrp, int ssid)
3540{
3541 struct cgroup_subsys *ss = cgroup_subsys[ssid];
3542 struct cgroup_subsys_state *css;
3543 int ret;
3544
3545 if (!ss->css_extra_stat_show)
3546 return 0;
3547
3548 css = cgroup_tryget_css(cgrp, ss);
3549 if (!css)
3550 return 0;
3551
3552 ret = ss->css_extra_stat_show(seq, css);
3553 css_put(css);
3554 return ret;
3555}
3556
3557static int cpu_stat_show(struct seq_file *seq, void *v)
3558{
3559 struct cgroup __maybe_unused *cgrp = seq_css(seq)->cgroup;
3560 int ret = 0;
3561
3562 cgroup_base_stat_cputime_show(seq);
3563#ifdef CONFIG_CGROUP_SCHED
3564 ret = cgroup_extra_stat_show(seq, cgrp, cpu_cgrp_id);
3565#endif
3566 return ret;
3567}
3568
3569#ifdef CONFIG_PSI
3570static int cgroup_io_pressure_show(struct seq_file *seq, void *v)
3571{
3572 struct cgroup *cgrp = seq_css(seq)->cgroup;
3573 struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;
3574
3575 return psi_show(seq, psi, PSI_IO);
3576}
3577static int cgroup_memory_pressure_show(struct seq_file *seq, void *v)
3578{
3579 struct cgroup *cgrp = seq_css(seq)->cgroup;
3580 struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;
3581
3582 return psi_show(seq, psi, PSI_MEM);
3583}
3584static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v)
3585{
3586 struct cgroup *cgrp = seq_css(seq)->cgroup;
3587 struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;
3588
3589 return psi_show(seq, psi, PSI_CPU);
3590}
3591
3592static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
3593 size_t nbytes, enum psi_res res)
3594{
3595 struct psi_trigger *new;
3596 struct cgroup *cgrp;
3597 struct psi_group *psi;
3598
3599 cgrp = cgroup_kn_lock_live(of->kn, false);
3600 if (!cgrp)
3601 return -ENODEV;
3602
3603 cgroup_get(cgrp);
3604 cgroup_kn_unlock(of->kn);
3605
3606 psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;
3607 new = psi_trigger_create(psi, buf, nbytes, res);
3608 if (IS_ERR(new)) {
3609 cgroup_put(cgrp);
3610 return PTR_ERR(new);
3611 }
3612
3613 psi_trigger_replace(&of->priv, new);
3614
3615 cgroup_put(cgrp);
3616
3617 return nbytes;
3618}
3619
3620static ssize_t cgroup_io_pressure_write(struct kernfs_open_file *of,
3621 char *buf, size_t nbytes,
3622 loff_t off)
3623{
3624 return cgroup_pressure_write(of, buf, nbytes, PSI_IO);
3625}
3626
3627static ssize_t cgroup_memory_pressure_write(struct kernfs_open_file *of,
3628 char *buf, size_t nbytes,
3629 loff_t off)
3630{
3631 return cgroup_pressure_write(of, buf, nbytes, PSI_MEM);
3632}
3633
3634static ssize_t cgroup_cpu_pressure_write(struct kernfs_open_file *of,
3635 char *buf, size_t nbytes,
3636 loff_t off)
3637{
3638 return cgroup_pressure_write(of, buf, nbytes, PSI_CPU);
3639}
3640
3641static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of,
3642 poll_table *pt)
3643{
3644 return psi_trigger_poll(&of->priv, of->file, pt);
3645}
3646
3647static void cgroup_pressure_release(struct kernfs_open_file *of)
3648{
3649 psi_trigger_replace(&of->priv, NULL);
3650}
3651
3652bool cgroup_psi_enabled(void)
3653{
3654 return (cgroup_feature_disable_mask & (1 << OPT_FEATURE_PRESSURE)) == 0;
3655}
3656
3657#else
3658bool cgroup_psi_enabled(void)
3659{
3660 return false;
3661}
3662
3663#endif
3664
3665static int cgroup_freeze_show(struct seq_file *seq, void *v)
3666{
3667 struct cgroup *cgrp = seq_css(seq)->cgroup;
3668
3669 seq_printf(seq, "%d\n", cgrp->freezer.freeze);
3670
3671 return 0;
3672}
3673
3674static ssize_t cgroup_freeze_write(struct kernfs_open_file *of,
3675 char *buf, size_t nbytes, loff_t off)
3676{
3677 struct cgroup *cgrp;
3678 ssize_t ret;
3679 int freeze;
3680
3681 ret = kstrtoint(strstrip(buf), 0, &freeze);
3682 if (ret)
3683 return ret;
3684
3685 if (freeze < 0 || freeze > 1)
3686 return -ERANGE;
3687
3688 cgrp = cgroup_kn_lock_live(of->kn, false);
3689 if (!cgrp)
3690 return -ENOENT;
3691
3692 cgroup_freeze(cgrp, freeze);
3693
3694 cgroup_kn_unlock(of->kn);
3695
3696 return nbytes;
3697}
3698
3699static void __cgroup_kill(struct cgroup *cgrp)
3700{
3701 struct css_task_iter it;
3702 struct task_struct *task;
3703
3704 lockdep_assert_held(&cgroup_mutex);
3705
3706 spin_lock_irq(&css_set_lock);
3707 set_bit(CGRP_KILL, &cgrp->flags);
3708 spin_unlock_irq(&css_set_lock);
3709
3710 css_task_iter_start(&cgrp->self, CSS_TASK_ITER_PROCS | CSS_TASK_ITER_THREADED, &it);
3711 while ((task = css_task_iter_next(&it))) {
3712
3713 if (task->flags & PF_KTHREAD)
3714 continue;
3715
3716
3717 if (__fatal_signal_pending(task))
3718 continue;
3719
3720 send_sig(SIGKILL, task, 0);
3721 }
3722 css_task_iter_end(&it);
3723
3724 spin_lock_irq(&css_set_lock);
3725 clear_bit(CGRP_KILL, &cgrp->flags);
3726 spin_unlock_irq(&css_set_lock);
3727}
3728
3729static void cgroup_kill(struct cgroup *cgrp)
3730{
3731 struct cgroup_subsys_state *css;
3732 struct cgroup *dsct;
3733
3734 lockdep_assert_held(&cgroup_mutex);
3735
3736 cgroup_for_each_live_descendant_pre(dsct, css, cgrp)
3737 __cgroup_kill(dsct);
3738}
3739
3740static ssize_t cgroup_kill_write(struct kernfs_open_file *of, char *buf,
3741 size_t nbytes, loff_t off)
3742{
3743 ssize_t ret = 0;
3744 int kill;
3745 struct cgroup *cgrp;
3746
3747 ret = kstrtoint(strstrip(buf), 0, &kill);
3748 if (ret)
3749 return ret;
3750
3751 if (kill != 1)
3752 return -ERANGE;
3753
3754 cgrp = cgroup_kn_lock_live(of->kn, false);
3755 if (!cgrp)
3756 return -ENOENT;
3757
3758
3759
3760
3761
3762
3763 if (cgroup_is_threaded(cgrp))
3764 ret = -EOPNOTSUPP;
3765 else
3766 cgroup_kill(cgrp);
3767
3768 cgroup_kn_unlock(of->kn);
3769
3770 return ret ?: nbytes;
3771}
3772
3773static int cgroup_file_open(struct kernfs_open_file *of)
3774{
3775 struct cftype *cft = of_cft(of);
3776
3777 if (cft->open)
3778 return cft->open(of);
3779 return 0;
3780}
3781
3782static void cgroup_file_release(struct kernfs_open_file *of)
3783{
3784 struct cftype *cft = of_cft(of);
3785
3786 if (cft->release)
3787 cft->release(of);
3788}
3789
3790static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
3791 size_t nbytes, loff_t off)
3792{
3793 struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
3794 struct cgroup *cgrp = of->kn->parent->priv;
3795 struct cftype *cft = of_cft(of);
3796 struct cgroup_subsys_state *css;
3797 int ret;
3798
3799 if (!nbytes)
3800 return 0;
3801
3802
3803
3804
3805
3806
3807
3808 if ((cgrp->root->flags & CGRP_ROOT_NS_DELEGATE) &&
3809 !(cft->flags & CFTYPE_NS_DELEGATABLE) &&
3810 ns != &init_cgroup_ns && ns->root_cset->dfl_cgrp == cgrp)
3811 return -EPERM;
3812
3813 if (cft->write)
3814 return cft->write(of, buf, nbytes, off);
3815
3816
3817
3818
3819
3820
3821
3822 rcu_read_lock();
3823 css = cgroup_css(cgrp, cft->ss);
3824 rcu_read_unlock();
3825
3826 if (cft->write_u64) {
3827 unsigned long long v;
3828 ret = kstrtoull(buf, 0, &v);
3829 if (!ret)
3830 ret = cft->write_u64(css, cft, v);
3831 } else if (cft->write_s64) {
3832 long long v;
3833 ret = kstrtoll(buf, 0, &v);
3834 if (!ret)
3835 ret = cft->write_s64(css, cft, v);
3836 } else {
3837 ret = -EINVAL;
3838 }
3839
3840 return ret ?: nbytes;
3841}
3842
3843static __poll_t cgroup_file_poll(struct kernfs_open_file *of, poll_table *pt)
3844{
3845 struct cftype *cft = of_cft(of);
3846
3847 if (cft->poll)
3848 return cft->poll(of, pt);
3849
3850 return kernfs_generic_poll(of, pt);
3851}
3852
3853static void *cgroup_seqfile_start(struct seq_file *seq, loff_t *ppos)
3854{
3855 return seq_cft(seq)->seq_start(seq, ppos);
3856}
3857
3858static void *cgroup_seqfile_next(struct seq_file *seq, void *v, loff_t *ppos)
3859{
3860 return seq_cft(seq)->seq_next(seq, v, ppos);
3861}
3862
3863static void cgroup_seqfile_stop(struct seq_file *seq, void *v)
3864{
3865 if (seq_cft(seq)->seq_stop)
3866 seq_cft(seq)->seq_stop(seq, v);
3867}
3868
3869static int cgroup_seqfile_show(struct seq_file *m, void *arg)
3870{
3871 struct cftype *cft = seq_cft(m);
3872 struct cgroup_subsys_state *css = seq_css(m);
3873
3874 if (cft->seq_show)
3875 return cft->seq_show(m, arg);
3876
3877 if (cft->read_u64)
3878 seq_printf(m, "%llu\n", cft->read_u64(css, cft));
3879 else if (cft->read_s64)
3880 seq_printf(m, "%lld\n", cft->read_s64(css, cft));
3881 else
3882 return -EINVAL;
3883 return 0;
3884}
3885
3886static struct kernfs_ops cgroup_kf_single_ops = {
3887 .atomic_write_len = PAGE_SIZE,
3888 .open = cgroup_file_open,
3889 .release = cgroup_file_release,
3890 .write = cgroup_file_write,
3891 .poll = cgroup_file_poll,
3892 .seq_show = cgroup_seqfile_show,
3893};
3894
3895static struct kernfs_ops cgroup_kf_ops = {
3896 .atomic_write_len = PAGE_SIZE,
3897 .open = cgroup_file_open,
3898 .release = cgroup_file_release,
3899 .write = cgroup_file_write,
3900 .poll = cgroup_file_poll,
3901 .seq_start = cgroup_seqfile_start,
3902 .seq_next = cgroup_seqfile_next,
3903 .seq_stop = cgroup_seqfile_stop,
3904 .seq_show = cgroup_seqfile_show,
3905};
3906
3907
3908static int cgroup_kn_set_ugid(struct kernfs_node *kn)
3909{
3910 struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID,
3911 .ia_uid = current_fsuid(),
3912 .ia_gid = current_fsgid(), };
3913
3914 if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) &&
3915 gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
3916 return 0;
3917
3918 return kernfs_setattr(kn, &iattr);
3919}
3920
3921static void cgroup_file_notify_timer(struct timer_list *timer)
3922{
3923 cgroup_file_notify(container_of(timer, struct cgroup_file,
3924 notify_timer));
3925}
3926
3927static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
3928 struct cftype *cft)
3929{
3930 char name[CGROUP_FILE_NAME_MAX];
3931 struct kernfs_node *kn;
3932 struct lock_class_key *key = NULL;
3933 int ret;
3934
3935#ifdef CONFIG_DEBUG_LOCK_ALLOC
3936 key = &cft->lockdep_key;
3937#endif
3938 kn = __kernfs_create_file(cgrp->kn, cgroup_file_name(cgrp, cft, name),
3939 cgroup_file_mode(cft),
3940 GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
3941 0, cft->kf_ops, cft,
3942 NULL, key);
3943 if (IS_ERR(kn))
3944 return PTR_ERR(kn);
3945
3946 ret = cgroup_kn_set_ugid(kn);
3947 if (ret) {
3948 kernfs_remove(kn);
3949 return ret;
3950 }
3951
3952 if (cft->file_offset) {
3953 struct cgroup_file *cfile = (void *)css + cft->file_offset;
3954
3955 timer_setup(&cfile->notify_timer, cgroup_file_notify_timer, 0);
3956
3957 spin_lock_irq(&cgroup_file_kn_lock);
3958 cfile->kn = kn;
3959 spin_unlock_irq(&cgroup_file_kn_lock);
3960 }
3961
3962 return 0;
3963}
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975static int cgroup_addrm_files(struct cgroup_subsys_state *css,
3976 struct cgroup *cgrp, struct cftype cfts[],
3977 bool is_add)
3978{
3979 struct cftype *cft, *cft_end = NULL;
3980 int ret = 0;
3981
3982 lockdep_assert_held(&cgroup_mutex);
3983
3984restart:
3985 for (cft = cfts; cft != cft_end && cft->name[0] != '\0'; cft++) {
3986
3987 if ((cft->flags & CFTYPE_PRESSURE) && !cgroup_psi_enabled())
3988 continue;
3989 if ((cft->flags & __CFTYPE_ONLY_ON_DFL) && !cgroup_on_dfl(cgrp))
3990 continue;
3991 if ((cft->flags & __CFTYPE_NOT_ON_DFL) && cgroup_on_dfl(cgrp))
3992 continue;
3993 if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgroup_parent(cgrp))
3994 continue;
3995 if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgroup_parent(cgrp))
3996 continue;
3997 if ((cft->flags & CFTYPE_DEBUG) && !cgroup_debug)
3998 continue;
3999 if (is_add) {
4000 ret = cgroup_add_file(css, cgrp, cft);
4001 if (ret) {
4002 pr_warn("%s: failed to add %s, err=%d\n",
4003 __func__, cft->name, ret);
4004 cft_end = cft;
4005 is_add = false;
4006 goto restart;
4007 }
4008 } else {
4009 cgroup_rm_file(cgrp, cft);
4010 }
4011 }
4012 return ret;
4013}
4014
4015static int cgroup_apply_cftypes(struct cftype *cfts, bool is_add)
4016{
4017 struct cgroup_subsys *ss = cfts[0].ss;
4018 struct cgroup *root = &ss->root->cgrp;
4019 struct cgroup_subsys_state *css;
4020 int ret = 0;
4021
4022 lockdep_assert_held(&cgroup_mutex);
4023
4024
4025 css_for_each_descendant_pre(css, cgroup_css(root, ss)) {
4026 struct cgroup *cgrp = css->cgroup;
4027
4028 if (!(css->flags & CSS_VISIBLE))
4029 continue;
4030
4031 ret = cgroup_addrm_files(css, cgrp, cfts, is_add);
4032 if (ret)
4033 break;
4034 }
4035
4036 if (is_add && !ret)
4037 kernfs_activate(root->kn);
4038 return ret;
4039}
4040
4041static void cgroup_exit_cftypes(struct cftype *cfts)
4042{
4043 struct cftype *cft;
4044
4045 for (cft = cfts; cft->name[0] != '\0'; cft++) {
4046
4047 if (cft->max_write_len && cft->max_write_len != PAGE_SIZE)
4048 kfree(cft->kf_ops);
4049 cft->kf_ops = NULL;
4050 cft->ss = NULL;
4051
4052
4053 cft->flags &= ~(__CFTYPE_ONLY_ON_DFL | __CFTYPE_NOT_ON_DFL);
4054 }
4055}
4056
4057static int cgroup_init_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
4058{
4059 struct cftype *cft;
4060
4061 for (cft = cfts; cft->name[0] != '\0'; cft++) {
4062 struct kernfs_ops *kf_ops;
4063
4064 WARN_ON(cft->ss || cft->kf_ops);
4065
4066 if ((cft->flags & CFTYPE_PRESSURE) && !cgroup_psi_enabled())
4067 continue;
4068
4069 if (cft->seq_start)
4070 kf_ops = &cgroup_kf_ops;
4071 else
4072 kf_ops = &cgroup_kf_single_ops;
4073
4074
4075
4076
4077
4078 if (cft->max_write_len && cft->max_write_len != PAGE_SIZE) {
4079 kf_ops = kmemdup(kf_ops, sizeof(*kf_ops), GFP_KERNEL);
4080 if (!kf_ops) {
4081 cgroup_exit_cftypes(cfts);
4082 return -ENOMEM;
4083 }
4084 kf_ops->atomic_write_len = cft->max_write_len;
4085 }
4086
4087 cft->kf_ops = kf_ops;
4088 cft->ss = ss;
4089 }
4090
4091 return 0;
4092}
4093
4094static int cgroup_rm_cftypes_locked(struct cftype *cfts)
4095{
4096 lockdep_assert_held(&cgroup_mutex);
4097
4098 if (!cfts || !cfts[0].ss)
4099 return -ENOENT;
4100
4101 list_del(&cfts->node);
4102 cgroup_apply_cftypes(cfts, false);
4103 cgroup_exit_cftypes(cfts);
4104 return 0;
4105}
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118int cgroup_rm_cftypes(struct cftype *cfts)
4119{
4120 int ret;
4121
4122 mutex_lock(&cgroup_mutex);
4123 ret = cgroup_rm_cftypes_locked(cfts);
4124 mutex_unlock(&cgroup_mutex);
4125 return ret;
4126}
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142static int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
4143{
4144 int ret;
4145
4146 if (!cgroup_ssid_enabled(ss->id))
4147 return 0;
4148
4149 if (!cfts || cfts[0].name[0] == '\0')
4150 return 0;
4151
4152 ret = cgroup_init_cftypes(ss, cfts);
4153 if (ret)
4154 return ret;
4155
4156 mutex_lock(&cgroup_mutex);
4157
4158 list_add_tail(&cfts->node, &ss->cfts);
4159 ret = cgroup_apply_cftypes(cfts, true);
4160 if (ret)
4161 cgroup_rm_cftypes_locked(cfts);
4162
4163 mutex_unlock(&cgroup_mutex);
4164 return ret;
4165}
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
4176{
4177 struct cftype *cft;
4178
4179 for (cft = cfts; cft && cft->name[0] != '\0'; cft++)
4180 cft->flags |= __CFTYPE_ONLY_ON_DFL;
4181 return cgroup_add_cftypes(ss, cfts);
4182}
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
4193{
4194 struct cftype *cft;
4195
4196 for (cft = cfts; cft && cft->name[0] != '\0'; cft++)
4197 cft->flags |= __CFTYPE_NOT_ON_DFL;
4198 return cgroup_add_cftypes(ss, cfts);
4199}
4200
4201
4202
4203
4204
4205
4206
4207void cgroup_file_notify(struct cgroup_file *cfile)
4208{
4209 unsigned long flags;
4210
4211 spin_lock_irqsave(&cgroup_file_kn_lock, flags);
4212 if (cfile->kn) {
4213 unsigned long last = cfile->notified_at;
4214 unsigned long next = last + CGROUP_FILE_NOTIFY_MIN_INTV;
4215
4216 if (time_in_range(jiffies, last, next)) {
4217 timer_reduce(&cfile->notify_timer, next);
4218 } else {
4219 kernfs_notify(cfile->kn);
4220 cfile->notified_at = jiffies;
4221 }
4222 }
4223 spin_unlock_irqrestore(&cgroup_file_kn_lock, flags);
4224}
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos,
4244 struct cgroup_subsys_state *parent)
4245{
4246 struct cgroup_subsys_state *next;
4247
4248 cgroup_assert_mutex_or_rcu_locked();
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270 if (!pos) {
4271 next = list_entry_rcu(parent->children.next, struct cgroup_subsys_state, sibling);
4272 } else if (likely(!(pos->flags & CSS_RELEASED))) {
4273 next = list_entry_rcu(pos->sibling.next, struct cgroup_subsys_state, sibling);
4274 } else {
4275 list_for_each_entry_rcu(next, &parent->children, sibling,
4276 lockdep_is_held(&cgroup_mutex))
4277 if (next->serial_nr > pos->serial_nr)
4278 break;
4279 }
4280
4281
4282
4283
4284
4285 if (&next->sibling != &parent->children)
4286 return next;
4287 return NULL;
4288}
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311struct cgroup_subsys_state *
4312css_next_descendant_pre(struct cgroup_subsys_state *pos,
4313 struct cgroup_subsys_state *root)
4314{
4315 struct cgroup_subsys_state *next;
4316
4317 cgroup_assert_mutex_or_rcu_locked();
4318
4319
4320 if (!pos)
4321 return root;
4322
4323
4324 next = css_next_child(NULL, pos);
4325 if (next)
4326 return next;
4327
4328
4329 while (pos != root) {
4330 next = css_next_child(pos, pos->parent);
4331 if (next)
4332 return next;
4333 pos = pos->parent;
4334 }
4335
4336 return NULL;
4337}
4338EXPORT_SYMBOL_GPL(css_next_descendant_pre);
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353struct cgroup_subsys_state *
4354css_rightmost_descendant(struct cgroup_subsys_state *pos)
4355{
4356 struct cgroup_subsys_state *last, *tmp;
4357
4358 cgroup_assert_mutex_or_rcu_locked();
4359
4360 do {
4361 last = pos;
4362
4363 pos = NULL;
4364 css_for_each_child(tmp, last)
4365 pos = tmp;
4366 } while (pos);
4367
4368 return last;
4369}
4370
4371static struct cgroup_subsys_state *
4372css_leftmost_descendant(struct cgroup_subsys_state *pos)
4373{
4374 struct cgroup_subsys_state *last;
4375
4376 do {
4377 last = pos;
4378 pos = css_next_child(NULL, pos);
4379 } while (pos);
4380
4381 return last;
4382}
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406struct cgroup_subsys_state *
4407css_next_descendant_post(struct cgroup_subsys_state *pos,
4408 struct cgroup_subsys_state *root)
4409{
4410 struct cgroup_subsys_state *next;
4411
4412 cgroup_assert_mutex_or_rcu_locked();
4413
4414
4415 if (!pos)
4416 return css_leftmost_descendant(root);
4417
4418
4419 if (pos == root)
4420 return NULL;
4421
4422
4423 next = css_next_child(pos, pos->parent);
4424 if (next)
4425 return css_leftmost_descendant(next);
4426
4427
4428 return pos->parent;
4429}
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439bool css_has_online_children(struct cgroup_subsys_state *css)
4440{
4441 struct cgroup_subsys_state *child;
4442 bool ret = false;
4443
4444 rcu_read_lock();
4445 css_for_each_child(child, css) {
4446 if (child->flags & CSS_ONLINE) {
4447 ret = true;
4448 break;
4449 }
4450 }
4451 rcu_read_unlock();
4452 return ret;
4453}
4454
4455static struct css_set *css_task_iter_next_css_set(struct css_task_iter *it)
4456{
4457 struct list_head *l;
4458 struct cgrp_cset_link *link;
4459 struct css_set *cset;
4460
4461 lockdep_assert_held(&css_set_lock);
4462
4463
4464 if (it->tcset_pos) {
4465 l = it->tcset_pos->next;
4466
4467 if (l != it->tcset_head) {
4468 it->tcset_pos = l;
4469 return container_of(l, struct css_set,
4470 threaded_csets_node);
4471 }
4472
4473 it->tcset_pos = NULL;
4474 }
4475
4476
4477 l = it->cset_pos;
4478 l = l->next;
4479 if (l == it->cset_head) {
4480 it->cset_pos = NULL;
4481 return NULL;
4482 }
4483
4484 if (it->ss) {
4485 cset = container_of(l, struct css_set, e_cset_node[it->ss->id]);
4486 } else {
4487 link = list_entry(l, struct cgrp_cset_link, cset_link);
4488 cset = link->cset;
4489 }
4490
4491 it->cset_pos = l;
4492
4493
4494 if (it->flags & CSS_TASK_ITER_THREADED) {
4495 if (it->cur_dcset)
4496 put_css_set_locked(it->cur_dcset);
4497 it->cur_dcset = cset;
4498 get_css_set(cset);
4499
4500 it->tcset_head = &cset->threaded_csets;
4501 it->tcset_pos = &cset->threaded_csets;
4502 }
4503
4504 return cset;
4505}
4506
4507
4508
4509
4510
4511
4512
4513static void css_task_iter_advance_css_set(struct css_task_iter *it)
4514{
4515 struct css_set *cset;
4516
4517 lockdep_assert_held(&css_set_lock);
4518
4519
4520 while ((cset = css_task_iter_next_css_set(it))) {
4521 if (!list_empty(&cset->tasks)) {
4522 it->cur_tasks_head = &cset->tasks;
4523 break;
4524 } else if (!list_empty(&cset->mg_tasks)) {
4525 it->cur_tasks_head = &cset->mg_tasks;
4526 break;
4527 } else if (!list_empty(&cset->dying_tasks)) {
4528 it->cur_tasks_head = &cset->dying_tasks;
4529 break;
4530 }
4531 }
4532 if (!cset) {
4533 it->task_pos = NULL;
4534 return;
4535 }
4536 it->task_pos = it->cur_tasks_head->next;
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553 if (it->cur_cset) {
4554 list_del(&it->iters_node);
4555 put_css_set_locked(it->cur_cset);
4556 }
4557 get_css_set(cset);
4558 it->cur_cset = cset;
4559 list_add(&it->iters_node, &cset->task_iters);
4560}
4561
4562static void css_task_iter_skip(struct css_task_iter *it,
4563 struct task_struct *task)
4564{
4565 lockdep_assert_held(&css_set_lock);
4566
4567 if (it->task_pos == &task->cg_list) {
4568 it->task_pos = it->task_pos->next;
4569 it->flags |= CSS_TASK_ITER_SKIPPED;
4570 }
4571}
4572
4573static void css_task_iter_advance(struct css_task_iter *it)
4574{
4575 struct task_struct *task;
4576
4577 lockdep_assert_held(&css_set_lock);
4578repeat:
4579 if (it->task_pos) {
4580
4581