1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23#include <linux/cpu.h>
24#include <linux/cpumask.h>
25#include <linux/cpuset.h>
26#include <linux/err.h>
27#include <linux/errno.h>
28#include <linux/file.h>
29#include <linux/fs.h>
30#include <linux/init.h>
31#include <linux/interrupt.h>
32#include <linux/kernel.h>
33#include <linux/kmod.h>
34#include <linux/list.h>
35#include <linux/mempolicy.h>
36#include <linux/mm.h>
37#include <linux/module.h>
38#include <linux/mount.h>
39#include <linux/namei.h>
40#include <linux/pagemap.h>
41#include <linux/proc_fs.h>
42#include <linux/rcupdate.h>
43#include <linux/sched.h>
44#include <linux/seq_file.h>
45#include <linux/security.h>
46#include <linux/slab.h>
47#include <linux/spinlock.h>
48#include <linux/stat.h>
49#include <linux/string.h>
50#include <linux/time.h>
51#include <linux/backing-dev.h>
52#include <linux/sort.h>
53
54#include <asm/uaccess.h>
55#include <asm/atomic.h>
56#include <linux/mutex.h>
57#include <linux/kfifo.h>
58#include <linux/workqueue.h>
59#include <linux/cgroup.h>
60
61
62
63
64
65
66int number_of_cpusets __read_mostly;
67
68
69struct cgroup_subsys cpuset_subsys;
70struct cpuset;
71
72
73
74struct fmeter {
75 int cnt;
76 int val;
77 time_t time;
78 spinlock_t lock;
79};
80
81struct cpuset {
82 struct cgroup_subsys_state css;
83
84 unsigned long flags;
85 cpumask_t cpus_allowed;
86 nodemask_t mems_allowed;
87
88 struct cpuset *parent;
89
90
91
92
93
94 int mems_generation;
95
96 struct fmeter fmeter;
97
98
99 int pn;
100
101
102 struct list_head stack_list;
103};
104
105
106static inline struct cpuset *cgroup_cs(struct cgroup *cont)
107{
108 return container_of(cgroup_subsys_state(cont, cpuset_subsys_id),
109 struct cpuset, css);
110}
111
112
113static inline struct cpuset *task_cs(struct task_struct *task)
114{
115 return container_of(task_subsys_state(task, cpuset_subsys_id),
116 struct cpuset, css);
117}
118struct cpuset_hotplug_scanner {
119 struct cgroup_scanner scan;
120 struct cgroup *to;
121};
122
123
124typedef enum {
125 CS_CPU_EXCLUSIVE,
126 CS_MEM_EXCLUSIVE,
127 CS_MEMORY_MIGRATE,
128 CS_SCHED_LOAD_BALANCE,
129 CS_SPREAD_PAGE,
130 CS_SPREAD_SLAB,
131} cpuset_flagbits_t;
132
133
134static inline int is_cpu_exclusive(const struct cpuset *cs)
135{
136 return test_bit(CS_CPU_EXCLUSIVE, &cs->flags);
137}
138
139static inline int is_mem_exclusive(const struct cpuset *cs)
140{
141 return test_bit(CS_MEM_EXCLUSIVE, &cs->flags);
142}
143
144static inline int is_sched_load_balance(const struct cpuset *cs)
145{
146 return test_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
147}
148
149static inline int is_memory_migrate(const struct cpuset *cs)
150{
151 return test_bit(CS_MEMORY_MIGRATE, &cs->flags);
152}
153
154static inline int is_spread_page(const struct cpuset *cs)
155{
156 return test_bit(CS_SPREAD_PAGE, &cs->flags);
157}
158
159static inline int is_spread_slab(const struct cpuset *cs)
160{
161 return test_bit(CS_SPREAD_SLAB, &cs->flags);
162}
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183static int cpuset_mems_generation;
184
185static struct cpuset top_cpuset = {
186 .flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)),
187 .cpus_allowed = CPU_MASK_ALL,
188 .mems_allowed = NODE_MASK_ALL,
189};
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233static DEFINE_MUTEX(callback_mutex);
234
235
236
237
238static int cpuset_get_sb(struct file_system_type *fs_type,
239 int flags, const char *unused_dev_name,
240 void *data, struct vfsmount *mnt)
241{
242 struct file_system_type *cgroup_fs = get_fs_type("cgroup");
243 int ret = -ENODEV;
244 if (cgroup_fs) {
245 char mountopts[] =
246 "cpuset,noprefix,"
247 "release_agent=/sbin/cpuset_release_agent";
248 ret = cgroup_fs->get_sb(cgroup_fs, flags,
249 unused_dev_name, mountopts, mnt);
250 put_filesystem(cgroup_fs);
251 }
252 return ret;
253}
254
255static struct file_system_type cpuset_fs_type = {
256 .name = "cpuset",
257 .get_sb = cpuset_get_sb,
258};
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274static void guarantee_online_cpus(const struct cpuset *cs, cpumask_t *pmask)
275{
276 while (cs && !cpus_intersects(cs->cpus_allowed, cpu_online_map))
277 cs = cs->parent;
278 if (cs)
279 cpus_and(*pmask, cs->cpus_allowed, cpu_online_map);
280 else
281 *pmask = cpu_online_map;
282 BUG_ON(!cpus_intersects(*pmask, cpu_online_map));
283}
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask)
299{
300 while (cs && !nodes_intersects(cs->mems_allowed,
301 node_states[N_HIGH_MEMORY]))
302 cs = cs->parent;
303 if (cs)
304 nodes_and(*pmask, cs->mems_allowed,
305 node_states[N_HIGH_MEMORY]);
306 else
307 *pmask = node_states[N_HIGH_MEMORY];
308 BUG_ON(!nodes_intersects(*pmask, node_states[N_HIGH_MEMORY]));
309}
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352void cpuset_update_task_memory_state(void)
353{
354 int my_cpusets_mem_gen;
355 struct task_struct *tsk = current;
356 struct cpuset *cs;
357
358 if (task_cs(tsk) == &top_cpuset) {
359
360 my_cpusets_mem_gen = top_cpuset.mems_generation;
361 } else {
362 rcu_read_lock();
363 my_cpusets_mem_gen = task_cs(current)->mems_generation;
364 rcu_read_unlock();
365 }
366
367 if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) {
368 mutex_lock(&callback_mutex);
369 task_lock(tsk);
370 cs = task_cs(tsk);
371 guarantee_online_mems(cs, &tsk->mems_allowed);
372 tsk->cpuset_mems_generation = cs->mems_generation;
373 if (is_spread_page(cs))
374 tsk->flags |= PF_SPREAD_PAGE;
375 else
376 tsk->flags &= ~PF_SPREAD_PAGE;
377 if (is_spread_slab(cs))
378 tsk->flags |= PF_SPREAD_SLAB;
379 else
380 tsk->flags &= ~PF_SPREAD_SLAB;
381 task_unlock(tsk);
382 mutex_unlock(&callback_mutex);
383 mpol_rebind_task(tsk, &tsk->mems_allowed);
384 }
385}
386
387
388
389
390
391
392
393
394
395static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q)
396{
397 return cpus_subset(p->cpus_allowed, q->cpus_allowed) &&
398 nodes_subset(p->mems_allowed, q->mems_allowed) &&
399 is_cpu_exclusive(p) <= is_cpu_exclusive(q) &&
400 is_mem_exclusive(p) <= is_mem_exclusive(q);
401}
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
424{
425 struct cgroup *cont;
426 struct cpuset *c, *par;
427
428
429 list_for_each_entry(cont, &cur->css.cgroup->children, sibling) {
430 if (!is_cpuset_subset(cgroup_cs(cont), trial))
431 return -EBUSY;
432 }
433
434
435 if (cur == &top_cpuset)
436 return 0;
437
438 par = cur->parent;
439
440
441 if (!is_cpuset_subset(trial, par))
442 return -EACCES;
443
444
445
446
447
448 list_for_each_entry(cont, &par->css.cgroup->children, sibling) {
449 c = cgroup_cs(cont);
450 if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) &&
451 c != cur &&
452 cpus_intersects(trial->cpus_allowed, c->cpus_allowed))
453 return -EINVAL;
454 if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) &&
455 c != cur &&
456 nodes_intersects(trial->mems_allowed, c->mems_allowed))
457 return -EINVAL;
458 }
459
460
461 if (cgroup_task_count(cur->css.cgroup)) {
462 if (cpus_empty(trial->cpus_allowed) ||
463 nodes_empty(trial->mems_allowed)) {
464 return -ENOSPC;
465 }
466 }
467
468 return 0;
469}
470
471
472
473
474
475
476static int cpusets_overlap(struct cpuset *a, struct cpuset *b)
477{
478 return cpus_intersects(a->cpus_allowed, b->cpus_allowed);
479}
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548static void rebuild_sched_domains(void)
549{
550 struct kfifo *q;
551 struct cpuset *cp;
552 struct cpuset **csa;
553 int csn;
554 int i, j, k;
555 cpumask_t *doms;
556 int ndoms;
557 int nslot;
558
559 q = NULL;
560 csa = NULL;
561 doms = NULL;
562
563
564 if (is_sched_load_balance(&top_cpuset)) {
565 ndoms = 1;
566 doms = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
567 if (!doms)
568 goto rebuild;
569 *doms = top_cpuset.cpus_allowed;
570 goto rebuild;
571 }
572
573 q = kfifo_alloc(number_of_cpusets * sizeof(cp), GFP_KERNEL, NULL);
574 if (IS_ERR(q))
575 goto done;
576 csa = kmalloc(number_of_cpusets * sizeof(cp), GFP_KERNEL);
577 if (!csa)
578 goto done;
579 csn = 0;
580
581 cp = &top_cpuset;
582 __kfifo_put(q, (void *)&cp, sizeof(cp));
583 while (__kfifo_get(q, (void *)&cp, sizeof(cp))) {
584 struct cgroup *cont;
585 struct cpuset *child;
586 if (is_sched_load_balance(cp))
587 csa[csn++] = cp;
588 list_for_each_entry(cont, &cp->css.cgroup->children, sibling) {
589 child = cgroup_cs(cont);
590 __kfifo_put(q, (void *)&child, sizeof(cp));
591 }
592 }
593
594 for (i = 0; i < csn; i++)
595 csa[i]->pn = i;
596 ndoms = csn;
597
598restart:
599
600 for (i = 0; i < csn; i++) {
601 struct cpuset *a = csa[i];
602 int apn = a->pn;
603
604 for (j = 0; j < csn; j++) {
605 struct cpuset *b = csa[j];
606 int bpn = b->pn;
607
608 if (apn != bpn && cpusets_overlap(a, b)) {
609 for (k = 0; k < csn; k++) {
610 struct cpuset *c = csa[k];
611
612 if (c->pn == bpn)
613 c->pn = apn;
614 }
615 ndoms--;
616 goto restart;
617 }
618 }
619 }
620
621
622 doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL);
623 if (!doms)
624 goto rebuild;
625
626 for (nslot = 0, i = 0; i < csn; i++) {
627 struct cpuset *a = csa[i];
628 int apn = a->pn;
629
630 if (apn >= 0) {
631 cpumask_t *dp = doms + nslot;
632
633 if (nslot == ndoms) {
634 static int warnings = 10;
635 if (warnings) {
636 printk(KERN_WARNING
637 "rebuild_sched_domains confused:"
638 " nslot %d, ndoms %d, csn %d, i %d,"
639 " apn %d\n",
640 nslot, ndoms, csn, i, apn);
641 warnings--;
642 }
643 continue;
644 }
645
646 cpus_clear(*dp);
647 for (j = i; j < csn; j++) {
648 struct cpuset *b = csa[j];
649
650 if (apn == b->pn) {
651 cpus_or(*dp, *dp, b->cpus_allowed);
652 b->pn = -1;
653 }
654 }
655 nslot++;
656 }
657 }
658 BUG_ON(nslot != ndoms);
659
660rebuild:
661
662 get_online_cpus();
663 partition_sched_domains(ndoms, doms);
664 put_online_cpus();
665
666done:
667 if (q && !IS_ERR(q))
668 kfifo_free(q);
669 kfree(csa);
670
671}
672
673static inline int started_after_time(struct task_struct *t1,
674 struct timespec *time,
675 struct task_struct *t2)
676{
677 int start_diff = timespec_compare(&t1->start_time, time);
678 if (start_diff > 0) {
679 return 1;
680 } else if (start_diff < 0) {
681 return 0;
682 } else {
683
684
685
686
687
688
689
690
691
692 return t1 > t2;
693 }
694}
695
696static inline int started_after(void *p1, void *p2)
697{
698 struct task_struct *t1 = p1;
699 struct task_struct *t2 = p2;
700 return started_after_time(t1, &t2->start_time, t2);
701}
702
703
704
705
706
707
708
709
710
711
712
713int cpuset_test_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan)
714{
715 return !cpus_equal(tsk->cpus_allowed,
716 (cgroup_cs(scan->cg))->cpus_allowed);
717}
718
719
720
721
722
723
724
725
726
727
728
729
730void cpuset_change_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan)
731{
732 set_cpus_allowed(tsk, (cgroup_cs(scan->cg))->cpus_allowed);
733}
734
735
736
737
738
739
740static int update_cpumask(struct cpuset *cs, char *buf)
741{
742 struct cpuset trialcs;
743 struct cgroup_scanner scan;
744 struct ptr_heap heap;
745 int retval;
746 int is_load_balanced;
747
748
749 if (cs == &top_cpuset)
750 return -EACCES;
751
752 trialcs = *cs;
753
754
755
756
757
758
759
760 buf = strstrip(buf);
761 if (!*buf) {
762 cpus_clear(trialcs.cpus_allowed);
763 } else {
764 retval = cpulist_parse(buf, trialcs.cpus_allowed);
765 if (retval < 0)
766 return retval;
767 }
768 cpus_and(trialcs.cpus_allowed, trialcs.cpus_allowed, cpu_online_map);
769 retval = validate_change(cs, &trialcs);
770 if (retval < 0)
771 return retval;
772
773
774 if (cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed))
775 return 0;
776
777 retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, &started_after);
778 if (retval)
779 return retval;
780
781 is_load_balanced = is_sched_load_balance(&trialcs);
782
783 mutex_lock(&callback_mutex);
784 cs->cpus_allowed = trialcs.cpus_allowed;
785 mutex_unlock(&callback_mutex);
786
787
788
789
790
791 scan.cg = cs->css.cgroup;
792 scan.test_task = cpuset_test_cpumask;
793 scan.process_task = cpuset_change_cpumask;
794 scan.heap = &heap;
795 cgroup_scan_tasks(&scan);
796 heap_free(&heap);
797
798 if (is_load_balanced)
799 rebuild_sched_domains();
800 return 0;
801}
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
835 const nodemask_t *to)
836{
837 struct task_struct *tsk = current;
838
839 cpuset_update_task_memory_state();
840
841 mutex_lock(&callback_mutex);
842 tsk->mems_allowed = *to;
843 mutex_unlock(&callback_mutex);
844
845 do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL);
846
847 mutex_lock(&callback_mutex);
848 guarantee_online_mems(task_cs(tsk),&tsk->mems_allowed);
849 mutex_unlock(&callback_mutex);
850}
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866static void *cpuset_being_rebound;
867
868static int update_nodemask(struct cpuset *cs, char *buf)
869{
870 struct cpuset trialcs;
871 nodemask_t oldmem;
872 struct task_struct *p;
873 struct mm_struct **mmarray;
874 int i, n, ntasks;
875 int migrate;
876 int fudge;
877 int retval;
878 struct cgroup_iter it;
879
880
881
882
883
884 if (cs == &top_cpuset)
885 return -EACCES;
886
887 trialcs = *cs;
888
889
890
891
892
893
894
895 buf = strstrip(buf);
896 if (!*buf) {
897 nodes_clear(trialcs.mems_allowed);
898 } else {
899 retval = nodelist_parse(buf, trialcs.mems_allowed);
900 if (retval < 0)
901 goto done;
902 }
903 nodes_and(trialcs.mems_allowed, trialcs.mems_allowed,
904 node_states[N_HIGH_MEMORY]);
905 oldmem = cs->mems_allowed;
906 if (nodes_equal(oldmem, trialcs.mems_allowed)) {
907 retval = 0;
908 goto done;
909 }
910 retval = validate_change(cs, &trialcs);
911 if (retval < 0)
912 goto done;
913
914 mutex_lock(&callback_mutex);
915 cs->mems_allowed = trialcs.mems_allowed;
916 cs->mems_generation = cpuset_mems_generation++;
917 mutex_unlock(&callback_mutex);
918
919 cpuset_being_rebound = cs;
920
921 fudge = 10;
922 fudge += cpus_weight(cs->cpus_allowed);
923 retval = -ENOMEM;
924
925
926
927
928
929
930
931
932 while (1) {
933 ntasks = cgroup_task_count(cs->css.cgroup);
934 ntasks += fudge;
935 mmarray = kmalloc(ntasks * sizeof(*mmarray), GFP_KERNEL);
936 if (!mmarray)
937 goto done;
938 read_lock(&tasklist_lock);
939 if (cgroup_task_count(cs->css.cgroup) <= ntasks)
940 break;
941 read_unlock(&tasklist_lock);
942 kfree(mmarray);
943 }
944
945 n = 0;
946
947
948 cgroup_iter_start(cs->css.cgroup, &it);
949 while ((p = cgroup_iter_next(cs->css.cgroup, &it))) {
950 struct mm_struct *mm;
951
952 if (n >= ntasks) {
953 printk(KERN_WARNING
954 "Cpuset mempolicy rebind incomplete.\n");
955 break;
956 }
957 mm = get_task_mm(p);
958 if (!mm)
959 continue;
960 mmarray[n++] = mm;
961 }
962 cgroup_iter_end(cs->css.cgroup, &it);
963 read_unlock(&tasklist_lock);
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978 migrate = is_memory_migrate(cs);
979 for (i = 0; i < n; i++) {
980 struct mm_struct *mm = mmarray[i];
981
982 mpol_rebind_mm(mm, &cs->mems_allowed);
983 if (migrate)
984 cpuset_migrate_mm(mm, &oldmem, &cs->mems_allowed);
985 mmput(mm);
986 }
987
988
989 kfree(mmarray);
990 cpuset_being_rebound = NULL;
991 retval = 0;
992done:
993 return retval;
994}
995
996int current_cpuset_is_being_rebound(void)
997{
998 return task_cs(current) == cpuset_being_rebound;
999}
1000
1001
1002
1003
1004
1005static int update_memory_pressure_enabled(struct cpuset *cs, char *buf)
1006{
1007 if (simple_strtoul(buf, NULL, 10) != 0)
1008 cpuset_memory_pressure_enabled = 1;
1009 else
1010 cpuset_memory_pressure_enabled = 0;
1011 return 0;
1012}
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf)
1027{
1028 int turning_on;
1029 struct cpuset trialcs;
1030 int err;
1031 int cpus_nonempty, balance_flag_changed;
1032
1033 turning_on = (simple_strtoul(buf, NULL, 10) != 0);
1034
1035 trialcs = *cs;
1036 if (turning_on)
1037 set_bit(bit, &trialcs.flags);
1038 else
1039 clear_bit(bit, &trialcs.flags);
1040
1041 err = validate_change(cs, &trialcs);
1042 if (err < 0)
1043 return err;
1044
1045 cpus_nonempty = !cpus_empty(trialcs.cpus_allowed);
1046 balance_flag_changed = (is_sched_load_balance(cs) !=
1047 is_sched_load_balance(&trialcs));
1048
1049 mutex_lock(&callback_mutex);
1050 cs->flags = trialcs.flags;
1051 mutex_unlock(&callback_mutex);
1052
1053 if (cpus_nonempty && balance_flag_changed)
1054 rebuild_sched_domains();
1055
1056 return 0;
1057}
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104#define FM_COEF 933
1105#define FM_MAXTICKS ((time_t)99)
1106#define FM_MAXCNT 1000000
1107#define FM_SCALE 1000
1108
1109
1110static void fmeter_init(struct fmeter *fmp)
1111{
1112 fmp->cnt = 0;
1113 fmp->val = 0;
1114 fmp->time = 0;
1115 spin_lock_init(&fmp->lock);
1116}
1117
1118
1119static void fmeter_update(struct fmeter *fmp)
1120{
1121 time_t now = get_seconds();
1122 time_t ticks = now - fmp->time;
1123
1124 if (ticks == 0)
1125 return;
1126
1127 ticks = min(FM_MAXTICKS, ticks);
1128 while (ticks-- > 0)
1129 fmp->val = (FM_COEF * fmp->val) / FM_SCALE;
1130 fmp->time = now;
1131
1132 fmp->val += ((FM_SCALE - FM_COEF) * fmp->cnt) / FM_SCALE;
1133 fmp->cnt = 0;
1134}
1135
1136
1137static void fmeter_markevent(struct fmeter *fmp)
1138{
1139 spin_lock(&fmp->lock);
1140 fmeter_update(fmp);
1141 fmp->cnt = min(FM_MAXCNT, fmp->cnt + FM_SCALE);
1142 spin_unlock(&fmp->lock);
1143}
1144
1145
1146static int fmeter_getrate(struct fmeter *fmp)
1147{
1148 int val;
1149
1150 spin_lock(&fmp->lock);
1151 fmeter_update(fmp);
1152 val = fmp->val;
1153 spin_unlock(&fmp->lock);
1154 return val;
1155}
1156
1157
1158static int cpuset_can_attach(struct cgroup_subsys *ss,
1159 struct cgroup *cont, struct task_struct *tsk)
1160{
1161 struct cpuset *cs = cgroup_cs(cont);
1162
1163 if (cpus_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
1164 return -ENOSPC;
1165
1166 return security_task_setscheduler(tsk, 0, NULL);
1167}
1168
1169static void cpuset_attach(struct cgroup_subsys *ss,
1170 struct cgroup *cont, struct cgroup *oldcont,
1171 struct task_struct *tsk)
1172{
1173 cpumask_t cpus;
1174 nodemask_t from, to;
1175 struct mm_struct *mm;
1176 struct cpuset *cs = cgroup_cs(cont);
1177 struct cpuset *oldcs = cgroup_cs(oldcont);
1178
1179 mutex_lock(&callback_mutex);
1180 guarantee_online_cpus(cs, &cpus);
1181 set_cpus_allowed(tsk, cpus);
1182 mutex_unlock(&callback_mutex);
1183
1184 from = oldcs->mems_allowed;
1185 to = cs->mems_allowed;
1186 mm = get_task_mm(tsk);
1187 if (mm) {
1188 mpol_rebind_mm(mm, &to);
1189 if (is_memory_migrate(cs))
1190 cpuset_migrate_mm(mm, &from, &to);
1191 mmput(mm);
1192 }
1193
1194}
1195
1196
1197
1198typedef enum {
1199 FILE_MEMORY_MIGRATE,
1200 FILE_CPULIST,
1201 FILE_MEMLIST,
1202 FILE_CPU_EXCLUSIVE,
1203 FILE_MEM_EXCLUSIVE,
1204 FILE_SCHED_LOAD_BALANCE,
1205 FILE_MEMORY_PRESSURE_ENABLED,
1206 FILE_MEMORY_PRESSURE,
1207 FILE_SPREAD_PAGE,
1208 FILE_SPREAD_SLAB,
1209} cpuset_filetype_t;
1210
1211static ssize_t cpuset_common_file_write(struct cgroup *cont,
1212 struct cftype *cft,
1213 struct file *file,
1214 const char __user *userbuf,
1215 size_t nbytes, loff_t *unused_ppos)
1216{
1217 struct cpuset *cs = cgroup_cs(cont);
1218 cpuset_filetype_t type = cft->private;
1219 char *buffer;
1220 int retval = 0;
1221
1222
1223 if (nbytes > 100U + 6 * max(NR_CPUS, MAX_NUMNODES))
1224 return -E2BIG;
1225
1226
1227 if ((buffer = kmalloc(nbytes + 1, GFP_KERNEL)) == 0)
1228 return -ENOMEM;
1229
1230 if (copy_from_user(buffer, userbuf, nbytes)) {
1231 retval = -EFAULT;
1232 goto out1;
1233 }
1234 buffer[nbytes] = 0;
1235
1236 cgroup_lock();
1237
1238 if (cgroup_is_removed(cont)) {
1239 retval = -ENODEV;
1240 goto out2;
1241 }
1242
1243 switch (type) {
1244 case FILE_CPULIST:
1245 retval = update_cpumask(cs, buffer);
1246 break;
1247 case FILE_MEMLIST:
1248 retval = update_nodemask(cs, buffer);
1249 break;
1250 case FILE_CPU_EXCLUSIVE:
1251 retval = update_flag(CS_CPU_EXCLUSIVE, cs, buffer);
1252 break;
1253 case FILE_MEM_EXCLUSIVE:
1254 retval = update_flag(CS_MEM_EXCLUSIVE, cs, buffer);
1255 break;
1256 case FILE_SCHED_LOAD_BALANCE:
1257 retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, buffer);
1258 break;
1259 case FILE_MEMORY_MIGRATE:
1260 retval = update_flag(CS_MEMORY_MIGRATE, cs, buffer);
1261 break;
1262 case FILE_MEMORY_PRESSURE_ENABLED:
1263 retval = update_memory_pressure_enabled(cs, buffer);
1264 break;
1265 case FILE_MEMORY_PRESSURE:
1266 retval = -EACCES;
1267 break;
1268 case FILE_SPREAD_PAGE:
1269 retval = update_flag(CS_SPREAD_PAGE, cs, buffer);
1270 cs->mems_generation = cpuset_mems_generation++;
1271 break;
1272 case FILE_SPREAD_SLAB:
1273 retval = update_flag(CS_SPREAD_SLAB, cs, buffer);
1274 cs->mems_generation = cpuset_mems_generation++;
1275 break;
1276 default:
1277 retval = -EINVAL;
1278 goto out2;
1279 }
1280
1281 if (retval == 0)
1282 retval = nbytes;
1283out2:
1284 cgroup_unlock();
1285out1:
1286 kfree(buffer);
1287 return retval;
1288}
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
1303{
1304 cpumask_t mask;
1305
1306 mutex_lock(&callback_mutex);
1307 mask = cs->cpus_allowed;
1308 mutex_unlock(&callback_mutex);
1309
1310 return cpulist_scnprintf(page, PAGE_SIZE, mask);
1311}
1312
1313static int cpuset_sprintf_memlist(char *page, struct cpuset *cs)
1314{
1315 nodemask_t mask;
1316
1317 mutex_lock(&callback_mutex);
1318 mask = cs->mems_allowed;
1319 mutex_unlock(&callback_mutex);
1320
1321 return nodelist_scnprintf(page, PAGE_SIZE, mask);
1322}
1323
1324static ssize_t cpuset_common_file_read(struct cgroup *cont,
1325 struct cftype *cft,
1326 struct file *file,
1327 char __user *buf,
1328 size_t nbytes, loff_t *ppos)
1329{
1330 struct cpuset *cs = cgroup_cs(cont);
1331 cpuset_filetype_t type = cft->private;
1332 char *page;
1333 ssize_t retval = 0;
1334 char *s;
1335
1336 if (!(page = (char *)__get_free_page(GFP_TEMPORARY)))
1337 return -ENOMEM;
1338
1339 s = page;
1340
1341 switch (type) {
1342 case FILE_CPULIST:
1343 s += cpuset_sprintf_cpulist(s, cs);
1344 break;
1345 case FILE_MEMLIST:
1346 s += cpuset_sprintf_memlist(s, cs);
1347 break;
1348 case FILE_CPU_EXCLUSIVE:
1349 *s++ = is_cpu_exclusive(cs) ? '1' : '0';
1350 break;
1351 case FILE_MEM_EXCLUSIVE:
1352 *s++ = is_mem_exclusive(cs) ? '1' : '0';
1353 break;
1354 case FILE_SCHED_LOAD_BALANCE:
1355 *s++ = is_sched_load_balance(cs) ? '1' : '0';
1356 break;
1357 case FILE_MEMORY_MIGRATE:
1358 *s++ = is_memory_migrate(cs) ? '1' : '0';
1359 break;
1360 case FILE_MEMORY_PRESSURE_ENABLED:
1361 *s++ = cpuset_memory_pressure_enabled ? '1' : '0';
1362 break;
1363 case FILE_MEMORY_PRESSURE:
1364 s += sprintf(s, "%d", fmeter_getrate(&cs->fmeter));
1365 break;
1366 case FILE_SPREAD_PAGE:
1367 *s++ = is_spread_page(cs) ? '1' : '0';
1368 break;
1369 case FILE_SPREAD_SLAB:
1370 *s++ = is_spread_slab(cs) ? '1' : '0';
1371 break;
1372 default:
1373 retval = -EINVAL;
1374 goto out;
1375 }
1376 *s++ = '\n';
1377
1378 retval = simple_read_from_buffer(buf, nbytes, ppos, page, s - page);
1379out:
1380 free_page((unsigned long)page);
1381 return retval;
1382}
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392static struct cftype cft_cpus = {
1393 .name = "cpus",
1394 .read = cpuset_common_file_read,
1395 .write = cpuset_common_file_write,
1396 .private = FILE_CPULIST,
1397};
1398
1399static struct cftype cft_mems = {
1400 .name = "mems",
1401 .read = cpuset_common_file_read,
1402 .write = cpuset_common_file_write,
1403 .private = FILE_MEMLIST,
1404};
1405
1406static struct cftype cft_cpu_exclusive = {
1407 .name = "cpu_exclusive",
1408 .read = cpuset_common_file_read,
1409 .write = cpuset_common_file_write,
1410 .private = FILE_CPU_EXCLUSIVE,
1411};
1412
1413static struct cftype cft_mem_exclusive = {
1414 .name = "mem_exclusive",
1415 .read = cpuset_common_file_read,
1416 .write = cpuset_common_file_write,
1417 .private = FILE_MEM_EXCLUSIVE,
1418};
1419
1420static struct cftype cft_sched_load_balance = {
1421 .name = "sched_load_balance",
1422 .read = cpuset_common_file_read,
1423 .write = cpuset_common_file_write,
1424 .private = FILE_SCHED_LOAD_BALANCE,
1425};
1426
1427static struct cftype cft_memory_migrate = {
1428 .name = "memory_migrate",
1429 .read = cpuset_common_file_read,
1430 .write = cpuset_common_file_write,
1431 .private = FILE_MEMORY_MIGRATE,
1432};
1433
1434static struct cftype cft_memory_pressure_enabled = {
1435 .name = "memory_pressure_enabled",
1436 .read = cpuset_common_file_read,
1437 .write = cpuset_common_file_write,
1438 .private = FILE_MEMORY_PRESSURE_ENABLED,
1439};
1440
1441static struct cftype cft_memory_pressure = {
1442 .name = "memory_pressure",
1443 .read = cpuset_common_file_read,
1444 .write = cpuset_common_file_write,
1445 .private = FILE_MEMORY_PRESSURE,
1446};
1447
1448static struct cftype cft_spread_page = {
1449 .name = "memory_spread_page",
1450 .read = cpuset_common_file_read,
1451 .write = cpuset_common_file_write,
1452 .private = FILE_SPREAD_PAGE,
1453};
1454
1455static struct cftype cft_spread_slab = {
1456 .name = "memory_spread_slab",
1457 .read = cpuset_common_file_read,
1458 .write = cpuset_common_file_write,
1459 .private = FILE_SPREAD_SLAB,
1460};
1461
1462static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont)
1463{
1464 int err;
1465
1466 if ((err = cgroup_add_file(cont, ss, &cft_cpus)) < 0)
1467 return err;
1468 if ((err = cgroup_add_file(cont, ss, &cft_mems)) < 0)
1469 return err;
1470 if ((err = cgroup_add_file(cont, ss, &cft_cpu_exclusive)) < 0)
1471 return err;
1472 if ((err = cgroup_add_file(cont, ss, &cft_mem_exclusive)) < 0)
1473 return err;
1474 if ((err = cgroup_add_file(cont, ss, &cft_memory_migrate)) < 0)
1475 return err;
1476 if ((err = cgroup_add_file(cont, ss, &cft_sched_load_balance)) < 0)
1477 return err;
1478 if ((err = cgroup_add_file(cont, ss, &cft_memory_pressure)) < 0)
1479 return err;
1480 if ((err = cgroup_add_file(cont, ss, &cft_spread_page)) < 0)
1481 return err;
1482 if ((err = cgroup_add_file(cont, ss, &cft_spread_slab)) < 0)
1483 return err;
1484
1485 if (err == 0 && !cont->parent)
1486 err = cgroup_add_file(cont, ss,
1487 &cft_memory_pressure_enabled);
1488 return 0;
1489}
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508static void cpuset_post_clone(struct cgroup_subsys *ss,
1509 struct cgroup *cgroup)
1510{
1511 struct cgroup *parent, *child;
1512 struct cpuset *cs, *parent_cs;
1513
1514 parent = cgroup->parent;
1515 list_for_each_entry(child, &parent->children, sibling) {
1516 cs = cgroup_cs(child);
1517 if (is_mem_exclusive(cs) || is_cpu_exclusive(cs))
1518 return;
1519 }
1520 cs = cgroup_cs(cgroup);
1521 parent_cs = cgroup_cs(parent);
1522
1523 cs->mems_allowed = parent_cs->mems_allowed;
1524 cs->cpus_allowed = parent_cs->cpus_allowed;
1525 return;
1526}
1527
1528
1529
1530
1531
1532
1533
1534static struct cgroup_subsys_state *cpuset_create(
1535 struct cgroup_subsys *ss,
1536 struct cgroup *cont)
1537{
1538 struct cpuset *cs;
1539 struct cpuset *parent;
1540
1541 if (!cont->parent) {
1542
1543 top_cpuset.mems_generation = cpuset_mems_generation++;
1544 return &top_cpuset.css;
1545 }
1546 parent = cgroup_cs(cont->parent);
1547 cs = kmalloc(sizeof(*cs), GFP_KERNEL);
1548 if (!cs)
1549 return ERR_PTR(-ENOMEM);
1550
1551 cpuset_update_task_memory_state();
1552 cs->flags = 0;
1553 if (is_spread_page(parent))
1554 set_bit(CS_SPREAD_PAGE, &cs->flags);
1555 if (is_spread_slab(parent))
1556 set_bit(CS_SPREAD_SLAB, &cs->flags);
1557 set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
1558 cs->cpus_allowed = CPU_MASK_NONE;
1559 cs->mems_allowed = NODE_MASK_NONE;
1560 cs->mems_generation = cpuset_mems_generation++;
1561 fmeter_init(&cs->fmeter);
1562
1563 cs->parent = parent;
1564 number_of_cpusets++;
1565 return &cs->css ;
1566}
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
1581{
1582 struct cpuset *cs = cgroup_cs(cont);
1583
1584 cpuset_update_task_memory_state();
1585
1586 if (is_sched_load_balance(cs))
1587 update_flag(CS_SCHED_LOAD_BALANCE, cs, "0");
1588
1589 number_of_cpusets--;
1590 kfree(cs);
1591}
1592
1593struct cgroup_subsys cpuset_subsys = {
1594 .name = "cpuset",
1595 .create = cpuset_create,
1596 .destroy = cpuset_destroy,
1597 .can_attach = cpuset_can_attach,
1598 .attach = cpuset_attach,
1599 .populate = cpuset_populate,
1600 .post_clone = cpuset_post_clone,
1601 .subsys_id = cpuset_subsys_id,
1602 .early_init = 1,
1603};
1604
1605
1606
1607
1608
1609
1610
1611int __init cpuset_init_early(void)
1612{
1613 top_cpuset.mems_generation = cpuset_mems_generation++;
1614 return 0;
1615}
1616
1617
1618
1619
1620
1621
1622
1623
1624int __init cpuset_init(void)
1625{
1626 int err = 0;
1627
1628 top_cpuset.cpus_allowed = CPU_MASK_ALL;
1629 top_cpuset.mems_allowed = NODE_MASK_ALL;
1630
1631 fmeter_init(&top_cpuset.fmeter);
1632 top_cpuset.mems_generation = cpuset_mems_generation++;
1633 set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags);
1634
1635 err = register_filesystem(&cpuset_fs_type);
1636 if (err < 0)
1637 return err;
1638
1639 number_of_cpusets = 1;
1640 return 0;
1641}
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651void cpuset_do_move_task(struct task_struct *tsk, struct cgroup_scanner *scan)
1652{
1653 struct cpuset_hotplug_scanner *chsp;
1654
1655 chsp = container_of(scan, struct cpuset_hotplug_scanner, scan);
1656 cgroup_attach_task(chsp->to, tsk);
1657}
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670static void move_member_tasks_to_cpuset(struct cpuset *from, struct cpuset *to)
1671{
1672 struct cpuset_hotplug_scanner scan;
1673
1674 scan.scan.cg = from->css.cgroup;
1675 scan.scan.test_task = NULL;
1676 scan.scan.process_task = cpuset_do_move_task;
1677 scan.scan.heap = NULL;
1678 scan.to = to->css.cgroup;
1679
1680 if (cgroup_scan_tasks((struct cgroup_scanner *)&scan))
1681 printk(KERN_ERR "move_member_tasks_to_cpuset: "
1682 "cgroup_scan_tasks failed\n");
1683}
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
1696{
1697 struct cpuset *parent;
1698
1699
1700
1701
1702
1703
1704 if (list_empty(&cs->css.cgroup->css_sets))
1705 return;
1706
1707
1708
1709
1710
1711 parent = cs->parent;
1712 while (cpus_empty(parent->cpus_allowed) ||
1713 nodes_empty(parent->mems_allowed))
1714 parent = parent->parent;
1715
1716 move_member_tasks_to_cpuset(cs, parent);
1717}
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734static void scan_for_empty_cpusets(const struct cpuset *root)
1735{
1736 struct cpuset *cp;
1737 struct cpuset *child;
1738 struct list_head queue;
1739 struct cgroup *cont;
1740
1741 INIT_LIST_HEAD(&queue);
1742
1743 list_add_tail((struct list_head *)&root->stack_list, &queue);
1744
1745 while (!list_empty(&queue)) {
1746 cp = container_of(queue.next, struct cpuset, stack_list);
1747 list_del(queue.next);
1748 list_for_each_entry(cont, &cp->css.cgroup->children, sibling) {
1749 child = cgroup_cs(cont);
1750 list_add_tail(&child->stack_list, &queue);
1751 }
1752 cont = cp->css.cgroup;
1753
1754
1755 if (cpus_subset(cp->cpus_allowed, cpu_online_map) &&
1756 nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
1757 continue;
1758
1759
1760 mutex_lock(&callback_mutex);
1761 cpus_and(cp->cpus_allowed, cp->cpus_allowed, cpu_online_map);
1762 nodes_and(cp->mems_allowed, cp->mems_allowed,
1763 node_states[N_HIGH_MEMORY]);
1764 mutex_unlock(&callback_mutex);
1765
1766
1767 if (cpus_empty(cp->cpus_allowed) ||
1768 nodes_empty(cp->mems_allowed))
1769 remove_tasks_in_empty_cpuset(cp);
1770 }
1771}
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784static void common_cpu_mem_hotplug_unplug(void)
1785{
1786 cgroup_lock();
1787
1788 top_cpuset.cpus_allowed = cpu_online_map;
1789 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
1790 scan_for_empty_cpusets(&top_cpuset);
1791
1792 cgroup_unlock();
1793}
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805static int cpuset_handle_cpuhp(struct notifier_block *unused_nb,
1806 unsigned long phase, void *unused_cpu)
1807{
1808 if (phase == CPU_DYING || phase == CPU_DYING_FROZEN)
1809 return NOTIFY_DONE;
1810
1811 common_cpu_mem_hotplug_unplug();
1812 return 0;
1813}
1814
1815#ifdef CONFIG_MEMORY_HOTPLUG
1816
1817
1818
1819
1820
1821
1822
1823void cpuset_track_online_nodes(void)
1824{
1825 common_cpu_mem_hotplug_unplug();
1826}
1827#endif
1828
1829
1830
1831
1832
1833
1834
1835void __init cpuset_init_smp(void)
1836{
1837 top_cpuset.cpus_allowed = cpu_online_map;
1838 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
1839
1840 hotcpu_notifier(cpuset_handle_cpuhp, 0);
1841}
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854cpumask_t cpuset_cpus_allowed(struct task_struct *tsk)
1855{
1856 cpumask_t mask;
1857
1858 mutex_lock(&callback_mutex);
1859 mask = cpuset_cpus_allowed_locked(tsk);
1860 mutex_unlock(&callback_mutex);
1861
1862 return mask;
1863}
1864
1865
1866
1867
1868
1869cpumask_t cpuset_cpus_allowed_locked(struct task_struct *tsk)
1870{
1871 cpumask_t mask;
1872
1873 task_lock(tsk);
1874 guarantee_online_cpus(task_cs(tsk), &mask);
1875 task_unlock(tsk);
1876
1877 return mask;
1878}
1879
1880void cpuset_init_current_mems_allowed(void)
1881{
1882 current->mems_allowed = NODE_MASK_ALL;
1883}
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895nodemask_t cpuset_mems_allowed(struct task_struct *tsk)
1896{
1897 nodemask_t mask;
1898
1899 mutex_lock(&callback_mutex);
1900 task_lock(tsk);
1901 guarantee_online_mems(task_cs(tsk), &mask);
1902 task_unlock(tsk);
1903 mutex_unlock(&callback_mutex);
1904
1905 return mask;
1906}
1907
1908
1909
1910
1911
1912
1913
1914int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl)
1915{
1916 int i;
1917
1918 for (i = 0; zl->zones[i]; i++) {
1919 int nid = zone_to_nid(zl->zones[i]);
1920
1921 if (node_isset(nid, current->mems_allowed))
1922 return 1;
1923 }
1924 return 0;
1925}
1926
1927
1928
1929
1930
1931
1932
1933static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
1934{
1935 while (!is_mem_exclusive(cs) && cs->parent)
1936 cs = cs->parent;
1937 return cs;
1938}
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask)
2005{
2006 int node;
2007 const struct cpuset *cs;
2008 int allowed;
2009
2010 if (in_interrupt() || (gfp_mask & __GFP_THISNODE))
2011 return 1;
2012 node = zone_to_nid(z);
2013 might_sleep_if(!(gfp_mask & __GFP_HARDWALL));
2014 if (node_isset(node, current->mems_allowed))
2015 return 1;
2016
2017
2018
2019
2020 if (unlikely(test_thread_flag(TIF_MEMDIE)))
2021 return 1;
2022 if (gfp_mask & __GFP_HARDWALL)
2023 return 0;
2024
2025 if (current->flags & PF_EXITING)
2026 return 1;
2027
2028
2029 mutex_lock(&callback_mutex);
2030
2031 task_lock(current);
2032 cs = nearest_exclusive_ancestor(task_cs(current));
2033 task_unlock(current);
2034
2035 allowed = node_isset(node, cs->mems_allowed);
2036 mutex_unlock(&callback_mutex);
2037 return allowed;
2038}
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064int __cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask)
2065{
2066 int node;
2067
2068 if (in_interrupt() || (gfp_mask & __GFP_THISNODE))
2069 return 1;
2070 node = zone_to_nid(z);
2071 if (node_isset(node, current->mems_allowed))
2072 return 1;
2073
2074
2075
2076
2077 if (unlikely(test_thread_flag(TIF_MEMDIE)))
2078 return 1;
2079 return 0;
2080}
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093void cpuset_lock(void)
2094{
2095 mutex_lock(&callback_mutex);
2096}
2097
2098
2099
2100
2101
2102
2103
2104void cpuset_unlock(void)
2105{
2106 mutex_unlock(&callback_mutex);
2107}
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135int cpuset_mem_spread_node(void)
2136{
2137 int node;
2138
2139 node = next_node(current->cpuset_mem_spread_rotor, current->mems_allowed);
2140 if (node == MAX_NUMNODES)
2141 node = first_node(current->mems_allowed);
2142 current->cpuset_mem_spread_rotor = node;
2143 return node;
2144}
2145EXPORT_SYMBOL_GPL(cpuset_mem_spread_node);
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
2159 const struct task_struct *tsk2)
2160{
2161 return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed);
2162}
2163
2164
2165
2166
2167
2168
2169
2170int cpuset_memory_pressure_enabled __read_mostly;
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190void __cpuset_memory_pressure_bump(void)
2191{
2192 task_lock(current);
2193 fmeter_markevent(&task_cs(current)->fmeter);
2194 task_unlock(current);
2195}
2196
2197#ifdef CONFIG_PROC_PID_CPUSET
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207static int proc_cpuset_show(struct seq_file *m, void *unused_v)
2208{
2209 struct pid *pid;
2210 struct task_struct *tsk;
2211 char *buf;
2212 struct cgroup_subsys_state *css;
2213 int retval;
2214
2215 retval = -ENOMEM;
2216 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
2217 if (!buf)
2218 goto out;
2219
2220 retval = -ESRCH;
2221 pid = m->private;
2222 tsk = get_pid_task(pid, PIDTYPE_PID);
2223 if (!tsk)
2224 goto out_free;
2225
2226 retval = -EINVAL;
2227 cgroup_lock();
2228 css = task_subsys_state(tsk, cpuset_subsys_id);
2229 retval = cgroup_path(css->cgroup, buf, PAGE_SIZE);
2230 if (retval < 0)
2231 goto out_unlock;
2232 seq_puts(m, buf);
2233 seq_putc(m, '\n');
2234out_unlock:
2235 cgroup_unlock();
2236 put_task_struct(tsk);
2237out_free:
2238 kfree(buf);
2239out:
2240 return retval;
2241}
2242
2243static int cpuset_open(struct inode *inode, struct file *file)
2244{
2245 struct pid *pid = PROC_I(inode)->pid;
2246 return single_open(file, proc_cpuset_show, pid);
2247}
2248
2249const struct file_operations proc_cpuset_operations = {
2250 .open = cpuset_open,
2251 .read = seq_read,
2252 .llseek = seq_lseek,
2253 .release = single_release,
2254};
2255#endif
2256
2257
2258void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task)
2259{
2260 seq_printf(m, "Cpus_allowed:\t");
2261 m->count += cpumask_scnprintf(m->buf + m->count, m->size - m->count,
2262 task->cpus_allowed);
2263 seq_printf(m, "\n");
2264 seq_printf(m, "Mems_allowed:\t");
2265 m->count += nodemask_scnprintf(m->buf + m->count, m->size - m->count,
2266 task->mems_allowed);
2267 seq_printf(m, "\n");
2268}
2269