1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23#include <linux/cpu.h>
24#include <linux/cpumask.h>
25#include <linux/cpuset.h>
26#include <linux/err.h>
27#include <linux/errno.h>
28#include <linux/file.h>
29#include <linux/fs.h>
30#include <linux/init.h>
31#include <linux/interrupt.h>
32#include <linux/kernel.h>
33#include <linux/kmod.h>
34#include <linux/list.h>
35#include <linux/mempolicy.h>
36#include <linux/mm.h>
37#include <linux/module.h>
38#include <linux/mount.h>
39#include <linux/namei.h>
40#include <linux/pagemap.h>
41#include <linux/proc_fs.h>
42#include <linux/rcupdate.h>
43#include <linux/sched.h>
44#include <linux/seq_file.h>
45#include <linux/security.h>
46#include <linux/slab.h>
47#include <linux/spinlock.h>
48#include <linux/stat.h>
49#include <linux/string.h>
50#include <linux/time.h>
51#include <linux/backing-dev.h>
52#include <linux/sort.h>
53
54#include <asm/uaccess.h>
55#include <asm/atomic.h>
56#include <linux/mutex.h>
57#include <linux/kfifo.h>
58#include <linux/workqueue.h>
59#include <linux/cgroup.h>
60
61
62
63
64
65
66int number_of_cpusets __read_mostly;
67
68
69struct cgroup_subsys cpuset_subsys;
70struct cpuset;
71
72
73
74struct fmeter {
75 int cnt;
76 int val;
77 time_t time;
78 spinlock_t lock;
79};
80
81struct cpuset {
82 struct cgroup_subsys_state css;
83
84 unsigned long flags;
85 cpumask_t cpus_allowed;
86 nodemask_t mems_allowed;
87
88 struct cpuset *parent;
89
90
91
92
93
94 int mems_generation;
95
96 struct fmeter fmeter;
97
98
99 int pn;
100
101
102 int relax_domain_level;
103
104
105 struct list_head stack_list;
106};
107
108
109static inline struct cpuset *cgroup_cs(struct cgroup *cont)
110{
111 return container_of(cgroup_subsys_state(cont, cpuset_subsys_id),
112 struct cpuset, css);
113}
114
115
116static inline struct cpuset *task_cs(struct task_struct *task)
117{
118 return container_of(task_subsys_state(task, cpuset_subsys_id),
119 struct cpuset, css);
120}
121struct cpuset_hotplug_scanner {
122 struct cgroup_scanner scan;
123 struct cgroup *to;
124};
125
126
127typedef enum {
128 CS_CPU_EXCLUSIVE,
129 CS_MEM_EXCLUSIVE,
130 CS_MEM_HARDWALL,
131 CS_MEMORY_MIGRATE,
132 CS_SCHED_LOAD_BALANCE,
133 CS_SPREAD_PAGE,
134 CS_SPREAD_SLAB,
135} cpuset_flagbits_t;
136
137
138static inline int is_cpu_exclusive(const struct cpuset *cs)
139{
140 return test_bit(CS_CPU_EXCLUSIVE, &cs->flags);
141}
142
143static inline int is_mem_exclusive(const struct cpuset *cs)
144{
145 return test_bit(CS_MEM_EXCLUSIVE, &cs->flags);
146}
147
148static inline int is_mem_hardwall(const struct cpuset *cs)
149{
150 return test_bit(CS_MEM_HARDWALL, &cs->flags);
151}
152
153static inline int is_sched_load_balance(const struct cpuset *cs)
154{
155 return test_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
156}
157
158static inline int is_memory_migrate(const struct cpuset *cs)
159{
160 return test_bit(CS_MEMORY_MIGRATE, &cs->flags);
161}
162
163static inline int is_spread_page(const struct cpuset *cs)
164{
165 return test_bit(CS_SPREAD_PAGE, &cs->flags);
166}
167
168static inline int is_spread_slab(const struct cpuset *cs)
169{
170 return test_bit(CS_SPREAD_SLAB, &cs->flags);
171}
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192static int cpuset_mems_generation;
193
194static struct cpuset top_cpuset = {
195 .flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)),
196 .cpus_allowed = CPU_MASK_ALL,
197 .mems_allowed = NODE_MASK_ALL,
198};
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242static DEFINE_MUTEX(callback_mutex);
243
244
245
246
247static int cpuset_get_sb(struct file_system_type *fs_type,
248 int flags, const char *unused_dev_name,
249 void *data, struct vfsmount *mnt)
250{
251 struct file_system_type *cgroup_fs = get_fs_type("cgroup");
252 int ret = -ENODEV;
253 if (cgroup_fs) {
254 char mountopts[] =
255 "cpuset,noprefix,"
256 "release_agent=/sbin/cpuset_release_agent";
257 ret = cgroup_fs->get_sb(cgroup_fs, flags,
258 unused_dev_name, mountopts, mnt);
259 put_filesystem(cgroup_fs);
260 }
261 return ret;
262}
263
264static struct file_system_type cpuset_fs_type = {
265 .name = "cpuset",
266 .get_sb = cpuset_get_sb,
267};
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283static void guarantee_online_cpus(const struct cpuset *cs, cpumask_t *pmask)
284{
285 while (cs && !cpus_intersects(cs->cpus_allowed, cpu_online_map))
286 cs = cs->parent;
287 if (cs)
288 cpus_and(*pmask, cs->cpus_allowed, cpu_online_map);
289 else
290 *pmask = cpu_online_map;
291 BUG_ON(!cpus_intersects(*pmask, cpu_online_map));
292}
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask)
308{
309 while (cs && !nodes_intersects(cs->mems_allowed,
310 node_states[N_HIGH_MEMORY]))
311 cs = cs->parent;
312 if (cs)
313 nodes_and(*pmask, cs->mems_allowed,
314 node_states[N_HIGH_MEMORY]);
315 else
316 *pmask = node_states[N_HIGH_MEMORY];
317 BUG_ON(!nodes_intersects(*pmask, node_states[N_HIGH_MEMORY]));
318}
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361void cpuset_update_task_memory_state(void)
362{
363 int my_cpusets_mem_gen;
364 struct task_struct *tsk = current;
365 struct cpuset *cs;
366
367 if (task_cs(tsk) == &top_cpuset) {
368
369 my_cpusets_mem_gen = top_cpuset.mems_generation;
370 } else {
371 rcu_read_lock();
372 my_cpusets_mem_gen = task_cs(current)->mems_generation;
373 rcu_read_unlock();
374 }
375
376 if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) {
377 mutex_lock(&callback_mutex);
378 task_lock(tsk);
379 cs = task_cs(tsk);
380 guarantee_online_mems(cs, &tsk->mems_allowed);
381 tsk->cpuset_mems_generation = cs->mems_generation;
382 if (is_spread_page(cs))
383 tsk->flags |= PF_SPREAD_PAGE;
384 else
385 tsk->flags &= ~PF_SPREAD_PAGE;
386 if (is_spread_slab(cs))
387 tsk->flags |= PF_SPREAD_SLAB;
388 else
389 tsk->flags &= ~PF_SPREAD_SLAB;
390 task_unlock(tsk);
391 mutex_unlock(&callback_mutex);
392 mpol_rebind_task(tsk, &tsk->mems_allowed);
393 }
394}
395
396
397
398
399
400
401
402
403
404static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q)
405{
406 return cpus_subset(p->cpus_allowed, q->cpus_allowed) &&
407 nodes_subset(p->mems_allowed, q->mems_allowed) &&
408 is_cpu_exclusive(p) <= is_cpu_exclusive(q) &&
409 is_mem_exclusive(p) <= is_mem_exclusive(q);
410}
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
433{
434 struct cgroup *cont;
435 struct cpuset *c, *par;
436
437
438 list_for_each_entry(cont, &cur->css.cgroup->children, sibling) {
439 if (!is_cpuset_subset(cgroup_cs(cont), trial))
440 return -EBUSY;
441 }
442
443
444 if (cur == &top_cpuset)
445 return 0;
446
447 par = cur->parent;
448
449
450 if (!is_cpuset_subset(trial, par))
451 return -EACCES;
452
453
454
455
456
457 list_for_each_entry(cont, &par->css.cgroup->children, sibling) {
458 c = cgroup_cs(cont);
459 if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) &&
460 c != cur &&
461 cpus_intersects(trial->cpus_allowed, c->cpus_allowed))
462 return -EINVAL;
463 if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) &&
464 c != cur &&
465 nodes_intersects(trial->mems_allowed, c->mems_allowed))
466 return -EINVAL;
467 }
468
469
470 if (cgroup_task_count(cur->css.cgroup)) {
471 if (cpus_empty(trial->cpus_allowed) ||
472 nodes_empty(trial->mems_allowed)) {
473 return -ENOSPC;
474 }
475 }
476
477 return 0;
478}
479
480
481
482
483
484
485static int cpusets_overlap(struct cpuset *a, struct cpuset *b)
486{
487 return cpus_intersects(a->cpus_allowed, b->cpus_allowed);
488}
489
490static void
491update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c)
492{
493 if (!dattr)
494 return;
495 if (dattr->relax_domain_level < c->relax_domain_level)
496 dattr->relax_domain_level = c->relax_domain_level;
497 return;
498}
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567static void rebuild_sched_domains(void)
568{
569 struct kfifo *q;
570 struct cpuset *cp;
571 struct cpuset **csa;
572 int csn;
573 int i, j, k;
574 cpumask_t *doms;
575 struct sched_domain_attr *dattr;
576 int ndoms;
577 int nslot;
578
579 q = NULL;
580 csa = NULL;
581 doms = NULL;
582 dattr = NULL;
583
584
585 if (is_sched_load_balance(&top_cpuset)) {
586 ndoms = 1;
587 doms = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
588 if (!doms)
589 goto rebuild;
590 dattr = kmalloc(sizeof(struct sched_domain_attr), GFP_KERNEL);
591 if (dattr) {
592 *dattr = SD_ATTR_INIT;
593 update_domain_attr(dattr, &top_cpuset);
594 }
595 *doms = top_cpuset.cpus_allowed;
596 goto rebuild;
597 }
598
599 q = kfifo_alloc(number_of_cpusets * sizeof(cp), GFP_KERNEL, NULL);
600 if (IS_ERR(q))
601 goto done;
602 csa = kmalloc(number_of_cpusets * sizeof(cp), GFP_KERNEL);
603 if (!csa)
604 goto done;
605 csn = 0;
606
607 cp = &top_cpuset;
608 __kfifo_put(q, (void *)&cp, sizeof(cp));
609 while (__kfifo_get(q, (void *)&cp, sizeof(cp))) {
610 struct cgroup *cont;
611 struct cpuset *child;
612 if (is_sched_load_balance(cp))
613 csa[csn++] = cp;
614 list_for_each_entry(cont, &cp->css.cgroup->children, sibling) {
615 child = cgroup_cs(cont);
616 __kfifo_put(q, (void *)&child, sizeof(cp));
617 }
618 }
619
620 for (i = 0; i < csn; i++)
621 csa[i]->pn = i;
622 ndoms = csn;
623
624restart:
625
626 for (i = 0; i < csn; i++) {
627 struct cpuset *a = csa[i];
628 int apn = a->pn;
629
630 for (j = 0; j < csn; j++) {
631 struct cpuset *b = csa[j];
632 int bpn = b->pn;
633
634 if (apn != bpn && cpusets_overlap(a, b)) {
635 for (k = 0; k < csn; k++) {
636 struct cpuset *c = csa[k];
637
638 if (c->pn == bpn)
639 c->pn = apn;
640 }
641 ndoms--;
642 goto restart;
643 }
644 }
645 }
646
647
648 doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL);
649 if (!doms)
650 goto rebuild;
651 dattr = kmalloc(ndoms * sizeof(struct sched_domain_attr), GFP_KERNEL);
652
653 for (nslot = 0, i = 0; i < csn; i++) {
654 struct cpuset *a = csa[i];
655 int apn = a->pn;
656
657 if (apn >= 0) {
658 cpumask_t *dp = doms + nslot;
659
660 if (nslot == ndoms) {
661 static int warnings = 10;
662 if (warnings) {
663 printk(KERN_WARNING
664 "rebuild_sched_domains confused:"
665 " nslot %d, ndoms %d, csn %d, i %d,"
666 " apn %d\n",
667 nslot, ndoms, csn, i, apn);
668 warnings--;
669 }
670 continue;
671 }
672
673 cpus_clear(*dp);
674 if (dattr)
675 *(dattr + nslot) = SD_ATTR_INIT;
676 for (j = i; j < csn; j++) {
677 struct cpuset *b = csa[j];
678
679 if (apn == b->pn) {
680 cpus_or(*dp, *dp, b->cpus_allowed);
681 b->pn = -1;
682 if (dattr)
683 update_domain_attr(dattr
684 + nslot, b);
685 }
686 }
687 nslot++;
688 }
689 }
690 BUG_ON(nslot != ndoms);
691
692rebuild:
693
694 get_online_cpus();
695 partition_sched_domains(ndoms, doms, dattr);
696 put_online_cpus();
697
698done:
699 if (q && !IS_ERR(q))
700 kfifo_free(q);
701 kfree(csa);
702
703
704}
705
706static inline int started_after_time(struct task_struct *t1,
707 struct timespec *time,
708 struct task_struct *t2)
709{
710 int start_diff = timespec_compare(&t1->start_time, time);
711 if (start_diff > 0) {
712 return 1;
713 } else if (start_diff < 0) {
714 return 0;
715 } else {
716
717
718
719
720
721
722
723
724
725 return t1 > t2;
726 }
727}
728
729static inline int started_after(void *p1, void *p2)
730{
731 struct task_struct *t1 = p1;
732 struct task_struct *t2 = p2;
733 return started_after_time(t1, &t2->start_time, t2);
734}
735
736
737
738
739
740
741
742
743
744
745
746static int cpuset_test_cpumask(struct task_struct *tsk,
747 struct cgroup_scanner *scan)
748{
749 return !cpus_equal(tsk->cpus_allowed,
750 (cgroup_cs(scan->cg))->cpus_allowed);
751}
752
753
754
755
756
757
758
759
760
761
762
763
764static void cpuset_change_cpumask(struct task_struct *tsk,
765 struct cgroup_scanner *scan)
766{
767 set_cpus_allowed_ptr(tsk, &((cgroup_cs(scan->cg))->cpus_allowed));
768}
769
770
771
772
773
774
775static int update_cpumask(struct cpuset *cs, char *buf)
776{
777 struct cpuset trialcs;
778 struct cgroup_scanner scan;
779 struct ptr_heap heap;
780 int retval;
781 int is_load_balanced;
782
783
784 if (cs == &top_cpuset)
785 return -EACCES;
786
787 trialcs = *cs;
788
789
790
791
792
793
794
795 buf = strstrip(buf);
796 if (!*buf) {
797 cpus_clear(trialcs.cpus_allowed);
798 } else {
799 retval = cpulist_parse(buf, trialcs.cpus_allowed);
800 if (retval < 0)
801 return retval;
802
803 if (!cpus_subset(trialcs.cpus_allowed, cpu_online_map))
804 return -EINVAL;
805 }
806 retval = validate_change(cs, &trialcs);
807 if (retval < 0)
808 return retval;
809
810
811 if (cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed))
812 return 0;
813
814 retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, &started_after);
815 if (retval)
816 return retval;
817
818 is_load_balanced = is_sched_load_balance(&trialcs);
819
820 mutex_lock(&callback_mutex);
821 cs->cpus_allowed = trialcs.cpus_allowed;
822 mutex_unlock(&callback_mutex);
823
824
825
826
827
828 scan.cg = cs->css.cgroup;
829 scan.test_task = cpuset_test_cpumask;
830 scan.process_task = cpuset_change_cpumask;
831 scan.heap = &heap;
832 cgroup_scan_tasks(&scan);
833 heap_free(&heap);
834
835 if (is_load_balanced)
836 rebuild_sched_domains();
837 return 0;
838}
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
872 const nodemask_t *to)
873{
874 struct task_struct *tsk = current;
875
876 cpuset_update_task_memory_state();
877
878 mutex_lock(&callback_mutex);
879 tsk->mems_allowed = *to;
880 mutex_unlock(&callback_mutex);
881
882 do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL);
883
884 mutex_lock(&callback_mutex);
885 guarantee_online_mems(task_cs(tsk),&tsk->mems_allowed);
886 mutex_unlock(&callback_mutex);
887}
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903static void *cpuset_being_rebound;
904
905static int update_nodemask(struct cpuset *cs, char *buf)
906{
907 struct cpuset trialcs;
908 nodemask_t oldmem;
909 struct task_struct *p;
910 struct mm_struct **mmarray;
911 int i, n, ntasks;
912 int migrate;
913 int fudge;
914 int retval;
915 struct cgroup_iter it;
916
917
918
919
920
921 if (cs == &top_cpuset)
922 return -EACCES;
923
924 trialcs = *cs;
925
926
927
928
929
930
931
932 buf = strstrip(buf);
933 if (!*buf) {
934 nodes_clear(trialcs.mems_allowed);
935 } else {
936 retval = nodelist_parse(buf, trialcs.mems_allowed);
937 if (retval < 0)
938 goto done;
939
940 if (!nodes_subset(trialcs.mems_allowed,
941 node_states[N_HIGH_MEMORY]))
942 return -EINVAL;
943 }
944 oldmem = cs->mems_allowed;
945 if (nodes_equal(oldmem, trialcs.mems_allowed)) {
946 retval = 0;
947 goto done;
948 }
949 retval = validate_change(cs, &trialcs);
950 if (retval < 0)
951 goto done;
952
953 mutex_lock(&callback_mutex);
954 cs->mems_allowed = trialcs.mems_allowed;
955 cs->mems_generation = cpuset_mems_generation++;
956 mutex_unlock(&callback_mutex);
957
958 cpuset_being_rebound = cs;
959
960 fudge = 10;
961 fudge += cpus_weight(cs->cpus_allowed);
962 retval = -ENOMEM;
963
964
965
966
967
968
969
970
971 while (1) {
972 ntasks = cgroup_task_count(cs->css.cgroup);
973 ntasks += fudge;
974 mmarray = kmalloc(ntasks * sizeof(*mmarray), GFP_KERNEL);
975 if (!mmarray)
976 goto done;
977 read_lock(&tasklist_lock);
978 if (cgroup_task_count(cs->css.cgroup) <= ntasks)
979 break;
980 read_unlock(&tasklist_lock);
981 kfree(mmarray);
982 }
983
984 n = 0;
985
986
987 cgroup_iter_start(cs->css.cgroup, &it);
988 while ((p = cgroup_iter_next(cs->css.cgroup, &it))) {
989 struct mm_struct *mm;
990
991 if (n >= ntasks) {
992 printk(KERN_WARNING
993 "Cpuset mempolicy rebind incomplete.\n");
994 break;
995 }
996 mm = get_task_mm(p);
997 if (!mm)
998 continue;
999 mmarray[n++] = mm;
1000 }
1001 cgroup_iter_end(cs->css.cgroup, &it);
1002 read_unlock(&tasklist_lock);
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017 migrate = is_memory_migrate(cs);
1018 for (i = 0; i < n; i++) {
1019 struct mm_struct *mm = mmarray[i];
1020
1021 mpol_rebind_mm(mm, &cs->mems_allowed);
1022 if (migrate)
1023 cpuset_migrate_mm(mm, &oldmem, &cs->mems_allowed);
1024 mmput(mm);
1025 }
1026
1027
1028 kfree(mmarray);
1029 cpuset_being_rebound = NULL;
1030 retval = 0;
1031done:
1032 return retval;
1033}
1034
1035int current_cpuset_is_being_rebound(void)
1036{
1037 return task_cs(current) == cpuset_being_rebound;
1038}
1039
1040static int update_relax_domain_level(struct cpuset *cs, s64 val)
1041{
1042 if (val < -1 || val >= SD_LV_MAX)
1043 return -EINVAL;
1044
1045 if (val != cs->relax_domain_level) {
1046 cs->relax_domain_level = val;
1047 rebuild_sched_domains();
1048 }
1049
1050 return 0;
1051}
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
1063 int turning_on)
1064{
1065 struct cpuset trialcs;
1066 int err;
1067 int cpus_nonempty, balance_flag_changed;
1068
1069 trialcs = *cs;
1070 if (turning_on)
1071 set_bit(bit, &trialcs.flags);
1072 else
1073 clear_bit(bit, &trialcs.flags);
1074
1075 err = validate_change(cs, &trialcs);
1076 if (err < 0)
1077 return err;
1078
1079 cpus_nonempty = !cpus_empty(trialcs.cpus_allowed);
1080 balance_flag_changed = (is_sched_load_balance(cs) !=
1081 is_sched_load_balance(&trialcs));
1082
1083 mutex_lock(&callback_mutex);
1084 cs->flags = trialcs.flags;
1085 mutex_unlock(&callback_mutex);
1086
1087 if (cpus_nonempty && balance_flag_changed)
1088 rebuild_sched_domains();
1089
1090 return 0;
1091}
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138#define FM_COEF 933
1139#define FM_MAXTICKS ((time_t)99)
1140#define FM_MAXCNT 1000000
1141#define FM_SCALE 1000
1142
1143
1144static void fmeter_init(struct fmeter *fmp)
1145{
1146 fmp->cnt = 0;
1147 fmp->val = 0;
1148 fmp->time = 0;
1149 spin_lock_init(&fmp->lock);
1150}
1151
1152
1153static void fmeter_update(struct fmeter *fmp)
1154{
1155 time_t now = get_seconds();
1156 time_t ticks = now - fmp->time;
1157
1158 if (ticks == 0)
1159 return;
1160
1161 ticks = min(FM_MAXTICKS, ticks);
1162 while (ticks-- > 0)
1163 fmp->val = (FM_COEF * fmp->val) / FM_SCALE;
1164 fmp->time = now;
1165
1166 fmp->val += ((FM_SCALE - FM_COEF) * fmp->cnt) / FM_SCALE;
1167 fmp->cnt = 0;
1168}
1169
1170
1171static void fmeter_markevent(struct fmeter *fmp)
1172{
1173 spin_lock(&fmp->lock);
1174 fmeter_update(fmp);
1175 fmp->cnt = min(FM_MAXCNT, fmp->cnt + FM_SCALE);
1176 spin_unlock(&fmp->lock);
1177}
1178
1179
1180static int fmeter_getrate(struct fmeter *fmp)
1181{
1182 int val;
1183
1184 spin_lock(&fmp->lock);
1185 fmeter_update(fmp);
1186 val = fmp->val;
1187 spin_unlock(&fmp->lock);
1188 return val;
1189}
1190
1191
1192static int cpuset_can_attach(struct cgroup_subsys *ss,
1193 struct cgroup *cont, struct task_struct *tsk)
1194{
1195 struct cpuset *cs = cgroup_cs(cont);
1196
1197 if (cpus_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
1198 return -ENOSPC;
1199
1200 return security_task_setscheduler(tsk, 0, NULL);
1201}
1202
1203static void cpuset_attach(struct cgroup_subsys *ss,
1204 struct cgroup *cont, struct cgroup *oldcont,
1205 struct task_struct *tsk)
1206{
1207 cpumask_t cpus;
1208 nodemask_t from, to;
1209 struct mm_struct *mm;
1210 struct cpuset *cs = cgroup_cs(cont);
1211 struct cpuset *oldcs = cgroup_cs(oldcont);
1212
1213 mutex_lock(&callback_mutex);
1214 guarantee_online_cpus(cs, &cpus);
1215 set_cpus_allowed_ptr(tsk, &cpus);
1216 mutex_unlock(&callback_mutex);
1217
1218 from = oldcs->mems_allowed;
1219 to = cs->mems_allowed;
1220 mm = get_task_mm(tsk);
1221 if (mm) {
1222 mpol_rebind_mm(mm, &to);
1223 if (is_memory_migrate(cs))
1224 cpuset_migrate_mm(mm, &from, &to);
1225 mmput(mm);
1226 }
1227
1228}
1229
1230
1231
1232typedef enum {
1233 FILE_MEMORY_MIGRATE,
1234 FILE_CPULIST,
1235 FILE_MEMLIST,
1236 FILE_CPU_EXCLUSIVE,
1237 FILE_MEM_EXCLUSIVE,
1238 FILE_MEM_HARDWALL,
1239 FILE_SCHED_LOAD_BALANCE,
1240 FILE_SCHED_RELAX_DOMAIN_LEVEL,
1241 FILE_MEMORY_PRESSURE_ENABLED,
1242 FILE_MEMORY_PRESSURE,
1243 FILE_SPREAD_PAGE,
1244 FILE_SPREAD_SLAB,
1245} cpuset_filetype_t;
1246
1247static ssize_t cpuset_common_file_write(struct cgroup *cont,
1248 struct cftype *cft,
1249 struct file *file,
1250 const char __user *userbuf,
1251 size_t nbytes, loff_t *unused_ppos)
1252{
1253 struct cpuset *cs = cgroup_cs(cont);
1254 cpuset_filetype_t type = cft->private;
1255 char *buffer;
1256 int retval = 0;
1257
1258
1259 if (nbytes > 100U + 6 * max(NR_CPUS, MAX_NUMNODES))
1260 return -E2BIG;
1261
1262
1263 buffer = kmalloc(nbytes + 1, GFP_KERNEL);
1264 if (!buffer)
1265 return -ENOMEM;
1266
1267 if (copy_from_user(buffer, userbuf, nbytes)) {
1268 retval = -EFAULT;
1269 goto out1;
1270 }
1271 buffer[nbytes] = 0;
1272
1273 cgroup_lock();
1274
1275 if (cgroup_is_removed(cont)) {
1276 retval = -ENODEV;
1277 goto out2;
1278 }
1279
1280 switch (type) {
1281 case FILE_CPULIST:
1282 retval = update_cpumask(cs, buffer);
1283 break;
1284 case FILE_MEMLIST:
1285 retval = update_nodemask(cs, buffer);
1286 break;
1287 default:
1288 retval = -EINVAL;
1289 goto out2;
1290 }
1291
1292 if (retval == 0)
1293 retval = nbytes;
1294out2:
1295 cgroup_unlock();
1296out1:
1297 kfree(buffer);
1298 return retval;
1299}
1300
1301static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val)
1302{
1303 int retval = 0;
1304 struct cpuset *cs = cgroup_cs(cgrp);
1305 cpuset_filetype_t type = cft->private;
1306
1307 cgroup_lock();
1308
1309 if (cgroup_is_removed(cgrp)) {
1310 cgroup_unlock();
1311 return -ENODEV;
1312 }
1313
1314 switch (type) {
1315 case FILE_CPU_EXCLUSIVE:
1316 retval = update_flag(CS_CPU_EXCLUSIVE, cs, val);
1317 break;
1318 case FILE_MEM_EXCLUSIVE:
1319 retval = update_flag(CS_MEM_EXCLUSIVE, cs, val);
1320 break;
1321 case FILE_MEM_HARDWALL:
1322 retval = update_flag(CS_MEM_HARDWALL, cs, val);
1323 break;
1324 case FILE_SCHED_LOAD_BALANCE:
1325 retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, val);
1326 break;
1327 case FILE_MEMORY_MIGRATE:
1328 retval = update_flag(CS_MEMORY_MIGRATE, cs, val);
1329 break;
1330 case FILE_MEMORY_PRESSURE_ENABLED:
1331 cpuset_memory_pressure_enabled = !!val;
1332 break;
1333 case FILE_MEMORY_PRESSURE:
1334 retval = -EACCES;
1335 break;
1336 case FILE_SPREAD_PAGE:
1337 retval = update_flag(CS_SPREAD_PAGE, cs, val);
1338 cs->mems_generation = cpuset_mems_generation++;
1339 break;
1340 case FILE_SPREAD_SLAB:
1341 retval = update_flag(CS_SPREAD_SLAB, cs, val);
1342 cs->mems_generation = cpuset_mems_generation++;
1343 break;
1344 default:
1345 retval = -EINVAL;
1346 break;
1347 }
1348 cgroup_unlock();
1349 return retval;
1350}
1351
1352static int cpuset_write_s64(struct cgroup *cgrp, struct cftype *cft, s64 val)
1353{
1354 int retval = 0;
1355 struct cpuset *cs = cgroup_cs(cgrp);
1356 cpuset_filetype_t type = cft->private;
1357
1358 cgroup_lock();
1359
1360 if (cgroup_is_removed(cgrp)) {
1361 cgroup_unlock();
1362 return -ENODEV;
1363 }
1364 switch (type) {
1365 case FILE_SCHED_RELAX_DOMAIN_LEVEL:
1366 retval = update_relax_domain_level(cs, val);
1367 break;
1368 default:
1369 retval = -EINVAL;
1370 break;
1371 }
1372 cgroup_unlock();
1373 return retval;
1374}
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
1389{
1390 cpumask_t mask;
1391
1392 mutex_lock(&callback_mutex);
1393 mask = cs->cpus_allowed;
1394 mutex_unlock(&callback_mutex);
1395
1396 return cpulist_scnprintf(page, PAGE_SIZE, mask);
1397}
1398
1399static int cpuset_sprintf_memlist(char *page, struct cpuset *cs)
1400{
1401 nodemask_t mask;
1402
1403 mutex_lock(&callback_mutex);
1404 mask = cs->mems_allowed;
1405 mutex_unlock(&callback_mutex);
1406
1407 return nodelist_scnprintf(page, PAGE_SIZE, mask);
1408}
1409
1410static ssize_t cpuset_common_file_read(struct cgroup *cont,
1411 struct cftype *cft,
1412 struct file *file,
1413 char __user *buf,
1414 size_t nbytes, loff_t *ppos)
1415{
1416 struct cpuset *cs = cgroup_cs(cont);
1417 cpuset_filetype_t type = cft->private;
1418 char *page;
1419 ssize_t retval = 0;
1420 char *s;
1421
1422 if (!(page = (char *)__get_free_page(GFP_TEMPORARY)))
1423 return -ENOMEM;
1424
1425 s = page;
1426
1427 switch (type) {
1428 case FILE_CPULIST:
1429 s += cpuset_sprintf_cpulist(s, cs);
1430 break;
1431 case FILE_MEMLIST:
1432 s += cpuset_sprintf_memlist(s, cs);
1433 break;
1434 default:
1435 retval = -EINVAL;
1436 goto out;
1437 }
1438 *s++ = '\n';
1439
1440 retval = simple_read_from_buffer(buf, nbytes, ppos, page, s - page);
1441out:
1442 free_page((unsigned long)page);
1443 return retval;
1444}
1445
1446static u64 cpuset_read_u64(struct cgroup *cont, struct cftype *cft)
1447{
1448 struct cpuset *cs = cgroup_cs(cont);
1449 cpuset_filetype_t type = cft->private;
1450 switch (type) {
1451 case FILE_CPU_EXCLUSIVE:
1452 return is_cpu_exclusive(cs);
1453 case FILE_MEM_EXCLUSIVE:
1454 return is_mem_exclusive(cs);
1455 case FILE_MEM_HARDWALL:
1456 return is_mem_hardwall(cs);
1457 case FILE_SCHED_LOAD_BALANCE:
1458 return is_sched_load_balance(cs);
1459 case FILE_MEMORY_MIGRATE:
1460 return is_memory_migrate(cs);
1461 case FILE_MEMORY_PRESSURE_ENABLED:
1462 return cpuset_memory_pressure_enabled;
1463 case FILE_MEMORY_PRESSURE:
1464 return fmeter_getrate(&cs->fmeter);
1465 case FILE_SPREAD_PAGE:
1466 return is_spread_page(cs);
1467 case FILE_SPREAD_SLAB:
1468 return is_spread_slab(cs);
1469 default:
1470 BUG();
1471 }
1472}
1473
1474static s64 cpuset_read_s64(struct cgroup *cont, struct cftype *cft)
1475{
1476 struct cpuset *cs = cgroup_cs(cont);
1477 cpuset_filetype_t type = cft->private;
1478 switch (type) {
1479 case FILE_SCHED_RELAX_DOMAIN_LEVEL:
1480 return cs->relax_domain_level;
1481 default:
1482 BUG();
1483 }
1484}
1485
1486
1487
1488
1489
1490
1491static struct cftype files[] = {
1492 {
1493 .name = "cpus",
1494 .read = cpuset_common_file_read,
1495 .write = cpuset_common_file_write,
1496 .private = FILE_CPULIST,
1497 },
1498
1499 {
1500 .name = "mems",
1501 .read = cpuset_common_file_read,
1502 .write = cpuset_common_file_write,
1503 .private = FILE_MEMLIST,
1504 },
1505
1506 {
1507 .name = "cpu_exclusive",
1508 .read_u64 = cpuset_read_u64,
1509 .write_u64 = cpuset_write_u64,
1510 .private = FILE_CPU_EXCLUSIVE,
1511 },
1512
1513 {
1514 .name = "mem_exclusive",
1515 .read_u64 = cpuset_read_u64,
1516 .write_u64 = cpuset_write_u64,
1517 .private = FILE_MEM_EXCLUSIVE,
1518 },
1519
1520 {
1521 .name = "mem_hardwall",
1522 .read_u64 = cpuset_read_u64,
1523 .write_u64 = cpuset_write_u64,
1524 .private = FILE_MEM_HARDWALL,
1525 },
1526
1527 {
1528 .name = "sched_load_balance",
1529 .read_u64 = cpuset_read_u64,
1530 .write_u64 = cpuset_write_u64,
1531 .private = FILE_SCHED_LOAD_BALANCE,
1532 },
1533
1534 {
1535 .name = "sched_relax_domain_level",
1536 .read_s64 = cpuset_read_s64,
1537 .write_s64 = cpuset_write_s64,
1538 .private = FILE_SCHED_RELAX_DOMAIN_LEVEL,
1539 },
1540
1541 {
1542 .name = "memory_migrate",
1543 .read_u64 = cpuset_read_u64,
1544 .write_u64 = cpuset_write_u64,
1545 .private = FILE_MEMORY_MIGRATE,
1546 },
1547
1548 {
1549 .name = "memory_pressure",
1550 .read_u64 = cpuset_read_u64,
1551 .write_u64 = cpuset_write_u64,
1552 .private = FILE_MEMORY_PRESSURE,
1553 },
1554
1555 {
1556 .name = "memory_spread_page",
1557 .read_u64 = cpuset_read_u64,
1558 .write_u64 = cpuset_write_u64,
1559 .private = FILE_SPREAD_PAGE,
1560 },
1561
1562 {
1563 .name = "memory_spread_slab",
1564 .read_u64 = cpuset_read_u64,
1565 .write_u64 = cpuset_write_u64,
1566 .private = FILE_SPREAD_SLAB,
1567 },
1568};
1569
1570static struct cftype cft_memory_pressure_enabled = {
1571 .name = "memory_pressure_enabled",
1572 .read_u64 = cpuset_read_u64,
1573 .write_u64 = cpuset_write_u64,
1574 .private = FILE_MEMORY_PRESSURE_ENABLED,
1575};
1576
1577static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont)
1578{
1579 int err;
1580
1581 err = cgroup_add_files(cont, ss, files, ARRAY_SIZE(files));
1582 if (err)
1583 return err;
1584
1585 if (!cont->parent)
1586 err = cgroup_add_file(cont, ss,
1587 &cft_memory_pressure_enabled);
1588 return err;
1589}
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608static void cpuset_post_clone(struct cgroup_subsys *ss,
1609 struct cgroup *cgroup)
1610{
1611 struct cgroup *parent, *child;
1612 struct cpuset *cs, *parent_cs;
1613
1614 parent = cgroup->parent;
1615 list_for_each_entry(child, &parent->children, sibling) {
1616 cs = cgroup_cs(child);
1617 if (is_mem_exclusive(cs) || is_cpu_exclusive(cs))
1618 return;
1619 }
1620 cs = cgroup_cs(cgroup);
1621 parent_cs = cgroup_cs(parent);
1622
1623 cs->mems_allowed = parent_cs->mems_allowed;
1624 cs->cpus_allowed = parent_cs->cpus_allowed;
1625 return;
1626}
1627
1628
1629
1630
1631
1632
1633
1634static struct cgroup_subsys_state *cpuset_create(
1635 struct cgroup_subsys *ss,
1636 struct cgroup *cont)
1637{
1638 struct cpuset *cs;
1639 struct cpuset *parent;
1640
1641 if (!cont->parent) {
1642
1643 top_cpuset.mems_generation = cpuset_mems_generation++;
1644 return &top_cpuset.css;
1645 }
1646 parent = cgroup_cs(cont->parent);
1647 cs = kmalloc(sizeof(*cs), GFP_KERNEL);
1648 if (!cs)
1649 return ERR_PTR(-ENOMEM);
1650
1651 cpuset_update_task_memory_state();
1652 cs->flags = 0;
1653 if (is_spread_page(parent))
1654 set_bit(CS_SPREAD_PAGE, &cs->flags);
1655 if (is_spread_slab(parent))
1656 set_bit(CS_SPREAD_SLAB, &cs->flags);
1657 set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
1658 cpus_clear(cs->cpus_allowed);
1659 nodes_clear(cs->mems_allowed);
1660 cs->mems_generation = cpuset_mems_generation++;
1661 fmeter_init(&cs->fmeter);
1662 cs->relax_domain_level = -1;
1663
1664 cs->parent = parent;
1665 number_of_cpusets++;
1666 return &cs->css ;
1667}
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
1682{
1683 struct cpuset *cs = cgroup_cs(cont);
1684
1685 cpuset_update_task_memory_state();
1686
1687 if (is_sched_load_balance(cs))
1688 update_flag(CS_SCHED_LOAD_BALANCE, cs, 0);
1689
1690 number_of_cpusets--;
1691 kfree(cs);
1692}
1693
1694struct cgroup_subsys cpuset_subsys = {
1695 .name = "cpuset",
1696 .create = cpuset_create,
1697 .destroy = cpuset_destroy,
1698 .can_attach = cpuset_can_attach,
1699 .attach = cpuset_attach,
1700 .populate = cpuset_populate,
1701 .post_clone = cpuset_post_clone,
1702 .subsys_id = cpuset_subsys_id,
1703 .early_init = 1,
1704};
1705
1706
1707
1708
1709
1710
1711
1712int __init cpuset_init_early(void)
1713{
1714 top_cpuset.mems_generation = cpuset_mems_generation++;
1715 return 0;
1716}
1717
1718
1719
1720
1721
1722
1723
1724
1725int __init cpuset_init(void)
1726{
1727 int err = 0;
1728
1729 cpus_setall(top_cpuset.cpus_allowed);
1730 nodes_setall(top_cpuset.mems_allowed);
1731
1732 fmeter_init(&top_cpuset.fmeter);
1733 top_cpuset.mems_generation = cpuset_mems_generation++;
1734 set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags);
1735 top_cpuset.relax_domain_level = -1;
1736
1737 err = register_filesystem(&cpuset_fs_type);
1738 if (err < 0)
1739 return err;
1740
1741 number_of_cpusets = 1;
1742 return 0;
1743}
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753static void cpuset_do_move_task(struct task_struct *tsk,
1754 struct cgroup_scanner *scan)
1755{
1756 struct cpuset_hotplug_scanner *chsp;
1757
1758 chsp = container_of(scan, struct cpuset_hotplug_scanner, scan);
1759 cgroup_attach_task(chsp->to, tsk);
1760}
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773static void move_member_tasks_to_cpuset(struct cpuset *from, struct cpuset *to)
1774{
1775 struct cpuset_hotplug_scanner scan;
1776
1777 scan.scan.cg = from->css.cgroup;
1778 scan.scan.test_task = NULL;
1779 scan.scan.process_task = cpuset_do_move_task;
1780 scan.scan.heap = NULL;
1781 scan.to = to->css.cgroup;
1782
1783 if (cgroup_scan_tasks((struct cgroup_scanner *)&scan))
1784 printk(KERN_ERR "move_member_tasks_to_cpuset: "
1785 "cgroup_scan_tasks failed\n");
1786}
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
1799{
1800 struct cpuset *parent;
1801
1802
1803
1804
1805
1806
1807 if (list_empty(&cs->css.cgroup->css_sets))
1808 return;
1809
1810
1811
1812
1813
1814 parent = cs->parent;
1815 while (cpus_empty(parent->cpus_allowed) ||
1816 nodes_empty(parent->mems_allowed))
1817 parent = parent->parent;
1818
1819 move_member_tasks_to_cpuset(cs, parent);
1820}
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837static void scan_for_empty_cpusets(const struct cpuset *root)
1838{
1839 struct cpuset *cp;
1840 struct cpuset *child;
1841 struct list_head queue;
1842 struct cgroup *cont;
1843
1844 INIT_LIST_HEAD(&queue);
1845
1846 list_add_tail((struct list_head *)&root->stack_list, &queue);
1847
1848 while (!list_empty(&queue)) {
1849 cp = container_of(queue.next, struct cpuset, stack_list);
1850 list_del(queue.next);
1851 list_for_each_entry(cont, &cp->css.cgroup->children, sibling) {
1852 child = cgroup_cs(cont);
1853 list_add_tail(&child->stack_list, &queue);
1854 }
1855 cont = cp->css.cgroup;
1856
1857
1858 if (cpus_subset(cp->cpus_allowed, cpu_online_map) &&
1859 nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
1860 continue;
1861
1862
1863 mutex_lock(&callback_mutex);
1864 cpus_and(cp->cpus_allowed, cp->cpus_allowed, cpu_online_map);
1865 nodes_and(cp->mems_allowed, cp->mems_allowed,
1866 node_states[N_HIGH_MEMORY]);
1867 mutex_unlock(&callback_mutex);
1868
1869
1870 if (cpus_empty(cp->cpus_allowed) ||
1871 nodes_empty(cp->mems_allowed))
1872 remove_tasks_in_empty_cpuset(cp);
1873 }
1874}
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887static void common_cpu_mem_hotplug_unplug(int rebuild_sd)
1888{
1889 cgroup_lock();
1890
1891 top_cpuset.cpus_allowed = cpu_online_map;
1892 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
1893 scan_for_empty_cpusets(&top_cpuset);
1894
1895
1896
1897
1898
1899 if (rebuild_sd)
1900 rebuild_sched_domains();
1901
1902 cgroup_unlock();
1903}
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915static int cpuset_handle_cpuhp(struct notifier_block *unused_nb,
1916 unsigned long phase, void *unused_cpu)
1917{
1918 switch (phase) {
1919 case CPU_UP_CANCELED:
1920 case CPU_UP_CANCELED_FROZEN:
1921 case CPU_DOWN_FAILED:
1922 case CPU_DOWN_FAILED_FROZEN:
1923 case CPU_ONLINE:
1924 case CPU_ONLINE_FROZEN:
1925 case CPU_DEAD:
1926 case CPU_DEAD_FROZEN:
1927 common_cpu_mem_hotplug_unplug(1);
1928 break;
1929 default:
1930 return NOTIFY_DONE;
1931 }
1932
1933 return NOTIFY_OK;
1934}
1935
1936#ifdef CONFIG_MEMORY_HOTPLUG
1937
1938
1939
1940
1941
1942
1943
1944void cpuset_track_online_nodes(void)
1945{
1946 common_cpu_mem_hotplug_unplug(0);
1947}
1948#endif
1949
1950
1951
1952
1953
1954
1955
1956void __init cpuset_init_smp(void)
1957{
1958 top_cpuset.cpus_allowed = cpu_online_map;
1959 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
1960
1961 hotcpu_notifier(cpuset_handle_cpuhp, 0);
1962}
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976void cpuset_cpus_allowed(struct task_struct *tsk, cpumask_t *pmask)
1977{
1978 mutex_lock(&callback_mutex);
1979 cpuset_cpus_allowed_locked(tsk, pmask);
1980 mutex_unlock(&callback_mutex);
1981}
1982
1983
1984
1985
1986
1987void cpuset_cpus_allowed_locked(struct task_struct *tsk, cpumask_t *pmask)
1988{
1989 task_lock(tsk);
1990 guarantee_online_cpus(task_cs(tsk), pmask);
1991 task_unlock(tsk);
1992}
1993
1994void cpuset_init_current_mems_allowed(void)
1995{
1996 nodes_setall(current->mems_allowed);
1997}
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009nodemask_t cpuset_mems_allowed(struct task_struct *tsk)
2010{
2011 nodemask_t mask;
2012
2013 mutex_lock(&callback_mutex);
2014 task_lock(tsk);
2015 guarantee_online_mems(task_cs(tsk), &mask);
2016 task_unlock(tsk);
2017 mutex_unlock(&callback_mutex);
2018
2019 return mask;
2020}
2021
2022
2023
2024
2025
2026
2027
2028int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask)
2029{
2030 return nodes_intersects(*nodemask, current->mems_allowed);
2031}
2032
2033
2034
2035
2036
2037
2038
2039static const struct cpuset *nearest_hardwall_ancestor(const struct cpuset *cs)
2040{
2041 while (!(is_mem_exclusive(cs) || is_mem_hardwall(cs)) && cs->parent)
2042 cs = cs->parent;
2043 return cs;
2044}
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask)
2111{
2112 int node;
2113 const struct cpuset *cs;
2114 int allowed;
2115
2116 if (in_interrupt() || (gfp_mask & __GFP_THISNODE))
2117 return 1;
2118 node = zone_to_nid(z);
2119 might_sleep_if(!(gfp_mask & __GFP_HARDWALL));
2120 if (node_isset(node, current->mems_allowed))
2121 return 1;
2122
2123
2124
2125
2126 if (unlikely(test_thread_flag(TIF_MEMDIE)))
2127 return 1;
2128 if (gfp_mask & __GFP_HARDWALL)
2129 return 0;
2130
2131 if (current->flags & PF_EXITING)
2132 return 1;
2133
2134
2135 mutex_lock(&callback_mutex);
2136
2137 task_lock(current);
2138 cs = nearest_hardwall_ancestor(task_cs(current));
2139 task_unlock(current);
2140
2141 allowed = node_isset(node, cs->mems_allowed);
2142 mutex_unlock(&callback_mutex);
2143 return allowed;
2144}
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170int __cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask)
2171{
2172 int node;
2173
2174 if (in_interrupt() || (gfp_mask & __GFP_THISNODE))
2175 return 1;
2176 node = zone_to_nid(z);
2177 if (node_isset(node, current->mems_allowed))
2178 return 1;
2179
2180
2181
2182
2183 if (unlikely(test_thread_flag(TIF_MEMDIE)))
2184 return 1;
2185 return 0;
2186}
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199void cpuset_lock(void)
2200{
2201 mutex_lock(&callback_mutex);
2202}
2203
2204
2205
2206
2207
2208
2209
2210void cpuset_unlock(void)
2211{
2212 mutex_unlock(&callback_mutex);
2213}
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241int cpuset_mem_spread_node(void)
2242{
2243 int node;
2244
2245 node = next_node(current->cpuset_mem_spread_rotor, current->mems_allowed);
2246 if (node == MAX_NUMNODES)
2247 node = first_node(current->mems_allowed);
2248 current->cpuset_mem_spread_rotor = node;
2249 return node;
2250}
2251EXPORT_SYMBOL_GPL(cpuset_mem_spread_node);
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
2265 const struct task_struct *tsk2)
2266{
2267 return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed);
2268}
2269
2270
2271
2272
2273
2274
2275
2276int cpuset_memory_pressure_enabled __read_mostly;
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296void __cpuset_memory_pressure_bump(void)
2297{
2298 task_lock(current);
2299 fmeter_markevent(&task_cs(current)->fmeter);
2300 task_unlock(current);
2301}
2302
2303#ifdef CONFIG_PROC_PID_CPUSET
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313static int proc_cpuset_show(struct seq_file *m, void *unused_v)
2314{
2315 struct pid *pid;
2316 struct task_struct *tsk;
2317 char *buf;
2318 struct cgroup_subsys_state *css;
2319 int retval;
2320
2321 retval = -ENOMEM;
2322 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
2323 if (!buf)
2324 goto out;
2325
2326 retval = -ESRCH;
2327 pid = m->private;
2328 tsk = get_pid_task(pid, PIDTYPE_PID);
2329 if (!tsk)
2330 goto out_free;
2331
2332 retval = -EINVAL;
2333 cgroup_lock();
2334 css = task_subsys_state(tsk, cpuset_subsys_id);
2335 retval = cgroup_path(css->cgroup, buf, PAGE_SIZE);
2336 if (retval < 0)
2337 goto out_unlock;
2338 seq_puts(m, buf);
2339 seq_putc(m, '\n');
2340out_unlock:
2341 cgroup_unlock();
2342 put_task_struct(tsk);
2343out_free:
2344 kfree(buf);
2345out:
2346 return retval;
2347}
2348
2349static int cpuset_open(struct inode *inode, struct file *file)
2350{
2351 struct pid *pid = PROC_I(inode)->pid;
2352 return single_open(file, proc_cpuset_show, pid);
2353}
2354
2355const struct file_operations proc_cpuset_operations = {
2356 .open = cpuset_open,
2357 .read = seq_read,
2358 .llseek = seq_lseek,
2359 .release = single_release,
2360};
2361#endif
2362
2363
2364void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task)
2365{
2366 seq_printf(m, "Cpus_allowed:\t");
2367 m->count += cpumask_scnprintf(m->buf + m->count, m->size - m->count,
2368 task->cpus_allowed);
2369 seq_printf(m, "\n");
2370 seq_printf(m, "Cpus_allowed_list:\t");
2371 m->count += cpulist_scnprintf(m->buf + m->count, m->size - m->count,
2372 task->cpus_allowed);
2373 seq_printf(m, "\n");
2374 seq_printf(m, "Mems_allowed:\t");
2375 m->count += nodemask_scnprintf(m->buf + m->count, m->size - m->count,
2376 task->mems_allowed);
2377 seq_printf(m, "\n");
2378 seq_printf(m, "Mems_allowed_list:\t");
2379 m->count += nodelist_scnprintf(m->buf + m->count, m->size - m->count,
2380 task->mems_allowed);
2381 seq_printf(m, "\n");
2382}
2383