1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20#include <linux/oom.h>
21#include <linux/mm.h>
22#include <linux/err.h>
23#include <linux/gfp.h>
24#include <linux/sched.h>
25#include <linux/swap.h>
26#include <linux/timex.h>
27#include <linux/jiffies.h>
28#include <linux/cpuset.h>
29#include <linux/export.h>
30#include <linux/notifier.h>
31#include <linux/memcontrol.h>
32#include <linux/mempolicy.h>
33#include <linux/security.h>
34#include <linux/ptrace.h>
35#include <linux/freezer.h>
36
37int sysctl_panic_on_oom;
38int sysctl_oom_kill_allocating_task;
39int sysctl_oom_dump_tasks = 1;
40static DEFINE_SPINLOCK(zone_scan_lock);
41
42
43
44
45
46
47
48
49
50
51void compare_swap_oom_score_adj(int old_val, int new_val)
52{
53 struct sighand_struct *sighand = current->sighand;
54
55 spin_lock_irq(&sighand->siglock);
56 if (current->signal->oom_score_adj == old_val)
57 current->signal->oom_score_adj = new_val;
58 spin_unlock_irq(&sighand->siglock);
59}
60
61
62
63
64
65
66
67
68
69int test_set_oom_score_adj(int new_val)
70{
71 struct sighand_struct *sighand = current->sighand;
72 int old_val;
73
74 spin_lock_irq(&sighand->siglock);
75 old_val = current->signal->oom_score_adj;
76 current->signal->oom_score_adj = new_val;
77 spin_unlock_irq(&sighand->siglock);
78
79 return old_val;
80}
81
82#ifdef CONFIG_NUMA
83
84
85
86
87
88
89
90
91
92static bool has_intersects_mems_allowed(struct task_struct *tsk,
93 const nodemask_t *mask)
94{
95 struct task_struct *start = tsk;
96
97 do {
98 if (mask) {
99
100
101
102
103
104
105 if (mempolicy_nodemask_intersects(tsk, mask))
106 return true;
107 } else {
108
109
110
111
112 if (cpuset_mems_allowed_intersects(current, tsk))
113 return true;
114 }
115 } while_each_thread(start, tsk);
116
117 return false;
118}
119#else
120static bool has_intersects_mems_allowed(struct task_struct *tsk,
121 const nodemask_t *mask)
122{
123 return true;
124}
125#endif
126
127
128
129
130
131
132
133struct task_struct *find_lock_task_mm(struct task_struct *p)
134{
135 struct task_struct *t = p;
136
137 do {
138 task_lock(t);
139 if (likely(t->mm))
140 return t;
141 task_unlock(t);
142 } while_each_thread(p, t);
143
144 return NULL;
145}
146
147
148static bool oom_unkillable_task(struct task_struct *p,
149 const struct mem_cgroup *mem, const nodemask_t *nodemask)
150{
151 if (is_global_init(p))
152 return true;
153 if (p->flags & PF_KTHREAD)
154 return true;
155
156
157 if (mem && !task_in_mem_cgroup(p, mem))
158 return true;
159
160
161 if (!has_intersects_mems_allowed(p, nodemask))
162 return true;
163
164 return false;
165}
166
167
168
169
170
171
172
173
174
175
176unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem,
177 const nodemask_t *nodemask, unsigned long totalpages)
178{
179 long points;
180
181 if (oom_unkillable_task(p, mem, nodemask))
182 return 0;
183
184 p = find_lock_task_mm(p);
185 if (!p)
186 return 0;
187
188 if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
189 task_unlock(p);
190 return 0;
191 }
192
193
194
195
196
197 if (!totalpages)
198 totalpages = 1;
199
200
201
202
203
204 points = get_mm_rss(p->mm) + p->mm->nr_ptes;
205 points += get_mm_counter(p->mm, MM_SWAPENTS);
206
207 points *= 1000;
208 points /= totalpages;
209 task_unlock(p);
210
211
212
213
214
215 if (has_capability_noaudit(p, CAP_SYS_ADMIN))
216 points -= 30;
217
218
219
220
221
222
223 points += p->signal->oom_score_adj;
224
225
226
227
228
229
230 if (points <= 0)
231 return 1;
232 return (points < 1000) ? points : 1000;
233}
234
235
236
237
238#ifdef CONFIG_NUMA
239static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
240 gfp_t gfp_mask, nodemask_t *nodemask,
241 unsigned long *totalpages)
242{
243 struct zone *zone;
244 struct zoneref *z;
245 enum zone_type high_zoneidx = gfp_zone(gfp_mask);
246 bool cpuset_limited = false;
247 int nid;
248
249
250 *totalpages = totalram_pages + total_swap_pages;
251
252 if (!zonelist)
253 return CONSTRAINT_NONE;
254
255
256
257
258
259 if (gfp_mask & __GFP_THISNODE)
260 return CONSTRAINT_NONE;
261
262
263
264
265
266
267 if (nodemask && !nodes_subset(node_states[N_HIGH_MEMORY], *nodemask)) {
268 *totalpages = total_swap_pages;
269 for_each_node_mask(nid, *nodemask)
270 *totalpages += node_spanned_pages(nid);
271 return CONSTRAINT_MEMORY_POLICY;
272 }
273
274
275 for_each_zone_zonelist_nodemask(zone, z, zonelist,
276 high_zoneidx, nodemask)
277 if (!cpuset_zone_allowed_softwall(zone, gfp_mask))
278 cpuset_limited = true;
279
280 if (cpuset_limited) {
281 *totalpages = total_swap_pages;
282 for_each_node_mask(nid, cpuset_current_mems_allowed)
283 *totalpages += node_spanned_pages(nid);
284 return CONSTRAINT_CPUSET;
285 }
286 return CONSTRAINT_NONE;
287}
288#else
289static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
290 gfp_t gfp_mask, nodemask_t *nodemask,
291 unsigned long *totalpages)
292{
293 *totalpages = totalram_pages + total_swap_pages;
294 return CONSTRAINT_NONE;
295}
296#endif
297
298
299
300
301
302
303
304static struct task_struct *select_bad_process(unsigned int *ppoints,
305 unsigned long totalpages, struct mem_cgroup *mem,
306 const nodemask_t *nodemask)
307{
308 struct task_struct *g, *p;
309 struct task_struct *chosen = NULL;
310 *ppoints = 0;
311
312 do_each_thread(g, p) {
313 unsigned int points;
314
315 if (p->exit_state)
316 continue;
317 if (oom_unkillable_task(p, mem, nodemask))
318 continue;
319
320
321
322
323
324
325
326
327
328
329 if (test_tsk_thread_flag(p, TIF_MEMDIE)) {
330 if (unlikely(frozen(p)))
331 thaw_process(p);
332 return ERR_PTR(-1UL);
333 }
334 if (!p->mm)
335 continue;
336
337 if (p->flags & PF_EXITING) {
338
339
340
341
342
343
344
345
346
347 if (p == current) {
348 chosen = p;
349 *ppoints = 1000;
350 } else {
351
352
353
354
355
356 if (!(p->group_leader->ptrace & PT_TRACE_EXIT))
357 return ERR_PTR(-1UL);
358 }
359 }
360
361 points = oom_badness(p, mem, nodemask, totalpages);
362 if (points > *ppoints) {
363 chosen = p;
364 *ppoints = points;
365 }
366 } while_each_thread(g, p);
367
368 return chosen;
369}
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384static void dump_tasks(const struct mem_cgroup *mem, const nodemask_t *nodemask)
385{
386 struct task_struct *p;
387 struct task_struct *task;
388
389 pr_info("[ pid ] uid tgid total_vm rss cpu oom_adj oom_score_adj name\n");
390 for_each_process(p) {
391 if (oom_unkillable_task(p, mem, nodemask))
392 continue;
393
394 task = find_lock_task_mm(p);
395 if (!task) {
396
397
398
399
400
401 continue;
402 }
403
404 pr_info("[%5d] %5d %5d %8lu %8lu %3u %3d %5d %s\n",
405 task->pid, task_uid(task), task->tgid,
406 task->mm->total_vm, get_mm_rss(task->mm),
407 task_cpu(task), task->signal->oom_adj,
408 task->signal->oom_score_adj, task->comm);
409 task_unlock(task);
410 }
411}
412
413static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
414 struct mem_cgroup *mem, const nodemask_t *nodemask)
415{
416 task_lock(current);
417 pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, "
418 "oom_adj=%d, oom_score_adj=%d\n",
419 current->comm, gfp_mask, order, current->signal->oom_adj,
420 current->signal->oom_score_adj);
421 cpuset_print_task_mems_allowed(current);
422 task_unlock(current);
423 dump_stack();
424 mem_cgroup_print_oom_info(mem, p);
425 show_mem(SHOW_MEM_FILTER_NODES);
426 if (sysctl_oom_dump_tasks)
427 dump_tasks(mem, nodemask);
428}
429
430#define K(x) ((x) << (PAGE_SHIFT-10))
431static int oom_kill_task(struct task_struct *p, struct mem_cgroup *mem)
432{
433 struct task_struct *q;
434 struct mm_struct *mm;
435
436 p = find_lock_task_mm(p);
437 if (!p)
438 return 1;
439
440
441 mm = p->mm;
442
443 pr_err("Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB\n",
444 task_pid_nr(p), p->comm, K(p->mm->total_vm),
445 K(get_mm_counter(p->mm, MM_ANONPAGES)),
446 K(get_mm_counter(p->mm, MM_FILEPAGES)));
447 task_unlock(p);
448
449
450
451
452
453
454
455
456
457
458
459 for_each_process(q)
460 if (q->mm == mm && !same_thread_group(q, p) &&
461 !(q->flags & PF_KTHREAD)) {
462 if (q->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
463 continue;
464
465 task_lock(q);
466 pr_err("Kill process %d (%s) sharing same memory\n",
467 task_pid_nr(q), q->comm);
468 task_unlock(q);
469 force_sig(SIGKILL, q);
470 }
471
472 set_tsk_thread_flag(p, TIF_MEMDIE);
473 force_sig(SIGKILL, p);
474
475 return 0;
476}
477#undef K
478
479static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
480 unsigned int points, unsigned long totalpages,
481 struct mem_cgroup *mem, nodemask_t *nodemask,
482 const char *message)
483{
484 struct task_struct *victim = p;
485 struct task_struct *child;
486 struct task_struct *t = p;
487 unsigned int victim_points = 0;
488
489 if (printk_ratelimit())
490 dump_header(p, gfp_mask, order, mem, nodemask);
491
492
493
494
495
496 if (p->flags & PF_EXITING) {
497 set_tsk_thread_flag(p, TIF_MEMDIE);
498 return 0;
499 }
500
501 task_lock(p);
502 pr_err("%s: Kill process %d (%s) score %d or sacrifice child\n",
503 message, task_pid_nr(p), p->comm, points);
504 task_unlock(p);
505
506
507
508
509
510
511
512 do {
513 list_for_each_entry(child, &t->children, sibling) {
514 unsigned int child_points;
515
516 if (child->mm == p->mm)
517 continue;
518
519
520
521 child_points = oom_badness(child, mem, nodemask,
522 totalpages);
523 if (child_points > victim_points) {
524 victim = child;
525 victim_points = child_points;
526 }
527 }
528 } while_each_thread(p, t);
529
530 return oom_kill_task(victim, mem);
531}
532
533
534
535
536static void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
537 int order, const nodemask_t *nodemask)
538{
539 if (likely(!sysctl_panic_on_oom))
540 return;
541 if (sysctl_panic_on_oom != 2) {
542
543
544
545
546
547 if (constraint != CONSTRAINT_NONE)
548 return;
549 }
550 read_lock(&tasklist_lock);
551 dump_header(NULL, gfp_mask, order, NULL, nodemask);
552 read_unlock(&tasklist_lock);
553 panic("Out of memory: %s panic_on_oom is enabled\n",
554 sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide");
555}
556
557#ifdef CONFIG_CGROUP_MEM_RES_CTLR
558void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask)
559{
560 unsigned long limit;
561 unsigned int points = 0;
562 struct task_struct *p;
563
564
565
566
567
568
569 if (fatal_signal_pending(current)) {
570 set_thread_flag(TIF_MEMDIE);
571 return;
572 }
573
574 check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, 0, NULL);
575 limit = mem_cgroup_get_limit(mem) >> PAGE_SHIFT;
576 read_lock(&tasklist_lock);
577retry:
578 p = select_bad_process(&points, limit, mem, NULL);
579 if (!p || PTR_ERR(p) == -1UL)
580 goto out;
581
582 if (oom_kill_process(p, gfp_mask, 0, points, limit, mem, NULL,
583 "Memory cgroup out of memory"))
584 goto retry;
585out:
586 read_unlock(&tasklist_lock);
587}
588#endif
589
590static BLOCKING_NOTIFIER_HEAD(oom_notify_list);
591
592int register_oom_notifier(struct notifier_block *nb)
593{
594 return blocking_notifier_chain_register(&oom_notify_list, nb);
595}
596EXPORT_SYMBOL_GPL(register_oom_notifier);
597
598int unregister_oom_notifier(struct notifier_block *nb)
599{
600 return blocking_notifier_chain_unregister(&oom_notify_list, nb);
601}
602EXPORT_SYMBOL_GPL(unregister_oom_notifier);
603
604
605
606
607
608
609int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask)
610{
611 struct zoneref *z;
612 struct zone *zone;
613 int ret = 1;
614
615 spin_lock(&zone_scan_lock);
616 for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
617 if (zone_is_oom_locked(zone)) {
618 ret = 0;
619 goto out;
620 }
621 }
622
623 for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
624
625
626
627
628
629 zone_set_flag(zone, ZONE_OOM_LOCKED);
630 }
631
632out:
633 spin_unlock(&zone_scan_lock);
634 return ret;
635}
636
637
638
639
640
641
642void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask)
643{
644 struct zoneref *z;
645 struct zone *zone;
646
647 spin_lock(&zone_scan_lock);
648 for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
649 zone_clear_flag(zone, ZONE_OOM_LOCKED);
650 }
651 spin_unlock(&zone_scan_lock);
652}
653
654
655
656
657
658
659static int try_set_system_oom(void)
660{
661 struct zone *zone;
662 int ret = 1;
663
664 spin_lock(&zone_scan_lock);
665 for_each_populated_zone(zone)
666 if (zone_is_oom_locked(zone)) {
667 ret = 0;
668 goto out;
669 }
670 for_each_populated_zone(zone)
671 zone_set_flag(zone, ZONE_OOM_LOCKED);
672out:
673 spin_unlock(&zone_scan_lock);
674 return ret;
675}
676
677
678
679
680
681static void clear_system_oom(void)
682{
683 struct zone *zone;
684
685 spin_lock(&zone_scan_lock);
686 for_each_populated_zone(zone)
687 zone_clear_flag(zone, ZONE_OOM_LOCKED);
688 spin_unlock(&zone_scan_lock);
689}
690
691
692
693
694
695
696
697
698
699
700
701
702
703void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
704 int order, nodemask_t *nodemask)
705{
706 const nodemask_t *mpol_mask;
707 struct task_struct *p;
708 unsigned long totalpages;
709 unsigned long freed = 0;
710 unsigned int points;
711 enum oom_constraint constraint = CONSTRAINT_NONE;
712 int killed = 0;
713
714 blocking_notifier_call_chain(&oom_notify_list, 0, &freed);
715 if (freed > 0)
716
717 return;
718
719
720
721
722
723
724 if (fatal_signal_pending(current)) {
725 set_thread_flag(TIF_MEMDIE);
726 return;
727 }
728
729
730
731
732
733 constraint = constrained_alloc(zonelist, gfp_mask, nodemask,
734 &totalpages);
735 mpol_mask = (constraint == CONSTRAINT_MEMORY_POLICY) ? nodemask : NULL;
736 check_panic_on_oom(constraint, gfp_mask, order, mpol_mask);
737
738 read_lock(&tasklist_lock);
739 if (sysctl_oom_kill_allocating_task &&
740 !oom_unkillable_task(current, NULL, nodemask) &&
741 current->mm) {
742
743
744
745
746
747 if (!oom_kill_process(current, gfp_mask, order, 0, totalpages,
748 NULL, nodemask,
749 "Out of memory (oom_kill_allocating_task)"))
750 goto out;
751 }
752
753retry:
754 p = select_bad_process(&points, totalpages, NULL, mpol_mask);
755 if (PTR_ERR(p) == -1UL)
756 goto out;
757
758
759 if (!p) {
760 dump_header(NULL, gfp_mask, order, NULL, mpol_mask);
761 read_unlock(&tasklist_lock);
762 panic("Out of memory and no killable processes...\n");
763 }
764
765 if (oom_kill_process(p, gfp_mask, order, points, totalpages, NULL,
766 nodemask, "Out of memory"))
767 goto retry;
768 killed = 1;
769out:
770 read_unlock(&tasklist_lock);
771
772
773
774
775
776 if (killed && !test_thread_flag(TIF_MEMDIE))
777 schedule_timeout_uninterruptible(1);
778}
779
780
781
782
783
784
785
786void pagefault_out_of_memory(void)
787{
788 if (try_set_system_oom()) {
789 out_of_memory(NULL, 0, 0, NULL);
790 clear_system_oom();
791 }
792 if (!test_thread_flag(TIF_MEMDIE))
793 schedule_timeout_uninterruptible(1);
794}
795