1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18#include <linux/oom.h>
19#include <linux/mm.h>
20#include <linux/err.h>
21#include <linux/sched.h>
22#include <linux/swap.h>
23#include <linux/timex.h>
24#include <linux/jiffies.h>
25#include <linux/cpuset.h>
26#include <linux/module.h>
27#include <linux/notifier.h>
28#include <linux/memcontrol.h>
29#include <linux/security.h>
30
31int sysctl_panic_on_oom;
32int sysctl_oom_kill_allocating_task;
33int sysctl_oom_dump_tasks;
34static DEFINE_SPINLOCK(zone_scan_lock);
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56unsigned long badness(struct task_struct *p, unsigned long uptime)
57{
58 unsigned long points, cpu_time, run_time;
59 struct mm_struct *mm;
60 struct task_struct *child;
61
62 task_lock(p);
63 mm = p->mm;
64 if (!mm) {
65 task_unlock(p);
66 return 0;
67 }
68
69
70
71
72 points = mm->total_vm;
73
74
75
76
77 task_unlock(p);
78
79
80
81
82 if (p->flags & PF_SWAPOFF)
83 return ULONG_MAX;
84
85
86
87
88
89
90
91
92
93 list_for_each_entry(child, &p->children, sibling) {
94 task_lock(child);
95 if (child->mm != mm && child->mm)
96 points += child->mm->total_vm/2 + 1;
97 task_unlock(child);
98 }
99
100
101
102
103
104
105 cpu_time = (cputime_to_jiffies(p->utime) + cputime_to_jiffies(p->stime))
106 >> (SHIFT_HZ + 3);
107
108 if (uptime >= p->start_time.tv_sec)
109 run_time = (uptime - p->start_time.tv_sec) >> 10;
110 else
111 run_time = 0;
112
113 if (cpu_time)
114 points /= int_sqrt(cpu_time);
115 if (run_time)
116 points /= int_sqrt(int_sqrt(run_time));
117
118
119
120
121
122 if (task_nice(p) > 0)
123 points *= 2;
124
125
126
127
128
129 if (has_capability_noaudit(p, CAP_SYS_ADMIN) ||
130 has_capability_noaudit(p, CAP_SYS_RESOURCE))
131 points /= 4;
132
133
134
135
136
137
138
139 if (has_capability_noaudit(p, CAP_SYS_RAWIO))
140 points /= 4;
141
142
143
144
145
146
147 if (!cpuset_mems_allowed_intersects(current, p))
148 points /= 8;
149
150
151
152
153 if (p->oomkilladj) {
154 if (p->oomkilladj > 0) {
155 if (!points)
156 points = 1;
157 points <<= p->oomkilladj;
158 } else
159 points >>= -(p->oomkilladj);
160 }
161
162#ifdef DEBUG
163 printk(KERN_DEBUG "OOMkill: task %d (%s) got %lu points\n",
164 p->pid, p->comm, points);
165#endif
166 return points;
167}
168
169
170
171
172static inline enum oom_constraint constrained_alloc(struct zonelist *zonelist,
173 gfp_t gfp_mask)
174{
175#ifdef CONFIG_NUMA
176 struct zone *zone;
177 struct zoneref *z;
178 enum zone_type high_zoneidx = gfp_zone(gfp_mask);
179 nodemask_t nodes = node_states[N_HIGH_MEMORY];
180
181 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx)
182 if (cpuset_zone_allowed_softwall(zone, gfp_mask))
183 node_clear(zone_to_nid(zone), nodes);
184 else
185 return CONSTRAINT_CPUSET;
186
187 if (!nodes_empty(nodes))
188 return CONSTRAINT_MEMORY_POLICY;
189#endif
190
191 return CONSTRAINT_NONE;
192}
193
194
195
196
197
198
199
200static struct task_struct *select_bad_process(unsigned long *ppoints,
201 struct mem_cgroup *mem)
202{
203 struct task_struct *g, *p;
204 struct task_struct *chosen = NULL;
205 struct timespec uptime;
206 *ppoints = 0;
207
208 do_posix_clock_monotonic_gettime(&uptime);
209 do_each_thread(g, p) {
210 unsigned long points;
211
212
213
214
215
216 if (!p->mm)
217 continue;
218
219 if (is_global_init(p))
220 continue;
221 if (mem && !task_in_mem_cgroup(p, mem))
222 continue;
223
224
225
226
227
228
229
230
231
232
233 if (test_tsk_thread_flag(p, TIF_MEMDIE))
234 return ERR_PTR(-1UL);
235
236
237
238
239
240
241
242
243
244
245
246 if (p->flags & PF_EXITING) {
247 if (p != current)
248 return ERR_PTR(-1UL);
249
250 chosen = p;
251 *ppoints = ULONG_MAX;
252 }
253
254 if (p->oomkilladj == OOM_DISABLE)
255 continue;
256
257 points = badness(p, uptime.tv_sec);
258 if (points > *ppoints || !chosen) {
259 chosen = p;
260 *ppoints = points;
261 }
262 } while_each_thread(g, p);
263
264 return chosen;
265}
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280static void dump_tasks(const struct mem_cgroup *mem)
281{
282 struct task_struct *g, *p;
283
284 printk(KERN_INFO "[ pid ] uid tgid total_vm rss cpu oom_adj "
285 "name\n");
286 do_each_thread(g, p) {
287 struct mm_struct *mm;
288
289 if (mem && !task_in_mem_cgroup(p, mem))
290 continue;
291 if (!thread_group_leader(p))
292 continue;
293
294 task_lock(p);
295 mm = p->mm;
296 if (!mm) {
297
298
299
300
301
302 task_unlock(p);
303 continue;
304 }
305 printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n",
306 p->pid, __task_cred(p)->uid, p->tgid, mm->total_vm,
307 get_mm_rss(mm), (int)task_cpu(p), p->oomkilladj,
308 p->comm);
309 task_unlock(p);
310 } while_each_thread(g, p);
311}
312
313
314
315
316
317
318static void __oom_kill_task(struct task_struct *p, int verbose)
319{
320 if (is_global_init(p)) {
321 WARN_ON(1);
322 printk(KERN_WARNING "tried to kill init!\n");
323 return;
324 }
325
326 if (!p->mm) {
327 WARN_ON(1);
328 printk(KERN_WARNING "tried to kill an mm-less task!\n");
329 return;
330 }
331
332 if (verbose)
333 printk(KERN_ERR "Killed process %d (%s)\n",
334 task_pid_nr(p), p->comm);
335
336
337
338
339
340
341 p->rt.time_slice = HZ;
342 set_tsk_thread_flag(p, TIF_MEMDIE);
343
344 force_sig(SIGKILL, p);
345}
346
347static int oom_kill_task(struct task_struct *p)
348{
349 struct mm_struct *mm;
350 struct task_struct *g, *q;
351
352 mm = p->mm;
353
354
355
356
357
358
359
360
361
362
363 if (mm == NULL)
364 return 1;
365
366
367
368
369 do_each_thread(g, q) {
370 if (q->mm == mm && q->oomkilladj == OOM_DISABLE)
371 return 1;
372 } while_each_thread(g, q);
373
374 __oom_kill_task(p, 1);
375
376
377
378
379
380
381 do_each_thread(g, q) {
382 if (q->mm == mm && !same_thread_group(q, p))
383 force_sig(SIGKILL, q);
384 } while_each_thread(g, q);
385
386 return 0;
387}
388
389static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
390 unsigned long points, struct mem_cgroup *mem,
391 const char *message)
392{
393 struct task_struct *c;
394
395 if (printk_ratelimit()) {
396 printk(KERN_WARNING "%s invoked oom-killer: "
397 "gfp_mask=0x%x, order=%d, oomkilladj=%d\n",
398 current->comm, gfp_mask, order, current->oomkilladj);
399 task_lock(current);
400 cpuset_print_task_mems_allowed(current);
401 task_unlock(current);
402 dump_stack();
403 mem_cgroup_print_oom_info(mem, current);
404 show_mem();
405 if (sysctl_oom_dump_tasks)
406 dump_tasks(mem);
407 }
408
409
410
411
412
413 if (p->flags & PF_EXITING) {
414 __oom_kill_task(p, 0);
415 return 0;
416 }
417
418 printk(KERN_ERR "%s: kill process %d (%s) score %li or a child\n",
419 message, task_pid_nr(p), p->comm, points);
420
421
422 list_for_each_entry(c, &p->children, sibling) {
423 if (c->mm == p->mm)
424 continue;
425 if (!oom_kill_task(c))
426 return 0;
427 }
428 return oom_kill_task(p);
429}
430
431#ifdef CONFIG_CGROUP_MEM_RES_CTLR
432void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask)
433{
434 unsigned long points = 0;
435 struct task_struct *p;
436
437 read_lock(&tasklist_lock);
438retry:
439 p = select_bad_process(&points, mem);
440 if (PTR_ERR(p) == -1UL)
441 goto out;
442
443 if (!p)
444 p = current;
445
446 if (oom_kill_process(p, gfp_mask, 0, points, mem,
447 "Memory cgroup out of memory"))
448 goto retry;
449out:
450 read_unlock(&tasklist_lock);
451}
452#endif
453
454static BLOCKING_NOTIFIER_HEAD(oom_notify_list);
455
456int register_oom_notifier(struct notifier_block *nb)
457{
458 return blocking_notifier_chain_register(&oom_notify_list, nb);
459}
460EXPORT_SYMBOL_GPL(register_oom_notifier);
461
462int unregister_oom_notifier(struct notifier_block *nb)
463{
464 return blocking_notifier_chain_unregister(&oom_notify_list, nb);
465}
466EXPORT_SYMBOL_GPL(unregister_oom_notifier);
467
468
469
470
471
472
473int try_set_zone_oom(struct zonelist *zonelist, gfp_t gfp_mask)
474{
475 struct zoneref *z;
476 struct zone *zone;
477 int ret = 1;
478
479 spin_lock(&zone_scan_lock);
480 for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
481 if (zone_is_oom_locked(zone)) {
482 ret = 0;
483 goto out;
484 }
485 }
486
487 for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
488
489
490
491
492
493 zone_set_flag(zone, ZONE_OOM_LOCKED);
494 }
495
496out:
497 spin_unlock(&zone_scan_lock);
498 return ret;
499}
500
501
502
503
504
505
506void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask)
507{
508 struct zoneref *z;
509 struct zone *zone;
510
511 spin_lock(&zone_scan_lock);
512 for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
513 zone_clear_flag(zone, ZONE_OOM_LOCKED);
514 }
515 spin_unlock(&zone_scan_lock);
516}
517
518
519
520
521static void __out_of_memory(gfp_t gfp_mask, int order)
522{
523 struct task_struct *p;
524 unsigned long points;
525
526 if (sysctl_oom_kill_allocating_task)
527 if (!oom_kill_process(current, gfp_mask, order, 0, NULL,
528 "Out of memory (oom_kill_allocating_task)"))
529 return;
530retry:
531
532
533
534
535 p = select_bad_process(&points, NULL);
536
537 if (PTR_ERR(p) == -1UL)
538 return;
539
540
541 if (!p) {
542 read_unlock(&tasklist_lock);
543 panic("Out of memory and no killable processes...\n");
544 }
545
546 if (oom_kill_process(p, gfp_mask, order, points, NULL,
547 "Out of memory"))
548 goto retry;
549}
550
551
552
553
554
555void pagefault_out_of_memory(void)
556{
557 unsigned long freed = 0;
558
559 blocking_notifier_call_chain(&oom_notify_list, 0, &freed);
560 if (freed > 0)
561
562 return;
563
564
565
566
567
568 if (mem_cgroup_oom_called(current))
569 goto rest_and_return;
570
571 if (sysctl_panic_on_oom)
572 panic("out of memory from page fault. panic_on_oom is selected.\n");
573
574 read_lock(&tasklist_lock);
575 __out_of_memory(0, 0);
576 read_unlock(&tasklist_lock);
577
578
579
580
581
582rest_and_return:
583 if (!test_thread_flag(TIF_MEMDIE))
584 schedule_timeout_uninterruptible(1);
585}
586
587
588
589
590
591
592
593
594
595
596
597
598void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
599{
600 unsigned long freed = 0;
601 enum oom_constraint constraint;
602
603 blocking_notifier_call_chain(&oom_notify_list, 0, &freed);
604 if (freed > 0)
605
606 return;
607
608 if (sysctl_panic_on_oom == 2)
609 panic("out of memory. Compulsory panic_on_oom is selected.\n");
610
611
612
613
614
615 constraint = constrained_alloc(zonelist, gfp_mask);
616 read_lock(&tasklist_lock);
617
618 switch (constraint) {
619 case CONSTRAINT_MEMORY_POLICY:
620 oom_kill_process(current, gfp_mask, order, 0, NULL,
621 "No available memory (MPOL_BIND)");
622 break;
623
624 case CONSTRAINT_NONE:
625 if (sysctl_panic_on_oom)
626 panic("out of memory. panic_on_oom is selected\n");
627
628 case CONSTRAINT_CPUSET:
629 __out_of_memory(gfp_mask, order);
630 break;
631 }
632
633 read_unlock(&tasklist_lock);
634
635
636
637
638
639 if (!test_thread_flag(TIF_MEMDIE))
640 schedule_timeout_uninterruptible(1);
641}
642