linux-bk/mm/oom_kill.c
<<
>>
Prefs
   1/*
   2 *  linux/mm/oom_kill.c
   3 * 
   4 *  Copyright (C)  1998,2000  Rik van Riel
   5 *      Thanks go out to Claus Fischer for some serious inspiration and
   6 *      for goading me into coding this file...
   7 *
   8 *  The routines in this file are used to kill a process when
   9 *  we're seriously out of memory. This gets called from kswapd()
  10 *  in linux/mm/vmscan.c when we really run out of memory.
  11 *
  12 *  Since we won't call these routines often (on a well-configured
  13 *  machine) this file will double as a 'coding guide' and a signpost
  14 *  for newbie kernel hackers. It features several pointers to major
  15 *  kernel subsystems and hints as to where to find out what things do.
  16 */
  17
  18#include <linux/mm.h>
  19#include <linux/sched.h>
  20#include <linux/swap.h>
  21#include <linux/timex.h>
  22
  23/* #define DEBUG */
  24
  25/**
  26 * int_sqrt - oom_kill.c internal function, rough approximation to sqrt
  27 * @x: integer of which to calculate the sqrt
  28 * 
  29 * A very rough approximation to the sqrt() function.
  30 */
  31static unsigned int int_sqrt(unsigned int x)
  32{
  33        unsigned int out = x;
  34        while (x & ~(unsigned int)1) x >>=2, out >>=1;
  35        if (x) out -= out >> 2;
  36        return (out ? out : 1);
  37}       
  38
  39/**
  40 * oom_badness - calculate a numeric value for how bad this task has been
  41 * @p: task struct of which task we should calculate
  42 *
  43 * The formula used is relatively simple and documented inline in the
  44 * function. The main rationale is that we want to select a good task
  45 * to kill when we run out of memory.
  46 *
  47 * Good in this context means that:
  48 * 1) we lose the minimum amount of work done
  49 * 2) we recover a large amount of memory
  50 * 3) we don't kill anything innocent of eating tons of memory
  51 * 4) we want to kill the minimum amount of processes (one)
  52 * 5) we try to kill the process the user expects us to kill, this
  53 *    algorithm has been meticulously tuned to meet the priniciple
  54 *    of least surprise ... (be careful when you change it)
  55 */
  56
  57static int badness(struct task_struct *p)
  58{
  59        int points, cpu_time, run_time;
  60
  61        if (!p->mm)
  62                return 0;
  63        /*
  64         * The memory size of the process is the basis for the badness.
  65         */
  66        points = p->mm->total_vm;
  67
  68        /*
  69         * CPU time is in seconds and run time is in minutes. There is no
  70         * particular reason for this other than that it turned out to work
  71         * very well in practice. This is not safe against jiffie wraps
  72         * but we don't care _that_ much...
  73         */
  74        cpu_time = (p->utime + p->stime) >> (SHIFT_HZ + 3);
  75        run_time = (jiffies - p->start_time) >> (SHIFT_HZ + 10);
  76
  77        points /= int_sqrt(cpu_time);
  78        points /= int_sqrt(int_sqrt(run_time));
  79
  80        /*
  81         * Niced processes are most likely less important, so double
  82         * their badness points.
  83         */
  84        if (task_nice(p) > 0)
  85                points *= 2;
  86
  87        /*
  88         * Superuser processes are usually more important, so we make it
  89         * less likely that we kill those.
  90         */
  91        if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_ADMIN) ||
  92                                p->uid == 0 || p->euid == 0)
  93                points /= 4;
  94
  95        /*
  96         * We don't want to kill a process with direct hardware access.
  97         * Not only could that mess up the hardware, but usually users
  98         * tend to only have this flag set on applications they think
  99         * of as important.
 100         */
 101        if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_RAWIO))
 102                points /= 4;
 103#ifdef DEBUG
 104        printk(KERN_DEBUG "OOMkill: task %d (%s) got %d points\n",
 105        p->pid, p->comm, points);
 106#endif
 107        return points;
 108}
 109
 110/*
 111 * Simple selection loop. We chose the process with the highest
 112 * number of 'points'. We expect the caller will lock the tasklist.
 113 *
 114 * (not docbooked, we don't want this one cluttering up the manual)
 115 */
 116static struct task_struct * select_bad_process(void)
 117{
 118        int maxpoints = 0;
 119        struct task_struct *g, *p;
 120        struct task_struct *chosen = NULL;
 121
 122        do_each_thread(g, p)
 123                if (p->pid) {
 124                        int points = badness(p);
 125                        if (points > maxpoints) {
 126                                chosen = p;
 127                                maxpoints = points;
 128                        }
 129                }
 130        while_each_thread(g, p);
 131        return chosen;
 132}
 133
 134/**
 135 * We must be careful though to never send SIGKILL a process with
 136 * CAP_SYS_RAW_IO set, send SIGTERM instead (but it's unlikely that
 137 * we select a process with CAP_SYS_RAW_IO set).
 138 */
 139void oom_kill_task(struct task_struct *p)
 140{
 141        printk(KERN_ERR "Out of Memory: Killed process %d (%s).\n", p->pid, p->comm);
 142
 143        /*
 144         * We give our sacrificial lamb high priority and access to
 145         * all the memory it needs. That way it should be able to
 146         * exit() and clear out its resources quickly...
 147         */
 148        p->time_slice = HZ;
 149        p->flags |= PF_MEMALLOC | PF_MEMDIE;
 150
 151        /* This process has hardware access, be more careful. */
 152        if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_RAWIO)) {
 153                force_sig(SIGTERM, p);
 154        } else {
 155                force_sig(SIGKILL, p);
 156        }
 157}
 158
 159/**
 160 * oom_kill - kill the "best" process when we run out of memory
 161 *
 162 * If we run out of memory, we have the choice between either
 163 * killing a random task (bad), letting the system crash (worse)
 164 * OR try to be smart about which process to kill. Note that we
 165 * don't have to be perfect here, we just have to be good.
 166 */
 167static void oom_kill(void)
 168{
 169        struct task_struct *g, *p, *q;
 170        
 171        read_lock(&tasklist_lock);
 172        p = select_bad_process();
 173
 174        /* Found nothing?!?! Either we hang forever, or we panic. */
 175        if (p == NULL)
 176                panic("Out of memory and no killable processes...\n");
 177
 178        /* kill all processes that share the ->mm (i.e. all threads) */
 179        do_each_thread(g, q)
 180                if (q->mm == p->mm)
 181                        oom_kill_task(q);
 182        while_each_thread(g, q);
 183
 184        read_unlock(&tasklist_lock);
 185
 186        /*
 187         * Make kswapd go out of the way, so "p" has a good chance of
 188         * killing itself before someone else gets the chance to ask
 189         * for more memory.
 190         */
 191        yield();
 192        return;
 193}
 194
 195/**
 196 * out_of_memory - is the system out of memory?
 197 */
 198void out_of_memory(void)
 199{
 200        static unsigned long first, last, count, lastkill;
 201        unsigned long now, since;
 202
 203        /*
 204         * Enough swap space left?  Not OOM.
 205         */
 206        if (nr_swap_pages > 0)
 207                return;
 208
 209        now = jiffies;
 210        since = now - last;
 211        last = now;
 212
 213        /*
 214         * If it's been a long time since last failure,
 215         * we're not oom.
 216         */
 217        last = now;
 218        if (since > 5*HZ)
 219                goto reset;
 220
 221        /*
 222         * If we haven't tried for at least one second,
 223         * we're not really oom.
 224         */
 225        since = now - first;
 226        if (since < HZ)
 227                return;
 228
 229        /*
 230         * If we have gotten only a few failures,
 231         * we're not really oom. 
 232         */
 233        if (++count < 10)
 234                return;
 235
 236        /*
 237         * If we just killed a process, wait a while
 238         * to give that task a chance to exit. This
 239         * avoids killing multiple processes needlessly.
 240         */
 241        since = now - lastkill;
 242        if (since < HZ*5)
 243                return;
 244
 245        /*
 246         * Ok, really out of memory. Kill something.
 247         */
 248        lastkill = now;
 249        oom_kill();
 250
 251reset:
 252        first = now;
 253        count = 0;
 254}
 255
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.