linux-bk/kernel/pid.c
<<
>>
Prefs
   1/*
   2 * Generic pidhash and scalable, time-bounded PID allocator
   3 *
   4 * (C) 2002-2003 William Irwin, IBM
   5 * (C) 2004 William Irwin, Oracle
   6 * (C) 2002-2004 Ingo Molnar, Red Hat
   7 *
   8 * pid-structures are backing objects for tasks sharing a given ID to chain
   9 * against. There is very little to them aside from hashing them and
  10 * parking tasks using given ID's on a list.
  11 *
  12 * The hash is always changed with the tasklist_lock write-acquired,
  13 * and the hash is only accessed with the tasklist_lock at least
  14 * read-acquired, so there's no additional SMP locking needed here.
  15 *
  16 * We have a list of bitmap pages, which bitmaps represent the PID space.
  17 * Allocating and freeing PIDs is completely lockless. The worst-case
  18 * allocation scenario when all but one out of 1 million PIDs possible are
  19 * allocated already: the scanning of 32 list entries and at most PAGE_SIZE
  20 * bytes. The typical fastpath is a single successful setbit. Freeing is O(1).
  21 */
  22
  23#include <linux/mm.h>
  24#include <linux/module.h>
  25#include <linux/slab.h>
  26#include <linux/init.h>
  27#include <linux/bootmem.h>
  28#include <linux/hash.h>
  29
  30#define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift)
  31static struct hlist_head *pid_hash[PIDTYPE_MAX];
  32static int pidhash_shift;
  33
  34int pid_max = PID_MAX_DEFAULT;
  35int last_pid;
  36
  37#define RESERVED_PIDS           300
  38
  39int pid_max_min = RESERVED_PIDS + 1;
  40int pid_max_max = PID_MAX_LIMIT;
  41
  42#define PIDMAP_ENTRIES          ((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8)
  43#define BITS_PER_PAGE           (PAGE_SIZE*8)
  44#define BITS_PER_PAGE_MASK      (BITS_PER_PAGE-1)
  45#define mk_pid(map, off)        (((map) - pidmap_array)*BITS_PER_PAGE + (off))
  46#define find_next_offset(map, off)                                      \
  47                find_next_zero_bit((map)->page, BITS_PER_PAGE, off)
  48
  49/*
  50 * PID-map pages start out as NULL, they get allocated upon
  51 * first use and are never deallocated. This way a low pid_max
  52 * value does not cause lots of bitmaps to be allocated, but
  53 * the scheme scales to up to 4 million PIDs, runtime.
  54 */
  55typedef struct pidmap {
  56        atomic_t nr_free;
  57        void *page;
  58} pidmap_t;
  59
  60static pidmap_t pidmap_array[PIDMAP_ENTRIES] =
  61         { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } };
  62
  63static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
  64
  65fastcall void free_pidmap(int pid)
  66{
  67        pidmap_t *map = pidmap_array + pid / BITS_PER_PAGE;
  68        int offset = pid & BITS_PER_PAGE_MASK;
  69
  70        clear_bit(offset, map->page);
  71        atomic_inc(&map->nr_free);
  72}
  73
  74int alloc_pidmap(void)
  75{
  76        int i, offset, max_scan, pid, last = last_pid;
  77        pidmap_t *map;
  78
  79        pid = last + 1;
  80        if (pid >= pid_max)
  81                pid = RESERVED_PIDS;
  82        offset = pid & BITS_PER_PAGE_MASK;
  83        map = &pidmap_array[pid/BITS_PER_PAGE];
  84        max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset;
  85        for (i = 0; i <= max_scan; ++i) {
  86                if (unlikely(!map->page)) {
  87                        unsigned long page = get_zeroed_page(GFP_KERNEL);
  88                        /*
  89                         * Free the page if someone raced with us
  90                         * installing it:
  91                         */
  92                        spin_lock(&pidmap_lock);
  93                        if (map->page)
  94                                free_page(page);
  95                        else
  96                                map->page = (void *)page;
  97                        spin_unlock(&pidmap_lock);
  98                        if (unlikely(!map->page))
  99                                break;
 100                }
 101                if (likely(atomic_read(&map->nr_free))) {
 102                        do {
 103                                if (!test_and_set_bit(offset, map->page)) {
 104                                        atomic_dec(&map->nr_free);
 105                                        last_pid = pid;
 106                                        return pid;
 107                                }
 108                                offset = find_next_offset(map, offset);
 109                                pid = mk_pid(map, offset);
 110                        /*
 111                         * find_next_offset() found a bit, the pid from it
 112                         * is in-bounds, and if we fell back to the last
 113                         * bitmap block and the final block was the same
 114                         * as the starting point, pid is before last_pid.
 115                         */
 116                        } while (offset < BITS_PER_PAGE && pid < pid_max &&
 117                                        (i != max_scan || pid < last ||
 118                                            !((last+1) & BITS_PER_PAGE_MASK)));
 119                }
 120                if (map < &pidmap_array[(pid_max-1)/BITS_PER_PAGE]) {
 121                        ++map;
 122                        offset = 0;
 123                } else {
 124                        map = &pidmap_array[0];
 125                        offset = RESERVED_PIDS;
 126                        if (unlikely(last == offset))
 127                                break;
 128                }
 129                pid = mk_pid(map, offset);
 130        }
 131        return -1;
 132}
 133
 134struct pid * fastcall find_pid(enum pid_type type, int nr)
 135{
 136        struct hlist_node *elem;
 137        struct pid *pid;
 138
 139        hlist_for_each_entry(pid, elem,
 140                        &pid_hash[type][pid_hashfn(nr)], pid_chain) {
 141                if (pid->nr == nr)
 142                        return pid;
 143        }
 144        return NULL;
 145}
 146
 147int fastcall attach_pid(task_t *task, enum pid_type type, int nr)
 148{
 149        struct pid *pid, *task_pid;
 150
 151        task_pid = &task->pids[type];
 152        pid = find_pid(type, nr);
 153        if (pid == NULL) {
 154                hlist_add_head(&task_pid->pid_chain,
 155                                &pid_hash[type][pid_hashfn(nr)]);
 156                INIT_LIST_HEAD(&task_pid->pid_list);
 157        } else {
 158                INIT_HLIST_NODE(&task_pid->pid_chain);
 159                list_add_tail(&task_pid->pid_list, &pid->pid_list);
 160        }
 161        task_pid->nr = nr;
 162
 163        return 0;
 164}
 165
 166static fastcall int __detach_pid(task_t *task, enum pid_type type)
 167{
 168        struct pid *pid, *pid_next;
 169        int nr = 0;
 170
 171        pid = &task->pids[type];
 172        if (!hlist_unhashed(&pid->pid_chain)) {
 173                hlist_del(&pid->pid_chain);
 174
 175                if (list_empty(&pid->pid_list))
 176                        nr = pid->nr;
 177                else {
 178                        pid_next = list_entry(pid->pid_list.next,
 179                                                struct pid, pid_list);
 180                        /* insert next pid from pid_list to hash */
 181                        hlist_add_head(&pid_next->pid_chain,
 182                                &pid_hash[type][pid_hashfn(pid_next->nr)]);
 183                }
 184        }
 185
 186        list_del(&pid->pid_list);
 187        pid->nr = 0;
 188
 189        return nr;
 190}
 191
 192void fastcall detach_pid(task_t *task, enum pid_type type)
 193{
 194        int tmp, nr;
 195
 196        nr = __detach_pid(task, type);
 197        if (!nr)
 198                return;
 199
 200        for (tmp = PIDTYPE_MAX; --tmp >= 0; )
 201                if (tmp != type && find_pid(tmp, nr))
 202                        return;
 203
 204        free_pidmap(nr);
 205}
 206
 207task_t *find_task_by_pid_type(int type, int nr)
 208{
 209        struct pid *pid;
 210
 211        pid = find_pid(type, nr);
 212        if (!pid)
 213                return NULL;
 214
 215        return pid_task(&pid->pid_list, type);
 216}
 217
 218EXPORT_SYMBOL(find_task_by_pid_type);
 219
 220/*
 221 * This function switches the PIDs if a non-leader thread calls
 222 * sys_execve() - this must be done without releasing the PID.
 223 * (which a detach_pid() would eventually do.)
 224 */
 225void switch_exec_pids(task_t *leader, task_t *thread)
 226{
 227        __detach_pid(leader, PIDTYPE_PID);
 228        __detach_pid(leader, PIDTYPE_TGID);
 229        __detach_pid(leader, PIDTYPE_PGID);
 230        __detach_pid(leader, PIDTYPE_SID);
 231
 232        __detach_pid(thread, PIDTYPE_PID);
 233        __detach_pid(thread, PIDTYPE_TGID);
 234
 235        leader->pid = leader->tgid = thread->pid;
 236        thread->pid = thread->tgid;
 237
 238        attach_pid(thread, PIDTYPE_PID, thread->pid);
 239        attach_pid(thread, PIDTYPE_TGID, thread->tgid);
 240        attach_pid(thread, PIDTYPE_PGID, thread->signal->pgrp);
 241        attach_pid(thread, PIDTYPE_SID, thread->signal->session);
 242        list_add_tail(&thread->tasks, &init_task.tasks);
 243
 244        attach_pid(leader, PIDTYPE_PID, leader->pid);
 245        attach_pid(leader, PIDTYPE_TGID, leader->tgid);
 246        attach_pid(leader, PIDTYPE_PGID, leader->signal->pgrp);
 247        attach_pid(leader, PIDTYPE_SID, leader->signal->session);
 248}
 249
 250/*
 251 * The pid hash table is scaled according to the amount of memory in the
 252 * machine.  From a minimum of 16 slots up to 4096 slots at one gigabyte or
 253 * more.
 254 */
 255void __init pidhash_init(void)
 256{
 257        int i, j, pidhash_size;
 258        unsigned long megabytes = nr_kernel_pages >> (20 - PAGE_SHIFT);
 259
 260        pidhash_shift = max(4, fls(megabytes * 4));
 261        pidhash_shift = min(12, pidhash_shift);
 262        pidhash_size = 1 << pidhash_shift;
 263
 264        printk("PID hash table entries: %d (order: %d, %Zd bytes)\n",
 265                pidhash_size, pidhash_shift,
 266                PIDTYPE_MAX * pidhash_size * sizeof(struct hlist_head));
 267
 268        for (i = 0; i < PIDTYPE_MAX; i++) {
 269                pid_hash[i] = alloc_bootmem(pidhash_size *
 270                                        sizeof(*(pid_hash[i])));
 271                if (!pid_hash[i])
 272                        panic("Could not alloc pidhash!\n");
 273                for (j = 0; j < pidhash_size; j++)
 274                        INIT_HLIST_HEAD(&pid_hash[i][j]);
 275        }
 276}
 277
 278void __init pidmap_init(void)
 279{
 280        int i;
 281
 282        pidmap_array->page = (void *)get_zeroed_page(GFP_KERNEL);
 283        set_bit(0, pidmap_array->page);
 284        atomic_dec(&pidmap_array->nr_free);
 285
 286        /*
 287         * Allocate PID 0, and hash it via all PID types:
 288         */
 289
 290        for (i = 0; i < PIDTYPE_MAX; i++)
 291                attach_pid(current, i, 0);
 292}
 293
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.