linux-old/mm/slab.c
<<
>>
Prefs
   1/*
   2 * linux/mm/slab.c
   3 * Written by Mark Hemment, 1996/97.
   4 * (markhe@nextd.demon.co.uk)
   5 *
   6 * 11 April '97.  Started multi-threading - markhe
   7 *      The global cache-chain is protected by the semaphore 'cache_chain_sem'.
   8 *      The sem is only needed when accessing/extending the cache-chain, which
   9 *      can never happen inside an interrupt (kmem_cache_create(),
  10 *      kmem_cache_shrink() and kmem_cache_reap()).
  11 *      This is a medium-term exclusion lock.
  12 *
  13 *      Each cache has its own lock; 'c_spinlock'.  This lock is needed only
  14 *      when accessing non-constant members of a cache-struct.
  15 *      Note: 'constant members' are assigned a value in kmem_cache_create() before
  16 *      the cache is linked into the cache-chain.  The values never change, so not
  17 *      even a multi-reader lock is needed for these members.
  18 *      The c_spinlock is only ever held for a few cycles.
  19 *
  20 *      To prevent kmem_cache_shrink() trying to shrink a 'growing' cache (which
  21 *      maybe be sleeping and therefore not holding the semaphore/lock), the
  22 *      c_growing field is used.  This also prevents reaping from a cache.
  23 *
  24 *      Note, caches can _never_ be destroyed.  When a sub-system (eg module) has
  25 *      finished with a cache, it can only be shrunk.  This leaves the cache empty,
  26 *      but already enabled for re-use, eg. during a module re-load.
  27 *
  28 *      Notes:
  29 *              o Constructors/deconstructors are called while the cache-lock
  30 *                is _not_ held.  Therefore they _must_ be threaded.
  31 *              o Constructors must not attempt to allocate memory from the
  32 *                same cache that they are a constructor for - infinite loop!
  33 *                (There is no easy way to trap this.)
  34 *              o The per-cache locks must be obtained with local-interrupts disabled.
  35 *              o When compiled with debug support, and an object-verify (upon release)
  36 *                is request for a cache, the verify-function is called with the cache
  37 *                lock held.  This helps debugging.
  38 *              o The functions called from try_to_free_page() must not attempt
  39 *                to allocate memory from a cache which is being grown.
  40 *                The buffer sub-system might try to allocate memory, via buffer_cachep.
  41 *                As this pri is passed to the SLAB, and then (if necessary) onto the
  42 *                gfp() funcs (which avoid calling try_to_free_page()), no deadlock
  43 *                should happen.
  44 *
  45 *      The positioning of the per-cache lock is tricky.  If the lock is
  46 *      placed on the same h/w cache line as commonly accessed members
  47 *      the number of L1 cache-line faults is reduced.  However, this can
  48 *      lead to the cache-line ping-ponging between processors when the
  49 *      lock is in contention (and the common members are being accessed).
  50 *      Decided to keep it away from common members.
  51 *
  52 *      More fine-graining is possible, with per-slab locks...but this might be
  53 *      taking fine graining too far, but would have the advantage;
  54 *              During most allocs/frees no writes occur to the cache-struct.
  55 *              Therefore a multi-reader/one writer lock could be used (the writer
  56 *              needed when the slab chain is being link/unlinked).
  57 *              As we would not have an exclusion lock for the cache-structure, one
  58 *              would be needed per-slab (for updating s_free ptr, and/or the contents
  59 *              of s_index).
  60 *      The above locking would allow parallel operations to different slabs within
  61 *      the same cache with reduced spinning.
  62 *
  63 *      Per-engine slab caches, backed by a global cache (as in Mach's Zone allocator),
  64 *      would allow most allocations from the same cache to execute in parallel.
  65 *
  66 *      At present, each engine can be growing a cache.  This should be blocked.
  67 *
  68 *      It is not currently 100% safe to examine the page_struct outside of a kernel
  69 *      or global cli lock.  The risk is v. small, and non-fatal.
  70 *
  71 *      Calls to printk() are not 100% safe (the function is not threaded).  However,
  72 *      printk() is only used under an error condition, and the risk is v. small (not
  73 *      sure if the console write functions 'enjoy' executing multiple contexts in
  74 *      parallel.  I guess they don't...).
  75 *      Note, for most calls to printk() any held cache-lock is dropped.  This is not
  76 *      always done for text size reasons - having *_unlock() everywhere is bloat.
  77 */
  78
  79/*
  80 * An implementation of the Slab Allocator as described in outline in;
  81 *      UNIX Internals: The New Frontiers by Uresh Vahalia
  82 *      Pub: Prentice Hall      ISBN 0-13-101908-2
  83 * or with a little more detail in;
  84 *      The Slab Allocator: An Object-Caching Kernel Memory Allocator
  85 *      Jeff Bonwick (Sun Microsystems).
  86 *      Presented at: USENIX Summer 1994 Technical Conference
  87 */
  88
  89/*
  90 * This implementation deviates from Bonwick's paper as it
  91 * does not use a hash-table for large objects, but rather a per slab
  92 * index to hold the bufctls.  This allows the bufctl structure to
  93 * be small (one word), but limits the number of objects a slab (not
  94 * a cache) can contain when off-slab bufctls are used.  The limit is the
  95 * size of the largest general cache that does not use off-slab bufctls,
  96 * divided by the size of a bufctl.  For 32bit archs, is this 256/4 = 64.
  97 * This is not serious, as it is only for large objects, when it is unwise
  98 * to have too many per slab.
  99 * Note: This limit can be raised by introducing a general cache whose size
 100 * is less than 512 (PAGE_SIZE<<3), but greater than 256.
 101 */
 102
 103#include        <linux/config.h>
 104#include        <linux/slab.h>
 105#include        <linux/interrupt.h>
 106#include        <linux/init.h>
 107
 108/* If there is a different PAGE_SIZE around, and it works with this allocator,
 109 * then change the following.
 110 */
 111#if     (PAGE_SIZE != 8192 && PAGE_SIZE != 4096)
 112#error  Your page size is probably not correctly supported - please check
 113#endif
 114
 115/* SLAB_MGMT_CHECKS     - 1 to enable extra checks in kmem_cache_create().
 116 *                        0 if you wish to reduce memory usage.
 117 *
 118 * SLAB_DEBUG_SUPPORT   - 1 for kmem_cache_create() to honour; SLAB_DEBUG_FREE,
 119 *                        SLAB_DEBUG_INITIAL, SLAB_RED_ZONE & SLAB_POISON.
 120 *                        0 for faster, smaller, code (especially in the critical paths).
 121 *
 122 * SLAB_STATS           - 1 to collect stats for /proc/slabinfo.
 123 *                        0 for faster, smaller, code (especially in the critical paths).
 124 *
 125 * SLAB_SELFTEST        - 1 to perform a few tests, mainly for development.
 126 */
 127#define         SLAB_MGMT_CHECKS        1
 128#define         SLAB_DEBUG_SUPPORT      0
 129#define         SLAB_STATS              0
 130#define         SLAB_SELFTEST           0
 131
 132/* Shouldn't this be in a header file somewhere? */
 133#define BYTES_PER_WORD          sizeof(void *)
 134
 135/* Legal flag mask for kmem_cache_create(). */
 136#if     SLAB_DEBUG_SUPPORT
 137#if     0
 138#define SLAB_C_MASK             (SLAB_DEBUG_FREE|SLAB_DEBUG_INITIAL|SLAB_RED_ZONE| \
 139                                 SLAB_POISON|SLAB_HWCACHE_ALIGN|SLAB_NO_REAP| \
 140                                 SLAB_HIGH_PACK)
 141#endif
 142#define SLAB_C_MASK             (SLAB_DEBUG_FREE|SLAB_DEBUG_INITIAL|SLAB_RED_ZONE| \
 143                                 SLAB_POISON|SLAB_HWCACHE_ALIGN|SLAB_NO_REAP)
 144#else
 145#if     0
 146#define SLAB_C_MASK             (SLAB_HWCACHE_ALIGN|SLAB_NO_REAP|SLAB_HIGH_PACK)
 147#endif
 148#define SLAB_C_MASK             (SLAB_HWCACHE_ALIGN|SLAB_NO_REAP)
 149#endif  /* SLAB_DEBUG_SUPPORT */
 150
 151/* Slab management struct.
 152 * Manages the objs in a slab.  Placed either at the end of mem allocated
 153 * for a slab, or from an internal obj cache (cache_slabp).
 154 * Slabs are chained into a partially ordered list; fully used first, partial
 155 * next, and then fully free slabs.
 156 * The first 4 members are referenced during an alloc/free operation, and
 157 * should always appear on the same cache line.
 158 * Note: The offset between some members _must_ match offsets within
 159 * the kmem_cache_t - see kmem_cache_init() for the checks. */
 160
 161#define SLAB_OFFSET_BITS        16      /* could make this larger for 64bit archs */
 162
 163typedef struct kmem_slab_s {
 164        struct kmem_bufctl_s    *s_freep;  /* ptr to first inactive obj in slab */
 165        struct kmem_bufctl_s    *s_index;
 166        unsigned long            s_magic;
 167        unsigned long            s_inuse;  /* num of objs active in slab */
 168
 169        struct kmem_slab_s      *s_nextp;
 170        struct kmem_slab_s      *s_prevp;
 171        void                    *s_mem;    /* addr of first obj in slab */
 172        unsigned long            s_offset:SLAB_OFFSET_BITS,
 173                                 s_dma:1;
 174} kmem_slab_t;
 175
 176/* When the slab management is on-slab, this gives the size to use. */
 177#define slab_align_size         (L1_CACHE_ALIGN(sizeof(kmem_slab_t)))
 178
 179/* Test for end of slab chain. */
 180#define kmem_slab_end(x)        ((kmem_slab_t*)&((x)->c_offset))
 181
 182/* s_magic */
 183#define SLAB_MAGIC_ALLOC        0xA5C32F2BUL    /* slab is alive */
 184#define SLAB_MAGIC_DESTROYED    0xB2F23C5AUL    /* slab has been destroyed */
 185
 186/* Bufctl's are used for linking objs within a slab, identifying what slab an obj
 187 * is in, and the address of the associated obj (for sanity checking with off-slab
 188 * bufctls).  What a bufctl contains depends upon the state of the obj and
 189 * the organisation of the cache.
 190 */
 191typedef struct kmem_bufctl_s {
 192        union {
 193                struct kmem_bufctl_s    *buf_nextp;
 194                kmem_slab_t             *buf_slabp;     /* slab for obj */
 195                void *                   buf_objp;
 196        } u;
 197} kmem_bufctl_t;
 198
 199/* ...shorthand... */
 200#define buf_nextp       u.buf_nextp
 201#define buf_slabp       u.buf_slabp
 202#define buf_objp        u.buf_objp
 203
 204#if     SLAB_DEBUG_SUPPORT
 205/* Magic nums for obj red zoning.
 206 * Placed in the first word before and the first word after an obj.
 207 */
 208#define SLAB_RED_MAGIC1         0x5A2CF071UL    /* when obj is active */
 209#define SLAB_RED_MAGIC2         0x170FC2A5UL    /* when obj is inactive */
 210
 211/* ...and for poisoning */
 212#define SLAB_POISON_BYTE        0x5a            /* byte value for poisoning */
 213#define SLAB_POISON_END 0xa5            /* end-byte of poisoning */
 214
 215#endif  /* SLAB_DEBUG_SUPPORT */
 216
 217/* Cache struct - manages a cache.
 218 * First four members are commonly referenced during an alloc/free operation.
 219 */
 220struct kmem_cache_s {
 221        kmem_slab_t              *c_freep;      /* first slab w. free objs */
 222        unsigned long             c_flags;      /* constant flags */
 223        unsigned long             c_offset;
 224        unsigned long             c_num;        /* # of objs per slab */
 225
 226        unsigned long             c_magic;
 227        unsigned long             c_inuse;      /* kept at zero */
 228        kmem_slab_t              *c_firstp;     /* first slab in chain */
 229        kmem_slab_t              *c_lastp;      /* last slab in chain */
 230
 231        spinlock_t                c_spinlock;
 232        unsigned long             c_growing;
 233        unsigned long             c_dflags;     /* dynamic flags */
 234        size_t                    c_org_size;
 235        unsigned long             c_gfporder;   /* order of pgs per slab (2^n) */
 236        void (*c_ctor)(void *, kmem_cache_t *, unsigned long); /* constructor func */
 237        void (*c_dtor)(void *, kmem_cache_t *, unsigned long); /* de-constructor func */
 238        unsigned long             c_align;      /* alignment of objs */
 239        size_t                    c_colour;     /* cache colouring range */
 240        size_t                    c_colour_next;/* cache colouring */
 241        unsigned long             c_failures;
 242        const char               *c_name;
 243        struct kmem_cache_s      *c_nextp;
 244        kmem_cache_t             *c_index_cachep;
 245#if     SLAB_STATS
 246        unsigned long             c_num_active;
 247        unsigned long             c_num_allocations;
 248        unsigned long             c_high_mark;
 249        unsigned long             c_grown;
 250        unsigned long             c_reaped;
 251        atomic_t                  c_errors;
 252#endif  /* SLAB_STATS */
 253};
 254
 255/* internal c_flags */
 256#define SLAB_CFLGS_OFF_SLAB     0x010000UL      /* slab management in own cache */
 257#define SLAB_CFLGS_BUFCTL       0x020000UL      /* bufctls in own cache */
 258#define SLAB_CFLGS_GENERAL      0x080000UL      /* a general cache */
 259
 260/* c_dflags (dynamic flags).  Need to hold the spinlock to access this member */
 261#define SLAB_CFLGS_GROWN        0x000002UL      /* don't reap a recently grown */
 262
 263#define SLAB_OFF_SLAB(x)        ((x) & SLAB_CFLGS_OFF_SLAB)
 264#define SLAB_BUFCTL(x)          ((x) & SLAB_CFLGS_BUFCTL)
 265#define SLAB_GROWN(x)           ((x) & SLAB_CFLGS_GROWN)
 266
 267#if     SLAB_STATS
 268#define SLAB_STATS_INC_ACTIVE(x)        ((x)->c_num_active++)
 269#define SLAB_STATS_DEC_ACTIVE(x)        ((x)->c_num_active--)
 270#define SLAB_STATS_INC_ALLOCED(x)       ((x)->c_num_allocations++)
 271#define SLAB_STATS_INC_GROWN(x)         ((x)->c_grown++)
 272#define SLAB_STATS_INC_REAPED(x)        ((x)->c_reaped++)
 273#define SLAB_STATS_SET_HIGH(x)          do { if ((x)->c_num_active > (x)->c_high_mark) \
 274                                                (x)->c_high_mark = (x)->c_num_active; \
 275                                        } while (0)
 276#define SLAB_STATS_INC_ERR(x)           (atomic_inc(&(x)->c_errors))
 277#else
 278#define SLAB_STATS_INC_ACTIVE(x)
 279#define SLAB_STATS_DEC_ACTIVE(x)
 280#define SLAB_STATS_INC_ALLOCED(x)
 281#define SLAB_STATS_INC_GROWN(x)
 282#define SLAB_STATS_INC_REAPED(x)
 283#define SLAB_STATS_SET_HIGH(x)
 284#define SLAB_STATS_INC_ERR(x)
 285#endif  /* SLAB_STATS */
 286
 287#if     SLAB_SELFTEST
 288#if     !SLAB_DEBUG_SUPPORT
 289#error  Debug support needed for self-test
 290#endif
 291static void kmem_self_test(void);
 292#endif  /* SLAB_SELFTEST */
 293
 294/* c_magic - used to detect 'out of slabs' in __kmem_cache_alloc() */
 295#define SLAB_C_MAGIC            0x4F17A36DUL
 296
 297/* maximum size of an obj (in 2^order pages) */
 298#define SLAB_OBJ_MAX_ORDER      5       /* 32 pages */
 299
 300/* maximum num of pages for a slab (prevents large requests to the VM layer) */
 301#define SLAB_MAX_GFP_ORDER      5       /* 32 pages */
 302
 303/* the 'preferred' minimum num of objs per slab - maybe less for large objs */
 304#define SLAB_MIN_OBJS_PER_SLAB  4
 305
 306/* If the num of objs per slab is <= SLAB_MIN_OBJS_PER_SLAB,
 307 * then the page order must be less than this before trying the next order.
 308 */
 309#define SLAB_BREAK_GFP_ORDER_HI 2
 310#define SLAB_BREAK_GFP_ORDER_LO 1
 311static int slab_break_gfp_order = SLAB_BREAK_GFP_ORDER_LO;
 312
 313/* Macros for storing/retrieving the cachep and or slab from the
 314 * global 'mem_map'.  With off-slab bufctls, these are used to find the
 315 * slab an obj belongs to.  With kmalloc(), and kfree(), these are used
 316 * to find the cache which an obj belongs to.
 317 */
 318#define SLAB_SET_PAGE_CACHE(pg, x)      ((pg)->next = (struct page *)(x))
 319#define SLAB_GET_PAGE_CACHE(pg)         ((kmem_cache_t *)(pg)->next)
 320#define SLAB_SET_PAGE_SLAB(pg, x)       ((pg)->prev = (struct page *)(x))
 321#define SLAB_GET_PAGE_SLAB(pg)          ((kmem_slab_t *)(pg)->prev)
 322
 323/* Size description struct for general caches. */
 324typedef struct cache_sizes {
 325        size_t           cs_size;
 326        kmem_cache_t    *cs_cachep;
 327} cache_sizes_t;
 328
 329static cache_sizes_t cache_sizes[] = {
 330#if     PAGE_SIZE == 4096
 331        {  32,          NULL},
 332#endif
 333        {  64,          NULL},
 334        { 128,          NULL},
 335        { 256,          NULL},
 336        { 512,          NULL},
 337        {1024,          NULL},
 338        {2048,          NULL},
 339        {4096,          NULL},
 340        {8192,          NULL},
 341        {16384,         NULL},
 342        {32768,         NULL},
 343        {65536,         NULL},
 344        {131072,        NULL},
 345        {0,             NULL}
 346};
 347
 348/* Names for the general caches.  Not placed into the sizes struct for
 349 * a good reason; the string ptr is not needed while searching in kmalloc(),
 350 * and would 'get-in-the-way' in the h/w cache.
 351 */
 352static char *cache_sizes_name[] = {
 353#if     PAGE_SIZE == 4096
 354        "size-32",
 355#endif
 356        "size-64",
 357        "size-128",
 358        "size-256",
 359        "size-512",
 360        "size-1024",
 361        "size-2048",
 362        "size-4096",
 363        "size-8192",
 364        "size-16384",
 365        "size-32768",
 366        "size-65536",
 367        "size-131072"
 368};
 369
 370/* internal cache of cache description objs */
 371static  kmem_cache_t    cache_cache = {
 372/* freep, flags */              kmem_slab_end(&cache_cache), SLAB_NO_REAP,
 373/* offset, num */               sizeof(kmem_cache_t),   0,
 374/* c_magic, c_inuse */          SLAB_C_MAGIC, 0,
 375/* firstp, lastp */             kmem_slab_end(&cache_cache), kmem_slab_end(&cache_cache),
 376/* spinlock */                  SPIN_LOCK_UNLOCKED,
 377/* growing */                   0,
 378/* dflags */                    0,
 379/* org_size, gfp */             0, 0,
 380/* ctor, dtor, align */         NULL, NULL, L1_CACHE_BYTES,
 381/* colour, colour_next */       0, 0,
 382/* failures */                  0,
 383/* name */                      "kmem_cache",
 384/* nextp */                     &cache_cache,
 385/* index */                     NULL,
 386};
 387
 388/* Guard access to the cache-chain. */
 389static struct semaphore cache_chain_sem;
 390
 391/* Place maintainer for reaping. */
 392static  kmem_cache_t    *clock_searchp = &cache_cache;
 393
 394/* Internal slab management cache, for when slab management is off-slab. */
 395static kmem_cache_t     *cache_slabp = NULL;
 396
 397/* Max number of objs-per-slab for caches which use bufctl's.
 398 * Needed to avoid a possible looping condition in kmem_cache_grow().
 399 */
 400static unsigned long bufctl_limit = 0;
 401
 402/* Initialisation - setup the `cache' cache. */
 403long __init kmem_cache_init(long start, long end)
 404{
 405        size_t size, i;
 406
 407#define kmem_slab_offset(x)  ((unsigned long)&((kmem_slab_t *)0)->x)
 408#define kmem_slab_diff(a,b)  (kmem_slab_offset(a) - kmem_slab_offset(b))
 409#define kmem_cache_offset(x) ((unsigned long)&((kmem_cache_t *)0)->x)
 410#define kmem_cache_diff(a,b) (kmem_cache_offset(a) - kmem_cache_offset(b))
 411
 412        /* Sanity checks... */
 413        if (kmem_cache_diff(c_firstp, c_magic) != kmem_slab_diff(s_nextp, s_magic) ||
 414            kmem_cache_diff(c_firstp, c_inuse) != kmem_slab_diff(s_nextp, s_inuse) ||
 415            ((kmem_cache_offset(c_lastp) -
 416              ((unsigned long) kmem_slab_end((kmem_cache_t*)NULL))) !=
 417             kmem_slab_offset(s_prevp)) ||
 418            kmem_cache_diff(c_lastp, c_firstp) != kmem_slab_diff(s_prevp, s_nextp)) {
 419                /* Offsets to the magic are incorrect, either the structures have
 420                 * been incorrectly changed, or adjustments are needed for your
 421                 * architecture.
 422                 */
 423                panic("kmem_cache_init(): Offsets are wrong - I've been messed with!");
 424                /* NOTREACHED */
 425        }
 426#undef  kmem_cache_offset
 427#undef  kmem_cache_diff
 428#undef  kmem_slab_offset
 429#undef  kmem_slab_diff
 430
 431        cache_chain_sem = MUTEX;
 432
 433        size = cache_cache.c_offset + sizeof(kmem_bufctl_t);
 434        size += (L1_CACHE_BYTES-1);
 435        size &= ~(L1_CACHE_BYTES-1);
 436        cache_cache.c_offset = size-sizeof(kmem_bufctl_t);
 437        
 438        i = (PAGE_SIZE<<cache_cache.c_gfporder)-slab_align_size;
 439        cache_cache.c_num = i / size;   /* num of objs per slab */
 440
 441        /* Cache colouring. */
 442        cache_cache.c_colour = (i-(cache_cache.c_num*size))/L1_CACHE_BYTES;
 443        cache_cache.c_colour_next = cache_cache.c_colour;
 444
 445        /*
 446         * Fragmentation resistance on low memory - only use bigger
 447         * page orders on machines with more than 32MB of memory.
 448         */
 449        if (num_physpages > (32 << 20) >> PAGE_SHIFT)
 450                slab_break_gfp_order = SLAB_BREAK_GFP_ORDER_HI;
 451        return start;
 452}
 453
 454/* Initialisation - setup remaining internal and general caches.
 455 * Called after the gfp() functions have been enabled, and before smp_init().
 456 */
 457void __init kmem_cache_sizes_init(void)
 458{
 459        unsigned int    found = 0;
 460
 461        cache_slabp = kmem_cache_create("slab_cache", sizeof(kmem_slab_t),
 462                                        0, SLAB_HWCACHE_ALIGN, NULL, NULL);
 463        if (cache_slabp) {
 464                char **names = cache_sizes_name;
 465                cache_sizes_t *sizes = cache_sizes;
 466                do {
 467                        /* For performance, all the general caches are L1 aligned.
 468                         * This should be particularly beneficial on SMP boxes, as it
 469                         * eliminates "false sharing".
 470                         * Note for systems short on memory removing the alignment will
 471                         * allow tighter packing of the smaller caches. */
 472                        if (!(sizes->cs_cachep =
 473                              kmem_cache_create(*names++, sizes->cs_size,
 474                                                0, SLAB_HWCACHE_ALIGN, NULL, NULL)))
 475                                goto panic_time;
 476                        if (!found) {
 477                                /* Inc off-slab bufctl limit until the ceiling is hit. */
 478                                if (SLAB_BUFCTL(sizes->cs_cachep->c_flags))
 479                                        found++;
 480                                else
 481                                        bufctl_limit =
 482                                                (sizes->cs_size/sizeof(kmem_bufctl_t));
 483                        }
 484                        sizes->cs_cachep->c_flags |= SLAB_CFLGS_GENERAL;
 485                        sizes++;
 486                } while (sizes->cs_size);
 487#if     SLAB_SELFTEST
 488                kmem_self_test();
 489#endif  /* SLAB_SELFTEST */
 490                return;
 491        }
 492panic_time:
 493        panic("kmem_cache_sizes_init: Error creating caches");
 494        /* NOTREACHED */
 495}
 496
 497/* Interface to system's page allocator.  Dma pts to non-zero if all
 498 * of memory is DMAable. No need to hold the cache-lock.
 499 */
 500static inline void *
 501kmem_getpages(kmem_cache_t *cachep, unsigned long flags, unsigned int *dma)
 502{
 503        void    *addr;
 504
 505        *dma = flags & SLAB_DMA;
 506        addr = (void*) __get_free_pages(flags, cachep->c_gfporder);
 507        /* Assume that now we have the pages no one else can legally
 508         * messes with the 'struct page's.
 509         * However vm_scan() might try to test the structure to see if
 510         * it is a named-page or buffer-page.  The members it tests are
 511         * of no interest here.....
 512         */
 513        if (!*dma && addr) {
 514                /* Need to check if can dma. */
 515                struct page *page = mem_map + MAP_NR(addr);
 516                *dma = 1<<cachep->c_gfporder;
 517                while ((*dma)--) {
 518                        if (!PageDMA(page)) {
 519                                *dma = 0;
 520                                break;
 521                        }
 522                        page++;
 523                }
 524        }
 525        return addr;
 526}
 527
 528/* Interface to system's page release. */
 529static inline void
 530kmem_freepages(kmem_cache_t *cachep, void *addr)
 531{
 532        unsigned long i = (1<<cachep->c_gfporder);
 533        struct page *page = &mem_map[MAP_NR(addr)];
 534
 535        /* free_pages() does not clear the type bit - we do that.
 536         * The pages have been unlinked from their cache-slab,
 537         * but their 'struct page's might be accessed in
 538         * vm_scan(). Shouldn't be a worry.
 539         */
 540        while (i--) {
 541                PageClearSlab(page);
 542                page++;
 543        }
 544        free_pages((unsigned long)addr, cachep->c_gfporder); 
 545}
 546
 547#if     SLAB_DEBUG_SUPPORT
 548static inline void
 549kmem_poison_obj(kmem_cache_t *cachep, void *addr)
 550{
 551        memset(addr, SLAB_POISON_BYTE, cachep->c_org_size);
 552        *(unsigned char *)(addr+cachep->c_org_size-1) = SLAB_POISON_END;
 553}
 554
 555static inline int
 556kmem_check_poison_obj(kmem_cache_t *cachep, void *addr)
 557{
 558        void *end;
 559        end = memchr(addr, SLAB_POISON_END, cachep->c_org_size);
 560        if (end != (addr+cachep->c_org_size-1))
 561                return 1;
 562        return 0;
 563}
 564#endif  /* SLAB_DEBUG_SUPPORT */
 565
 566/* Three slab chain funcs - all called with ints disabled and the appropriate
 567 * cache-lock held.
 568 */
 569static inline void
 570kmem_slab_unlink(kmem_slab_t *slabp)
 571{
 572        kmem_slab_t     *prevp = slabp->s_prevp;
 573        kmem_slab_t     *nextp = slabp->s_nextp;
 574        prevp->s_nextp = nextp;
 575        nextp->s_prevp = prevp;
 576}
 577
 578static inline void 
 579kmem_slab_link_end(kmem_cache_t *cachep, kmem_slab_t *slabp)
 580{
 581        kmem_slab_t     *lastp = cachep->c_lastp;
 582        slabp->s_nextp = kmem_slab_end(cachep);
 583        slabp->s_prevp = lastp;
 584        cachep->c_lastp = slabp;
 585        lastp->s_nextp = slabp;
 586}
 587
 588static inline void
 589kmem_slab_link_free(kmem_cache_t *cachep, kmem_slab_t *slabp)
 590{
 591        kmem_slab_t     *nextp = cachep->c_freep;
 592        kmem_slab_t     *prevp = nextp->s_prevp;
 593        slabp->s_nextp = nextp;
 594        slabp->s_prevp = prevp;
 595        nextp->s_prevp = slabp;
 596        slabp->s_prevp->s_nextp = slabp;
 597}
 598
 599/* Destroy all the objs in a slab, and release the mem back to the system.
 600 * Before calling the slab must have been unlinked from the cache.
 601 * The cache-lock is not held/needed.
 602 */
 603static void
 604kmem_slab_destroy(kmem_cache_t *cachep, kmem_slab_t *slabp)
 605{
 606        if (cachep->c_dtor
 607#if     SLAB_DEBUG_SUPPORT
 608                || cachep->c_flags & (SLAB_POISON | SLAB_RED_ZONE)
 609#endif  /*SLAB_DEBUG_SUPPORT*/
 610        ) {
 611                /* Doesn't use the bufctl ptrs to find objs. */
 612                unsigned long num = cachep->c_num;
 613                void *objp = slabp->s_mem;
 614                do {
 615#if     SLAB_DEBUG_SUPPORT
 616                        if (cachep->c_flags & SLAB_RED_ZONE) {
 617                                if (*((unsigned long*)(objp)) != SLAB_RED_MAGIC1)
 618                                        printk(KERN_ERR "kmem_slab_destroy: "
 619                                               "Bad front redzone - %s\n",
 620                                               cachep->c_name);
 621                                objp += BYTES_PER_WORD;
 622                                if (*((unsigned long*)(objp+cachep->c_org_size)) !=
 623                                    SLAB_RED_MAGIC1)
 624                                        printk(KERN_ERR "kmem_slab_destroy: "
 625                                               "Bad rear redzone - %s\n",
 626                                               cachep->c_name);
 627                        }
 628                        if (cachep->c_dtor)
 629#endif  /*SLAB_DEBUG_SUPPORT*/
 630                                (cachep->c_dtor)(objp, cachep, 0);
 631#if     SLAB_DEBUG_SUPPORT
 632                        else if (cachep->c_flags & SLAB_POISON) {
 633                                if (kmem_check_poison_obj(cachep, objp))
 634                                        printk(KERN_ERR "kmem_slab_destroy: "
 635                                               "Bad poison - %s\n", cachep->c_name);
 636                        }
 637                        if (cachep->c_flags & SLAB_RED_ZONE)
 638                                objp -= BYTES_PER_WORD;
 639#endif  /* SLAB_DEBUG_SUPPORT */
 640                        objp += cachep->c_offset;
 641                        if (!slabp->s_index)
 642                                objp += sizeof(kmem_bufctl_t);
 643                } while (--num);
 644        }
 645
 646        slabp->s_magic = SLAB_MAGIC_DESTROYED;
 647        if (slabp->s_index)
 648                kmem_cache_free(cachep->c_index_cachep, slabp->s_index);
 649        kmem_freepages(cachep, slabp->s_mem-slabp->s_offset);
 650        if (SLAB_OFF_SLAB(cachep->c_flags))
 651                kmem_cache_free(cache_slabp, slabp);
 652}
 653
 654/* Cal the num objs, wastage, and bytes left over for a given slab size. */
 655static inline size_t
 656kmem_cache_cal_waste(unsigned long gfporder, size_t size, size_t extra,
 657                     unsigned long flags, size_t *left_over, unsigned long *num)
 658{
 659        size_t wastage = PAGE_SIZE<<gfporder;
 660
 661        if (SLAB_OFF_SLAB(flags))
 662                gfporder = 0;
 663        else
 664                gfporder = slab_align_size;
 665        wastage -= gfporder;
 666        *num = wastage / size;
 667        wastage -= (*num * size);
 668        *left_over = wastage;
 669
 670        return (wastage + gfporder + (extra * *num));
 671}
 672
 673/* Create a cache:
 674 * Returns a ptr to the cache on success, NULL on failure.
 675 * Cannot be called within a int, but can be interrupted.
 676 * NOTE: The 'name' is assumed to be memory that is _not_  going to disappear.
 677 */
 678kmem_cache_t *
 679kmem_cache_create(const char *name, size_t size, size_t offset,
 680        unsigned long flags, void (*ctor)(void*, kmem_cache_t *, unsigned long),
 681        void (*dtor)(void*, kmem_cache_t *, unsigned long))
 682{
 683        const char *func_nm= KERN_ERR "kmem_create: ";
 684        kmem_cache_t    *searchp;
 685        kmem_cache_t    *cachep=NULL;
 686        size_t          extra;
 687        size_t          left_over;
 688        size_t          align;
 689
 690        /* Sanity checks... */
 691#if     SLAB_MGMT_CHECKS
 692        if (!name) {
 693                printk("%sNULL ptr\n", func_nm);
 694                goto opps;
 695        }
 696        if (in_interrupt()) {
 697                printk("%sCalled during int - %s\n", func_nm, name);
 698                goto opps;
 699        }
 700
 701        if (size < BYTES_PER_WORD) {
 702                printk("%sSize too small %d - %s\n", func_nm, (int) size, name);
 703                size = BYTES_PER_WORD;
 704        }
 705
 706        if (size > ((1<<SLAB_OBJ_MAX_ORDER)*PAGE_SIZE)) {
 707                printk("%sSize too large %d - %s\n", func_nm, (int) size, name);
 708                goto opps;
 709        }
 710
 711        if (dtor && !ctor) {
 712                /* Decon, but no con - doesn't make sense */
 713                printk("%sDecon but no con - %s\n", func_nm, name);
 714                goto opps;
 715        }
 716
 717        if (offset < 0 || offset > size) {
 718                printk("%sOffset weird %d - %s\n", func_nm, (int) offset, name);
 719                offset = 0;
 720        }
 721
 722#if     SLAB_DEBUG_SUPPORT
 723        if ((flags & SLAB_DEBUG_INITIAL) && !ctor) {
 724                /* No constructor, but inital state check requested */
 725                printk("%sNo con, but init state check requested - %s\n", func_nm, name);
 726                flags &= ~SLAB_DEBUG_INITIAL;
 727        }
 728
 729        if ((flags & SLAB_POISON) && ctor) {
 730                /* request for poisoning, but we can't do that with a constructor */
 731                printk("%sPoisoning requested, but con given - %s\n", func_nm, name);
 732                flags &= ~SLAB_POISON;
 733        }
 734#if     0
 735        if ((flags & SLAB_HIGH_PACK) && ctor) {
 736                printk("%sHigh pack requested, but con given - %s\n", func_nm, name);
 737                flags &= ~SLAB_HIGH_PACK;
 738        }
 739        if ((flags & SLAB_HIGH_PACK) && (flags & (SLAB_POISON|SLAB_RED_ZONE))) {
 740                printk("%sHigh pack requested, but with poisoning/red-zoning - %s\n",
 741                       func_nm, name);
 742                flags &= ~SLAB_HIGH_PACK;
 743        }
 744#endif
 745#endif  /* SLAB_DEBUG_SUPPORT */
 746#endif  /* SLAB_MGMT_CHECKS */
 747
 748        /* Always checks flags, a caller might be expecting debug
 749         * support which isn't available.
 750         */
 751        if (flags & ~SLAB_C_MASK) {
 752                printk("%sIllgl flg %lX - %s\n", func_nm, flags, name);
 753                flags &= SLAB_C_MASK;
 754        }
 755
 756        /* Get cache's description obj. */
 757        cachep = (kmem_cache_t *) kmem_cache_alloc(&cache_cache, SLAB_KERNEL);
 758        if (!cachep)
 759                goto opps;
 760        memset(cachep, 0, sizeof(kmem_cache_t));
 761
 762        /* Check that size is in terms of words.  This is needed to avoid
 763         * unaligned accesses for some archs when redzoning is used, and makes
 764         * sure any on-slab bufctl's are also correctly aligned.
 765         */
 766        if (size & (BYTES_PER_WORD-1)) {
 767                size += (BYTES_PER_WORD-1);
 768                size &= ~(BYTES_PER_WORD-1);
 769                printk("%sForcing size word alignment - %s\n", func_nm, name);
 770        }
 771
 772        cachep->c_org_size = size;
 773#if     SLAB_DEBUG_SUPPORT
 774        if (flags & SLAB_RED_ZONE) {
 775                /* There is no point trying to honour cache alignment when redzoning. */
 776                flags &= ~SLAB_HWCACHE_ALIGN;
 777                size += 2*BYTES_PER_WORD;               /* words for redzone */
 778        }
 779#endif  /* SLAB_DEBUG_SUPPORT */
 780
 781        align = BYTES_PER_WORD;
 782        if (flags & SLAB_HWCACHE_ALIGN)
 783                align = L1_CACHE_BYTES;
 784
 785        /* Determine if the slab management and/or bufclts are 'on' or 'off' slab. */
 786        extra = sizeof(kmem_bufctl_t);
 787        if (size < (PAGE_SIZE>>3)) {
 788                /* Size is small(ish).  Use packing where bufctl size per
 789                 * obj is low, and slab management is on-slab.
 790                 */
 791#if     0
 792                if ((flags & SLAB_HIGH_PACK)) {
 793                        /* Special high packing for small objects
 794                         * (mainly for vm_mapping structs, but
 795                         * others can use it).
 796                         */
 797                        if (size == (L1_CACHE_BYTES/4) || size == (L1_CACHE_BYTES/2) ||
 798                            size == L1_CACHE_BYTES) {
 799                                /* The bufctl is stored with the object. */
 800                                extra = 0;
 801                        } else
 802                                flags &= ~SLAB_HIGH_PACK;
 803                }
 804#endif
 805        } else {
 806                /* Size is large, assume best to place the slab management obj
 807                 * off-slab (should allow better packing of objs).
 808                 */
 809                flags |= SLAB_CFLGS_OFF_SLAB;
 810                if (!(size & ~PAGE_MASK) || size == (PAGE_SIZE/2)
 811                    || size == (PAGE_SIZE/4) || size == (PAGE_SIZE/8)) {
 812                        /* To avoid waste the bufctls are off-slab... */
 813                        flags |= SLAB_CFLGS_BUFCTL;
 814                        extra = 0;
 815                } /* else slab management is off-slab, but freelist pointers are on. */
 816        }
 817        size += extra;
 818
 819        if (flags & SLAB_HWCACHE_ALIGN) {
 820                /* Need to adjust size so that objs are cache aligned. */
 821                if (size > (L1_CACHE_BYTES/2)) {
 822                        size_t words = size % L1_CACHE_BYTES;
 823                        if (words)
 824                                size += (L1_CACHE_BYTES-words);
 825                } else {
 826                        /* Small obj size, can get at least two per cache line. */
 827                        int num_per_line = L1_CACHE_BYTES/size;
 828                        left_over = L1_CACHE_BYTES - (num_per_line*size);
 829                        if (left_over) {
 830                                /* Need to adjust size so objs cache align. */
 831                                if (left_over%num_per_line) {
 832                                        /* Odd num of objs per line - fixup. */
 833                                        num_per_line--;
 834                                        left_over += size;
 835                                }
 836                                size += (left_over/num_per_line);
 837                        }
 838                }
 839        } else if (!(size%L1_CACHE_BYTES)) {
 840                /* Size happens to cache align... */
 841                flags |= SLAB_HWCACHE_ALIGN;
 842                align = L1_CACHE_BYTES;
 843        }
 844
 845        /* Cal size (in pages) of slabs, and the num of objs per slab.
 846         * This could be made much more intelligent.  For now, try to avoid
 847         * using high page-orders for slabs.  When the gfp() funcs are more
 848         * friendly towards high-order requests, this should be changed.
 849         */
 850        do {
 851                size_t wastage;
 852                unsigned int break_flag = 0;
 853cal_wastage:
 854                wastage = kmem_cache_cal_waste(cachep->c_gfporder, size, extra,
 855                                               flags, &left_over, &cachep->c_num);
 856                if (!cachep->c_num)
 857                        goto next;
 858                if (break_flag)
 859                        break;
 860                if (SLAB_BUFCTL(flags) && cachep->c_num > bufctl_limit) {
 861                        /* Oops, this num of objs will cause problems. */
 862                        cachep->c_gfporder--;
 863                        break_flag++;
 864                        goto cal_wastage;
 865                }
 866                if (cachep->c_gfporder == SLAB_MAX_GFP_ORDER)
 867                        break;
 868
 869                /* Large num of objs is good, but v. large slabs are currently
 870                 * bad for the gfp()s.
 871                 */
 872                if (cachep->c_num <= SLAB_MIN_OBJS_PER_SLAB) {
 873                        if (cachep->c_gfporder < slab_break_gfp_order)
 874                                goto next;
 875                }
 876
 877                /* Stop caches with small objs having a large num of pages. */
 878                if (left_over <= slab_align_size)
 879                        break;
 880                if ((wastage*8) <= (PAGE_SIZE<<cachep->c_gfporder))
 881                        break;  /* Acceptable internal fragmentation. */
 882next:
 883                cachep->c_gfporder++;
 884        } while (1);
 885
 886        /* If the slab has been placed off-slab, and we have enough space then
 887         * move it on-slab.  This is at the expense of any extra colouring.
 888         */
 889        if ((flags & SLAB_CFLGS_OFF_SLAB) && !SLAB_BUFCTL(flags) &&
 890            left_over >= slab_align_size) {
 891                flags &= ~SLAB_CFLGS_OFF_SLAB;
 892                left_over -= slab_align_size;
 893        }
 894
 895        /* Offset must be a factor of the alignment. */
 896        offset += (align-1);
 897        offset &= ~(align-1);
 898
 899        /* Mess around with the offset alignment. */
 900        if (!left_over) {
 901                offset = 0;
 902        } else if (left_over < offset) {
 903                offset = align;
 904                if (flags & SLAB_HWCACHE_ALIGN) {
 905                        if (left_over < offset)
 906                                offset = 0;
 907                } else {
 908                        /* Offset is BYTES_PER_WORD, and left_over is at
 909                         * least BYTES_PER_WORD.
 910                         */
 911                        if (left_over >= (BYTES_PER_WORD*2)) {
 912                                offset >>= 1;
 913                                if (left_over >= (BYTES_PER_WORD*4))
 914                                        offset >>= 1;
 915                        }
 916                }
 917        } else if (!offset) {
 918                /* No offset requested, but space enough - give one. */
 919                offset = left_over/align;
 920                if (flags & SLAB_HWCACHE_ALIGN) {
 921                        if (offset >= 8) {
 922                                /* A large number of colours - use a larger alignment. */
 923                                align <<= 1;
 924                        }
 925                } else {
 926                        if (offset >= 10) {
 927                                align <<= 1;
 928                                if (offset >= 16)
 929                                        align <<= 1;
 930                        }
 931                }
 932                offset = align;
 933        }
 934
 935#if     0
 936printk("%s: Left_over:%d Align:%d Size:%d\n", name, left_over, offset, size);
 937#endif
 938
 939        if ((cachep->c_align = (unsigned long) offset))
 940                cachep->c_colour = (left_over/offset);
 941        cachep->c_colour_next = cachep->c_colour;
 942
 943        /* If the bufctl's are on-slab, c_offset does not include the size of bufctl. */
 944        if (!SLAB_BUFCTL(flags))
 945                size -= sizeof(kmem_bufctl_t);
 946        else
 947                cachep->c_index_cachep =
 948                        kmem_find_general_cachep(cachep->c_num*sizeof(kmem_bufctl_t));
 949        cachep->c_offset = (unsigned long) size;
 950        cachep->c_freep = kmem_slab_end(cachep);
 951        cachep->c_firstp = kmem_slab_end(cachep);
 952        cachep->c_lastp = kmem_slab_end(cachep);
 953        cachep->c_flags = flags;
 954        cachep->c_ctor = ctor;
 955        cachep->c_dtor = dtor;
 956        cachep->c_magic = SLAB_C_MAGIC;
 957        cachep->c_name = name;          /* Simply point to the name. */
 958        spin_lock_init(&cachep->c_spinlock);
 959
 960        /* Need the semaphore to access the chain. */
 961        down(&cache_chain_sem);
 962        searchp = &cache_cache;
 963        do {
 964                /* The name field is constant - no lock needed. */
 965                if (!strcmp(searchp->c_name, name)) {
 966                        printk("%sDup name - %s\n", func_nm, name);
 967                        break;
 968                }
 969                searchp = searchp->c_nextp;
 970        } while (searchp != &cache_cache);
 971
 972        /* There is no reason to lock our new cache before we
 973         * link it in - no one knows about it yet...
 974         */
 975        cachep->c_nextp = cache_cache.c_nextp;
 976        cache_cache.c_nextp = cachep;
 977        up(&cache_chain_sem);
 978opps:
 979        return cachep;
 980}
 981
 982/* Shrink a cache.  Releases as many slabs as possible for a cache.
 983 * It is expected this function will be called by a module when it is
 984 * unloaded.  The cache is _not_ removed, this creates too many problems and
 985 * the cache-structure does not take up much room.  A module should keep its
 986 * cache pointer(s) in unloaded memory, so when reloaded it knows the cache
 987 * is available.  To help debugging, a zero exit status indicates all slabs
 988 * were released.
 989 */
 990
 991static int __kmem_cache_shrink(kmem_cache_t *cachep, int validated)
 992{
 993        kmem_cache_t    *searchp;
 994        kmem_slab_t     *slabp;
 995        int     ret;
 996
 997        if (!cachep) {
 998                printk(KERN_ERR "kmem_shrink: NULL ptr\n");
 999                return 2;
1000        }
1001        if (in_interrupt()) {
1002                printk(KERN_ERR "kmem_shrink: Called during int - %s\n", cachep->c_name);
1003                return 2;
1004        }
1005
1006        if(validated==0)
1007        {
1008                /* Find the cache in the chain of caches. */
1009                down(&cache_chain_sem);         /* Semaphore is needed. */
1010                searchp = &cache_cache;
1011                for (;searchp->c_nextp != &cache_cache; searchp = searchp->c_nextp) {
1012                        if (searchp->c_nextp != cachep)
1013                                continue;
1014
1015                        /* Accessing clock_searchp is safe - we hold the mutex. */
1016                        if (cachep == clock_searchp)
1017                                clock_searchp = cachep->c_nextp;
1018                        goto found;
1019                }
1020                up(&cache_chain_sem);
1021                printk(KERN_ERR "kmem_shrink: Invalid cache addr %p\n", cachep);
1022                return 2;
1023found:
1024                /* Release the semaphore before getting the cache-lock.  This could
1025                 * mean multiple engines are shrinking the cache, but so what.
1026                 */
1027                up(&cache_chain_sem);
1028        }
1029        spin_lock_irq(&cachep->c_spinlock);
1030
1031        /* If the cache is growing, stop shrinking. */
1032        while (!cachep->c_growing) {
1033                slabp = cachep->c_lastp;
1034                if (slabp->s_inuse || slabp == kmem_slab_end(cachep))
1035                        break;
1036                kmem_slab_unlink(slabp);
1037                spin_unlock_irq(&cachep->c_spinlock);
1038                kmem_slab_destroy(cachep, slabp);
1039                spin_lock_irq(&cachep->c_spinlock);
1040        }
1041        ret = 1;
1042        if (cachep->c_lastp == kmem_slab_end(cachep))
1043                ret--;          /* Cache is empty. */
1044        spin_unlock_irq(&cachep->c_spinlock);
1045        return ret;
1046}
1047
1048int kmem_cache_shrink(kmem_cache_t *cachep)
1049{
1050        return __kmem_cache_shrink(cachep,0);
1051}
1052
1053/*
1054 * Remove a kmem_cache_t object from the slab cache. When returns 0 it
1055 * completed succesfully. -arca
1056 */
1057int kmem_cache_destroy(kmem_cache_t * cachep)
1058{
1059        kmem_cache_t * prev;
1060        int ret;
1061
1062        if (!cachep) {
1063                printk(KERN_ERR "kmem_destroy: NULL ptr\n");
1064                return 1;
1065        }
1066        if (in_interrupt()) {
1067                printk(KERN_ERR "kmem_destroy: Called during int - %s\n",
1068                       cachep->c_name);
1069                return 1;
1070        }
1071
1072        ret = 0;
1073        /* Find the cache in the chain of caches. */
1074        down(&cache_chain_sem);
1075        for (prev = &cache_cache; prev->c_nextp != &cache_cache;
1076             prev = prev->c_nextp) {
1077                if (prev->c_nextp != cachep)
1078                        continue;
1079
1080                /* Accessing clock_searchp is safe - we hold the mutex. */
1081                if (cachep == clock_searchp)
1082                        clock_searchp = cachep->c_nextp;
1083
1084                /* remove the cachep from the cache_cache list. -arca */
1085                prev->c_nextp = cachep->c_nextp;
1086
1087                ret = 1;
1088                break;
1089        }
1090        up(&cache_chain_sem);
1091
1092        if (!ret) {
1093                printk(KERN_ERR "kmem_destroy: Invalid cache addr %p\n",
1094                       cachep);
1095                return 1;
1096        }
1097
1098        if (__kmem_cache_shrink(cachep, 1)) {
1099                printk(KERN_ERR "kmem_destroy: Can't free all objects %p\n",
1100                       cachep);
1101                down(&cache_chain_sem);
1102                cachep->c_nextp = cache_cache.c_nextp;
1103                cache_cache.c_nextp = cachep;
1104                up(&cache_chain_sem);
1105                return 1;
1106        }
1107
1108        kmem_cache_free(&cache_cache, cachep);
1109
1110        return 0;
1111}
1112
1113/* Get the memory for a slab management obj. */
1114static inline kmem_slab_t *
1115kmem_cache_slabmgmt(kmem_cache_t *cachep, void *objp, int local_flags)
1116{
1117        kmem_slab_t     *slabp;
1118
1119        if (SLAB_OFF_SLAB(cachep->c_flags)) {
1120                /* Slab management obj is off-slab. */
1121                slabp = kmem_cache_alloc(cache_slabp, local_flags);
1122        } else {
1123                /* Slab management at end of slab memory, placed so that
1124                 * the position is 'coloured'.
1125                 */
1126                void *end;
1127                end = objp + (cachep->c_num * cachep->c_offset);
1128                if (!SLAB_BUFCTL(cachep->c_flags))
1129                        end += (cachep->c_num * sizeof(kmem_bufctl_t));
1130                slabp = (kmem_slab_t *) L1_CACHE_ALIGN((unsigned long)end);
1131        }
1132
1133        if (slabp) {
1134                slabp->s_inuse = 0;
1135                slabp->s_dma = 0;
1136                slabp->s_index = NULL;
1137        }
1138
1139        return slabp;
1140}
1141
1142static inline void
1143kmem_cache_init_objs(kmem_cache_t * cachep, kmem_slab_t * slabp, void *objp,
1144                                unsigned long ctor_flags)
1145{
1146        kmem_bufctl_t   **bufpp = &slabp->s_freep;
1147        unsigned long   num = cachep->c_num-1;
1148
1149        do {
1150#if     SLAB_DEBUG_SUPPORT
1151                if (cachep->c_flags & SLAB_RED_ZONE) {
1152                        *((unsigned long*)(objp)) = SLAB_RED_MAGIC1;
1153                        objp += BYTES_PER_WORD;
1154                        *((unsigned long*)(objp+cachep->c_org_size)) = SLAB_RED_MAGIC1;
1155                }
1156#endif  /* SLAB_DEBUG_SUPPORT */
1157
1158                /* Constructors are not allowed to allocate memory from the same cache
1159                 * which they are a constructor for.  Otherwise, deadlock.
1160                 * They must also be threaded.
1161                 */
1162                if (cachep->c_ctor)
1163                        cachep->c_ctor(objp, cachep, ctor_flags);
1164#if     SLAB_DEBUG_SUPPORT
1165                else if (cachep->c_flags & SLAB_POISON) {
1166                        /* need to poison the objs */
1167                        kmem_poison_obj(cachep, objp);
1168                }
1169
1170                if (cachep->c_flags & SLAB_RED_ZONE) {
1171                        if (*((unsigned long*)(objp+cachep->c_org_size)) !=
1172                            SLAB_RED_MAGIC1) {
1173                                *((unsigned long*)(objp+cachep->c_org_size)) =
1174                                        SLAB_RED_MAGIC1;
1175                                printk(KERN_ERR "kmem_init_obj: Bad rear redzone "
1176                                       "after constructor - %s\n", cachep->c_name);
1177                        }
1178                        objp -= BYTES_PER_WORD;
1179                        if (*((unsigned long*)(objp)) != SLAB_RED_MAGIC1) {
1180                                *((unsigned long*)(objp)) = SLAB_RED_MAGIC1;
1181                                printk(KERN_ERR "kmem_init_obj: Bad front redzone "
1182                                       "after constructor - %s\n", cachep->c_name);
1183                        }
1184                }
1185#endif  /* SLAB_DEBUG_SUPPORT */
1186
1187                objp += cachep->c_offset;
1188                if (!slabp->s_index) {
1189                        *bufpp = objp;
1190                        objp += sizeof(kmem_bufctl_t);
1191                } else
1192                        *bufpp = &slabp->s_index[num];
1193                bufpp = &(*bufpp)->buf_nextp;
1194        } while (num--);
1195
1196        *bufpp = NULL;
1197}
1198
1199/* Grow (by 1) the number of slabs within a cache.  This is called by
1200 * kmem_cache_alloc() when there are no active objs left in a cache.
1201 */
1202static int
1203kmem_cache_grow(kmem_cache_t * cachep, int flags)
1204{
1205        kmem_slab_t     *slabp;
1206        struct page     *page;
1207        void            *objp;
1208        size_t           offset;
1209        unsigned int     dma, local_flags;
1210        unsigned long    ctor_flags;
1211        unsigned long    save_flags;
1212
1213        /* Be lazy and only check for valid flags here,
1214         * keeping it out of the critical path in kmem_cache_alloc().
1215         */
1216        if (flags & ~(SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW)) {
1217                printk(KERN_WARNING "kmem_grow: Illegal flgs %X (correcting) - %s\n",
1218                       flags, cachep->c_name);
1219                flags &= (SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW);
1220        }
1221
1222        if (flags & SLAB_NO_GROW)
1223                return 0;
1224
1225        /* The test for missing atomic flag is performed here, rather than
1226         * the more obvious place, simply to reduce the critical path length
1227         * in kmem_cache_alloc().  If a caller is slightly mis-behaving they
1228         * will eventually be caught here (where it matters).
1229         */
1230        if (in_interrupt() && (flags & SLAB_LEVEL_MASK) != SLAB_ATOMIC) {
1231                printk(KERN_ERR "kmem_grow: Called nonatomically from int - %s\n",
1232                       cachep->c_name);
1233                flags &= ~SLAB_LEVEL_MASK;
1234                flags |= SLAB_ATOMIC;
1235        }
1236        ctor_flags = SLAB_CTOR_CONSTRUCTOR;
1237        local_flags = (flags & SLAB_LEVEL_MASK);
1238        if (local_flags == SLAB_ATOMIC) {
1239                /* Not allowed to sleep.  Need to tell a constructor about
1240                 * this - it might need to know...
1241                 */
1242                ctor_flags |= SLAB_CTOR_ATOMIC;
1243        }
1244
1245        /* About to mess with non-constant members - lock. */
1246        spin_lock_irqsave(&cachep->c_spinlock, save_flags);
1247
1248        /* Get colour for the slab, and cal the next value. */
1249        if (!(offset = cachep->c_colour_next--))
1250                cachep->c_colour_next = cachep->c_colour;
1251        offset *= cachep->c_align;
1252        cachep->c_dflags = SLAB_CFLGS_GROWN;
1253
1254        cachep->c_growing++;
1255        spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1256
1257        /* A series of memory allocations for a new slab.
1258         * Neither the cache-chain semaphore, or cache-lock, are
1259         * held, but the incrementing c_growing prevents this
1260         * cache from being reaped or shrunk.
1261         * Note: The cache could be selected in for reaping in
1262         * kmem_cache_reap(), but when the final test is made the
1263         * growing value will be seen.
1264         */
1265
1266        /* Get mem for the objs. */
1267        if (!(objp = kmem_getpages(cachep, flags, &dma)))
1268                goto failed;
1269
1270        /* Get slab management. */
1271        if (!(slabp = kmem_cache_slabmgmt(cachep, objp+offset, local_flags)))
1272                goto opps1;
1273        if (dma)
1274                slabp->s_dma = 1;
1275        if (SLAB_BUFCTL(cachep->c_flags)) {
1276                slabp->s_index = kmem_cache_alloc(cachep->c_index_cachep, local_flags);
1277                if (!slabp->s_index)
1278                        goto opps2;
1279        }
1280
1281        /* Nasty!!!!!!  I hope this is OK. */
1282        dma = 1 << cachep->c_gfporder;
1283        page = &mem_map[MAP_NR(objp)];
1284        do {
1285                SLAB_SET_PAGE_CACHE(page, cachep);
1286                SLAB_SET_PAGE_SLAB(page, slabp);
1287                PageSetSlab(page);
1288                page++;
1289        } while (--dma);
1290
1291        slabp->s_offset = offset;       /* It will fit... */
1292        objp += offset;         /* Address of first object. */
1293        slabp->s_mem = objp;
1294
1295        /* For on-slab bufctls, c_offset is the distance between the start of
1296         * an obj and its related bufctl.  For off-slab bufctls, c_offset is
1297         * the distance between objs in the slab.
1298         */
1299        kmem_cache_init_objs(cachep, slabp, objp, ctor_flags);
1300
1301        spin_lock_irq(&cachep->c_spinlock);
1302
1303        /* Make slab active. */
1304        slabp->s_magic = SLAB_MAGIC_ALLOC;
1305        kmem_slab_link_end(cachep, slabp);
1306        if (cachep->c_freep == kmem_slab_end(cachep))
1307                cachep->c_freep = slabp;
1308        SLAB_STATS_INC_GROWN(cachep);
1309        cachep->c_failures = 0;
1310        cachep->c_growing--;
1311
1312        spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1313        return 1;
1314opps2:
1315        if (SLAB_OFF_SLAB(cachep->c_flags))
1316                kmem_cache_free(cache_slabp, slabp);
1317opps1:
1318        kmem_freepages(cachep, objp); 
1319failed:
1320        spin_lock_irq(&cachep->c_spinlock);
1321        cachep->c_growing--;
1322        spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1323        return 0;
1324}
1325
1326static void
1327kmem_report_alloc_err(const char *str, kmem_cache_t * cachep)
1328{
1329        if (cachep)
1330                SLAB_STATS_INC_ERR(cachep);     /* this is atomic */
1331        printk(KERN_ERR "kmem_alloc: %s (name=%s)\n",
1332               str, cachep ? cachep->c_name : "unknown");
1333}
1334
1335static void
1336kmem_report_free_err(const char *str, const void *objp, kmem_cache_t * cachep)
1337{
1338        if (cachep)
1339                SLAB_STATS_INC_ERR(cachep);
1340        printk(KERN_ERR "kmem_free: %s (objp=%p, name=%s)\n",
1341               str, objp, cachep ? cachep->c_name : "unknown");
1342}
1343
1344/* Search for a slab whose objs are suitable for DMA.
1345 * Note: since testing the first free slab (in __kmem_cache_alloc()),
1346 * ints must not have been enabled, or the cache-lock released!
1347 */
1348static inline kmem_slab_t *
1349kmem_cache_search_dma(kmem_cache_t * cachep)
1350{
1351        kmem_slab_t     *slabp = cachep->c_freep->s_nextp;
1352
1353        for (; slabp != kmem_slab_end(cachep); slabp = slabp->s_nextp) {
1354                if (!(slabp->s_dma))
1355                        continue;
1356                kmem_slab_unlink(slabp);
1357                kmem_slab_link_free(cachep, slabp);
1358                cachep->c_freep = slabp;
1359                break;
1360        }
1361        return slabp;
1362}
1363
1364#if     SLAB_DEBUG_SUPPORT
1365/* Perform extra freeing checks.  Currently, this check is only for caches
1366 * that use bufctl structures within the slab.  Those which use bufctl's
1367 * from the internal cache have a reasonable check when the address is
1368 * searched for.  Called with the cache-lock held.
1369 */
1370static void *
1371kmem_extra_free_checks(kmem_cache_t * cachep, kmem_bufctl_t *search_bufp,
1372                       kmem_bufctl_t *bufp, void * objp)
1373{
1374        if (SLAB_BUFCTL(cachep->c_flags))
1375                return objp;
1376
1377        /* Check slab's freelist to see if this obj is there. */
1378        for (; search_bufp; search_bufp = search_bufp->buf_nextp) {
1379                if (search_bufp != bufp)
1380                        continue;
1381                return NULL;
1382        }
1383        return objp;
1384}
1385#endif  /* SLAB_DEBUG_SUPPORT */
1386
1387/* Called with cache lock held. */
1388static inline void
1389kmem_cache_full_free(kmem_cache_t *cachep, kmem_slab_t *slabp)
1390{
1391        if (slabp->s_nextp->s_inuse) {
1392                /* Not at correct position. */
1393                if (cachep->c_freep == slabp)
1394                        cachep->c_freep = slabp->s_nextp;
1395                kmem_slab_unlink(slabp);
1396                kmem_slab_link_end(cachep, slabp);
1397        }
1398}
1399
1400/* Called with cache lock held. */
1401static inline void
1402kmem_cache_one_free(kmem_cache_t *cachep, kmem_slab_t *slabp)
1403{
1404        if (slabp->s_nextp->s_inuse == cachep->c_num) {
1405                kmem_slab_unlink(slabp);
1406                kmem_slab_link_free(cachep, slabp);
1407        }
1408        cachep->c_freep = slabp;
1409}
1410
1411/* Returns a ptr to an obj in the given cache. */
1412static inline void *
1413__kmem_cache_alloc(kmem_cache_t *cachep, int flags)
1414{
1415        kmem_slab_t     *slabp;
1416        kmem_bufctl_t   *bufp;
1417        void            *objp;
1418        unsigned long   save_flags;
1419
1420        /* Sanity check. */
1421        if (!cachep)
1422                goto nul_ptr;
1423        spin_lock_irqsave(&cachep->c_spinlock, save_flags);
1424try_again:
1425        /* Get slab alloc is to come from. */
1426        slabp = cachep->c_freep;
1427
1428        /* Magic is a sanity check _and_ says if we need a new slab. */
1429        if (slabp->s_magic != SLAB_MAGIC_ALLOC)
1430                goto alloc_new_slab;
1431        /* DMA requests are 'rare' - keep out of the critical path. */
1432        if (flags & SLAB_DMA)
1433                goto search_dma;
1434try_again_dma:
1435        SLAB_STATS_INC_ALLOCED(cachep);
1436        SLAB_STATS_INC_ACTIVE(cachep);
1437        SLAB_STATS_SET_HIGH(cachep);
1438        slabp->s_inuse++;
1439        bufp = slabp->s_freep;
1440        slabp->s_freep = bufp->buf_nextp;
1441        if (slabp->s_freep) {
1442ret_obj:
1443                if (!slabp->s_index) {
1444                        bufp->buf_slabp = slabp;
1445                        objp = ((void*)bufp) - cachep->c_offset;
1446finished:
1447                        /* The lock is not needed by the red-zone or poison ops, and the
1448                         * obj has been removed from the slab.  Should be safe to drop
1449                         * the lock here.
1450                         */
1451                        spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1452#if     SLAB_DEBUG_SUPPORT
1453                        if (cachep->c_flags & SLAB_RED_ZONE)
1454                                goto red_zone;
1455ret_red:
1456                        if ((cachep->c_flags & SLAB_POISON) && kmem_check_poison_obj(cachep, objp))
1457                                kmem_report_alloc_err("Bad poison", cachep);
1458#endif  /* SLAB_DEBUG_SUPPORT */
1459                        return objp;
1460                }
1461                /* Update index ptr. */
1462                objp = ((bufp-slabp->s_index)*cachep->c_offset) + slabp->s_mem;
1463                bufp->buf_objp = objp;
1464                goto finished;
1465        }
1466        cachep->c_freep = slabp->s_nextp;
1467        goto ret_obj;
1468
1469#if     SLAB_DEBUG_SUPPORT
1470red_zone:
1471        /* Set alloc red-zone, and check old one. */
1472        if (xchg((unsigned long *)objp, SLAB_RED_MAGIC2) != SLAB_RED_MAGIC1)
1473                kmem_report_alloc_err("Bad front redzone", cachep);
1474        objp += BYTES_PER_WORD;
1475        if (xchg((unsigned long *)(objp+cachep->c_org_size), SLAB_RED_MAGIC2) != SLAB_RED_MAGIC1)
1476                kmem_report_alloc_err("Bad rear redzone", cachep);
1477        goto ret_red;
1478#endif  /* SLAB_DEBUG_SUPPORT */
1479
1480search_dma:
1481        if (slabp->s_dma || (slabp = kmem_cache_search_dma(cachep))!=kmem_slab_end(cachep))
1482                goto try_again_dma;
1483alloc_new_slab:
1484        /* Either out of slabs, or magic number corruption. */
1485        if (slabp == kmem_slab_end(cachep)) {
1486                /* Need a new slab.  Release the lock before calling kmem_cache_grow().
1487                 * This allows objs to be released back into the cache while growing.
1488                 */
1489                spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1490                if (kmem_cache_grow(cachep, flags)) {
1491                        /* Someone may have stolen our objs.  Doesn't matter, we'll
1492                         * just come back here again.
1493                         */
1494                        spin_lock_irq(&cachep->c_spinlock);
1495                        goto try_again;
1496                }
1497                /* Couldn't grow, but some objs may have been freed. */
1498                spin_lock_irq(&cachep->c_spinlock);
1499                if (cachep->c_freep != kmem_slab_end(cachep)) {
1500                        if ((flags & SLAB_ATOMIC) == 0) 
1501                                goto try_again;
1502                }
1503        } else {
1504                /* Very serious error - maybe panic() here? */
1505                kmem_report_alloc_err("Bad slab magic (corrupt)", cachep);
1506        }
1507        spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1508err_exit:
1509        return NULL;
1510nul_ptr:
1511        kmem_report_alloc_err("NULL ptr", NULL);
1512        goto err_exit;
1513}
1514
1515/* Release an obj back to its cache.  If the obj has a constructed state,
1516 * it should be in this state _before_ it is released.
1517 */
1518static inline void
1519__kmem_cache_free(kmem_cache_t *cachep, const void *objp)
1520{
1521        kmem_slab_t     *slabp;
1522        kmem_bufctl_t   *bufp;
1523        unsigned long   save_flags;
1524
1525        /* Basic sanity checks. */
1526        if (!cachep || !objp)
1527                goto null_addr;
1528
1529#if     SLAB_DEBUG_SUPPORT
1530        /* A verify func is called without the cache-lock held. */
1531        if (cachep->c_flags & SLAB_DEBUG_INITIAL)
1532                goto init_state_check;
1533finished_initial:
1534
1535        if (cachep->c_flags & SLAB_RED_ZONE)
1536                goto red_zone;
1537return_red:
1538#endif  /* SLAB_DEBUG_SUPPORT */
1539
1540        spin_lock_irqsave(&cachep->c_spinlock, save_flags);
1541
1542        if (SLAB_BUFCTL(cachep->c_flags))
1543                goto bufctl;
1544        bufp = (kmem_bufctl_t *)(objp+cachep->c_offset);
1545
1546        /* Get slab for the object. */
1547#if     0
1548        /* _NASTY_IF/ELSE_, but avoids a 'distant' memory ref for some objects.
1549         * Is this worth while? XXX
1550         */
1551        if (cachep->c_flags & SLAB_HIGH_PACK)
1552                slabp = SLAB_GET_PAGE_SLAB(&mem_map[MAP_NR(bufp)]);
1553        else
1554#endif
1555                slabp = bufp->buf_slabp;
1556
1557check_magic:
1558        if (slabp->s_magic != SLAB_MAGIC_ALLOC)         /* Sanity check. */
1559                goto bad_slab;
1560
1561#if     SLAB_DEBUG_SUPPORT
1562        if (cachep->c_flags & SLAB_DEBUG_FREE)
1563                goto extra_checks;
1564passed_extra:
1565#endif  /* SLAB_DEBUG_SUPPORT */
1566
1567        if (slabp->s_inuse) {           /* Sanity check. */
1568                SLAB_STATS_DEC_ACTIVE(cachep);
1569                slabp->s_inuse--;
1570                bufp->buf_nextp = slabp->s_freep;
1571                slabp->s_freep = bufp;
1572                if (bufp->buf_nextp) {
1573                        if (slabp->s_inuse) {
1574                                /* (hopefully) The most common case. */
1575finished:
1576#if     SLAB_DEBUG_SUPPORT
1577                                if (cachep->c_flags & SLAB_POISON) {
1578                                        if (cachep->c_flags & SLAB_RED_ZONE)
1579                                                objp += BYTES_PER_WORD;
1580                                        kmem_poison_obj(cachep, objp);
1581                                }
1582#endif  /* SLAB_DEBUG_SUPPORT */
1583                                spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1584                                return;
1585                        }
1586                        kmem_cache_full_free(cachep, slabp);
1587                        goto finished;
1588                }
1589                kmem_cache_one_free(cachep, slabp);
1590                goto finished;
1591        }
1592
1593        /* Don't add to freelist. */
1594        spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1595        kmem_report_free_err("free with no active objs", objp, cachep);
1596        return;
1597bufctl:
1598        /* No 'extra' checks are performed for objs stored this way, finding
1599         * the obj is check enough.
1600         */
1601        slabp = SLAB_GET_PAGE_SLAB(&mem_map[MAP_NR(objp)]);
1602        bufp =  &slabp->s_index[(objp - slabp->s_mem)/cachep->c_offset];
1603        if (bufp->buf_objp == objp)
1604                goto check_magic;
1605        spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1606        kmem_report_free_err("Either bad obj addr or double free", objp, cachep);
1607        return;
1608#if     SLAB_DEBUG_SUPPORT
1609init_state_check:
1610        /* Need to call the slab's constructor so the
1611         * caller can perform a verify of its state (debugging).
1612         */
1613        cachep->c_ctor(objp, cachep, SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY);
1614        goto finished_initial;
1615extra_checks:
1616        if (!kmem_extra_free_checks(cachep, slabp->s_freep, bufp, objp)) {
1617                spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1618                kmem_report_free_err("Double free detected during checks", objp, cachep);
1619                return;
1620        }
1621        goto passed_extra;
1622red_zone:
1623        /* We do not hold the cache-lock while checking the red-zone.
1624         */
1625        objp -= BYTES_PER_WORD;
1626        if (xchg((unsigned long *)objp, SLAB_RED_MAGIC1) != SLAB_RED_MAGIC2) {
1627                /* Either write before start of obj, or a double free. */
1628                kmem_report_free_err("Bad front redzone", objp, cachep);
1629        }
1630        if (xchg((unsigned long *)(objp+cachep->c_org_size+BYTES_PER_WORD), SLAB_RED_MAGIC1) != SLAB_RED_MAGIC2) {
1631                /* Either write past end of obj, or a double free. */
1632                kmem_report_free_err("Bad rear redzone", objp, cachep);
1633        }
1634        goto return_red;
1635#endif  /* SLAB_DEBUG_SUPPORT */
1636
1637bad_slab:
1638        /* Slab doesn't contain the correct magic num. */
1639        if (slabp->s_magic == SLAB_MAGIC_DESTROYED) {
1640                /* Magic num says this is a destroyed slab. */
1641                kmem_report_free_err("free from inactive slab", objp, cachep);
1642        } else
1643                kmem_report_free_err("Bad obj addr", objp, cachep);
1644        spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1645
1646#if 1
1647/* FORCE A KERNEL DUMP WHEN THIS HAPPENS. SPEAK IN ALL CAPS. GET THE CALL CHAIN. */
1648*(int *) 0 = 0;
1649#endif
1650
1651        return;
1652null_addr:
1653        kmem_report_free_err("NULL ptr", objp, cachep);
1654        return;
1655}
1656
1657void *
1658kmem_cache_alloc(kmem_cache_t *cachep, int flags)
1659{
1660        return __kmem_cache_alloc(cachep, flags);
1661}
1662
1663void
1664kmem_cache_free(kmem_cache_t *cachep, void *objp)
1665{
1666        __kmem_cache_free(cachep, objp);
1667}
1668
1669void *
1670kmalloc(size_t size, int flags)
1671{
1672        cache_sizes_t   *csizep = cache_sizes;
1673
1674        for (; csizep->cs_size; csizep++) {
1675                if (size > csizep->cs_size)
1676                        continue;
1677                return __kmem_cache_alloc(csizep->cs_cachep, flags);
1678        }
1679        printk(KERN_ERR "kmalloc: Size (%lu) too large\n", (unsigned long) size);
1680        return NULL;
1681}
1682
1683void
1684kfree(const void *objp)
1685{
1686        struct page *page;
1687        int     nr;
1688
1689        if (!objp)
1690                goto null_ptr;
1691        nr = MAP_NR(objp);
1692        if (nr >= max_mapnr)
1693                goto bad_ptr;
1694
1695        /* Assume we own the page structure - hence no locking.
1696         * If someone is misbehaving (for example, calling us with a bad
1697         * address), then access to the page structure can race with the
1698         * kmem_slab_destroy() code.  Need to add a spin_lock to each page
1699         * structure, which would be useful in threading the gfp() functions....
1700         */
1701        page = &mem_map[nr];
1702        if (PageSlab(page)) {
1703                kmem_cache_t    *cachep;
1704
1705                /* Here, we again assume the obj address is good.
1706                 * If it isn't, and happens to map onto another
1707                 * general cache page which has no active objs, then
1708                 * we race.
1709                 */
1710                cachep = SLAB_GET_PAGE_CACHE(page);
1711                if (cachep && (cachep->c_flags & SLAB_CFLGS_GENERAL)) {
1712                        __kmem_cache_free(cachep, objp);
1713                        return;
1714                }
1715        }
1716bad_ptr:
1717        printk(KERN_ERR "kfree: Bad obj %p\n", objp);
1718
1719#if 1
1720/* FORCE A KERNEL DUMP WHEN THIS HAPPENS. SPEAK IN ALL CAPS. GET THE CALL CHAIN. */
1721*(int *) 0 = 0;
1722#endif
1723
1724null_ptr:
1725        return;
1726}
1727
1728void
1729kfree_s(const void *objp, size_t size)
1730{
1731        struct page *page;
1732        int     nr;
1733
1734        if (!objp)
1735                goto null_ptr;
1736        nr = MAP_NR(objp);
1737        if (nr >= max_mapnr)
1738                goto null_ptr;
1739        /* See comment in kfree() */
1740        page = &mem_map[nr];
1741        if (PageSlab(page)) {
1742                kmem_cache_t    *cachep;
1743                /* See comment in kfree() */
1744                cachep = SLAB_GET_PAGE_CACHE(page);
1745                if (cachep && cachep->c_flags & SLAB_CFLGS_GENERAL) {
1746                        if (size <= cachep->c_org_size) {       /* XXX better check */
1747                                __kmem_cache_free(cachep, objp);
1748                                return;
1749                        }
1750                }
1751        }
1752null_ptr:
1753        printk(KERN_ERR "kfree_s: Bad obj %p\n", objp);
1754        return;
1755}
1756
1757kmem_cache_t *
1758kmem_find_general_cachep(size_t size)
1759{
1760        cache_sizes_t   *csizep = cache_sizes;
1761
1762        /* This function could be moved to the header file, and
1763         * made inline so consumers can quickly determine what
1764         * cache pointer they require.
1765         */
1766        for (; csizep->cs_size; csizep++) {
1767                if (size > csizep->cs_size)
1768                        continue;
1769                break;
1770        }
1771        return csizep->cs_cachep;
1772}
1773
1774
1775/* Called from try_to_free_page().
1776 * This function _cannot_ be called within a int, but it
1777 * can be interrupted.
1778 */
1779void
1780kmem_cache_reap(int gfp_mask)
1781{
1782        kmem_slab_t     *slabp;
1783        kmem_cache_t    *searchp;
1784        kmem_cache_t    *best_cachep;
1785        unsigned int     scan;
1786        unsigned int     reap_level;
1787
1788        if (in_interrupt()) {
1789                printk("kmem_cache_reap() called within int!\n");
1790                return;
1791        }
1792
1793        /* We really need a test semaphore op so we can avoid sleeping when
1794         * !wait is true.
1795         */
1796        down(&cache_chain_sem);
1797
1798        scan = 10;
1799        reap_level = 0;
1800
1801        best_cachep = NULL;
1802        searchp = clock_searchp;
1803        do {
1804                unsigned int    full_free;
1805                unsigned int    dma_flag;
1806
1807                /* It's safe to test this without holding the cache-lock. */
1808                if (searchp->c_flags & SLAB_NO_REAP)
1809                        goto next;
1810                spin_lock_irq(&searchp->c_spinlock);
1811                if (searchp->c_growing)
1812                        goto next_unlock;
1813                if (searchp->c_dflags & SLAB_CFLGS_GROWN) {
1814                        searchp->c_dflags &= ~SLAB_CFLGS_GROWN;
1815                        goto next_unlock;
1816                }
1817                /* Sanity check for corruption of static values. */
1818                if (searchp->c_inuse || searchp->c_magic != SLAB_C_MAGIC) {
1819                        spin_unlock_irq(&searchp->c_spinlock);
1820                        printk(KERN_ERR "kmem_reap: Corrupted cache struct for %s\n", searchp->c_name);
1821                        goto next;
1822                }
1823                dma_flag = 0;
1824                full_free = 0;
1825
1826                /* Count the fully free slabs.  There should not be not many,
1827                 * since we are holding the cache lock.
1828                 */
1829                slabp = searchp->c_lastp;
1830                while (!slabp->s_inuse && slabp != kmem_slab_end(searchp)) {
1831                        slabp = slabp->s_prevp;
1832                        full_free++;
1833                        if (slabp->s_dma)
1834                                dma_flag++;
1835                }
1836                spin_unlock_irq(&searchp->c_spinlock);
1837
1838                if ((gfp_mask & GFP_DMA) && !dma_flag)
1839                        goto next;
1840
1841                if (full_free) {
1842                        if (full_free >= 10) {
1843                                best_cachep = searchp;
1844                                break;
1845                        }
1846
1847                        /* Try to avoid slabs with constructors and/or
1848                         * more than one page per slab (as it can be difficult
1849                         * to get high orders from gfp()).
1850                         */
1851                        if (full_free >= reap_level) {
1852                                reap_level = full_free;
1853                                best_cachep = searchp;
1854                        }
1855                }
1856                goto next;
1857next_unlock:
1858                spin_unlock_irq(&searchp->c_spinlock);
1859next:
1860                searchp = searchp->c_nextp;
1861        } while (--scan && searchp != clock_searchp);
1862
1863        clock_searchp = searchp;
1864        up(&cache_chain_sem);
1865
1866        if (!best_cachep) {
1867                /* couldn't find anything to reap */
1868                return;
1869        }
1870
1871        spin_lock_irq(&best_cachep->c_spinlock);
1872        while (!best_cachep->c_growing &&
1873               !(slabp = best_cachep->c_lastp)->s_inuse &&
1874               slabp != kmem_slab_end(best_cachep)) {
1875                if (gfp_mask & GFP_DMA) {
1876                        do {
1877                                if (slabp->s_dma)
1878                                        goto good_dma;
1879                                slabp = slabp->s_prevp;
1880                        } while (!slabp->s_inuse && slabp != kmem_slab_end(best_cachep));
1881
1882                        /* Didn't found a DMA slab (there was a free one -
1883                         * must have been become active).
1884                         */
1885                        goto dma_fail;
1886good_dma:
1887                }
1888                if (slabp == best_cachep->c_freep)
1889                        best_cachep->c_freep = slabp->s_nextp;
1890                kmem_slab_unlink(slabp);
1891                SLAB_STATS_INC_REAPED(best_cachep);
1892
1893                /* Safe to drop the lock.  The slab is no longer linked to the
1894                 * cache.
1895                 */
1896                spin_unlock_irq(&best_cachep->c_spinlock);
1897                kmem_slab_destroy(best_cachep, slabp);
1898                spin_lock_irq(&best_cachep->c_spinlock);
1899        }
1900dma_fail:
1901        spin_unlock_irq(&best_cachep->c_spinlock);
1902        return;
1903}
1904
1905#if     SLAB_SELFTEST
1906/* A few v. simple tests */
1907static void
1908kmem_self_test(void)
1909{
1910        kmem_cache_t    *test_cachep;
1911
1912        printk(KERN_INFO "kmem_test() - start\n");
1913        test_cachep = kmem_cache_create("test-cachep", 16, 0, SLAB_RED_ZONE|SLAB_POISON, NULL, NULL);
1914        if (test_cachep) {
1915                char *objp = kmem_cache_alloc(test_cachep, SLAB_KERNEL);
1916                if (objp) {
1917                        /* Write in front and past end, red-zone test. */
1918                        *(objp-1) = 1;
1919                        *(objp+16) = 1;
1920                        kmem_cache_free(test_cachep, objp);
1921
1922                        /* Mess up poisoning. */
1923                        *objp = 10;
1924                        objp = kmem_cache_alloc(test_cachep, SLAB_KERNEL);
1925                        kmem_cache_free(test_cachep, objp);
1926
1927                        /* Mess up poisoning (again). */
1928                        *objp = 10;
1929                        kmem_cache_shrink(test_cachep);
1930                }
1931        }
1932        printk(KERN_INFO "kmem_test() - finished\n");
1933}
1934#endif  /* SLAB_SELFTEST */
1935
1936#if     defined(CONFIG_PROC_FS)
1937/* /proc/slabinfo
1938 * cache-name num-active-objs total-objs num-active-slabs total-slabs num-pages-per-slab
1939 */
1940int
1941get_slabinfo(char *buf)
1942{
1943        kmem_cache_t    *cachep;
1944        kmem_slab_t     *slabp;
1945        unsigned long   active_objs;
1946        unsigned long   save_flags;
1947        unsigned long   num_slabs;
1948        unsigned long   num_objs;
1949        int             len=0;
1950#if     SLAB_STATS
1951        unsigned long   active_slabs;
1952#endif  /* SLAB_STATS */
1953
1954        __save_flags(save_flags);
1955
1956        /* Output format version, so at least we can change it without _too_
1957         * many complaints.
1958         */
1959#if     SLAB_STATS
1960        len = sprintf(buf, "slabinfo - version: 1.0 (statistics)\n");
1961#else
1962        len = sprintf(buf, "slabinfo - version: 1.0\n");
1963#endif  /* SLAB_STATS */
1964        down(&cache_chain_sem);
1965        cachep = &cache_cache;
1966        do {
1967#if     SLAB_STATS
1968                active_slabs = 0;
1969#endif  /* SLAB_STATS */
1970                num_slabs = active_objs = 0;
1971                spin_lock_irq(&cachep->c_spinlock);
1972                for (slabp = cachep->c_firstp; slabp != kmem_slab_end(cachep); slabp = slabp->s_nextp) {
1973                        active_objs += slabp->s_inuse;
1974                        num_slabs++;
1975#if     SLAB_STATS
1976                        if (slabp->s_inuse)
1977                                active_slabs++;
1978#endif  /* SLAB_STATS */
1979                }
1980                num_objs = cachep->c_num*num_slabs;
1981#if     SLAB_STATS
1982                {
1983                unsigned long errors;
1984                unsigned long high = cachep->c_high_mark;
1985                unsigned long grown = cachep->c_grown;
1986                unsigned long reaped = cachep->c_reaped;
1987                unsigned long allocs = cachep->c_num_allocations;
1988                errors = (unsigned long) atomic_read(&cachep->c_errors);
1989                spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1990                len += sprintf(buf+len, "%-16s %6lu %6lu %4lu %4lu %4lu %6lu %7lu %5lu %4lu %4lu\n",
1991                                cachep->c_name, active_objs, num_objs, active_slabs, num_slabs,
1992                                (1<<cachep->c_gfporder)*num_slabs,
1993                                high, allocs, grown, reaped, errors);
1994                }
1995#else
1996                spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1997                len += sprintf(buf+len, "%-17s %6lu %6lu\n", cachep->c_name, active_objs, num_objs);
1998#endif  /* SLAB_STATS */
1999        } while ((cachep = cachep->c_nextp) != &cache_cache);
2000        up(&cache_chain_sem);
2001
2002        return len;
2003}
2004#endif  /* CONFIG_PROC_FS */
2005
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.