linux-old/mm/slab.c
<<
>>
Prefs
   1/*
   2 * linux/mm/slab.c
   3 * Written by Mark Hemment, 1996/97.
   4 * (markhe@nextd.demon.co.uk)
   5 *
   6 * kmem_cache_destroy() + some cleanup - 1999 Andrea Arcangeli
   7 *
   8 * Major cleanup, different bufctl logic, per-cpu arrays
   9 *      (c) 2000 Manfred Spraul
  10 *
  11 * An implementation of the Slab Allocator as described in outline in;
  12 *      UNIX Internals: The New Frontiers by Uresh Vahalia
  13 *      Pub: Prentice Hall      ISBN 0-13-101908-2
  14 * or with a little more detail in;
  15 *      The Slab Allocator: An Object-Caching Kernel Memory Allocator
  16 *      Jeff Bonwick (Sun Microsystems).
  17 *      Presented at: USENIX Summer 1994 Technical Conference
  18 *
  19 *
  20 * The memory is organized in caches, one cache for each object type.
  21 * (e.g. inode_cache, dentry_cache, buffer_head, vm_area_struct)
  22 * Each cache consists out of many slabs (they are small (usually one
  23 * page long) and always contiguous), and each slab contains multiple
  24 * initialized objects.
  25 *
  26 * Each cache can only support one memory type (GFP_DMA, GFP_HIGHMEM,
  27 * normal). If you need a special memory type, then must create a new
  28 * cache for that memory type.
  29 *
  30 * In order to reduce fragmentation, the slabs are sorted in 3 groups:
  31 *   full slabs with 0 free objects
  32 *   partial slabs
  33 *   empty slabs with no allocated objects
  34 *
  35 * If partial slabs exist, then new allocations come from these slabs,
  36 * otherwise from empty slabs or new slabs are allocated.
  37 *
  38 * kmem_cache_destroy() CAN CRASH if you try to allocate from the cache
  39 * during kmem_cache_destroy(). The caller must prevent concurrent allocs.
  40 *
  41 * On SMP systems, each cache has a short per-cpu head array, most allocs
  42 * and frees go into that array, and if that array overflows, then 1/2
  43 * of the entries in the array are given back into the global cache.
  44 * This reduces the number of spinlock operations.
  45 *
  46 * The c_cpuarray may not be read with enabled local interrupts.
  47 *
  48 * SMP synchronization:
  49 *  constructors and destructors are called without any locking.
  50 *  Several members in kmem_cache_t and slab_t never change, they
  51 *      are accessed without any locking.
  52 *  The per-cpu arrays are never accessed from the wrong cpu, no locking.
  53 *  The non-constant members are protected with a per-cache irq spinlock.
  54 *
  55 * Further notes from the original documentation:
  56 *
  57 * 11 April '97.  Started multi-threading - markhe
  58 *      The global cache-chain is protected by the semaphore 'cache_chain_sem'.
  59 *      The sem is only needed when accessing/extending the cache-chain, which
  60 *      can never happen inside an interrupt (kmem_cache_create(),
  61 *      kmem_cache_shrink() and kmem_cache_reap()).
  62 *
  63 *      To prevent kmem_cache_shrink() trying to shrink a 'growing' cache (which
  64 *      maybe be sleeping and therefore not holding the semaphore/lock), the
  65 *      growing field is used.  This also prevents reaping from a cache.
  66 *
  67 *      At present, each engine can be growing a cache.  This should be blocked.
  68 *
  69 */
  70
  71#include        <linux/config.h>
  72#include        <linux/slab.h>
  73#include        <linux/interrupt.h>
  74#include        <linux/init.h>
  75#include        <linux/compiler.h>
  76#include        <linux/seq_file.h>
  77#include        <asm/uaccess.h>
  78
  79/*
  80 * DEBUG        - 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL,
  81 *                SLAB_RED_ZONE & SLAB_POISON.
  82 *                0 for faster, smaller code (especially in the critical paths).
  83 *
  84 * STATS        - 1 to collect stats for /proc/slabinfo.
  85 *                0 for faster, smaller code (especially in the critical paths).
  86 *
  87 * FORCED_DEBUG - 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
  88 */
  89
  90#ifdef CONFIG_DEBUG_SLAB
  91#define DEBUG           1
  92#define STATS           1
  93#define FORCED_DEBUG    1
  94#else
  95#define DEBUG           0
  96#define STATS           0
  97#define FORCED_DEBUG    0
  98#endif
  99
 100/*
 101 * Parameters for kmem_cache_reap
 102 */
 103#define REAP_SCANLEN    10
 104#define REAP_PERFECT    10
 105
 106/* Shouldn't this be in a header file somewhere? */
 107#define BYTES_PER_WORD          sizeof(void *)
 108
 109/* Legal flag mask for kmem_cache_create(). */
 110#if DEBUG
 111# define CREATE_MASK    (SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \
 112                         SLAB_POISON | SLAB_HWCACHE_ALIGN | \
 113                         SLAB_NO_REAP | SLAB_CACHE_DMA | \
 114                         SLAB_MUST_HWCACHE_ALIGN)
 115#else
 116# define CREATE_MASK    (SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | \
 117                         SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN)
 118#endif
 119
 120/*
 121 * kmem_bufctl_t:
 122 *
 123 * Bufctl's are used for linking objs within a slab
 124 * linked offsets.
 125 *
 126 * This implementation relies on "struct page" for locating the cache &
 127 * slab an object belongs to.
 128 * This allows the bufctl structure to be small (one int), but limits
 129 * the number of objects a slab (not a cache) can contain when off-slab
 130 * bufctls are used. The limit is the size of the largest general cache
 131 * that does not use off-slab slabs.
 132 * For 32bit archs with 4 kB pages, is this 56.
 133 * This is not serious, as it is only for large objects, when it is unwise
 134 * to have too many per slab.
 135 * Note: This limit can be raised by introducing a general cache whose size
 136 * is less than 512 (PAGE_SIZE<<3), but greater than 256.
 137 */
 138
 139#define BUFCTL_END 0xffffFFFF
 140#define SLAB_LIMIT 0xffffFFFE
 141typedef unsigned int kmem_bufctl_t;
 142
 143/* Max number of objs-per-slab for caches which use off-slab slabs.
 144 * Needed to avoid a possible looping condition in kmem_cache_grow().
 145 */
 146static unsigned long offslab_limit;
 147
 148/*
 149 * slab_t
 150 *
 151 * Manages the objs in a slab. Placed either at the beginning of mem allocated
 152 * for a slab, or allocated from an general cache.
 153 * Slabs are chained into three list: fully used, partial, fully free slabs.
 154 */
 155typedef struct slab_s {
 156        struct list_head        list;
 157        unsigned long           colouroff;
 158        void                    *s_mem;         /* including colour offset */
 159        unsigned int            inuse;          /* num of objs active in slab */
 160        kmem_bufctl_t           free;
 161} slab_t;
 162
 163#define slab_bufctl(slabp) \
 164        ((kmem_bufctl_t *)(((slab_t*)slabp)+1))
 165
 166/*
 167 * cpucache_t
 168 *
 169 * Per cpu structures
 170 * The limit is stored in the per-cpu structure to reduce the data cache
 171 * footprint.
 172 */
 173typedef struct cpucache_s {
 174        unsigned int avail;
 175        unsigned int limit;
 176} cpucache_t;
 177
 178#define cc_entry(cpucache) \
 179        ((void **)(((cpucache_t*)(cpucache))+1))
 180#define cc_data(cachep) \
 181        ((cachep)->cpudata[smp_processor_id()])
 182/*
 183 * kmem_cache_t
 184 *
 185 * manages a cache.
 186 */
 187
 188#define CACHE_NAMELEN   20      /* max name length for a slab cache */
 189
 190struct kmem_cache_s {
 191/* 1) each alloc & free */
 192        /* full, partial first, then free */
 193        struct list_head        slabs_full;
 194        struct list_head        slabs_partial;
 195        struct list_head        slabs_free;
 196        unsigned int            objsize;
 197        unsigned int            flags;  /* constant flags */
 198        unsigned int            num;    /* # of objs per slab */
 199        spinlock_t              spinlock;
 200#ifdef CONFIG_SMP
 201        unsigned int            batchcount;
 202#endif
 203
 204/* 2) slab additions /removals */
 205        /* order of pgs per slab (2^n) */
 206        unsigned int            gfporder;
 207
 208        /* force GFP flags, e.g. GFP_DMA */
 209        unsigned int            gfpflags;
 210
 211        size_t                  colour;         /* cache colouring range */
 212        unsigned int            colour_off;     /* colour offset */
 213        unsigned int            colour_next;    /* cache colouring */
 214        kmem_cache_t            *slabp_cache;
 215        unsigned int            growing;
 216        unsigned int            dflags;         /* dynamic flags */
 217
 218        /* constructor func */
 219        void (*ctor)(void *, kmem_cache_t *, unsigned long);
 220
 221        /* de-constructor func */
 222        void (*dtor)(void *, kmem_cache_t *, unsigned long);
 223
 224        unsigned long           failures;
 225
 226/* 3) cache creation/removal */
 227        char                    name[CACHE_NAMELEN];
 228        struct list_head        next;
 229#ifdef CONFIG_SMP
 230/* 4) per-cpu data */
 231        cpucache_t              *cpudata[NR_CPUS];
 232#endif
 233#if STATS
 234        unsigned long           num_active;
 235        unsigned long           num_allocations;
 236        unsigned long           high_mark;
 237        unsigned long           grown;
 238        unsigned long           reaped;
 239        unsigned long           errors;
 240#ifdef CONFIG_SMP
 241        atomic_t                allochit;
 242        atomic_t                allocmiss;
 243        atomic_t                freehit;
 244        atomic_t                freemiss;
 245#endif
 246#endif
 247};
 248
 249/* internal c_flags */
 250#define CFLGS_OFF_SLAB  0x010000UL      /* slab management in own cache */
 251#define CFLGS_OPTIMIZE  0x020000UL      /* optimized slab lookup */
 252
 253/* c_dflags (dynamic flags). Need to hold the spinlock to access this member */
 254#define DFLGS_GROWN     0x000001UL      /* don't reap a recently grown */
 255
 256#define OFF_SLAB(x)     ((x)->flags & CFLGS_OFF_SLAB)
 257#define OPTIMIZE(x)     ((x)->flags & CFLGS_OPTIMIZE)
 258#define GROWN(x)        ((x)->dlags & DFLGS_GROWN)
 259
 260#if STATS
 261#define STATS_INC_ACTIVE(x)     ((x)->num_active++)
 262#define STATS_DEC_ACTIVE(x)     ((x)->num_active--)
 263#define STATS_INC_ALLOCED(x)    ((x)->num_allocations++)
 264#define STATS_INC_GROWN(x)      ((x)->grown++)
 265#define STATS_INC_REAPED(x)     ((x)->reaped++)
 266#define STATS_SET_HIGH(x)       do { if ((x)->num_active > (x)->high_mark) \
 267                                        (x)->high_mark = (x)->num_active; \
 268                                } while (0)
 269#define STATS_INC_ERR(x)        ((x)->errors++)
 270#else
 271#define STATS_INC_ACTIVE(x)     do { } while (0)
 272#define STATS_DEC_ACTIVE(x)     do { } while (0)
 273#define STATS_INC_ALLOCED(x)    do { } while (0)
 274#define STATS_INC_GROWN(x)      do { } while (0)
 275#define STATS_INC_REAPED(x)     do { } while (0)
 276#define STATS_SET_HIGH(x)       do { } while (0)
 277#define STATS_INC_ERR(x)        do { } while (0)
 278#endif
 279
 280#if STATS && defined(CONFIG_SMP)
 281#define STATS_INC_ALLOCHIT(x)   atomic_inc(&(x)->allochit)
 282#define STATS_INC_ALLOCMISS(x)  atomic_inc(&(x)->allocmiss)
 283#define STATS_INC_FREEHIT(x)    atomic_inc(&(x)->freehit)
 284#define STATS_INC_FREEMISS(x)   atomic_inc(&(x)->freemiss)
 285#else
 286#define STATS_INC_ALLOCHIT(x)   do { } while (0)
 287#define STATS_INC_ALLOCMISS(x)  do { } while (0)
 288#define STATS_INC_FREEHIT(x)    do { } while (0)
 289#define STATS_INC_FREEMISS(x)   do { } while (0)
 290#endif
 291
 292#if DEBUG
 293/* Magic nums for obj red zoning.
 294 * Placed in the first word before and the first word after an obj.
 295 */
 296#define RED_MAGIC1      0x5A2CF071UL    /* when obj is active */
 297#define RED_MAGIC2      0x170FC2A5UL    /* when obj is inactive */
 298
 299/* ...and for poisoning */
 300#define POISON_BYTE     0x5a            /* byte value for poisoning */
 301#define POISON_END      0xa5            /* end-byte of poisoning */
 302
 303#endif
 304
 305/* maximum size of an obj (in 2^order pages) */
 306#define MAX_OBJ_ORDER   5       /* 32 pages */
 307
 308/*
 309 * Do not go above this order unless 0 objects fit into the slab.
 310 */
 311#define BREAK_GFP_ORDER_HI      2
 312#define BREAK_GFP_ORDER_LO      1
 313static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
 314
 315/*
 316 * Absolute limit for the gfp order
 317 */
 318#define MAX_GFP_ORDER   5       /* 32 pages */
 319
 320
 321/* Macros for storing/retrieving the cachep and or slab from the
 322 * global 'mem_map'. These are used to find the slab an obj belongs to.
 323 * With kfree(), these are used to find the cache which an obj belongs to.
 324 */
 325#define SET_PAGE_CACHE(pg,x)  ((pg)->list.next = (struct list_head *)(x))
 326#define GET_PAGE_CACHE(pg)    ((kmem_cache_t *)(pg)->list.next)
 327#define SET_PAGE_SLAB(pg,x)   ((pg)->list.prev = (struct list_head *)(x))
 328#define GET_PAGE_SLAB(pg)     ((slab_t *)(pg)->list.prev)
 329
 330/* Size description struct for general caches. */
 331typedef struct cache_sizes {
 332        size_t           cs_size;
 333        kmem_cache_t    *cs_cachep;
 334        kmem_cache_t    *cs_dmacachep;
 335} cache_sizes_t;
 336
 337static cache_sizes_t cache_sizes[] = {
 338#if PAGE_SIZE == 4096
 339        {    32,        NULL, NULL},
 340#endif
 341        {    64,        NULL, NULL},
 342        {   128,        NULL, NULL},
 343        {   256,        NULL, NULL},
 344        {   512,        NULL, NULL},
 345        {  1024,        NULL, NULL},
 346        {  2048,        NULL, NULL},
 347        {  4096,        NULL, NULL},
 348        {  8192,        NULL, NULL},
 349        { 16384,        NULL, NULL},
 350        { 32768,        NULL, NULL},
 351        { 65536,        NULL, NULL},
 352        {131072,        NULL, NULL},
 353        {     0,        NULL, NULL}
 354};
 355
 356/* internal cache of cache description objs */
 357static kmem_cache_t cache_cache = {
 358        slabs_full:     LIST_HEAD_INIT(cache_cache.slabs_full),
 359        slabs_partial:  LIST_HEAD_INIT(cache_cache.slabs_partial),
 360        slabs_free:     LIST_HEAD_INIT(cache_cache.slabs_free),
 361        objsize:        sizeof(kmem_cache_t),
 362        flags:          SLAB_NO_REAP,
 363        spinlock:       SPIN_LOCK_UNLOCKED,
 364        colour_off:     L1_CACHE_BYTES,
 365        name:           "kmem_cache",
 366};
 367
 368/* Guard access to the cache-chain. */
 369static struct semaphore cache_chain_sem;
 370
 371/* Place maintainer for reaping. */
 372static kmem_cache_t *clock_searchp = &cache_cache;
 373
 374#define cache_chain (cache_cache.next)
 375
 376#ifdef CONFIG_SMP
 377/*
 378 * chicken and egg problem: delay the per-cpu array allocation
 379 * until the general caches are up.
 380 */
 381static int g_cpucache_up;
 382
 383static void enable_cpucache (kmem_cache_t *cachep);
 384static void enable_all_cpucaches (void);
 385#endif
 386
 387/* Cal the num objs, wastage, and bytes left over for a given slab size. */
 388static void kmem_cache_estimate (unsigned long gfporder, size_t size,
 389                 int flags, size_t *left_over, unsigned int *num)
 390{
 391        int i;
 392        size_t wastage = PAGE_SIZE<<gfporder;
 393        size_t extra = 0;
 394        size_t base = 0;
 395
 396        if (!(flags & CFLGS_OFF_SLAB)) {
 397                base = sizeof(slab_t);
 398                extra = sizeof(kmem_bufctl_t);
 399        }
 400        i = 0;
 401        while (i*size + L1_CACHE_ALIGN(base+i*extra) <= wastage)
 402                i++;
 403        if (i > 0)
 404                i--;
 405
 406        if (i > SLAB_LIMIT)
 407                i = SLAB_LIMIT;
 408
 409        *num = i;
 410        wastage -= i*size;
 411        wastage -= L1_CACHE_ALIGN(base+i*extra);
 412        *left_over = wastage;
 413}
 414
 415/* Initialisation - setup the `cache' cache. */
 416void __init kmem_cache_init(void)
 417{
 418        size_t left_over;
 419
 420        init_MUTEX(&cache_chain_sem);
 421        INIT_LIST_HEAD(&cache_chain);
 422
 423        kmem_cache_estimate(0, cache_cache.objsize, 0,
 424                        &left_over, &cache_cache.num);
 425        if (!cache_cache.num)
 426                BUG();
 427
 428        cache_cache.colour = left_over/cache_cache.colour_off;
 429        cache_cache.colour_next = 0;
 430}
 431
 432
 433/* Initialisation - setup remaining internal and general caches.
 434 * Called after the gfp() functions have been enabled, and before smp_init().
 435 */
 436void __init kmem_cache_sizes_init(void)
 437{
 438        cache_sizes_t *sizes = cache_sizes;
 439        char name[20];
 440        /*
 441         * Fragmentation resistance on low memory - only use bigger
 442         * page orders on machines with more than 32MB of memory.
 443         */
 444        if (num_physpages > (32 << 20) >> PAGE_SHIFT)
 445                slab_break_gfp_order = BREAK_GFP_ORDER_HI;
 446        do {
 447                /* For performance, all the general caches are L1 aligned.
 448                 * This should be particularly beneficial on SMP boxes, as it
 449                 * eliminates "false sharing".
 450                 * Note for systems short on memory removing the alignment will
 451                 * allow tighter packing of the smaller caches. */
 452                snprintf(name, sizeof(name), "size-%Zd",sizes->cs_size);
 453                if (!(sizes->cs_cachep =
 454                        kmem_cache_create(name, sizes->cs_size,
 455                                        0, SLAB_HWCACHE_ALIGN, NULL, NULL))) {
 456                        BUG();
 457                }
 458
 459                /* Inc off-slab bufctl limit until the ceiling is hit. */
 460                if (!(OFF_SLAB(sizes->cs_cachep))) {
 461                        offslab_limit = sizes->cs_size-sizeof(slab_t);
 462                        offslab_limit /= 2;
 463                }
 464                snprintf(name, sizeof(name), "size-%Zd(DMA)",sizes->cs_size);
 465                sizes->cs_dmacachep = kmem_cache_create(name, sizes->cs_size, 0,
 466                              SLAB_CACHE_DMA|SLAB_HWCACHE_ALIGN, NULL, NULL);
 467                if (!sizes->cs_dmacachep)
 468                        BUG();
 469                sizes++;
 470        } while (sizes->cs_size);
 471}
 472
 473int __init kmem_cpucache_init(void)
 474{
 475#ifdef CONFIG_SMP
 476        g_cpucache_up = 1;
 477        enable_all_cpucaches();
 478#endif
 479        return 0;
 480}
 481
 482__initcall(kmem_cpucache_init);
 483
 484/* Interface to system's page allocator. No need to hold the cache-lock.
 485 */
 486static inline void * kmem_getpages (kmem_cache_t *cachep, unsigned long flags)
 487{
 488        void    *addr;
 489
 490        /*
 491         * If we requested dmaable memory, we will get it. Even if we
 492         * did not request dmaable memory, we might get it, but that
 493         * would be relatively rare and ignorable.
 494         */
 495        flags |= cachep->gfpflags;
 496        addr = (void*) __get_free_pages(flags, cachep->gfporder);
 497        /* Assume that now we have the pages no one else can legally
 498         * messes with the 'struct page's.
 499         * However vm_scan() might try to test the structure to see if
 500         * it is a named-page or buffer-page.  The members it tests are
 501         * of no interest here.....
 502         */
 503        return addr;
 504}
 505
 506/* Interface to system's page release. */
 507static inline void kmem_freepages (kmem_cache_t *cachep, void *addr)
 508{
 509        unsigned long i = (1<<cachep->gfporder);
 510        struct page *page = virt_to_page(addr);
 511
 512        /* free_pages() does not clear the type bit - we do that.
 513         * The pages have been unlinked from their cache-slab,
 514         * but their 'struct page's might be accessed in
 515         * vm_scan(). Shouldn't be a worry.
 516         */
 517        while (i--) {
 518                PageClearSlab(page);
 519                page++;
 520        }
 521        free_pages((unsigned long)addr, cachep->gfporder);
 522}
 523
 524#if DEBUG
 525static inline void kmem_poison_obj (kmem_cache_t *cachep, void *addr)
 526{
 527        int size = cachep->objsize;
 528        if (cachep->flags & SLAB_RED_ZONE) {
 529                addr += BYTES_PER_WORD;
 530                size -= 2*BYTES_PER_WORD;
 531        }
 532        memset(addr, POISON_BYTE, size);
 533        *(unsigned char *)(addr+size-1) = POISON_END;
 534}
 535
 536static inline int kmem_check_poison_obj (kmem_cache_t *cachep, void *addr)
 537{
 538        int size = cachep->objsize;
 539        void *end;
 540        if (cachep->flags & SLAB_RED_ZONE) {
 541                addr += BYTES_PER_WORD;
 542                size -= 2*BYTES_PER_WORD;
 543        }
 544        end = memchr(addr, POISON_END, size);
 545        if (end != (addr+size-1))
 546                return 1;
 547        return 0;
 548}
 549#endif
 550
 551/* Destroy all the objs in a slab, and release the mem back to the system.
 552 * Before calling the slab must have been unlinked from the cache.
 553 * The cache-lock is not held/needed.
 554 */
 555static void kmem_slab_destroy (kmem_cache_t *cachep, slab_t *slabp)
 556{
 557        if (cachep->dtor
 558#if DEBUG
 559                || cachep->flags & (SLAB_POISON | SLAB_RED_ZONE)
 560#endif
 561        ) {
 562                int i;
 563                for (i = 0; i < cachep->num; i++) {
 564                        void* objp = slabp->s_mem+cachep->objsize*i;
 565#if DEBUG
 566                        if (cachep->flags & SLAB_RED_ZONE) {
 567                                if (*((unsigned long*)(objp)) != RED_MAGIC1)
 568                                        BUG();
 569                                if (*((unsigned long*)(objp + cachep->objsize
 570                                                -BYTES_PER_WORD)) != RED_MAGIC1)
 571                                        BUG();
 572                                objp += BYTES_PER_WORD;
 573                        }
 574#endif
 575                        if (cachep->dtor)
 576                                (cachep->dtor)(objp, cachep, 0);
 577#if DEBUG
 578                        if (cachep->flags & SLAB_RED_ZONE) {
 579                                objp -= BYTES_PER_WORD;
 580                        }       
 581                        if ((cachep->flags & SLAB_POISON)  &&
 582                                kmem_check_poison_obj(cachep, objp))
 583                                BUG();
 584#endif
 585                }
 586        }
 587
 588        kmem_freepages(cachep, slabp->s_mem-slabp->colouroff);
 589        if (OFF_SLAB(cachep))
 590                kmem_cache_free(cachep->slabp_cache, slabp);
 591}
 592
 593/**
 594 * kmem_cache_create - Create a cache.
 595 * @name: A string which is used in /proc/slabinfo to identify this cache.
 596 * @size: The size of objects to be created in this cache.
 597 * @offset: The offset to use within the page.
 598 * @flags: SLAB flags
 599 * @ctor: A constructor for the objects.
 600 * @dtor: A destructor for the objects.
 601 *
 602 * Returns a ptr to the cache on success, NULL on failure.
 603 * Cannot be called within a int, but can be interrupted.
 604 * The @ctor is run when new pages are allocated by the cache
 605 * and the @dtor is run before the pages are handed back.
 606 * The flags are
 607 *
 608 * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
 609 * to catch references to uninitialised memory.
 610 *
 611 * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
 612 * for buffer overruns.
 613 *
 614 * %SLAB_NO_REAP - Don't automatically reap this cache when we're under
 615 * memory pressure.
 616 *
 617 * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
 618 * cacheline.  This can be beneficial if you're counting cycles as closely
 619 * as davem.
 620 */
 621kmem_cache_t *
 622kmem_cache_create (const char *name, size_t size, size_t offset,
 623        unsigned long flags, void (*ctor)(void*, kmem_cache_t *, unsigned long),
 624        void (*dtor)(void*, kmem_cache_t *, unsigned long))
 625{
 626        const char *func_nm = KERN_ERR "kmem_create: ";
 627        size_t left_over, align, slab_size;
 628        kmem_cache_t *cachep = NULL;
 629
 630        /*
 631         * Sanity checks... these are all serious usage bugs.
 632         */
 633        if ((!name) ||
 634                ((strlen(name) >= CACHE_NAMELEN - 1)) ||
 635                in_interrupt() ||
 636                (size < BYTES_PER_WORD) ||
 637                (size > (1<<MAX_OBJ_ORDER)*PAGE_SIZE) ||
 638                (dtor && !ctor) ||
 639                (offset < 0 || offset > size))
 640                        BUG();
 641
 642#if DEBUG
 643        if ((flags & SLAB_DEBUG_INITIAL) && !ctor) {
 644                /* No constructor, but inital state check requested */
 645                printk("%sNo con, but init state check requested - %s\n", func_nm, name);
 646                flags &= ~SLAB_DEBUG_INITIAL;
 647        }
 648
 649        if ((flags & SLAB_POISON) && ctor) {
 650                /* request for poisoning, but we can't do that with a constructor */
 651                printk("%sPoisoning requested, but con given - %s\n", func_nm, name);
 652                flags &= ~SLAB_POISON;
 653        }
 654#if FORCED_DEBUG
 655        if ((size < (PAGE_SIZE>>3)) && !(flags & SLAB_MUST_HWCACHE_ALIGN))
 656                /*
 657                 * do not red zone large object, causes severe
 658                 * fragmentation.
 659                 */
 660                flags |= SLAB_RED_ZONE;
 661        if (!ctor)
 662                flags |= SLAB_POISON;
 663#endif
 664#endif
 665
 666        /*
 667         * Always checks flags, a caller might be expecting debug
 668         * support which isn't available.
 669         */
 670        BUG_ON(flags & ~CREATE_MASK);
 671
 672        /* Get cache's description obj. */
 673        cachep = (kmem_cache_t *) kmem_cache_alloc(&cache_cache, SLAB_KERNEL);
 674        if (!cachep)
 675                goto opps;
 676        memset(cachep, 0, sizeof(kmem_cache_t));
 677
 678        /* Check that size is in terms of words.  This is needed to avoid
 679         * unaligned accesses for some archs when redzoning is used, and makes
 680         * sure any on-slab bufctl's are also correctly aligned.
 681         */
 682        if (size & (BYTES_PER_WORD-1)) {
 683                size += (BYTES_PER_WORD-1);
 684                size &= ~(BYTES_PER_WORD-1);
 685                printk("%sForcing size word alignment - %s\n", func_nm, name);
 686        }
 687        
 688#if DEBUG
 689        if (flags & SLAB_RED_ZONE) {
 690                /*
 691                 * There is no point trying to honour cache alignment
 692                 * when redzoning.
 693                 */
 694                flags &= ~SLAB_HWCACHE_ALIGN;
 695                size += 2*BYTES_PER_WORD;       /* words for redzone */
 696        }
 697#endif
 698        align = BYTES_PER_WORD;
 699        if (flags & SLAB_HWCACHE_ALIGN)
 700                align = L1_CACHE_BYTES;
 701
 702        /* Determine if the slab management is 'on' or 'off' slab. */
 703        if (size >= (PAGE_SIZE>>3))
 704                /*
 705                 * Size is large, assume best to place the slab management obj
 706                 * off-slab (should allow better packing of objs).
 707                 */
 708                flags |= CFLGS_OFF_SLAB;
 709
 710        if (flags & SLAB_HWCACHE_ALIGN) {
 711                /* Need to adjust size so that objs are cache aligned. */
 712                /* Small obj size, can get at least two per cache line. */
 713                /* FIXME: only power of 2 supported, was better */
 714                while (size < align/2)
 715                        align /= 2;
 716                size = (size+align-1)&(~(align-1));
 717        }
 718
 719        /* Cal size (in pages) of slabs, and the num of objs per slab.
 720         * This could be made much more intelligent.  For now, try to avoid
 721         * using high page-orders for slabs.  When the gfp() funcs are more
 722         * friendly towards high-order requests, this should be changed.
 723         */
 724        do {
 725                unsigned int break_flag = 0;
 726cal_wastage:
 727                kmem_cache_estimate(cachep->gfporder, size, flags,
 728                                                &left_over, &cachep->num);
 729                if (break_flag)
 730                        break;
 731                if (cachep->gfporder >= MAX_GFP_ORDER)
 732                        break;
 733                if (!cachep->num)
 734                        goto next;
 735                if (flags & CFLGS_OFF_SLAB && cachep->num > offslab_limit) {
 736                        /* Oops, this num of objs will cause problems. */
 737                        cachep->gfporder--;
 738                        break_flag++;
 739                        goto cal_wastage;
 740                }
 741
 742                /*
 743                 * Large num of objs is good, but v. large slabs are currently
 744                 * bad for the gfp()s.
 745                 */
 746                if (cachep->gfporder >= slab_break_gfp_order)
 747                        break;
 748
 749                if ((left_over*8) <= (PAGE_SIZE<<cachep->gfporder))
 750                        break;  /* Acceptable internal fragmentation. */
 751next:
 752                cachep->gfporder++;
 753        } while (1);
 754
 755        if (!cachep->num) {
 756                printk("kmem_cache_create: couldn't create cache %s.\n", name);
 757                kmem_cache_free(&cache_cache, cachep);
 758                cachep = NULL;
 759                goto opps;
 760        }
 761        slab_size = L1_CACHE_ALIGN(cachep->num*sizeof(kmem_bufctl_t)+sizeof(slab_t));
 762
 763        /*
 764         * If the slab has been placed off-slab, and we have enough space then
 765         * move it on-slab. This is at the expense of any extra colouring.
 766         */
 767        if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) {
 768                flags &= ~CFLGS_OFF_SLAB;
 769                left_over -= slab_size;
 770        }
 771
 772        /* Offset must be a multiple of the alignment. */
 773        offset += (align-1);
 774        offset &= ~(align-1);
 775        if (!offset)
 776                offset = L1_CACHE_BYTES;
 777        cachep->colour_off = offset;
 778        cachep->colour = left_over/offset;
 779
 780        /* init remaining fields */
 781        if (!cachep->gfporder && !(flags & CFLGS_OFF_SLAB))
 782                flags |= CFLGS_OPTIMIZE;
 783
 784        cachep->flags = flags;
 785        cachep->gfpflags = 0;
 786        if (flags & SLAB_CACHE_DMA)
 787                cachep->gfpflags |= GFP_DMA;
 788        spin_lock_init(&cachep->spinlock);
 789        cachep->objsize = size;
 790        INIT_LIST_HEAD(&cachep->slabs_full);
 791        INIT_LIST_HEAD(&cachep->slabs_partial);
 792        INIT_LIST_HEAD(&cachep->slabs_free);
 793
 794        if (flags & CFLGS_OFF_SLAB)
 795                cachep->slabp_cache = kmem_find_general_cachep(slab_size,0);
 796        cachep->ctor = ctor;
 797        cachep->dtor = dtor;
 798        /* Copy name over so we don't have problems with unloaded modules */
 799        strcpy(cachep->name, name);
 800
 801#ifdef CONFIG_SMP
 802        if (g_cpucache_up)
 803                enable_cpucache(cachep);
 804#endif
 805        /* Need the semaphore to access the chain. */
 806        down(&cache_chain_sem);
 807        {
 808                struct list_head *p;
 809
 810                list_for_each(p, &cache_chain) {
 811                        kmem_cache_t *pc = list_entry(p, kmem_cache_t, next);
 812
 813                        /* The name field is constant - no lock needed. */
 814                        if (!strcmp(pc->name, name))
 815                                BUG();
 816                }
 817        }
 818
 819        /* There is no reason to lock our new cache before we
 820         * link it in - no one knows about it yet...
 821         */
 822        list_add(&cachep->next, &cache_chain);
 823        up(&cache_chain_sem);
 824opps:
 825        return cachep;
 826}
 827
 828
 829#if DEBUG
 830/*
 831 * This check if the kmem_cache_t pointer is chained in the cache_cache
 832 * list. -arca
 833 */
 834static int is_chained_kmem_cache(kmem_cache_t * cachep)
 835{
 836        struct list_head *p;
 837        int ret = 0;
 838
 839        /* Find the cache in the chain of caches. */
 840        down(&cache_chain_sem);
 841        list_for_each(p, &cache_chain) {
 842                if (p == &cachep->next) {
 843                        ret = 1;
 844                        break;
 845                }
 846        }
 847        up(&cache_chain_sem);
 848
 849        return ret;
 850}
 851#else
 852#define is_chained_kmem_cache(x) 1
 853#endif
 854
 855#ifdef CONFIG_SMP
 856/*
 857 * Waits for all CPUs to execute func().
 858 */
 859static void smp_call_function_all_cpus(void (*func) (void *arg), void *arg)
 860{
 861        local_irq_disable();
 862        func(arg);
 863        local_irq_enable();
 864
 865        if (smp_call_function(func, arg, 1, 1))
 866                BUG();
 867}
 868typedef struct ccupdate_struct_s
 869{
 870        kmem_cache_t *cachep;
 871        cpucache_t *new[NR_CPUS];
 872} ccupdate_struct_t;
 873
 874static void do_ccupdate_local(void *info)
 875{
 876        ccupdate_struct_t *new = (ccupdate_struct_t *)info;
 877        cpucache_t *old = cc_data(new->cachep);
 878        
 879        cc_data(new->cachep) = new->new[smp_processor_id()];
 880        new->new[smp_processor_id()] = old;
 881}
 882
 883static void free_block (kmem_cache_t* cachep, void** objpp, int len);
 884
 885static void drain_cpu_caches(kmem_cache_t *cachep)
 886{
 887        ccupdate_struct_t new;
 888        int i;
 889
 890        memset(&new.new,0,sizeof(new.new));
 891
 892        new.cachep = cachep;
 893
 894        down(&cache_chain_sem);
 895        smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
 896
 897        for (i = 0; i < smp_num_cpus; i++) {
 898                cpucache_t* ccold = new.new[cpu_logical_map(i)];
 899                if (!ccold || (ccold->avail == 0))
 900                        continue;
 901                local_irq_disable();
 902                free_block(cachep, cc_entry(ccold), ccold->avail);
 903                local_irq_enable();
 904                ccold->avail = 0;
 905        }
 906        smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
 907        up(&cache_chain_sem);
 908}
 909
 910#else
 911#define drain_cpu_caches(cachep)        do { } while (0)
 912#endif
 913
 914/*
 915 * Called with the &cachep->spinlock held, returns number of slabs released
 916 */
 917static int __kmem_cache_shrink_locked(kmem_cache_t *cachep)
 918{
 919        slab_t *slabp;
 920        int ret = 0;
 921
 922        /* If the cache is growing, stop shrinking. */
 923        while (!cachep->growing) {
 924                struct list_head *p;
 925
 926                p = cachep->slabs_free.prev;
 927                if (p == &cachep->slabs_free)
 928                        break;
 929
 930                slabp = list_entry(cachep->slabs_free.prev, slab_t, list);
 931#if DEBUG
 932                if (slabp->inuse)
 933                        BUG();
 934#endif
 935                list_del(&slabp->list);
 936
 937                spin_unlock_irq(&cachep->spinlock);
 938                kmem_slab_destroy(cachep, slabp);
 939                ret++;
 940                spin_lock_irq(&cachep->spinlock);
 941        }
 942        return ret;
 943}
 944
 945static int __kmem_cache_shrink(kmem_cache_t *cachep)
 946{
 947        int ret;
 948
 949        drain_cpu_caches(cachep);
 950
 951        spin_lock_irq(&cachep->spinlock);
 952        __kmem_cache_shrink_locked(cachep);
 953        ret = !list_empty(&cachep->slabs_full) ||
 954                !list_empty(&cachep->slabs_partial);
 955        spin_unlock_irq(&cachep->spinlock);
 956        return ret;
 957}
 958
 959/**
 960 * kmem_cache_shrink - Shrink a cache.
 961 * @cachep: The cache to shrink.
 962 *
 963 * Releases as many slabs as possible for a cache.
 964 * Returns number of pages released.
 965 */
 966int kmem_cache_shrink(kmem_cache_t *cachep)
 967{
 968        int ret;
 969
 970        if (!cachep || in_interrupt() || !is_chained_kmem_cache(cachep))
 971                BUG();
 972
 973        spin_lock_irq(&cachep->spinlock);
 974        ret = __kmem_cache_shrink_locked(cachep);
 975        spin_unlock_irq(&cachep->spinlock);
 976
 977        return ret << cachep->gfporder;
 978}
 979
 980/**
 981 * kmem_cache_destroy - delete a cache
 982 * @cachep: the cache to destroy
 983 *
 984 * Remove a kmem_cache_t object from the slab cache.
 985 * Returns 0 on success.
 986 *
 987 * It is expected this function will be called by a module when it is
 988 * unloaded.  This will remove the cache completely, and avoid a duplicate
 989 * cache being allocated each time a module is loaded and unloaded, if the
 990 * module doesn't have persistent in-kernel storage across loads and unloads.
 991 *
 992 * The cache must be empty before calling this function.
 993 *
 994 * The caller must guarantee that noone will allocate memory from the cache
 995 * during the kmem_cache_destroy().
 996 */
 997int kmem_cache_destroy (kmem_cache_t * cachep)
 998{
 999        if (!cachep || in_interrupt() || cachep->growing)
1000                BUG();
1001
1002        /* Find the cache in the chain of caches. */
1003        down(&cache_chain_sem);
1004        /* the chain is never empty, cache_cache is never destroyed */
1005        if (clock_searchp == cachep)
1006                clock_searchp = list_entry(cachep->next.next,
1007                                                kmem_cache_t, next);
1008        list_del(&cachep->next);
1009        up(&cache_chain_sem);
1010
1011        if (__kmem_cache_shrink(cachep)) {
1012                printk(KERN_ERR "kmem_cache_destroy: Can't free all objects %p\n",
1013                       cachep);
1014                down(&cache_chain_sem);
1015                list_add(&cachep->next,&cache_chain);
1016                up(&cache_chain_sem);
1017                return 1;
1018        }
1019#ifdef CONFIG_SMP
1020        {
1021                int i;
1022                for (i = 0; i < NR_CPUS; i++)
1023                        kfree(cachep->cpudata[i]);
1024        }
1025#endif
1026        kmem_cache_free(&cache_cache, cachep);
1027
1028        return 0;
1029}
1030
1031/* Get the memory for a slab management obj. */
1032static inline slab_t * kmem_cache_slabmgmt (kmem_cache_t *cachep,
1033                        void *objp, int colour_off, int local_flags)
1034{
1035        slab_t *slabp;
1036        
1037        if (OFF_SLAB(cachep)) {
1038                /* Slab management obj is off-slab. */
1039                slabp = kmem_cache_alloc(cachep->slabp_cache, local_flags);
1040                if (!slabp)
1041                        return NULL;
1042        } else {
1043                /* FIXME: change to
1044                        slabp = objp
1045                 * if you enable OPTIMIZE
1046                 */
1047                slabp = objp+colour_off;
1048                colour_off += L1_CACHE_ALIGN(cachep->num *
1049                                sizeof(kmem_bufctl_t) + sizeof(slab_t));
1050        }
1051        slabp->inuse = 0;
1052        slabp->colouroff = colour_off;
1053        slabp->s_mem = objp+colour_off;
1054
1055        return slabp;
1056}
1057
1058static inline void kmem_cache_init_objs (kmem_cache_t * cachep,
1059                        slab_t * slabp, unsigned long ctor_flags)
1060{
1061        int i;
1062
1063        for (i = 0; i < cachep->num; i++) {
1064                void* objp = slabp->s_mem+cachep->objsize*i;
1065#if DEBUG
1066                if (cachep->flags & SLAB_RED_ZONE) {
1067                        *((unsigned long*)(objp)) = RED_MAGIC1;
1068                        *((unsigned long*)(objp + cachep->objsize -
1069                                        BYTES_PER_WORD)) = RED_MAGIC1;
1070                        objp += BYTES_PER_WORD;
1071                }
1072#endif
1073
1074                /*
1075                 * Constructors are not allowed to allocate memory from
1076                 * the same cache which they are a constructor for.
1077                 * Otherwise, deadlock. They must also be threaded.
1078                 */
1079                if (cachep->ctor)
1080                        cachep->ctor(objp, cachep, ctor_flags);
1081#if DEBUG
1082                if (cachep->flags & SLAB_RED_ZONE)
1083                        objp -= BYTES_PER_WORD;
1084                if (cachep->flags & SLAB_POISON)
1085                        /* need to poison the objs */
1086                        kmem_poison_obj(cachep, objp);
1087                if (cachep->flags & SLAB_RED_ZONE) {
1088                        if (*((unsigned long*)(objp)) != RED_MAGIC1)
1089                                BUG();
1090                        if (*((unsigned long*)(objp + cachep->objsize -
1091                                        BYTES_PER_WORD)) != RED_MAGIC1)
1092                                BUG();
1093                }
1094#endif
1095                slab_bufctl(slabp)[i] = i+1;
1096        }
1097        slab_bufctl(slabp)[i-1] = BUFCTL_END;
1098        slabp->free = 0;
1099}
1100
1101/*
1102 * Grow (by 1) the number of slabs within a cache.  This is called by
1103 * kmem_cache_alloc() when there are no active objs left in a cache.
1104 */
1105static int kmem_cache_grow (kmem_cache_t * cachep, int flags)
1106{
1107        slab_t  *slabp;
1108        struct page     *page;
1109        void            *objp;
1110        size_t           offset;
1111        unsigned int     i, local_flags;
1112        unsigned long    ctor_flags;
1113        unsigned long    save_flags;
1114
1115        /* Be lazy and only check for valid flags here,
1116         * keeping it out of the critical path in kmem_cache_alloc().
1117         */
1118        if (flags & ~(SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW))
1119                BUG();
1120        if (flags & SLAB_NO_GROW)
1121                return 0;
1122
1123        /*
1124         * The test for missing atomic flag is performed here, rather than
1125         * the more obvious place, simply to reduce the critical path length
1126         * in kmem_cache_alloc(). If a caller is seriously mis-behaving they
1127         * will eventually be caught here (where it matters).
1128         */
1129        if (in_interrupt() && (flags & SLAB_LEVEL_MASK) != SLAB_ATOMIC)
1130                BUG();
1131
1132        ctor_flags = SLAB_CTOR_CONSTRUCTOR;
1133        local_flags = (flags & SLAB_LEVEL_MASK);
1134        if (local_flags == SLAB_ATOMIC)
1135                /*
1136                 * Not allowed to sleep.  Need to tell a constructor about
1137                 * this - it might need to know...
1138                 */
1139                ctor_flags |= SLAB_CTOR_ATOMIC;
1140
1141        /* About to mess with non-constant members - lock. */
1142        spin_lock_irqsave(&cachep->spinlock, save_flags);
1143
1144        /* Get colour for the slab, and cal the next value. */
1145        offset = cachep->colour_next;
1146        cachep->colour_next++;
1147        if (cachep->colour_next >= cachep->colour)
1148                cachep->colour_next = 0;
1149        offset *= cachep->colour_off;
1150        cachep->dflags |= DFLGS_GROWN;
1151
1152        cachep->growing++;
1153        spin_unlock_irqrestore(&cachep->spinlock, save_flags);
1154
1155        /* A series of memory allocations for a new slab.
1156         * Neither the cache-chain semaphore, or cache-lock, are
1157         * held, but the incrementing c_growing prevents this
1158         * cache from being reaped or shrunk.
1159         * Note: The cache could be selected in for reaping in
1160         * kmem_cache_reap(), but when the final test is made the
1161         * growing value will be seen.
1162         */
1163
1164        /* Get mem for the objs. */
1165        if (!(objp = kmem_getpages(cachep, flags)))
1166                goto failed;
1167
1168        /* Get slab management. */
1169        if (!(slabp = kmem_cache_slabmgmt(cachep, objp, offset, local_flags)))
1170                goto opps1;
1171
1172        /* Nasty!!!!!! I hope this is OK. */
1173        i = 1 << cachep->gfporder;
1174        page = virt_to_page(objp);
1175        do {
1176                SET_PAGE_CACHE(page, cachep);
1177                SET_PAGE_SLAB(page, slabp);
1178                PageSetSlab(page);
1179                page++;
1180        } while (--i);
1181
1182        kmem_cache_init_objs(cachep, slabp, ctor_flags);
1183
1184        spin_lock_irqsave(&cachep->spinlock, save_flags);
1185        cachep->growing--;
1186
1187        /* Make slab active. */
1188        list_add_tail(&slabp->list, &cachep->slabs_free);
1189        STATS_INC_GROWN(cachep);
1190        cachep->failures = 0;
1191
1192        spin_unlock_irqrestore(&cachep->spinlock, save_flags);
1193        return 1;
1194opps1:
1195        kmem_freepages(cachep, objp);
1196failed:
1197        spin_lock_irqsave(&cachep->spinlock, save_flags);
1198        cachep->growing--;
1199        spin_unlock_irqrestore(&cachep->spinlock, save_flags);
1200        return 0;
1201}
1202
1203/*
1204 * Perform extra freeing checks:
1205 * - detect double free
1206 * - detect bad pointers.
1207 * Called with the cache-lock held.
1208 */
1209
1210#if DEBUG
1211static int kmem_extra_free_checks (kmem_cache_t * cachep,
1212                        slab_t *slabp, void * objp)
1213{
1214        int i;
1215        unsigned int objnr = (objp-slabp->s_mem)/cachep->objsize;
1216
1217        if (objnr >= cachep->num)
1218                BUG();
1219        if (objp != slabp->s_mem + objnr*cachep->objsize)
1220                BUG();
1221
1222        /* Check slab's freelist to see if this obj is there. */
1223        for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) {
1224                if (i == objnr)
1225                        BUG();
1226        }
1227        return 0;
1228}
1229#endif
1230
1231static inline void kmem_cache_alloc_head(kmem_cache_t *cachep, int flags)
1232{
1233        if (flags & SLAB_DMA) {
1234                if (!(cachep->gfpflags & GFP_DMA))
1235                        BUG();
1236        } else {
1237                if (cachep->gfpflags & GFP_DMA)
1238                        BUG();
1239        }
1240}
1241
1242static inline void * kmem_cache_alloc_one_tail (kmem_cache_t *cachep,
1243                                                slab_t *slabp)
1244{
1245        void *objp;
1246
1247        STATS_INC_ALLOCED(cachep);
1248        STATS_INC_ACTIVE(cachep);
1249        STATS_SET_HIGH(cachep);
1250
1251        /* get obj pointer */
1252        slabp->inuse++;
1253        objp = slabp->s_mem + slabp->free*cachep->objsize;
1254        slabp->free=slab_bufctl(slabp)[slabp->free];
1255
1256        if (unlikely(slabp->free == BUFCTL_END)) {
1257                list_del(&slabp->list);
1258                list_add(&slabp->list, &cachep->slabs_full);
1259        }
1260#if DEBUG
1261        if (cachep->flags & SLAB_POISON)
1262                if (kmem_check_poison_obj(cachep, objp))
1263                        BUG();
1264        if (cachep->flags & SLAB_RED_ZONE) {
1265                /* Set alloc red-zone, and check old one. */
1266                if (xchg((unsigned long *)objp, RED_MAGIC2) !=
1267                                                         RED_MAGIC1)
1268                        BUG();
1269                if (xchg((unsigned long *)(objp+cachep->objsize -
1270                          BYTES_PER_WORD), RED_MAGIC2) != RED_MAGIC1)
1271                        BUG();
1272                objp += BYTES_PER_WORD;
1273        }
1274#endif
1275        return objp;
1276}
1277
1278/*
1279 * Returns a ptr to an obj in the given cache.
1280 * caller must guarantee synchronization
1281 * #define for the goto optimization 8-)
1282 */
1283#define kmem_cache_alloc_one(cachep)                            \
1284({                                                              \
1285        struct list_head * slabs_partial, * entry;              \
1286        slab_t *slabp;                                          \
1287                                                                \
1288        slabs_partial = &(cachep)->slabs_partial;               \
1289        entry = slabs_partial->next;                            \
1290        if (unlikely(entry == slabs_partial)) {                 \
1291                struct list_head * slabs_free;                  \
1292                slabs_free = &(cachep)->slabs_free;             \
1293                entry = slabs_free->next;                       \
1294                if (unlikely(entry == slabs_free))              \
1295                        goto alloc_new_slab;                    \
1296                list_del(entry);                                \
1297                list_add(entry, slabs_partial);                 \
1298        }                                                       \
1299                                                                \
1300        slabp = list_entry(entry, slab_t, list);                \
1301        kmem_cache_alloc_one_tail(cachep, slabp);               \
1302})
1303
1304#ifdef CONFIG_SMP
1305void* kmem_cache_alloc_batch(kmem_cache_t* cachep, cpucache_t* cc, int flags)
1306{
1307        int batchcount = cachep->batchcount;
1308
1309        spin_lock(&cachep->spinlock);
1310        while (batchcount--) {
1311                struct list_head * slabs_partial, * entry;
1312                slab_t *slabp;
1313                /* Get slab alloc is to come from. */
1314                slabs_partial = &(cachep)->slabs_partial;
1315                entry = slabs_partial->next;
1316                if (unlikely(entry == slabs_partial)) {
1317                        struct list_head * slabs_free;
1318                        slabs_free = &(cachep)->slabs_free;
1319                        entry = slabs_free->next;
1320                        if (unlikely(entry == slabs_free))
1321                                break;
1322                        list_del(entry);
1323                        list_add(entry, slabs_partial);
1324                }
1325
1326                slabp = list_entry(entry, slab_t, list);
1327                cc_entry(cc)[cc->avail++] =
1328                                kmem_cache_alloc_one_tail(cachep, slabp);
1329        }
1330        spin_unlock(&cachep->spinlock);
1331
1332        if (cc->avail)
1333                return cc_entry(cc)[--cc->avail];
1334        return NULL;
1335}
1336#endif
1337
1338static inline void * __kmem_cache_alloc (kmem_cache_t *cachep, int flags)
1339{
1340        unsigned long save_flags;
1341        void* objp;
1342
1343        kmem_cache_alloc_head(cachep, flags);
1344try_again:
1345        local_irq_save(save_flags);
1346#ifdef CONFIG_SMP
1347        {
1348                cpucache_t *cc = cc_data(cachep);
1349
1350                if (cc) {
1351                        if (cc->avail) {
1352                                STATS_INC_ALLOCHIT(cachep);
1353                                objp = cc_entry(cc)[--cc->avail];
1354                        } else {
1355                                STATS_INC_ALLOCMISS(cachep);
1356                                objp = kmem_cache_alloc_batch(cachep,cc,flags);
1357                                if (!objp)
1358                                        goto alloc_new_slab_nolock;
1359                        }
1360                } else {
1361                        spin_lock(&cachep->spinlock);
1362                        objp = kmem_cache_alloc_one(cachep);
1363                        spin_unlock(&cachep->spinlock);
1364                }
1365        }
1366#else
1367        objp = kmem_cache_alloc_one(cachep);
1368#endif
1369        local_irq_restore(save_flags);
1370        return objp;
1371alloc_new_slab:
1372#ifdef CONFIG_SMP
1373        spin_unlock(&cachep->spinlock);
1374alloc_new_slab_nolock:
1375#endif
1376        local_irq_restore(save_flags);
1377        if (kmem_cache_grow(cachep, flags))
1378                /* Someone may have stolen our objs.  Doesn't matter, we'll
1379                 * just come back here again.
1380                 */
1381                goto try_again;
1382        return NULL;
1383}
1384
1385/*
1386 * Release an obj back to its cache. If the obj has a constructed
1387 * state, it should be in this state _before_ it is released.
1388 * - caller is responsible for the synchronization
1389 */
1390
1391#if DEBUG
1392# define CHECK_NR(pg)                                           \
1393        do {                                                    \
1394                if (!VALID_PAGE(pg)) {                          \
1395                        printk(KERN_ERR "kfree: out of range ptr %lxh.\n", \
1396                                (unsigned long)objp);           \
1397                        BUG();                                  \
1398                } \
1399        } while (0)
1400# define CHECK_PAGE(page)                                       \
1401        do {                                                    \
1402                CHECK_NR(page);                                 \
1403                if (!PageSlab(page)) {                          \
1404                        printk(KERN_ERR "kfree: bad ptr %lxh.\n", \
1405                                (unsigned long)objp);           \
1406                        BUG();                                  \
1407                }                                               \
1408        } while (0)
1409
1410#else
1411# define CHECK_PAGE(pg) do { } while (0)
1412#endif
1413
1414static inline void kmem_cache_free_one(kmem_cache_t *cachep, void *objp)
1415{
1416        slab_t* slabp;
1417
1418        CHECK_PAGE(virt_to_page(objp));
1419        /* reduces memory footprint
1420         *
1421        if (OPTIMIZE(cachep))
1422                slabp = (void*)((unsigned long)objp&(~(PAGE_SIZE-1)));
1423         else
1424         */
1425        slabp = GET_PAGE_SLAB(virt_to_page(objp));
1426
1427#if DEBUG
1428        if (cachep->flags & SLAB_DEBUG_INITIAL)
1429                /* Need to call the slab's constructor so the
1430                 * caller can perform a verify of its state (debugging).
1431                 * Called without the cache-lock held.
1432                 */
1433                cachep->ctor(objp, cachep, SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY);
1434
1435        if (cachep->flags & SLAB_RED_ZONE) {
1436                objp -= BYTES_PER_WORD;
1437                if (xchg((unsigned long *)objp, RED_MAGIC1) != RED_MAGIC2)
1438                        /* Either write before start, or a double free. */
1439                        BUG();
1440                if (xchg((unsigned long *)(objp+cachep->objsize -
1441                                BYTES_PER_WORD), RED_MAGIC1) != RED_MAGIC2)
1442                        /* Either write past end, or a double free. */
1443                        BUG();
1444        }
1445        if (cachep->flags & SLAB_POISON)
1446                kmem_poison_obj(cachep, objp);
1447        if (kmem_extra_free_checks(cachep, slabp, objp))
1448                return;
1449#endif
1450        {
1451                unsigned int objnr = (objp-slabp->s_mem)/cachep->objsize;
1452
1453                slab_bufctl(slabp)[objnr] = slabp->free;
1454                slabp->free = objnr;
1455        }
1456        STATS_DEC_ACTIVE(cachep);
1457        
1458        /* fixup slab chains */
1459        {
1460                int inuse = slabp->inuse;
1461                if (unlikely(!--slabp->inuse)) {
1462                        /* Was partial or full, now empty. */
1463                        list_del(&slabp->list);
1464                        list_add(&slabp->list, &cachep->slabs_free);
1465                } else if (unlikely(inuse == cachep->num)) {
1466                        /* Was full. */
1467                        list_del(&slabp->list);
1468                        list_add(&slabp->list, &cachep->slabs_partial);
1469                }
1470        }
1471}
1472
1473#ifdef CONFIG_SMP
1474static inline void __free_block (kmem_cache_t* cachep,
1475                                                        void** objpp, int len)
1476{
1477        for ( ; len > 0; len--, objpp++)
1478                kmem_cache_free_one(cachep, *objpp);
1479}
1480
1481static void free_block (kmem_cache_t* cachep, void** objpp, int len)
1482{
1483        spin_lock(&cachep->spinlock);
1484        __free_block(cachep, objpp, len);
1485        spin_unlock(&cachep->spinlock);
1486}
1487#endif
1488
1489/*
1490 * __kmem_cache_free
1491 * called with disabled ints
1492 */
1493static inline void __kmem_cache_free (kmem_cache_t *cachep, void* objp)
1494{
1495#ifdef CONFIG_SMP
1496        cpucache_t *cc = cc_data(cachep);
1497
1498        CHECK_PAGE(virt_to_page(objp));
1499        if (cc) {
1500                int batchcount;
1501                if (cc->avail < cc->limit) {
1502                        STATS_INC_FREEHIT(cachep);
1503                        cc_entry(cc)[cc->avail++] = objp;
1504                        return;
1505                }
1506                STATS_INC_FREEMISS(cachep);
1507                batchcount = cachep->batchcount;
1508                cc->avail -= batchcount;
1509                free_block(cachep,
1510                                        &cc_entry(cc)[cc->avail],batchcount);
1511                cc_entry(cc)[cc->avail++] = objp;
1512                return;
1513        } else {
1514                free_block(cachep, &objp, 1);
1515        }
1516#else
1517        kmem_cache_free_one(cachep, objp);
1518#endif
1519}
1520
1521/**
1522 * kmem_cache_alloc - Allocate an object
1523 * @cachep: The cache to allocate from.
1524 * @flags: See kmalloc().
1525 *
1526 * Allocate an object from this cache.  The flags are only relevant
1527 * if the cache has no available objects.
1528 */
1529void * kmem_cache_alloc (kmem_cache_t *cachep, int flags)
1530{
1531        return __kmem_cache_alloc(cachep, flags);
1532}
1533
1534/**
1535 * kmalloc - allocate memory
1536 * @size: how many bytes of memory are required.
1537 * @flags: the type of memory to allocate.
1538 *
1539 * kmalloc is the normal method of allocating memory
1540 * in the kernel.
1541 *
1542 * The @flags argument may be one of:
1543 *
1544 * %GFP_USER - Allocate memory on behalf of user.  May sleep.
1545 *
1546 * %GFP_KERNEL - Allocate normal kernel ram.  May sleep.
1547 *
1548 * %GFP_ATOMIC - Allocation will not sleep.  Use inside interrupt handlers.
1549 *
1550 * Additionally, the %GFP_DMA flag may be set to indicate the memory
1551 * must be suitable for DMA.  This can mean different things on different
1552 * platforms.  For example, on i386, it means that the memory must come
1553 * from the first 16MB.
1554 */
1555void * kmalloc (size_t size, int flags)
1556{
1557        cache_sizes_t *csizep = cache_sizes;
1558
1559        for (; csizep->cs_size; csizep++) {
1560                if (size > csizep->cs_size)
1561                        continue;
1562                return __kmem_cache_alloc(flags & GFP_DMA ?
1563                         csizep->cs_dmacachep : csizep->cs_cachep, flags);
1564        }
1565        return NULL;
1566}
1567
1568/**
1569 * kmem_cache_free - Deallocate an object
1570 * @cachep: The cache the allocation was from.
1571 * @objp: The previously allocated object.
1572 *
1573 * Free an object which was previously allocated from this
1574 * cache.
1575 */
1576void kmem_cache_free (kmem_cache_t *cachep, void *objp)
1577{
1578        unsigned long flags;
1579#if DEBUG
1580        CHECK_PAGE(virt_to_page(objp));
1581        if (cachep != GET_PAGE_CACHE(virt_to_page(objp)))
1582                BUG();
1583#endif
1584
1585        local_irq_save(flags);
1586        __kmem_cache_free(cachep, objp);
1587        local_irq_restore(flags);
1588}
1589
1590/**
1591 * kfree - free previously allocated memory
1592 * @objp: pointer returned by kmalloc.
1593 *
1594 * Don't free memory not originally allocated by kmalloc()
1595 * or you will run into trouble.
1596 */
1597void kfree (const void *objp)
1598{
1599        kmem_cache_t *c;
1600        unsigned long flags;
1601
1602        if (!objp)
1603                return;
1604        local_irq_save(flags);
1605        CHECK_PAGE(virt_to_page(objp));
1606        c = GET_PAGE_CACHE(virt_to_page(objp));
1607        __kmem_cache_free(c, (void*)objp);
1608        local_irq_restore(flags);
1609}
1610
1611unsigned int kmem_cache_size(kmem_cache_t *cachep)
1612{
1613#if DEBUG
1614        if (cachep->flags & SLAB_RED_ZONE)
1615                return (cachep->objsize - 2*BYTES_PER_WORD);
1616#endif
1617        return cachep->objsize;
1618}
1619
1620kmem_cache_t * kmem_find_general_cachep (size_t size, int gfpflags)
1621{
1622        cache_sizes_t *csizep = cache_sizes;
1623
1624        /* This function could be moved to the header file, and
1625         * made inline so consumers can quickly determine what
1626         * cache pointer they require.
1627         */
1628        for ( ; csizep->cs_size; csizep++) {
1629                if (size > csizep->cs_size)
1630                        continue;
1631                break;
1632        }
1633        return (gfpflags & GFP_DMA) ? csizep->cs_dmacachep : csizep->cs_cachep;
1634}
1635
1636#ifdef CONFIG_SMP
1637
1638/* called with cache_chain_sem acquired.  */
1639static int kmem_tune_cpucache (kmem_cache_t* cachep, int limit, int batchcount)
1640{
1641        ccupdate_struct_t new;
1642        int i;
1643
1644        /*
1645         * These are admin-provided, so we are more graceful.
1646         */
1647        if (limit < 0)
1648                return -EINVAL;
1649        if (batchcount < 0)
1650                return -EINVAL;
1651        if (batchcount > limit)
1652                return -EINVAL;
1653        if (limit != 0 && !batchcount)
1654                return -EINVAL;
1655
1656        memset(&new.new,0,sizeof(new.new));
1657        if (limit) {
1658                for (i = 0; i< smp_num_cpus; i++) {
1659                        cpucache_t* ccnew;
1660
1661                        ccnew = kmalloc(sizeof(void*)*limit+
1662                                        sizeof(cpucache_t), GFP_KERNEL);
1663                        if (!ccnew)
1664                                goto oom;
1665                        ccnew->limit = limit;
1666                        ccnew->avail = 0;
1667                        new.new[cpu_logical_map(i)] = ccnew;
1668                }
1669        }
1670        new.cachep = cachep;
1671        spin_lock_irq(&cachep->spinlock);
1672        cachep->batchcount = batchcount;
1673        spin_unlock_irq(&cachep->spinlock);
1674
1675        smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
1676
1677        for (i = 0; i < smp_num_cpus; i++) {
1678                cpucache_t* ccold = new.new[cpu_logical_map(i)];
1679                if (!ccold)
1680                        continue;
1681                local_irq_disable();
1682                free_block(cachep, cc_entry(ccold), ccold->avail);
1683                local_irq_enable();
1684                kfree(ccold);
1685        }
1686        return 0;
1687oom:
1688        for (i--; i >= 0; i--)
1689                kfree(new.new[cpu_logical_map(i)]);
1690        return -ENOMEM;
1691}
1692
1693static void enable_cpucache (kmem_cache_t *cachep)
1694{
1695        int err;
1696        int limit;
1697
1698        /* FIXME: optimize */
1699        if (cachep->objsize > PAGE_SIZE)
1700                return;
1701        if (cachep->objsize > 1024)
1702                limit = 60;
1703        else if (cachep->objsize > 256)
1704                limit = 124;
1705        else
1706                limit = 252;
1707
1708        err = kmem_tune_cpucache(cachep, limit, limit/2);
1709        if (err)
1710                printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
1711                                        cachep->name, -err);
1712}
1713
1714static void enable_all_cpucaches (void)
1715{
1716        struct list_head* p;
1717
1718        down(&cache_chain_sem);
1719
1720        p = &cache_cache.next;
1721        do {
1722                kmem_cache_t* cachep = list_entry(p, kmem_cache_t, next);
1723
1724                enable_cpucache(cachep);
1725                p = cachep->next.next;
1726        } while (p != &cache_cache.next);
1727
1728        up(&cache_chain_sem);
1729}
1730#endif
1731
1732/**
1733 * kmem_cache_reap - Reclaim memory from caches.
1734 * @gfp_mask: the type of memory required.
1735 *
1736 * Called from do_try_to_free_pages() and __alloc_pages()
1737 */
1738int fastcall kmem_cache_reap (int gfp_mask)
1739{
1740        slab_t *slabp;
1741        kmem_cache_t *searchp;
1742        kmem_cache_t *best_cachep;
1743        unsigned int best_pages;
1744        unsigned int best_len;
1745        unsigned int scan;
1746        int ret = 0;
1747
1748        if (gfp_mask & __GFP_WAIT)
1749                down(&cache_chain_sem);
1750        else
1751                if (down_trylock(&cache_chain_sem))
1752                        return 0;
1753
1754        scan = REAP_SCANLEN;
1755        best_len = 0;
1756        best_pages = 0;
1757        best_cachep = NULL;
1758        searchp = clock_searchp;
1759        do {
1760                unsigned int pages;
1761                struct list_head* p;
1762                unsigned int full_free;
1763
1764                /* It's safe to test this without holding the cache-lock. */
1765                if (searchp->flags & SLAB_NO_REAP)
1766                        goto next;
1767                spin_lock_irq(&searchp->spinlock);
1768                if (searchp->growing)
1769                        goto next_unlock;
1770                if (searchp->dflags & DFLGS_GROWN) {
1771                        searchp->dflags &= ~DFLGS_GROWN;
1772                        goto next_unlock;
1773                }
1774#ifdef CONFIG_SMP
1775                {
1776                        cpucache_t *cc = cc_data(searchp);
1777                        if (cc && cc->avail) {
1778                                __free_block(searchp, cc_entry(cc), cc->avail);
1779                                cc->avail = 0;
1780                        }
1781                }
1782#endif
1783
1784                full_free = 0;
1785                p = searchp->slabs_free.next;
1786                while (p != &searchp->slabs_free) {
1787#if DEBUG
1788                        slabp = list_entry(p, slab_t, list);
1789
1790                        if (slabp->inuse)
1791                                BUG();
1792#endif
1793                        full_free++;
1794                        p = p->next;
1795                }
1796
1797                /*
1798                 * Try to avoid slabs with constructors and/or
1799                 * more than one page per slab (as it can be difficult
1800                 * to get high orders from gfp()).
1801                 */
1802                pages = full_free * (1<<searchp->gfporder);
1803                if (searchp->ctor)
1804                        pages = (pages*4+1)/5;
1805                if (searchp->gfporder)
1806                        pages = (pages*4+1)/5;
1807                if (pages > best_pages) {
1808                        best_cachep = searchp;
1809                        best_len = full_free;
1810                        best_pages = pages;
1811                        if (pages >= REAP_PERFECT) {
1812                                clock_searchp = list_entry(searchp->next.next,
1813                                                        kmem_cache_t,next);
1814                                goto perfect;
1815                        }
1816                }
1817next_unlock:
1818                spin_unlock_irq(&searchp->spinlock);
1819next:
1820                searchp = list_entry(searchp->next.next,kmem_cache_t,next);
1821        } while (--scan && searchp != clock_searchp);
1822
1823        clock_searchp = searchp;
1824
1825        if (!best_cachep)
1826                /* couldn't find anything to reap */
1827                goto out;
1828
1829        spin_lock_irq(&best_cachep->spinlock);
1830perfect:
1831        /* free only 50% of the free slabs */
1832        best_len = (best_len + 1)/2;
1833        for (scan = 0; scan < best_len; scan++) {
1834                struct list_head *p;
1835
1836                if (best_cachep->growing)
1837                        break;
1838                p = best_cachep->slabs_free.prev;
1839                if (p == &best_cachep->slabs_free)
1840                        break;
1841                slabp = list_entry(p,slab_t,list);
1842#if DEBUG
1843                if (slabp->inuse)
1844                        BUG();
1845#endif
1846                list_del(&slabp->list);
1847                STATS_INC_REAPED(best_cachep);
1848
1849                /* Safe to drop the lock. The slab is no longer linked to the
1850                 * cache.
1851                 */
1852                spin_unlock_irq(&best_cachep->spinlock);
1853                kmem_slab_destroy(best_cachep, slabp);
1854                spin_lock_irq(&best_cachep->spinlock);
1855        }
1856        spin_unlock_irq(&best_cachep->spinlock);
1857        ret = scan * (1 << best_cachep->gfporder);
1858out:
1859        up(&cache_chain_sem);
1860        return ret;
1861}
1862
1863#ifdef CONFIG_PROC_FS
1864
1865static void *s_start(struct seq_file *m, loff_t *pos)
1866{
1867        loff_t n = *pos;
1868        struct list_head *p;
1869
1870        down(&cache_chain_sem);
1871        if (!n)
1872                return (void *)1;
1873        p = &cache_cache.next;
1874        while (--n) {
1875                p = p->next;
1876                if (p == &cache_cache.next)
1877                        return NULL;
1878        }
1879        return list_entry(p, kmem_cache_t, next);
1880}
1881
1882static void *s_next(struct seq_file *m, void *p, loff_t *pos)
1883{
1884        kmem_cache_t *cachep = p;
1885        ++*pos;
1886        if (p == (void *)1)
1887                return &cache_cache;
1888        cachep = list_entry(cachep->next.next, kmem_cache_t, next);
1889        return cachep == &cache_cache ? NULL : cachep;
1890}
1891
1892static void s_stop(struct seq_file *m, void *p)
1893{
1894        up(&cache_chain_sem);
1895}
1896
1897static int s_show(struct seq_file *m, void *p)
1898{
1899        kmem_cache_t *cachep = p;
1900        struct list_head *q;
1901        slab_t          *slabp;
1902        unsigned long   active_objs;
1903        unsigned long   num_objs;
1904        unsigned long   active_slabs = 0;
1905        unsigned long   num_slabs;
1906        const char *name; 
1907
1908        if (p == (void*)1) {
1909                /*
1910                 * Output format version, so at least we can change it
1911                 * without _too_ many complaints.
1912                 */
1913                seq_puts(m, "slabinfo - version: 1.1"
1914#if STATS
1915                                " (statistics)"
1916#endif
1917#ifdef CONFIG_SMP
1918                                " (SMP)"
1919#endif
1920                                "\n");
1921                return 0;
1922        }
1923
1924        spin_lock_irq(&cachep->spinlock);
1925        active_objs = 0;
1926        num_slabs = 0;
1927        list_for_each(q,&cachep->slabs_full) {
1928                slabp = list_entry(q, slab_t, list);
1929                if (slabp->inuse != cachep->num)
1930                        BUG();
1931                active_objs += cachep->num;
1932                active_slabs++;
1933        }
1934        list_for_each(q,&cachep->slabs_partial) {
1935                slabp = list_entry(q, slab_t, list);
1936                if (slabp->inuse == cachep->num || !slabp->inuse)
1937                        BUG();
1938                active_objs += slabp->inuse;
1939                active_slabs++;
1940        }
1941        list_for_each(q,&cachep->slabs_free) {
1942                slabp = list_entry(q, slab_t, list);
1943                if (slabp->inuse)
1944                        BUG();
1945                num_slabs++;
1946        }
1947        num_slabs+=active_slabs;
1948        num_objs = num_slabs*cachep->num;
1949
1950        name = cachep->name; 
1951        {
1952        char tmp; 
1953        mm_segment_t    old_fs;
1954        old_fs = get_fs();
1955        set_fs(KERNEL_DS);
1956        if (__get_user(tmp, name)) 
1957                name = "broken"; 
1958        set_fs(old_fs);
1959        }       
1960
1961        seq_printf(m, "%-17s %6lu %6lu %6u %4lu %4lu %4u",
1962                name, active_objs, num_objs, cachep->objsize,
1963                active_slabs, num_slabs, (1<<cachep->gfporder));
1964
1965#if STATS
1966        {
1967                unsigned long errors = cachep->errors;
1968                unsigned long high = cachep->high_mark;
1969                unsigned long grown = cachep->grown;
1970                unsigned long reaped = cachep->reaped;
1971                unsigned long allocs = cachep->num_allocations;
1972
1973                seq_printf(m, " : %6lu %7lu %5lu %4lu %4lu",
1974                                high, allocs, grown, reaped, errors);
1975        }
1976#endif
1977#ifdef CONFIG_SMP
1978        {
1979                cpucache_t *cc = cc_data(cachep);
1980                unsigned int batchcount = cachep->batchcount;
1981                unsigned int limit;
1982
1983                if (cc)
1984                        limit = cc->limit;
1985                else
1986                        limit = 0;
1987                seq_printf(m, " : %4u %4u",
1988                                limit, batchcount);
1989        }
1990#endif
1991#if STATS && defined(CONFIG_SMP)
1992        {
1993                unsigned long allochit = atomic_read(&cachep->allochit);
1994                unsigned long allocmiss = atomic_read(&cachep->allocmiss);
1995                unsigned long freehit = atomic_read(&cachep->freehit);
1996                unsigned long freemiss = atomic_read(&cachep->freemiss);
1997                seq_printf(m, " : %6lu %6lu %6lu %6lu",
1998                                allochit, allocmiss, freehit, freemiss);
1999        }
2000#endif
2001        spin_unlock_irq(&cachep->spinlock);
2002        seq_putc(m, '\n');
2003        return 0;
2004}
2005
2006/**
2007 * slabinfo_op - iterator that generates /proc/slabinfo
2008 *
2009 * Output layout:
2010 * cache-name
2011 * num-active-objs
2012 * total-objs
2013 * object size
2014 * num-active-slabs
2015 * total-slabs
2016 * num-pages-per-slab
2017 * + further values on SMP and with statistics enabled
2018 */
2019
2020struct seq_operations slabinfo_op = {
2021        start:  s_start,
2022        next:   s_next,
2023        stop:   s_stop,
2024        show:   s_show
2025};
2026
2027#define MAX_SLABINFO_WRITE 128
2028/**
2029 * slabinfo_write - SMP tuning for the slab allocator
2030 * @file: unused
2031 * @buffer: user buffer
2032 * @count: data len
2033 * @data: unused
2034 */
2035ssize_t slabinfo_write(struct file *file, const char *buffer,
2036                                size_t count, loff_t *ppos)
2037{
2038#ifdef CONFIG_SMP
2039        char kbuf[MAX_SLABINFO_WRITE+1], *tmp;
2040        int limit, batchcount, res;
2041        struct list_head *p;
2042        
2043        if (count > MAX_SLABINFO_WRITE)
2044                return -EINVAL;
2045        if (copy_from_user(&kbuf, buffer, count))
2046                return -EFAULT;
2047        kbuf[MAX_SLABINFO_WRITE] = '\0'; 
2048
2049        tmp = strchr(kbuf, ' ');
2050        if (!tmp)
2051                return -EINVAL;
2052        *tmp = '\0';
2053        tmp++;
2054        limit = simple_strtol(tmp, &tmp, 10);
2055        while (*tmp == ' ')
2056                tmp++;
2057        batchcount = simple_strtol(tmp, &tmp, 10);
2058
2059        /* Find the cache in the chain of caches. */
2060        down(&cache_chain_sem);
2061        res = -EINVAL;
2062        list_for_each(p,&cache_chain) {
2063                kmem_cache_t *cachep = list_entry(p, kmem_cache_t, next);
2064
2065                if (!strcmp(cachep->name, kbuf)) {
2066                        res = kmem_tune_cpucache(cachep, limit, batchcount);
2067                        break;
2068                }
2069        }
2070        up(&cache_chain_sem);
2071        if (res >= 0)
2072                res = count;
2073        return res;
2074#else
2075        return -EINVAL;
2076#endif
2077}
2078#endif
2079
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.