linux-bk/mm/slab.c
<<
>>
Prefs
   1/*
   2 * linux/mm/slab.c
   3 * Written by Mark Hemment, 1996/97.
   4 * (markhe@nextd.demon.co.uk)
   5 *
   6 * kmem_cache_destroy() + some cleanup - 1999 Andrea Arcangeli
   7 *
   8 * Major cleanup, different bufctl logic, per-cpu arrays
   9 *      (c) 2000 Manfred Spraul
  10 *
  11 * An implementation of the Slab Allocator as described in outline in;
  12 *      UNIX Internals: The New Frontiers by Uresh Vahalia
  13 *      Pub: Prentice Hall      ISBN 0-13-101908-2
  14 * or with a little more detail in;
  15 *      The Slab Allocator: An Object-Caching Kernel Memory Allocator
  16 *      Jeff Bonwick (Sun Microsystems).
  17 *      Presented at: USENIX Summer 1994 Technical Conference
  18 *
  19 *
  20 * The memory is organized in caches, one cache for each object type.
  21 * (e.g. inode_cache, dentry_cache, buffer_head, vm_area_struct)
  22 * Each cache consists out of many slabs (they are small (usually one
  23 * page long) and always contiguous), and each slab contains multiple
  24 * initialized objects.
  25 *
  26 * Each cache can only support one memory type (GFP_DMA, GFP_HIGHMEM,
  27 * normal). If you need a special memory type, then must create a new
  28 * cache for that memory type.
  29 *
  30 * In order to reduce fragmentation, the slabs are sorted in 3 groups:
  31 *   full slabs with 0 free objects
  32 *   partial slabs
  33 *   empty slabs with no allocated objects
  34 *
  35 * If partial slabs exist, then new allocations come from these slabs,
  36 * otherwise from empty slabs or new slabs are allocated.
  37 *
  38 * kmem_cache_destroy() CAN CRASH if you try to allocate from the cache
  39 * during kmem_cache_destroy(). The caller must prevent concurrent allocs.
  40 *
  41 * On SMP systems, each cache has a short per-cpu head array, most allocs
  42 * and frees go into that array, and if that array overflows, then 1/2
  43 * of the entries in the array are given back into the global cache.
  44 * This reduces the number of spinlock operations.
  45 *
  46 * The c_cpuarray may not be read with enabled local interrupts.
  47 *
  48 * SMP synchronization:
  49 *  constructors and destructors are called without any locking.
  50 *  Several members in kmem_cache_t and slab_t never change, they
  51 *      are accessed without any locking.
  52 *  The per-cpu arrays are never accessed from the wrong cpu, no locking,
  53 *      and local interrupts are disabled so slab code is preempt-safe.
  54 *  The non-constant members are protected with a per-cache irq spinlock.
  55 *
  56 * Further notes from the original documentation:
  57 *
  58 * 11 April '97.  Started multi-threading - markhe
  59 *      The global cache-chain is protected by the semaphore 'cache_chain_sem'.
  60 *      The sem is only needed when accessing/extending the cache-chain, which
  61 *      can never happen inside an interrupt (kmem_cache_create(),
  62 *      kmem_cache_shrink() and kmem_cache_reap()).
  63 *
  64 *      To prevent kmem_cache_shrink() trying to shrink a 'growing' cache (which
  65 *      maybe be sleeping and therefore not holding the semaphore/lock), the
  66 *      growing field is used.  This also prevents reaping from a cache.
  67 *
  68 *      At present, each engine can be growing a cache.  This should be blocked.
  69 *
  70 */
  71
  72#include        <linux/config.h>
  73#include        <linux/slab.h>
  74#include        <linux/mm.h>
  75#include        <linux/cache.h>
  76#include        <linux/interrupt.h>
  77#include        <linux/init.h>
  78#include        <linux/compiler.h>
  79#include        <linux/seq_file.h>
  80#include        <asm/uaccess.h>
  81
  82/*
  83 * DEBUG        - 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL,
  84 *                SLAB_RED_ZONE & SLAB_POISON.
  85 *                0 for faster, smaller code (especially in the critical paths).
  86 *
  87 * STATS        - 1 to collect stats for /proc/slabinfo.
  88 *                0 for faster, smaller code (especially in the critical paths).
  89 *
  90 * FORCED_DEBUG - 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
  91 */
  92
  93#ifdef CONFIG_DEBUG_SLAB
  94#define DEBUG           1
  95#define STATS           1
  96#define FORCED_DEBUG    1
  97#else
  98#define DEBUG           0
  99#define STATS           0
 100#define FORCED_DEBUG    0
 101#endif
 102
 103/*
 104 * Parameters for kmem_cache_reap
 105 */
 106#define REAP_SCANLEN    10
 107#define REAP_PERFECT    10
 108
 109/* Shouldn't this be in a header file somewhere? */
 110#define BYTES_PER_WORD          sizeof(void *)
 111
 112/* Legal flag mask for kmem_cache_create(). */
 113#if DEBUG
 114# define CREATE_MASK    (SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \
 115                         SLAB_POISON | SLAB_HWCACHE_ALIGN | \
 116                         SLAB_NO_REAP | SLAB_CACHE_DMA | \
 117                         SLAB_MUST_HWCACHE_ALIGN)
 118#else
 119# define CREATE_MASK    (SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | \
 120                         SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN)
 121#endif
 122
 123/*
 124 * kmem_bufctl_t:
 125 *
 126 * Bufctl's are used for linking objs within a slab
 127 * linked offsets.
 128 *
 129 * This implementation relies on "struct page" for locating the cache &
 130 * slab an object belongs to.
 131 * This allows the bufctl structure to be small (one int), but limits
 132 * the number of objects a slab (not a cache) can contain when off-slab
 133 * bufctls are used. The limit is the size of the largest general cache
 134 * that does not use off-slab slabs.
 135 * For 32bit archs with 4 kB pages, is this 56.
 136 * This is not serious, as it is only for large objects, when it is unwise
 137 * to have too many per slab.
 138 * Note: This limit can be raised by introducing a general cache whose size
 139 * is less than 512 (PAGE_SIZE<<3), but greater than 256.
 140 */
 141
 142#define BUFCTL_END 0xffffFFFF
 143#define SLAB_LIMIT 0xffffFFFE
 144typedef unsigned int kmem_bufctl_t;
 145
 146/* Max number of objs-per-slab for caches which use off-slab slabs.
 147 * Needed to avoid a possible looping condition in kmem_cache_grow().
 148 */
 149static unsigned long offslab_limit;
 150
 151/*
 152 * slab_t
 153 *
 154 * Manages the objs in a slab. Placed either at the beginning of mem allocated
 155 * for a slab, or allocated from an general cache.
 156 * Slabs are chained into three list: fully used, partial, fully free slabs.
 157 */
 158typedef struct slab_s {
 159        struct list_head        list;
 160        unsigned long           colouroff;
 161        void                    *s_mem;         /* including colour offset */
 162        unsigned int            inuse;          /* num of objs active in slab */
 163        kmem_bufctl_t           free;
 164} slab_t;
 165
 166#define slab_bufctl(slabp) \
 167        ((kmem_bufctl_t *)(((slab_t*)slabp)+1))
 168
 169/*
 170 * cpucache_t
 171 *
 172 * Per cpu structures
 173 * The limit is stored in the per-cpu structure to reduce the data cache
 174 * footprint.
 175 */
 176typedef struct cpucache_s {
 177        unsigned int avail;
 178        unsigned int limit;
 179} cpucache_t;
 180
 181#define cc_entry(cpucache) \
 182        ((void **)(((cpucache_t*)(cpucache))+1))
 183#define cc_data(cachep) \
 184        ((cachep)->cpudata[smp_processor_id()])
 185/*
 186 * kmem_cache_t
 187 *
 188 * manages a cache.
 189 */
 190
 191struct kmem_cache_s {
 192/* 1) each alloc & free */
 193        /* full, partial first, then free */
 194        struct list_head        slabs_full;
 195        struct list_head        slabs_partial;
 196        struct list_head        slabs_free;
 197        unsigned int            objsize;
 198        unsigned int            flags;  /* constant flags */
 199        unsigned int            num;    /* # of objs per slab */
 200        spinlock_t              spinlock;
 201#ifdef CONFIG_SMP
 202        unsigned int            batchcount;
 203#endif
 204
 205/* 2) slab additions /removals */
 206        /* order of pgs per slab (2^n) */
 207        unsigned int            gfporder;
 208
 209        /* force GFP flags, e.g. GFP_DMA */
 210        unsigned int            gfpflags;
 211
 212        size_t                  colour;         /* cache colouring range */
 213        unsigned int            colour_off;     /* colour offset */
 214        unsigned int            colour_next;    /* cache colouring */
 215        kmem_cache_t            *slabp_cache;
 216        unsigned int            growing;
 217        unsigned int            dflags;         /* dynamic flags */
 218
 219        /* constructor func */
 220        void (*ctor)(void *, kmem_cache_t *, unsigned long);
 221
 222        /* de-constructor func */
 223        void (*dtor)(void *, kmem_cache_t *, unsigned long);
 224
 225        unsigned long           failures;
 226
 227/* 3) cache creation/removal */
 228        const char              *name;
 229        struct list_head        next;
 230#ifdef CONFIG_SMP
 231/* 4) per-cpu data */
 232        cpucache_t              *cpudata[NR_CPUS];
 233#endif
 234#if STATS
 235        unsigned long           num_active;
 236        unsigned long           num_allocations;
 237        unsigned long           high_mark;
 238        unsigned long           grown;
 239        unsigned long           reaped;
 240        unsigned long           errors;
 241#ifdef CONFIG_SMP
 242        atomic_t                allochit;
 243        atomic_t                allocmiss;
 244        atomic_t                freehit;
 245        atomic_t                freemiss;
 246#endif
 247#endif
 248};
 249
 250/* internal c_flags */
 251#define CFLGS_OFF_SLAB  0x010000UL      /* slab management in own cache */
 252#define CFLGS_OPTIMIZE  0x020000UL      /* optimized slab lookup */
 253
 254/* c_dflags (dynamic flags). Need to hold the spinlock to access this member */
 255#define DFLGS_GROWN     0x000001UL      /* don't reap a recently grown */
 256
 257#define OFF_SLAB(x)     ((x)->flags & CFLGS_OFF_SLAB)
 258#define OPTIMIZE(x)     ((x)->flags & CFLGS_OPTIMIZE)
 259#define GROWN(x)        ((x)->dlags & DFLGS_GROWN)
 260
 261#if STATS
 262#define STATS_INC_ACTIVE(x)     ((x)->num_active++)
 263#define STATS_DEC_ACTIVE(x)     ((x)->num_active--)
 264#define STATS_INC_ALLOCED(x)    ((x)->num_allocations++)
 265#define STATS_INC_GROWN(x)      ((x)->grown++)
 266#define STATS_INC_REAPED(x)     ((x)->reaped++)
 267#define STATS_SET_HIGH(x)       do { if ((x)->num_active > (x)->high_mark) \
 268                                        (x)->high_mark = (x)->num_active; \
 269                                } while (0)
 270#define STATS_INC_ERR(x)        ((x)->errors++)
 271#else
 272#define STATS_INC_ACTIVE(x)     do { } while (0)
 273#define STATS_DEC_ACTIVE(x)     do { } while (0)
 274#define STATS_INC_ALLOCED(x)    do { } while (0)
 275#define STATS_INC_GROWN(x)      do { } while (0)
 276#define STATS_INC_REAPED(x)     do { } while (0)
 277#define STATS_SET_HIGH(x)       do { } while (0)
 278#define STATS_INC_ERR(x)        do { } while (0)
 279#endif
 280
 281#if STATS && defined(CONFIG_SMP)
 282#define STATS_INC_ALLOCHIT(x)   atomic_inc(&(x)->allochit)
 283#define STATS_INC_ALLOCMISS(x)  atomic_inc(&(x)->allocmiss)
 284#define STATS_INC_FREEHIT(x)    atomic_inc(&(x)->freehit)
 285#define STATS_INC_FREEMISS(x)   atomic_inc(&(x)->freemiss)
 286#else
 287#define STATS_INC_ALLOCHIT(x)   do { } while (0)
 288#define STATS_INC_ALLOCMISS(x)  do { } while (0)
 289#define STATS_INC_FREEHIT(x)    do { } while (0)
 290#define STATS_INC_FREEMISS(x)   do { } while (0)
 291#endif
 292
 293#if DEBUG
 294/* Magic nums for obj red zoning.
 295 * Placed in the first word before and the first word after an obj.
 296 */
 297#define RED_MAGIC1      0x5A2CF071UL    /* when obj is active */
 298#define RED_MAGIC2      0x170FC2A5UL    /* when obj is inactive */
 299
 300/* ...and for poisoning */
 301#define POISON_BYTE     0x5a            /* byte value for poisoning */
 302#define POISON_END      0xa5            /* end-byte of poisoning */
 303
 304#endif
 305
 306/* maximum size of an obj (in 2^order pages) */
 307#define MAX_OBJ_ORDER   5       /* 32 pages */
 308
 309/*
 310 * Do not go above this order unless 0 objects fit into the slab.
 311 */
 312#define BREAK_GFP_ORDER_HI      2
 313#define BREAK_GFP_ORDER_LO      1
 314static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
 315
 316/*
 317 * Absolute limit for the gfp order
 318 */
 319#define MAX_GFP_ORDER   5       /* 32 pages */
 320
 321
 322/* Macros for storing/retrieving the cachep and or slab from the
 323 * global 'mem_map'. These are used to find the slab an obj belongs to.
 324 * With kfree(), these are used to find the cache which an obj belongs to.
 325 */
 326#define SET_PAGE_CACHE(pg,x)  ((pg)->list.next = (struct list_head *)(x))
 327#define GET_PAGE_CACHE(pg)    ((kmem_cache_t *)(pg)->list.next)
 328#define SET_PAGE_SLAB(pg,x)   ((pg)->list.prev = (struct list_head *)(x))
 329#define GET_PAGE_SLAB(pg)     ((slab_t *)(pg)->list.prev)
 330
 331/* Size description struct for general caches. */
 332typedef struct cache_sizes {
 333        size_t           cs_size;
 334        kmem_cache_t    *cs_cachep;
 335        kmem_cache_t    *cs_dmacachep;
 336} cache_sizes_t;
 337
 338/* These are the default caches for kmalloc. Custom caches can have other sizes. */
 339static cache_sizes_t cache_sizes[] = {
 340#if PAGE_SIZE == 4096
 341        {    32,        NULL, NULL},
 342#endif
 343        {    64,        NULL, NULL},
 344        {    96,        NULL, NULL},
 345        {   128,        NULL, NULL},
 346        {   192,        NULL, NULL},
 347        {   256,        NULL, NULL},
 348        {   512,        NULL, NULL},
 349        {  1024,        NULL, NULL},
 350        {  2048,        NULL, NULL},
 351        {  4096,        NULL, NULL},
 352        {  8192,        NULL, NULL},
 353        { 16384,        NULL, NULL},
 354        { 32768,        NULL, NULL},
 355        { 65536,        NULL, NULL},
 356        {131072,        NULL, NULL},
 357        {     0,        NULL, NULL}
 358};
 359/* Must match cache_sizes above. Out of line to keep cache footprint low. */
 360#define CN(x) { x, x " (DMA)" }
 361static struct { 
 362        char *name; 
 363        char *name_dma;
 364} cache_names[] = { 
 365#if PAGE_SIZE == 4096
 366        CN("size-32"),
 367#endif
 368        CN("size-64"),
 369        CN("size-96"),
 370        CN("size-128"),
 371        CN("size-192"),
 372        CN("size-256"),
 373        CN("size-512"),
 374        CN("size-1024"),
 375        CN("size-2048"),
 376        CN("size-4096"),
 377        CN("size-8192"),
 378        CN("size-16384"),
 379        CN("size-32768"),
 380        CN("size-65536"),
 381        CN("size-131072")
 382}; 
 383#undef CN
 384
 385/* internal cache of cache description objs */
 386static kmem_cache_t cache_cache = {
 387        .slabs_full     = LIST_HEAD_INIT(cache_cache.slabs_full),
 388        .slabs_partial  = LIST_HEAD_INIT(cache_cache.slabs_partial),
 389        .slabs_free     = LIST_HEAD_INIT(cache_cache.slabs_free),
 390        .objsize        = sizeof(kmem_cache_t),
 391        .flags          = SLAB_NO_REAP,
 392        .spinlock       = SPIN_LOCK_UNLOCKED,
 393        .colour_off     = L1_CACHE_BYTES,
 394        .name           = "kmem_cache",
 395};
 396
 397/* Guard access to the cache-chain. */
 398static struct semaphore cache_chain_sem;
 399
 400/* Place maintainer for reaping. */
 401static kmem_cache_t *clock_searchp = &cache_cache;
 402
 403#define cache_chain (cache_cache.next)
 404
 405#ifdef CONFIG_SMP
 406/*
 407 * chicken and egg problem: delay the per-cpu array allocation
 408 * until the general caches are up.
 409 */
 410static int g_cpucache_up;
 411
 412static void enable_cpucache (kmem_cache_t *cachep);
 413static void enable_all_cpucaches (void);
 414#endif
 415
 416/* Cal the num objs, wastage, and bytes left over for a given slab size. */
 417static void kmem_cache_estimate (unsigned long gfporder, size_t size,
 418                 int flags, size_t *left_over, unsigned int *num)
 419{
 420        int i;
 421        size_t wastage = PAGE_SIZE<<gfporder;
 422        size_t extra = 0;
 423        size_t base = 0;
 424
 425        if (!(flags & CFLGS_OFF_SLAB)) {
 426                base = sizeof(slab_t);
 427                extra = sizeof(kmem_bufctl_t);
 428        }
 429        i = 0;
 430        while (i*size + L1_CACHE_ALIGN(base+i*extra) <= wastage)
 431                i++;
 432        if (i > 0)
 433                i--;
 434
 435        if (i > SLAB_LIMIT)
 436                i = SLAB_LIMIT;
 437
 438        *num = i;
 439        wastage -= i*size;
 440        wastage -= L1_CACHE_ALIGN(base+i*extra);
 441        *left_over = wastage;
 442}
 443
 444/* Initialisation - setup the `cache' cache. */
 445void __init kmem_cache_init(void)
 446{
 447        size_t left_over;
 448
 449        init_MUTEX(&cache_chain_sem);
 450        INIT_LIST_HEAD(&cache_chain);
 451
 452        kmem_cache_estimate(0, cache_cache.objsize, 0,
 453                        &left_over, &cache_cache.num);
 454        if (!cache_cache.num)
 455                BUG();
 456
 457        cache_cache.colour = left_over/cache_cache.colour_off;
 458        cache_cache.colour_next = 0;
 459}
 460
 461
 462/* Initialisation - setup remaining internal and general caches.
 463 * Called after the gfp() functions have been enabled, and before smp_init().
 464 */
 465void __init kmem_cache_sizes_init(void)
 466{
 467        cache_sizes_t *sizes = cache_sizes;
 468        /*
 469         * Fragmentation resistance on low memory - only use bigger
 470         * page orders on machines with more than 32MB of memory.
 471         */
 472        if (num_physpages > (32 << 20) >> PAGE_SHIFT)
 473                slab_break_gfp_order = BREAK_GFP_ORDER_HI;
 474        do {
 475                /* For performance, all the general caches are L1 aligned.
 476                 * This should be particularly beneficial on SMP boxes, as it
 477                 * eliminates "false sharing".
 478                 * Note for systems short on memory removing the alignment will
 479                 * allow tighter packing of the smaller caches. */
 480                if (!(sizes->cs_cachep =
 481                        kmem_cache_create(cache_names[sizes-cache_sizes].name, 
 482                                          sizes->cs_size,
 483                                        0, SLAB_HWCACHE_ALIGN, NULL, NULL))) {
 484                        BUG();
 485                }
 486
 487                /* Inc off-slab bufctl limit until the ceiling is hit. */
 488                if (!(OFF_SLAB(sizes->cs_cachep))) {
 489                        offslab_limit = sizes->cs_size-sizeof(slab_t);
 490                        offslab_limit /= sizeof(kmem_bufctl_t);
 491                }
 492                sizes->cs_dmacachep = kmem_cache_create(
 493                    cache_names[sizes-cache_sizes].name_dma, 
 494                        sizes->cs_size, 0,
 495                        SLAB_CACHE_DMA|SLAB_HWCACHE_ALIGN, NULL, NULL);
 496                if (!sizes->cs_dmacachep)
 497                        BUG();
 498                sizes++;
 499        } while (sizes->cs_size);
 500}
 501
 502int __init kmem_cpucache_init(void)
 503{
 504#ifdef CONFIG_SMP
 505        g_cpucache_up = 1;
 506        enable_all_cpucaches();
 507#endif
 508        return 0;
 509}
 510
 511__initcall(kmem_cpucache_init);
 512
 513/* Interface to system's page allocator. No need to hold the cache-lock.
 514 */
 515static inline void * kmem_getpages (kmem_cache_t *cachep, unsigned long flags)
 516{
 517        void    *addr;
 518
 519        /*
 520         * If we requested dmaable memory, we will get it. Even if we
 521         * did not request dmaable memory, we might get it, but that
 522         * would be relatively rare and ignorable.
 523         */
 524        flags |= cachep->gfpflags;
 525        addr = (void*) __get_free_pages(flags, cachep->gfporder);
 526        /* Assume that now we have the pages no one else can legally
 527         * messes with the 'struct page's.
 528         * However vm_scan() might try to test the structure to see if
 529         * it is a named-page or buffer-page.  The members it tests are
 530         * of no interest here.....
 531         */
 532        return addr;
 533}
 534
 535/* Interface to system's page release. */
 536static inline void kmem_freepages (kmem_cache_t *cachep, void *addr)
 537{
 538        unsigned long i = (1<<cachep->gfporder);
 539        struct page *page = virt_to_page(addr);
 540
 541        /* free_pages() does not clear the type bit - we do that.
 542         * The pages have been unlinked from their cache-slab,
 543         * but their 'struct page's might be accessed in
 544         * vm_scan(). Shouldn't be a worry.
 545         */
 546        while (i--) {
 547                ClearPageSlab(page);
 548                dec_page_state(nr_slab);
 549                page++;
 550        }
 551        free_pages((unsigned long)addr, cachep->gfporder);
 552}
 553
 554#if DEBUG
 555static inline void kmem_poison_obj (kmem_cache_t *cachep, void *addr)
 556{
 557        int size = cachep->objsize;
 558        if (cachep->flags & SLAB_RED_ZONE) {
 559                addr += BYTES_PER_WORD;
 560                size -= 2*BYTES_PER_WORD;
 561        }
 562        memset(addr, POISON_BYTE, size);
 563        *(unsigned char *)(addr+size-1) = POISON_END;
 564}
 565
 566static inline int kmem_check_poison_obj (kmem_cache_t *cachep, void *addr)
 567{
 568        int size = cachep->objsize;
 569        void *end;
 570        if (cachep->flags & SLAB_RED_ZONE) {
 571                addr += BYTES_PER_WORD;
 572                size -= 2*BYTES_PER_WORD;
 573        }
 574        end = memchr(addr, POISON_END, size);
 575        if (end != (addr+size-1))
 576                return 1;
 577        return 0;
 578}
 579#endif
 580
 581/* Destroy all the objs in a slab, and release the mem back to the system.
 582 * Before calling the slab must have been unlinked from the cache.
 583 * The cache-lock is not held/needed.
 584 */
 585static void kmem_slab_destroy (kmem_cache_t *cachep, slab_t *slabp)
 586{
 587        if (cachep->dtor
 588#if DEBUG
 589                || cachep->flags & (SLAB_POISON | SLAB_RED_ZONE)
 590#endif
 591        ) {
 592                int i;
 593                for (i = 0; i < cachep->num; i++) {
 594                        void* objp = slabp->s_mem+cachep->objsize*i;
 595#if DEBUG
 596                        if (cachep->flags & SLAB_RED_ZONE) {
 597                                if (*((unsigned long*)(objp)) != RED_MAGIC1)
 598                                        BUG();
 599                                if (*((unsigned long*)(objp + cachep->objsize
 600                                                -BYTES_PER_WORD)) != RED_MAGIC1)
 601                                        BUG();
 602                                objp += BYTES_PER_WORD;
 603                        }
 604#endif
 605                        if (cachep->dtor)
 606                                (cachep->dtor)(objp, cachep, 0);
 607#if DEBUG
 608                        if (cachep->flags & SLAB_RED_ZONE) {
 609                                objp -= BYTES_PER_WORD;
 610                        }       
 611                        if ((cachep->flags & SLAB_POISON)  &&
 612                                kmem_check_poison_obj(cachep, objp))
 613                                BUG();
 614#endif
 615                }
 616        }
 617
 618        kmem_freepages(cachep, slabp->s_mem-slabp->colouroff);
 619        if (OFF_SLAB(cachep))
 620                kmem_cache_free(cachep->slabp_cache, slabp);
 621}
 622
 623/**
 624 * kmem_cache_create - Create a cache.
 625 * @name: A string which is used in /proc/slabinfo to identify this cache.
 626 * @size: The size of objects to be created in this cache.
 627 * @offset: The offset to use within the page.
 628 * @flags: SLAB flags
 629 * @ctor: A constructor for the objects.
 630 * @dtor: A destructor for the objects.
 631 *
 632 * Returns a ptr to the cache on success, NULL on failure.
 633 * Cannot be called within a int, but can be interrupted.
 634 * The @ctor is run when new pages are allocated by the cache
 635 * and the @dtor is run before the pages are handed back.
 636 *
 637 * @name must be valid until the cache is destroyed. This implies that
 638 * the module calling this has to destroy the cache before getting 
 639 * unloaded.
 640 * 
 641 * The flags are
 642 *
 643 * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
 644 * to catch references to uninitialised memory.
 645 *
 646 * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
 647 * for buffer overruns.
 648 *
 649 * %SLAB_NO_REAP - Don't automatically reap this cache when we're under
 650 * memory pressure.
 651 *
 652 * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
 653 * cacheline.  This can be beneficial if you're counting cycles as closely
 654 * as davem.
 655 */
 656kmem_cache_t *
 657kmem_cache_create (const char *name, size_t size, size_t offset,
 658        unsigned long flags, void (*ctor)(void*, kmem_cache_t *, unsigned long),
 659        void (*dtor)(void*, kmem_cache_t *, unsigned long))
 660{
 661        const char *func_nm = KERN_ERR "kmem_create: ";
 662        size_t left_over, align, slab_size;
 663        kmem_cache_t *cachep = NULL;
 664
 665        /*
 666         * Sanity checks... these are all serious usage bugs.
 667         */
 668        if ((!name) ||
 669                in_interrupt() ||
 670                (size < BYTES_PER_WORD) ||
 671                (size > (1<<MAX_OBJ_ORDER)*PAGE_SIZE) ||
 672                (dtor && !ctor) ||
 673                (offset < 0 || offset > size))
 674                        BUG();
 675
 676#if DEBUG
 677        if ((flags & SLAB_DEBUG_INITIAL) && !ctor) {
 678                /* No constructor, but inital state check requested */
 679                printk("%sNo con, but init state check requested - %s\n", func_nm, name);
 680                flags &= ~SLAB_DEBUG_INITIAL;
 681        }
 682
 683        if ((flags & SLAB_POISON) && ctor) {
 684                /* request for poisoning, but we can't do that with a constructor */
 685                printk("%sPoisoning requested, but con given - %s\n", func_nm, name);
 686                flags &= ~SLAB_POISON;
 687        }
 688#if FORCED_DEBUG
 689        if ((size < (PAGE_SIZE>>3)) && !(flags & SLAB_MUST_HWCACHE_ALIGN))
 690                /*
 691                 * do not red zone large object, causes severe
 692                 * fragmentation.
 693                 */
 694                flags |= SLAB_RED_ZONE;
 695        if (!ctor)
 696                flags |= SLAB_POISON;
 697#endif
 698#endif
 699
 700        /*
 701         * Always checks flags, a caller might be expecting debug
 702         * support which isn't available.
 703         */
 704        if (flags & ~CREATE_MASK)
 705                BUG();
 706
 707        /* Get cache's description obj. */
 708        cachep = (kmem_cache_t *) kmem_cache_alloc(&cache_cache, SLAB_KERNEL);
 709        if (!cachep)
 710                goto opps;
 711        memset(cachep, 0, sizeof(kmem_cache_t));
 712
 713        /* Check that size is in terms of words.  This is needed to avoid
 714         * unaligned accesses for some archs when redzoning is used, and makes
 715         * sure any on-slab bufctl's are also correctly aligned.
 716         */
 717        if (size & (BYTES_PER_WORD-1)) {
 718                size += (BYTES_PER_WORD-1);
 719                size &= ~(BYTES_PER_WORD-1);
 720                printk("%sForcing size word alignment - %s\n", func_nm, name);
 721        }
 722        
 723#if DEBUG
 724        if (flags & SLAB_RED_ZONE) {
 725                /*
 726                 * There is no point trying to honour cache alignment
 727                 * when redzoning.
 728                 */
 729                flags &= ~SLAB_HWCACHE_ALIGN;
 730                size += 2*BYTES_PER_WORD;       /* words for redzone */
 731        }
 732#endif
 733        align = BYTES_PER_WORD;
 734        if (flags & SLAB_HWCACHE_ALIGN)
 735                align = L1_CACHE_BYTES;
 736
 737        /* Determine if the slab management is 'on' or 'off' slab. */
 738        if (size >= (PAGE_SIZE>>3))
 739                /*
 740                 * Size is large, assume best to place the slab management obj
 741                 * off-slab (should allow better packing of objs).
 742                 */
 743                flags |= CFLGS_OFF_SLAB;
 744
 745        if (flags & SLAB_HWCACHE_ALIGN) {
 746                /* Need to adjust size so that objs are cache aligned. */
 747                /* Small obj size, can get at least two per cache line. */
 748                /* FIXME: only power of 2 supported, was better */
 749                while (size < align/2)
 750                        align /= 2;
 751                size = (size+align-1)&(~(align-1));
 752        }
 753
 754        /* Cal size (in pages) of slabs, and the num of objs per slab.
 755         * This could be made much more intelligent.  For now, try to avoid
 756         * using high page-orders for slabs.  When the gfp() funcs are more
 757         * friendly towards high-order requests, this should be changed.
 758         */
 759        do {
 760                unsigned int break_flag = 0;
 761cal_wastage:
 762                kmem_cache_estimate(cachep->gfporder, size, flags,
 763                                                &left_over, &cachep->num);
 764                if (break_flag)
 765                        break;
 766                if (cachep->gfporder >= MAX_GFP_ORDER)
 767                        break;
 768                if (!cachep->num)
 769                        goto next;
 770                if (flags & CFLGS_OFF_SLAB && cachep->num > offslab_limit) {
 771                        /* Oops, this num of objs will cause problems. */
 772                        cachep->gfporder--;
 773                        break_flag++;
 774                        goto cal_wastage;
 775                }
 776
 777                /*
 778                 * Large num of objs is good, but v. large slabs are currently
 779                 * bad for the gfp()s.
 780                 */
 781                if (cachep->gfporder >= slab_break_gfp_order)
 782                        break;
 783
 784                if ((left_over*8) <= (PAGE_SIZE<<cachep->gfporder))
 785                        break;  /* Acceptable internal fragmentation. */
 786next:
 787                cachep->gfporder++;
 788        } while (1);
 789
 790        if (!cachep->num) {
 791                printk("kmem_cache_create: couldn't create cache %s.\n", name);
 792                kmem_cache_free(&cache_cache, cachep);
 793                cachep = NULL;
 794                goto opps;
 795        }
 796        slab_size = L1_CACHE_ALIGN(cachep->num*sizeof(kmem_bufctl_t)+sizeof(slab_t));
 797
 798        /*
 799         * If the slab has been placed off-slab, and we have enough space then
 800         * move it on-slab. This is at the expense of any extra colouring.
 801         */
 802        if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) {
 803                flags &= ~CFLGS_OFF_SLAB;
 804                left_over -= slab_size;
 805        }
 806
 807        /* Offset must be a multiple of the alignment. */
 808        offset += (align-1);
 809        offset &= ~(align-1);
 810        if (!offset)
 811                offset = L1_CACHE_BYTES;
 812        cachep->colour_off = offset;
 813        cachep->colour = left_over/offset;
 814
 815        /* init remaining fields */
 816        if (!cachep->gfporder && !(flags & CFLGS_OFF_SLAB))
 817                flags |= CFLGS_OPTIMIZE;
 818
 819        cachep->flags = flags;
 820        cachep->gfpflags = 0;
 821        if (flags & SLAB_CACHE_DMA)
 822                cachep->gfpflags |= GFP_DMA;
 823        spin_lock_init(&cachep->spinlock);
 824        cachep->objsize = size;
 825        INIT_LIST_HEAD(&cachep->slabs_full);
 826        INIT_LIST_HEAD(&cachep->slabs_partial);
 827        INIT_LIST_HEAD(&cachep->slabs_free);
 828
 829        if (flags & CFLGS_OFF_SLAB)
 830                cachep->slabp_cache = kmem_find_general_cachep(slab_size,0);
 831        cachep->ctor = ctor;
 832        cachep->dtor = dtor;
 833        cachep->name = name;
 834
 835#ifdef CONFIG_SMP
 836        if (g_cpucache_up)
 837                enable_cpucache(cachep);
 838#endif
 839        /* Need the semaphore to access the chain. */
 840        down(&cache_chain_sem);
 841        {
 842                struct list_head *p;
 843                mm_segment_t old_fs;
 844
 845                old_fs = get_fs();
 846                set_fs(KERNEL_DS);
 847                list_for_each(p, &cache_chain) {
 848                        kmem_cache_t *pc = list_entry(p, kmem_cache_t, next);
 849                        char tmp;
 850                        /* This happens when the module gets unloaded and doesn't
 851                           destroy its slab cache and noone else reuses the vmalloc
 852                           area of the module. Print a warning. */
 853                        if (__get_user(tmp,pc->name)) { 
 854                                printk("SLAB: cache with size %d has lost its name\n", 
 855                                        pc->objsize); 
 856                                continue; 
 857                        }       
 858                        if (!strcmp(pc->name,name)) { 
 859                                printk("kmem_cache_create: duplicate cache %s\n",name); 
 860                                up(&cache_chain_sem); 
 861                                BUG(); 
 862                        }       
 863                }
 864                set_fs(old_fs);
 865        }
 866
 867        /* There is no reason to lock our new cache before we
 868         * link it in - no one knows about it yet...
 869         */
 870        list_add(&cachep->next, &cache_chain);
 871        up(&cache_chain_sem);
 872opps:
 873        return cachep;
 874}
 875
 876
 877#if DEBUG
 878/*
 879 * This check if the kmem_cache_t pointer is chained in the cache_cache
 880 * list. -arca
 881 */
 882static int is_chained_kmem_cache(kmem_cache_t * cachep)
 883{
 884        struct list_head *p;
 885        int ret = 0;
 886
 887        /* Find the cache in the chain of caches. */
 888        down(&cache_chain_sem);
 889        list_for_each(p, &cache_chain) {
 890                if (p == &cachep->next) {
 891                        ret = 1;
 892                        break;
 893                }
 894        }
 895        up(&cache_chain_sem);
 896
 897        return ret;
 898}
 899#else
 900#define is_chained_kmem_cache(x) 1
 901#endif
 902
 903#ifdef CONFIG_SMP
 904/*
 905 * Waits for all CPUs to execute func().
 906 */
 907static void smp_call_function_all_cpus(void (*func) (void *arg), void *arg)
 908{
 909        local_irq_disable();
 910        func(arg);
 911        local_irq_enable();
 912
 913        if (smp_call_function(func, arg, 1, 1))
 914                BUG();
 915}
 916typedef struct ccupdate_struct_s
 917{
 918        kmem_cache_t *cachep;
 919        cpucache_t *new[NR_CPUS];
 920} ccupdate_struct_t;
 921
 922static void do_ccupdate_local(void *info)
 923{
 924        ccupdate_struct_t *new = (ccupdate_struct_t *)info;
 925        cpucache_t *old = cc_data(new->cachep);
 926        
 927        cc_data(new->cachep) = new->new[smp_processor_id()];
 928        new->new[smp_processor_id()] = old;
 929}
 930
 931static void free_block (kmem_cache_t* cachep, void** objpp, int len);
 932
 933static void drain_cpu_caches(kmem_cache_t *cachep)
 934{
 935        ccupdate_struct_t new;
 936        int i;
 937
 938        memset(&new.new,0,sizeof(new.new));
 939
 940        new.cachep = cachep;
 941
 942        down(&cache_chain_sem);
 943        smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
 944
 945        for (i = 0; i < NR_CPUS; i++) {
 946                cpucache_t* ccold = new.new[i];
 947                if (!ccold || (ccold->avail == 0))
 948                        continue;
 949                local_irq_disable();
 950                free_block(cachep, cc_entry(ccold), ccold->avail);
 951                local_irq_enable();
 952                ccold->avail = 0;
 953        }
 954        smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
 955        up(&cache_chain_sem);
 956}
 957
 958#else
 959#define drain_cpu_caches(cachep)        do { } while (0)
 960#endif
 961
 962static int __kmem_cache_shrink(kmem_cache_t *cachep)
 963{
 964        slab_t *slabp;
 965        int ret;
 966
 967        drain_cpu_caches(cachep);
 968
 969        spin_lock_irq(&cachep->spinlock);
 970
 971        /* If the cache is growing, stop shrinking. */
 972        while (!cachep->growing) {
 973                struct list_head *p;
 974
 975                p = cachep->slabs_free.prev;
 976                if (p == &cachep->slabs_free)
 977                        break;
 978
 979                slabp = list_entry(cachep->slabs_free.prev, slab_t, list);
 980#if DEBUG
 981                if (slabp->inuse)
 982                        BUG();
 983#endif
 984                list_del(&slabp->list);
 985
 986                spin_unlock_irq(&cachep->spinlock);
 987                kmem_slab_destroy(cachep, slabp);
 988                spin_lock_irq(&cachep->spinlock);
 989        }
 990        ret = !list_empty(&cachep->slabs_full) || !list_empty(&cachep->slabs_partial);
 991        spin_unlock_irq(&cachep->spinlock);
 992        return ret;
 993}
 994
 995/**
 996 * kmem_cache_shrink - Shrink a cache.
 997 * @cachep: The cache to shrink.
 998 *
 999 * Releases as many slabs as possible for a cache.
1000 * To help debugging, a zero exit status indicates all slabs were released.
1001 */
1002int kmem_cache_shrink(kmem_cache_t *cachep)
1003{
1004        if (!cachep || in_interrupt() || !is_chained_kmem_cache(cachep))
1005                BUG();
1006
1007        return __kmem_cache_shrink(cachep);
1008}
1009
1010/**
1011 * kmem_cache_destroy - delete a cache
1012 * @cachep: the cache to destroy
1013 *
1014 * Remove a kmem_cache_t object from the slab cache.
1015 * Returns 0 on success.
1016 *
1017 * It is expected this function will be called by a module when it is
1018 * unloaded.  This will remove the cache completely, and avoid a duplicate
1019 * cache being allocated each time a module is loaded and unloaded, if the
1020 * module doesn't have persistent in-kernel storage across loads and unloads.
1021 *
1022 * The caller must guarantee that noone will allocate memory from the cache
1023 * during the kmem_cache_destroy().
1024 */
1025int kmem_cache_destroy (kmem_cache_t * cachep)
1026{
1027        if (!cachep || in_interrupt() || cachep->growing)
1028                BUG();
1029
1030        /* Find the cache in the chain of caches. */
1031        down(&cache_chain_sem);
1032        /* the chain is never empty, cache_cache is never destroyed */
1033        if (clock_searchp == cachep)
1034                clock_searchp = list_entry(cachep->next.next,
1035                                                kmem_cache_t, next);
1036        list_del(&cachep->next);
1037        up(&cache_chain_sem);
1038
1039        if (__kmem_cache_shrink(cachep)) {
1040                printk(KERN_ERR "kmem_cache_destroy: Can't free all objects %p\n",
1041                       cachep);
1042                down(&cache_chain_sem);
1043                list_add(&cachep->next,&cache_chain);
1044                up(&cache_chain_sem);
1045                return 1;
1046        }
1047#ifdef CONFIG_SMP
1048        {
1049                int i;
1050                for (i = 0; i < NR_CPUS; i++)
1051                        kfree(cachep->cpudata[i]);
1052        }
1053#endif
1054        kmem_cache_free(&cache_cache, cachep);
1055
1056        return 0;
1057}
1058
1059/* Get the memory for a slab management obj. */
1060static inline slab_t * kmem_cache_slabmgmt (kmem_cache_t *cachep,
1061                        void *objp, int colour_off, int local_flags)
1062{
1063        slab_t *slabp;
1064        
1065        if (OFF_SLAB(cachep)) {
1066                /* Slab management obj is off-slab. */
1067                slabp = kmem_cache_alloc(cachep->slabp_cache, local_flags);
1068                if (!slabp)
1069                        return NULL;
1070        } else {
1071                /* FIXME: change to
1072                        slabp = objp
1073                 * if you enable OPTIMIZE
1074                 */
1075                slabp = objp+colour_off;
1076                colour_off += L1_CACHE_ALIGN(cachep->num *
1077                                sizeof(kmem_bufctl_t) + sizeof(slab_t));
1078        }
1079        slabp->inuse = 0;
1080        slabp->colouroff = colour_off;
1081        slabp->s_mem = objp+colour_off;
1082
1083        return slabp;
1084}
1085
1086static inline void kmem_cache_init_objs (kmem_cache_t * cachep,
1087                        slab_t * slabp, unsigned long ctor_flags)
1088{
1089        int i;
1090
1091        for (i = 0; i < cachep->num; i++) {
1092                void* objp = slabp->s_mem+cachep->objsize*i;
1093#if DEBUG
1094                if (cachep->flags & SLAB_RED_ZONE) {
1095                        *((unsigned long*)(objp)) = RED_MAGIC1;
1096                        *((unsigned long*)(objp + cachep->objsize -
1097                                        BYTES_PER_WORD)) = RED_MAGIC1;
1098                        objp += BYTES_PER_WORD;
1099                }
1100#endif
1101
1102                /*
1103                 * Constructors are not allowed to allocate memory from
1104                 * the same cache which they are a constructor for.
1105                 * Otherwise, deadlock. They must also be threaded.
1106                 */
1107                if (cachep->ctor)
1108                        cachep->ctor(objp, cachep, ctor_flags);
1109#if DEBUG
1110                if (cachep->flags & SLAB_RED_ZONE)
1111                        objp -= BYTES_PER_WORD;
1112                if (cachep->flags & SLAB_POISON)
1113                        /* need to poison the objs */
1114                        kmem_poison_obj(cachep, objp);
1115                if (cachep->flags & SLAB_RED_ZONE) {
1116                        if (*((unsigned long*)(objp)) != RED_MAGIC1)
1117                                BUG();
1118                        if (*((unsigned long*)(objp + cachep->objsize -
1119                                        BYTES_PER_WORD)) != RED_MAGIC1)
1120                                BUG();
1121                }
1122#endif
1123                slab_bufctl(slabp)[i] = i+1;
1124        }
1125        slab_bufctl(slabp)[i-1] = BUFCTL_END;
1126        slabp->free = 0;
1127}
1128
1129/*
1130 * Grow (by 1) the number of slabs within a cache.  This is called by
1131 * kmem_cache_alloc() when there are no active objs left in a cache.
1132 */
1133static int kmem_cache_grow (kmem_cache_t * cachep, int flags)
1134{
1135        slab_t  *slabp;
1136        struct page     *page;
1137        void            *objp;
1138        size_t           offset;
1139        unsigned int     i, local_flags;
1140        unsigned long    ctor_flags;
1141        unsigned long    save_flags;
1142
1143        /* Be lazy and only check for valid flags here,
1144         * keeping it out of the critical path in kmem_cache_alloc().
1145         */
1146        if (flags & ~(SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW))
1147                BUG();
1148        if (flags & SLAB_NO_GROW)
1149                return 0;
1150
1151        /*
1152         * The test for missing atomic flag is performed here, rather than
1153         * the more obvious place, simply to reduce the critical path length
1154         * in kmem_cache_alloc(). If a caller is seriously mis-behaving they
1155         * will eventually be caught here (where it matters).
1156         */
1157        if (in_interrupt() && (flags & __GFP_WAIT))
1158                BUG();
1159
1160        ctor_flags = SLAB_CTOR_CONSTRUCTOR;
1161        local_flags = (flags & SLAB_LEVEL_MASK);
1162        if (!(local_flags & __GFP_WAIT))
1163                /*
1164                 * Not allowed to sleep.  Need to tell a constructor about
1165                 * this - it might need to know...
1166                 */
1167                ctor_flags |= SLAB_CTOR_ATOMIC;
1168
1169        /* About to mess with non-constant members - lock. */
1170        spin_lock_irqsave(&cachep->spinlock, save_flags);
1171
1172        /* Get colour for the slab, and cal the next value. */
1173        offset = cachep->colour_next;
1174        cachep->colour_next++;
1175        if (cachep->colour_next >= cachep->colour)
1176                cachep->colour_next = 0;
1177        offset *= cachep->colour_off;
1178        cachep->dflags |= DFLGS_GROWN;
1179
1180        cachep->growing++;
1181        spin_unlock_irqrestore(&cachep->spinlock, save_flags);
1182
1183        /* A series of memory allocations for a new slab.
1184         * Neither the cache-chain semaphore, or cache-lock, are
1185         * held, but the incrementing c_growing prevents this
1186         * cache from being reaped or shrunk.
1187         * Note: The cache could be selected in for reaping in
1188         * kmem_cache_reap(), but when the final test is made the
1189         * growing value will be seen.
1190         */
1191
1192        /* Get mem for the objs. */
1193        if (!(objp = kmem_getpages(cachep, flags)))
1194                goto failed;
1195
1196        /* Get slab management. */
1197        if (!(slabp = kmem_cache_slabmgmt(cachep, objp, offset, local_flags)))
1198                goto opps1;
1199
1200        /* Nasty!!!!!! I hope this is OK. */
1201        i = 1 << cachep->gfporder;
1202        page = virt_to_page(objp);
1203        do {
1204                SET_PAGE_CACHE(page, cachep);
1205                SET_PAGE_SLAB(page, slabp);
1206                SetPageSlab(page);
1207                inc_page_state(nr_slab);
1208                page++;
1209        } while (--i);
1210
1211        kmem_cache_init_objs(cachep, slabp, ctor_flags);
1212
1213        spin_lock_irqsave(&cachep->spinlock, save_flags);
1214        cachep->growing--;
1215
1216        /* Make slab active. */
1217        list_add_tail(&slabp->list, &cachep->slabs_free);
1218        STATS_INC_GROWN(cachep);
1219        cachep->failures = 0;
1220
1221        spin_unlock_irqrestore(&cachep->spinlock, save_flags);
1222        return 1;
1223opps1:
1224        kmem_freepages(cachep, objp);
1225failed:
1226        spin_lock_irqsave(&cachep->spinlock, save_flags);
1227        cachep->growing--;
1228        spin_unlock_irqrestore(&cachep->spinlock, save_flags);
1229        return 0;
1230}
1231
1232/*
1233 * Perform extra freeing checks:
1234 * - detect double free
1235 * - detect bad pointers.
1236 * Called with the cache-lock held.
1237 */
1238
1239#if DEBUG
1240static int kmem_extra_free_checks (kmem_cache_t * cachep,
1241                        slab_t *slabp, void * objp)
1242{
1243        int i;
1244        unsigned int objnr = (objp-slabp->s_mem)/cachep->objsize;
1245
1246        if (objnr >= cachep->num)
1247                BUG();
1248        if (objp != slabp->s_mem + objnr*cachep->objsize)
1249                BUG();
1250
1251        /* Check slab's freelist to see if this obj is there. */
1252        for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) {
1253                if (i == objnr)
1254                        BUG();
1255        }
1256        return 0;
1257}
1258#endif
1259
1260static inline void kmem_cache_alloc_head(kmem_cache_t *cachep, int flags)
1261{
1262        if (flags & SLAB_DMA) {
1263                if (!(cachep->gfpflags & GFP_DMA))
1264                        BUG();
1265        } else {
1266                if (cachep->gfpflags & GFP_DMA)
1267                        BUG();
1268        }
1269}
1270
1271static inline void * kmem_cache_alloc_one_tail (kmem_cache_t *cachep,
1272                                                slab_t *slabp)
1273{
1274        void *objp;
1275
1276        STATS_INC_ALLOCED(cachep);
1277        STATS_INC_ACTIVE(cachep);
1278        STATS_SET_HIGH(cachep);
1279
1280        /* get obj pointer */
1281        slabp->inuse++;
1282        objp = slabp->s_mem + slabp->free*cachep->objsize;
1283        slabp->free=slab_bufctl(slabp)[slabp->free];
1284
1285        if (unlikely(slabp->free == BUFCTL_END)) {
1286                list_del(&slabp->list);
1287                list_add(&slabp->list, &cachep->slabs_full);
1288        }
1289#if DEBUG
1290        if (cachep->flags & SLAB_POISON)
1291                if (kmem_check_poison_obj(cachep, objp))
1292                        BUG();
1293        if (cachep->flags & SLAB_RED_ZONE) {
1294                /* Set alloc red-zone, and check old one. */
1295                if (xchg((unsigned long *)objp, RED_MAGIC2) !=
1296                                                         RED_MAGIC1)
1297                        BUG();
1298                if (xchg((unsigned long *)(objp+cachep->objsize -
1299                          BYTES_PER_WORD), RED_MAGIC2) != RED_MAGIC1)
1300                        BUG();
1301                objp += BYTES_PER_WORD;
1302        }
1303#endif
1304        return objp;
1305}
1306
1307/*
1308 * Returns a ptr to an obj in the given cache.
1309 * caller must guarantee synchronization
1310 * #define for the goto optimization 8-)
1311 */
1312#define kmem_cache_alloc_one(cachep)                            \
1313({                                                              \
1314        struct list_head * slabs_partial, * entry;              \
1315        slab_t *slabp;                                          \
1316                                                                \
1317        slabs_partial = &(cachep)->slabs_partial;               \
1318        entry = slabs_partial->next;                            \
1319        if (unlikely(entry == slabs_partial)) {                 \
1320                struct list_head * slabs_free;                  \
1321                slabs_free = &(cachep)->slabs_free;             \
1322                entry = slabs_free->next;                       \
1323                if (unlikely(entry == slabs_free))              \
1324                        goto alloc_new_slab;                    \
1325                list_del(entry);                                \
1326                list_add(entry, slabs_partial);                 \
1327        }                                                       \
1328                                                                \
1329        slabp = list_entry(entry, slab_t, list);                \
1330        kmem_cache_alloc_one_tail(cachep, slabp);               \
1331})
1332
1333#ifdef CONFIG_SMP
1334void* kmem_cache_alloc_batch(kmem_cache_t* cachep, int flags)
1335{
1336        int batchcount = cachep->batchcount;
1337        cpucache_t* cc = cc_data(cachep);
1338
1339        spin_lock(&cachep->spinlock);
1340        while (batchcount--) {
1341                struct list_head * slabs_partial, * entry;
1342                slab_t *slabp;
1343                /* Get slab alloc is to come from. */
1344                slabs_partial = &(cachep)->slabs_partial;
1345                entry = slabs_partial->next;
1346                if (unlikely(entry == slabs_partial)) {
1347                        struct list_head * slabs_free;
1348                        slabs_free = &(cachep)->slabs_free;
1349                        entry = slabs_free->next;
1350                        if (unlikely(entry == slabs_free))
1351                                break;
1352                        list_del(entry);
1353                        list_add(entry, slabs_partial);
1354                }
1355
1356                slabp = list_entry(entry, slab_t, list);
1357                cc_entry(cc)[cc->avail++] =
1358                                kmem_cache_alloc_one_tail(cachep, slabp);
1359        }
1360        /*
1361         * CAREFUL: do not enable preemption yet, the per-CPU
1362         * entries rely on us being atomic.
1363         */
1364        _raw_spin_unlock(&cachep->spinlock);
1365
1366        if (cc->avail)
1367                return cc_entry(cc)[--cc->avail];
1368        return NULL;
1369}
1370#endif
1371
1372static inline void * __kmem_cache_alloc (kmem_cache_t *cachep, int flags)
1373{
1374        unsigned long save_flags;
1375        void* objp;
1376
1377        kmem_cache_alloc_head(cachep, flags);
1378try_again:
1379        local_irq_save(save_flags);
1380#ifdef CONFIG_SMP
1381        {
1382                cpucache_t *cc = cc_data(cachep);
1383
1384                if (cc) {
1385                        if (cc->avail) {
1386                                STATS_INC_ALLOCHIT(cachep);
1387                                objp = cc_entry(cc)[--cc->avail];
1388                        } else {
1389                                STATS_INC_ALLOCMISS(cachep);
1390                                objp = kmem_cache_alloc_batch(cachep,flags);
1391                                local_irq_restore(save_flags);
1392                                /* end of non-preemptible region */
1393                                preempt_enable();
1394                                if (!objp)
1395                                        goto alloc_new_slab_nolock;
1396                                return objp;
1397                        }
1398                } else {
1399                        spin_lock(&cachep->spinlock);
1400                        objp = kmem_cache_alloc_one(cachep);
1401                        spin_unlock(&cachep->spinlock);
1402                }
1403        }
1404#else
1405        objp = kmem_cache_alloc_one(cachep);
1406#endif
1407        local_irq_restore(save_flags);
1408        return objp;
1409alloc_new_slab:
1410#ifdef CONFIG_SMP
1411        spin_unlock(&cachep->spinlock);
1412#endif
1413        local_irq_restore(save_flags);
1414#ifdef CONFIG_SMP
1415alloc_new_slab_nolock:
1416#endif
1417        if (kmem_cache_grow(cachep, flags))
1418                /* Someone may have stolen our objs.  Doesn't matter, we'll
1419                 * just come back here again.
1420                 */
1421                goto try_again;
1422        return NULL;
1423}
1424
1425/*
1426 * Release an obj back to its cache. If the obj has a constructed
1427 * state, it should be in this state _before_ it is released.
1428 * - caller is responsible for the synchronization
1429 */
1430
1431#if DEBUG
1432# define CHECK_NR(pg)                                           \
1433        do {                                                    \
1434                if (!virt_addr_valid(pg)) {                     \
1435                        printk(KERN_ERR "kfree: out of range ptr %lxh.\n", \
1436                                (unsigned long)objp);           \
1437                        BUG();                                  \
1438                } \
1439        } while (0)
1440# define CHECK_PAGE(addr)                                       \
1441        do {                                                    \
1442                struct page *page = virt_to_page(addr);         \
1443                CHECK_NR(addr);                                 \
1444                if (!PageSlab(page)) {                          \
1445                        printk(KERN_ERR "kfree: bad ptr %lxh.\n", \
1446                                (unsigned long)objp);           \
1447                        BUG();                                  \
1448                }                                               \
1449        } while (0)
1450
1451#else
1452# define CHECK_PAGE(pg) do { } while (0)
1453#endif
1454
1455static inline void kmem_cache_free_one(kmem_cache_t *cachep, void *objp)
1456{
1457        slab_t* slabp;
1458
1459        CHECK_PAGE(objp);
1460        /* reduces memory footprint
1461         *
1462        if (OPTIMIZE(cachep))
1463                slabp = (void*)((unsigned long)objp&(~(PAGE_SIZE-1)));
1464         else
1465         */
1466        slabp = GET_PAGE_SLAB(virt_to_page(objp));
1467
1468#if DEBUG
1469        if (cachep->flags & SLAB_DEBUG_INITIAL)
1470                /* Need to call the slab's constructor so the
1471                 * caller can perform a verify of its state (debugging).
1472                 * Called without the cache-lock held.
1473                 */
1474                cachep->ctor(objp, cachep, SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY);
1475
1476        if (cachep->flags & SLAB_RED_ZONE) {
1477                objp -= BYTES_PER_WORD;
1478                if (xchg((unsigned long *)objp, RED_MAGIC1) != RED_MAGIC2)
1479                        /* Either write before start, or a double free. */
1480                        BUG();
1481                if (xchg((unsigned long *)(objp+cachep->objsize -
1482                                BYTES_PER_WORD), RED_MAGIC1) != RED_MAGIC2)
1483                        /* Either write past end, or a double free. */
1484                        BUG();
1485        }
1486        if (cachep->flags & SLAB_POISON)
1487                kmem_poison_obj(cachep, objp);
1488        if (kmem_extra_free_checks(cachep, slabp, objp))
1489                return;
1490#endif
1491        {
1492                unsigned int objnr = (objp-slabp->s_mem)/cachep->objsize;
1493
1494                slab_bufctl(slabp)[objnr] = slabp->free;
1495                slabp->free = objnr;
1496        }
1497        STATS_DEC_ACTIVE(cachep);
1498        
1499        /* fixup slab chains */
1500        {
1501                int inuse = slabp->inuse;
1502                if (unlikely(!--slabp->inuse)) {
1503                        /* Was partial or full, now empty. */
1504                        list_del(&slabp->list);
1505                        list_add(&slabp->list, &cachep->slabs_free);
1506                } else if (unlikely(inuse == cachep->num)) {
1507                        /* Was full. */
1508                        list_del(&slabp->list);
1509                        list_add(&slabp->list, &cachep->slabs_partial);
1510                }
1511        }
1512}
1513
1514#ifdef CONFIG_SMP
1515static inline void __free_block (kmem_cache_t* cachep,
1516                                                        void** objpp, int len)
1517{
1518        for ( ; len > 0; len--, objpp++)
1519                kmem_cache_free_one(cachep, *objpp);
1520}
1521
1522static void free_block (kmem_cache_t* cachep, void** objpp, int len)
1523{
1524        spin_lock(&cachep->spinlock);
1525        __free_block(cachep, objpp, len);
1526        spin_unlock(&cachep->spinlock);
1527}
1528#endif
1529
1530/*
1531 * __kmem_cache_free
1532 * called with disabled ints
1533 */
1534static inline void __kmem_cache_free (kmem_cache_t *cachep, void* objp)
1535{
1536#ifdef CONFIG_SMP
1537        cpucache_t *cc = cc_data(cachep);
1538
1539        CHECK_PAGE(objp);
1540        if (cc) {
1541                int batchcount;
1542                if (cc->avail < cc->limit) {
1543                        STATS_INC_FREEHIT(cachep);
1544                        cc_entry(cc)[cc->avail++] = objp;
1545                        return;
1546                }
1547                STATS_INC_FREEMISS(cachep);
1548                batchcount = cachep->batchcount;
1549                cc->avail -= batchcount;
1550                free_block(cachep, &cc_entry(cc)[cc->avail], batchcount);
1551                cc_entry(cc)[cc->avail++] = objp;
1552                return;
1553        } else {
1554                free_block(cachep, &objp, 1);
1555        }
1556#else
1557        kmem_cache_free_one(cachep, objp);
1558#endif
1559}
1560
1561/**
1562 * kmem_cache_alloc - Allocate an object
1563 * @cachep: The cache to allocate from.
1564 * @flags: See kmalloc().
1565 *
1566 * Allocate an object from this cache.  The flags are only relevant
1567 * if the cache has no available objects.
1568 */
1569void * kmem_cache_alloc (kmem_cache_t *cachep, int flags)
1570{
1571        return __kmem_cache_alloc(cachep, flags);
1572}
1573
1574/**
1575 * kmalloc - allocate memory
1576 * @size: how many bytes of memory are required.
1577 * @flags: the type of memory to allocate.
1578 *
1579 * kmalloc is the normal method of allocating memory
1580 * in the kernel.
1581 *
1582 * The @flags argument may be one of:
1583 *
1584 * %GFP_USER - Allocate memory on behalf of user.  May sleep.
1585 *
1586 * %GFP_KERNEL - Allocate normal kernel ram.  May sleep.
1587 *
1588 * %GFP_ATOMIC - Allocation will not sleep.  Use inside interrupt handlers.
1589 *
1590 * Additionally, the %GFP_DMA flag may be set to indicate the memory
1591 * must be suitable for DMA.  This can mean different things on different
1592 * platforms.  For example, on i386, it means that the memory must come
1593 * from the first 16MB.
1594 */
1595void * kmalloc (size_t size, int flags)
1596{
1597        cache_sizes_t *csizep = cache_sizes;
1598
1599        for (; csizep->cs_size; csizep++) {
1600                if (size > csizep->cs_size)
1601                        continue;
1602                return __kmem_cache_alloc(flags & GFP_DMA ?
1603                         csizep->cs_dmacachep : csizep->cs_cachep, flags);
1604        }
1605        return NULL;
1606}
1607
1608/**
1609 * kmem_cache_free - Deallocate an object
1610 * @cachep: The cache the allocation was from.
1611 * @objp: The previously allocated object.
1612 *
1613 * Free an object which was previously allocated from this
1614 * cache.
1615 */
1616void kmem_cache_free (kmem_cache_t *cachep, void *objp)
1617{
1618        unsigned long flags;
1619#if DEBUG
1620        CHECK_PAGE(objp);
1621        if (cachep != GET_PAGE_CACHE(virt_to_page(objp)))
1622                BUG();
1623#endif
1624
1625        local_irq_save(flags);
1626        __kmem_cache_free(cachep, objp);
1627        local_irq_restore(flags);
1628}
1629
1630/**
1631 * kfree - free previously allocated memory
1632 * @objp: pointer returned by kmalloc.
1633 *
1634 * Don't free memory not originally allocated by kmalloc()
1635 * or you will run into trouble.
1636 */
1637void kfree (const void *objp)
1638{
1639        kmem_cache_t *c;
1640        unsigned long flags;
1641
1642        if (!objp)
1643                return;
1644        local_irq_save(flags);
1645        CHECK_PAGE(objp);
1646        c = GET_PAGE_CACHE(virt_to_page(objp));
1647        __kmem_cache_free(c, (void*)objp);
1648        local_irq_restore(flags);
1649}
1650
1651unsigned int kmem_cache_size(kmem_cache_t *cachep)
1652{
1653#if DEBUG
1654        if (cachep->flags & SLAB_RED_ZONE)
1655                return (cachep->objsize - 2*BYTES_PER_WORD);
1656#endif
1657        return cachep->objsize;
1658}
1659
1660kmem_cache_t * kmem_find_general_cachep (size_t size, int gfpflags)
1661{
1662        cache_sizes_t *csizep = cache_sizes;
1663
1664        /* This function could be moved to the header file, and
1665         * made inline so consumers can quickly determine what
1666         * cache pointer they require.
1667         */
1668        for ( ; csizep->cs_size; csizep++) {
1669                if (size > csizep->cs_size)
1670                        continue;
1671                break;
1672        }
1673        return (gfpflags & GFP_DMA) ? csizep->cs_dmacachep : csizep->cs_cachep;
1674}
1675
1676#ifdef CONFIG_SMP
1677
1678/* called with cache_chain_sem acquired.  */
1679static int kmem_tune_cpucache (kmem_cache_t* cachep, int limit, int batchcount)
1680{
1681        ccupdate_struct_t new;
1682        int i;
1683
1684        /*
1685         * These are admin-provided, so we are more graceful.
1686         */
1687        if (limit < 0)
1688                return -EINVAL;
1689        if (batchcount < 0)
1690                return -EINVAL;
1691        if (batchcount > limit)
1692                return -EINVAL;
1693        if (limit != 0 && !batchcount)
1694                return -EINVAL;
1695
1696        memset(&new.new,0,sizeof(new.new));
1697        if (limit) {
1698                for (i = 0; i < NR_CPUS; i++) {
1699                        cpucache_t* ccnew;
1700
1701                        ccnew = kmalloc(sizeof(void*)*limit+
1702                                        sizeof(cpucache_t), GFP_KERNEL);
1703                        if (!ccnew) {
1704                                for (i--; i >= 0; i--) kfree(new.new[i]);
1705                                return -ENOMEM;
1706                        }
1707                        ccnew->limit = limit;
1708                        ccnew->avail = 0;
1709                        new.new[i] = ccnew;
1710                }
1711        }
1712        new.cachep = cachep;
1713        spin_lock_irq(&cachep->spinlock);
1714        cachep->batchcount = batchcount;
1715        spin_unlock_irq(&cachep->spinlock);
1716
1717        smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
1718
1719        for (i = 0; i < NR_CPUS; i++) {
1720                cpucache_t* ccold = new.new[i];
1721                if (!ccold)
1722                        continue;
1723                local_irq_disable();
1724                free_block(cachep, cc_entry(ccold), ccold->avail);
1725                local_irq_enable();
1726                kfree(ccold);
1727        }
1728        return 0;
1729}
1730
1731/* 
1732 * If slab debugging is enabled, don't batch slabs
1733 * on the per-cpu lists by defaults.
1734 */
1735static void enable_cpucache (kmem_cache_t *cachep)
1736{
1737#ifndef CONFIG_DEBUG_SLAB
1738        int err;
1739        int limit;
1740
1741        /* FIXME: optimize */
1742        if (cachep->objsize > PAGE_SIZE)
1743                return;
1744        if (cachep->objsize > 1024)
1745                limit = 60;
1746        else if (cachep->objsize > 256)
1747                limit = 124;
1748        else
1749                limit = 252;
1750
1751        err = kmem_tune_cpucache(cachep, limit, limit/2);
1752        if (err)
1753                printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
1754                                        cachep->name, -err);
1755#endif
1756}
1757
1758static void enable_all_cpucaches (void)
1759{
1760        struct list_head* p;
1761
1762        down(&cache_chain_sem);
1763
1764        p = &cache_cache.next;
1765        do {
1766                kmem_cache_t* cachep = list_entry(p, kmem_cache_t, next);
1767
1768                enable_cpucache(cachep);
1769                p = cachep->next.next;
1770        } while (p != &cache_cache.next);
1771
1772        up(&cache_chain_sem);
1773}
1774#endif
1775
1776/**
1777 * kmem_cache_reap - Reclaim memory from caches.
1778 * @gfp_mask: the type of memory required.
1779 *
1780 * Called from do_try_to_free_pages() and __alloc_pages()
1781 */
1782int kmem_cache_reap (int gfp_mask)
1783{
1784        slab_t *slabp;
1785        kmem_cache_t *searchp;
1786        kmem_cache_t *best_cachep;
1787        unsigned int best_pages;
1788        unsigned int best_len;
1789        unsigned int scan;
1790        int ret = 0;
1791
1792        if (gfp_mask & __GFP_WAIT)
1793                down(&cache_chain_sem);
1794        else
1795                if (down_trylock(&cache_chain_sem))
1796                        return 0;
1797
1798        scan = REAP_SCANLEN;
1799        best_len = 0;
1800        best_pages = 0;
1801        best_cachep = NULL;
1802        searchp = clock_searchp;
1803        do {
1804                unsigned int pages;
1805                struct list_head* p;
1806                unsigned int full_free;
1807
1808                /* It's safe to test this without holding the cache-lock. */
1809                if (searchp->flags & SLAB_NO_REAP)
1810                        goto next;
1811                spin_lock_irq(&searchp->spinlock);
1812                if (searchp->growing)
1813                        goto next_unlock;
1814                if (searchp->dflags & DFLGS_GROWN) {
1815                        searchp->dflags &= ~DFLGS_GROWN;
1816                        goto next_unlock;
1817                }
1818#ifdef CONFIG_SMP
1819                {
1820                        cpucache_t *cc = cc_data(searchp);
1821                        if (cc && cc->avail) {
1822                                __free_block(searchp, cc_entry(cc), cc->avail);
1823                                cc->avail = 0;
1824                        }
1825                }
1826#endif
1827
1828                full_free = 0;
1829                p = searchp->slabs_free.next;
1830                while (p != &searchp->slabs_free) {
1831                        slabp = list_entry(p, slab_t, list);
1832#if DEBUG
1833                        if (slabp->inuse)
1834                                BUG();
1835#endif
1836                        full_free++;
1837                        p = p->next;
1838                }
1839
1840                /*
1841                 * Try to avoid slabs with constructors and/or
1842                 * more than one page per slab (as it can be difficult
1843                 * to get high orders from gfp()).
1844                 */
1845                pages = full_free * (1<<searchp->gfporder);
1846                if (searchp->ctor)
1847                        pages = (pages*4+1)/5;
1848                if (searchp->gfporder)
1849                        pages = (pages*4+1)/5;
1850                if (pages > best_pages) {
1851                        best_cachep = searchp;
1852                        best_len = full_free;
1853                        best_pages = pages;
1854                        if (pages >= REAP_PERFECT) {
1855                                clock_searchp = list_entry(searchp->next.next,
1856                                                        kmem_cache_t,next);
1857                                goto perfect;
1858                        }
1859                }
1860next_unlock:
1861                spin_unlock_irq(&searchp->spinlock);
1862next:
1863                searchp = list_entry(searchp->next.next,kmem_cache_t,next);
1864        } while (--scan && searchp != clock_searchp);
1865
1866        clock_searchp = searchp;
1867
1868        if (!best_cachep)
1869                /* couldn't find anything to reap */
1870                goto out;
1871
1872        spin_lock_irq(&best_cachep->spinlock);
1873perfect:
1874        /* free only 50% of the free slabs */
1875        best_len = (best_len + 1)/2;
1876        for (scan = 0; scan < best_len; scan++) {
1877                struct list_head *p;
1878
1879                if (best_cachep->growing)
1880                        break;
1881                p = best_cachep->slabs_free.prev;
1882                if (p == &best_cachep->slabs_free)
1883                        break;
1884                slabp = list_entry(p,slab_t,list);
1885#if DEBUG
1886                if (slabp->inuse)
1887                        BUG();
1888#endif
1889                list_del(&slabp->list);
1890                STATS_INC_REAPED(best_cachep);
1891
1892                /* Safe to drop the lock. The slab is no longer linked to the
1893                 * cache.
1894                 */
1895                spin_unlock_irq(&best_cachep->spinlock);
1896                kmem_slab_destroy(best_cachep, slabp);
1897                spin_lock_irq(&best_cachep->spinlock);
1898        }
1899        spin_unlock_irq(&best_cachep->spinlock);
1900        ret = scan * (1 << best_cachep->gfporder);
1901out:
1902        up(&cache_chain_sem);
1903        return ret;
1904}
1905
1906#ifdef CONFIG_PROC_FS
1907
1908static void *s_start(struct seq_file *m, loff_t *pos)
1909{
1910        loff_t n = *pos;
1911        struct list_head *p;
1912
1913        down(&cache_chain_sem);
1914        if (!n)
1915                return (void *)1;
1916        p = &cache_cache.next;
1917        while (--n) {
1918                p = p->next;
1919                if (p == &cache_cache.next)
1920                        return NULL;
1921        }
1922        return list_entry(p, kmem_cache_t, next);
1923}
1924
1925static void *s_next(struct seq_file *m, void *p, loff_t *pos)
1926{
1927        kmem_cache_t *cachep = p;
1928        ++*pos;
1929        if (p == (void *)1)
1930                return &cache_cache;
1931        cachep = list_entry(cachep->next.next, kmem_cache_t, next);
1932        return cachep == &cache_cache ? NULL : cachep;
1933}
1934
1935static void s_stop(struct seq_file *m, void *p)
1936{
1937        up(&cache_chain_sem);
1938}
1939
1940static int s_show(struct seq_file *m, void *p)
1941{
1942        kmem_cache_t *cachep = p;
1943        struct list_head *q;
1944        slab_t          *slabp;
1945        unsigned long   active_objs;
1946        unsigned long   num_objs;
1947        unsigned long   active_slabs = 0;
1948        unsigned long   num_slabs;
1949        const char *name; 
1950
1951        if (p == (void*)1) {
1952                /*
1953                 * Output format version, so at least we can change it
1954                 * without _too_ many complaints.
1955                 */
1956                seq_puts(m, "slabinfo - version: 1.1"
1957#if STATS
1958                                " (statistics)"
1959#endif
1960#ifdef CONFIG_SMP
1961                                " (SMP)"
1962#endif
1963                                "\n");
1964                return 0;
1965        }
1966
1967        spin_lock_irq(&cachep->spinlock);
1968        active_objs = 0;
1969        num_slabs = 0;
1970        list_for_each(q,&cachep->slabs_full) {
1971                slabp = list_entry(q, slab_t, list);
1972                if (slabp->inuse != cachep->num)
1973                        BUG();
1974                active_objs += cachep->num;
1975                active_slabs++;
1976        }
1977        list_for_each(q,&cachep->slabs_partial) {
1978                slabp = list_entry(q, slab_t, list);
1979                if (slabp->inuse == cachep->num || !slabp->inuse)
1980                        BUG();
1981                active_objs += slabp->inuse;
1982                active_slabs++;
1983        }
1984        list_for_each(q,&cachep->slabs_free) {
1985                slabp = list_entry(q, slab_t, list);
1986                if (slabp->inuse)
1987                        BUG();
1988                num_slabs++;
1989        }
1990        num_slabs+=active_slabs;
1991        num_objs = num_slabs*cachep->num;
1992
1993        name = cachep->name; 
1994        {
1995        char tmp; 
1996        mm_segment_t old_fs;
1997
1998        old_fs = get_fs();
1999        set_fs(KERNEL_DS);
2000        if (__get_user(tmp, name)) 
2001                name = "broken"; 
2002        set_fs(old_fs);
2003        }       
2004
2005        seq_printf(m, "%-17s %6lu %6lu %6u %4lu %4lu %4u",
2006                name, active_objs, num_objs, cachep->objsize,
2007                active_slabs, num_slabs, (1<<cachep->gfporder));
2008
2009#if STATS
2010        {
2011                unsigned long errors = cachep->errors;
2012                unsigned long high = cachep->high_mark;
2013                unsigned long grown = cachep->grown;
2014                unsigned long reaped = cachep->reaped;
2015                unsigned long allocs = cachep->num_allocations;
2016
2017                seq_printf(m, " : %6lu %7lu %5lu %4lu %4lu",
2018                                high, allocs, grown, reaped, errors);
2019        }
2020#endif
2021#ifdef CONFIG_SMP
2022        {
2023                unsigned int batchcount = cachep->batchcount;
2024                unsigned int limit;
2025
2026                if (cc_data(cachep))
2027                        limit = cc_data(cachep)->limit;
2028                 else
2029                        limit = 0;
2030                seq_printf(m, " : %4u %4u", limit, batchcount);
2031        }
2032#endif
2033#if STATS && defined(CONFIG_SMP)
2034        {
2035                unsigned long allochit = atomic_read(&cachep->allochit);
2036                unsigned long allocmiss = atomic_read(&cachep->allocmiss);
2037                unsigned long freehit = atomic_read(&cachep->freehit);
2038                unsigned long freemiss = atomic_read(&cachep->freemiss);
2039                seq_printf(m, " : %6lu %6lu %6lu %6lu",
2040                                allochit, allocmiss, freehit, freemiss);
2041        }
2042#endif
2043        spin_unlock_irq(&cachep->spinlock);
2044        seq_putc(m, '\n');
2045        return 0;
2046}
2047
2048/*
2049 * slabinfo_op - iterator that generates /proc/slabinfo
2050 *
2051 * Output layout:
2052 * cache-name
2053 * num-active-objs
2054 * total-objs
2055 * object size
2056 * num-active-slabs
2057 * total-slabs
2058 * num-pages-per-slab
2059 * + further values on SMP and with statistics enabled
2060 */
2061
2062struct seq_operations slabinfo_op = {
2063        .start  = s_start,
2064        .next   = s_next,
2065        .stop   = s_stop,
2066        .show   = s_show,
2067};
2068
2069#define MAX_SLABINFO_WRITE 128
2070/**
2071 * slabinfo_write - SMP tuning for the slab allocator
2072 * @file: unused
2073 * @buffer: user buffer
2074 * @count: data len
2075 * @data: unused
2076 */
2077ssize_t slabinfo_write(struct file *file, const char *buffer,
2078                                size_t count, loff_t *ppos)
2079{
2080#ifdef CONFIG_SMP
2081        char kbuf[MAX_SLABINFO_WRITE+1], *tmp;
2082        int limit, batchcount, res;
2083        struct list_head *p;
2084        
2085        if (count > MAX_SLABINFO_WRITE)
2086                return -EINVAL;
2087        if (copy_from_user(&kbuf, buffer, count))
2088                return -EFAULT;
2089        kbuf[MAX_SLABINFO_WRITE] = '\0'; 
2090
2091        tmp = strchr(kbuf, ' ');
2092        if (!tmp)
2093                return -EINVAL;
2094        *tmp = '\0';
2095        tmp++;
2096        limit = simple_strtol(tmp, &tmp, 10);
2097        while (*tmp == ' ')
2098                tmp++;
2099        batchcount = simple_strtol(tmp, &tmp, 10);
2100
2101        /* Find the cache in the chain of caches. */
2102        down(&cache_chain_sem);
2103        res = -EINVAL;
2104        list_for_each(p,&cache_chain) {
2105                kmem_cache_t *cachep = list_entry(p, kmem_cache_t, next);
2106
2107                if (!strcmp(cachep->name, kbuf)) {
2108                        res = kmem_tune_cpucache(cachep, limit, batchcount);
2109                        break;
2110                }
2111        }
2112        up(&cache_chain_sem);
2113        if (res >= 0)
2114                res = count;
2115        return res;
2116#else
2117        return -EINVAL;
2118#endif
2119}
2120#endif
2121
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.