linux-old/mm/simp.c
<<
>>
Prefs
   1#define NULL 0
   2/*
   3 * mm/simp.c  -- simple allocator for cached objects
   4 *
   5 * (C) 1997 Thomas Schoebel-Theuer
   6 */
   7
   8#include <linux/simp.h>
   9#include <linux/tasks.h>
  10#include <linux/smp.h>
  11#include <linux/mm.h>
  12#include <asm/spinlock.h>
  13
  14/* The next two defines can be independently enabled for debugging */
  15/*#define DEBUG*/
  16/*#define DEAD_BEEF*/
  17
  18#ifdef DEAD_BEEF
  19#define DEBUG_BEEF 1
  20#else
  21#define DEBUG_BEEF 0
  22#endif
  23
  24#ifdef __SMP__
  25#define NR_PROCESSORS  NR_CPUS
  26#define GLOBAL_SIZE CHUNK_SIZE
  27#else
  28#define NR_PROCESSORS  1
  29#define GLOBAL_SIZE PAGE_SIZE
  30#endif
  31
  32#define POSTBUFFER_SIZE 63
  33#define ORDER 2
  34#define CHUNK_SIZE (PAGE_SIZE*(1<<ORDER))
  35#define CHUNK_BASE(ptr) (struct header*)(((unsigned long)(ptr)) & ~(CHUNK_SIZE-1))
  36#define CHUNK_END(hdr) (void**)((char*)(hdr) + CHUNK_SIZE)
  37
  38#define COLOR_INCREMENT (8*sizeof(void*)) /* should be 1 cache line */
  39#define ALIGN_CACHE(adr) ((((((unsigned long)adr) - 1) / COLOR_INCREMENT) + 1) * COLOR_INCREMENT)
  40#define HEADER_SIZE ALIGN_CACHE(sizeof(struct header))
  41#define ELEM_SIZE ALIGN_CACHE(sizeof(struct elem))
  42#define FILL_TYPE(name,wrongsize) char name[ALIGN_CACHE(wrongsize)-(wrongsize)]
  43
  44#define MAX_SIMPS ((GLOBAL_SIZE / sizeof(struct simp)) - 1)
  45
  46struct header { /* this is at the beginning of each memory region */
  47        /* 1st cache line */
  48        void ** index;
  49        void ** fresh;
  50        struct simp * father;
  51        void ** emptypos;
  52        struct header * next;
  53        structor again_ctor;
  54        structor first_ctor;
  55        void * fill[1];
  56#ifdef DEBUG
  57        /* 2nd cache line */
  58        char magic[32];
  59#endif
  60};
  61
  62struct per_processor {
  63        void ** buffer_pos;
  64        void * postbuffer[POSTBUFFER_SIZE];
  65};
  66
  67struct simp {
  68        /* 1st cache lines */
  69        struct per_processor private[NR_PROCESSORS];
  70        /* next cache line */
  71        struct header * usable_list;
  72        spinlock_t lock;
  73        char fill[sizeof(void*) - sizeof(spinlock_t)];
  74        long real_size;
  75        long max_elems;
  76        structor again_ctor;
  77        structor first_ctor;
  78        structor dtor;
  79        long fill2;
  80        /* next cache line */
  81        long create_offset;
  82        long color;
  83        long max_color;
  84        long size;
  85        long fill3[4];
  86        /* next cache line */
  87        char name[32];
  88};
  89
  90struct global_data {
  91        /* 1st cache line */
  92        long changed_flag;
  93        long nr_simps;
  94        spinlock_t lock;
  95        char fill[(6+8)*sizeof(void*)+sizeof(void*)-sizeof(spinlock_t)];
  96        /* rest */
  97        struct simp simps[MAX_SIMPS];
  98};
  99
 100static struct global_data * global = NULL;
 101
 102#ifdef DEBUG
 103static char global_magic[32] = "SIMP header SdC581oi9rY20051962\n";
 104#endif
 105
 106struct simp * simp_create(char * name, long size,
 107                          structor first_ctor, 
 108                          structor again_ctor, 
 109                          structor dtor)
 110{
 111        struct simp * simp;
 112        long fraction;
 113        long real_size;
 114        int cpu;
 115
 116        if(!global) {
 117#ifdef __SMP__
 118                global = (struct global_data*)__get_free_pages(GFP_KERNEL, ORDER, 0);
 119                memset(global, 0, CHUNK_SIZE);
 120#else
 121                global = (struct global_data*)get_free_page(GFP_KERNEL);
 122#endif
 123                spin_lock_init(&global->lock);
 124        }
 125
 126        spin_lock(&global->lock);
 127        simp = &global->simps[global->nr_simps++];
 128        spin_unlock(&global->lock);
 129
 130        if(global->nr_simps >= MAX_SIMPS) {
 131                printk("SIMP: too many simps allocated\n");
 132                return NULL;
 133        }
 134        memset(simp, 0, sizeof(struct simp));
 135        spin_lock_init(&simp->lock);
 136        strncpy(simp->name, name, 15);
 137        simp->size = size;
 138        simp->real_size = real_size = ALIGN_CACHE(size);
 139        /* allow aggregation of very small objects in 2-power fractions of
 140         * cachelines */
 141        fraction = COLOR_INCREMENT / 2;
 142        while(size <= fraction && fraction >= sizeof(void*)) {
 143                simp->real_size = fraction;
 144                fraction >>= 1;
 145        }
 146        simp->first_ctor = first_ctor;
 147        simp->again_ctor = again_ctor;
 148        simp->dtor = dtor;
 149        
 150        real_size += sizeof(void*);
 151        simp->max_elems = (CHUNK_SIZE - HEADER_SIZE) / real_size;
 152        simp->max_color = (CHUNK_SIZE - HEADER_SIZE) % real_size;
 153        for(cpu = 0; cpu < NR_PROCESSORS; cpu++) {
 154                struct per_processor * private = &simp->private[cpu];
 155                private->buffer_pos = private->postbuffer;
 156        }
 157        return simp;
 158}
 159
 160/* Do *not* inline this, it clobbers too many registers... */
 161static void alloc_header(struct simp * simp)
 162{
 163        struct header * hdr;
 164        char * ptr;
 165        void ** index;
 166        long count;
 167
 168        spin_unlock(&simp->lock);
 169        for(;;) {
 170                hdr = (struct header*)__get_free_pages(GFP_KERNEL, ORDER, 0);
 171                if(hdr)
 172                        break;
 173                if(!simp_garbage())
 174                        return;
 175        }
 176#ifdef DEBUG
 177        if(CHUNK_BASE(hdr) != hdr)
 178                panic("simp: bad kernel page alignment");
 179#endif
 180
 181        memset(hdr, 0, HEADER_SIZE);
 182#ifdef DEBUG
 183        memcpy(hdr->magic, global_magic, sizeof(global_magic));
 184#endif
 185        hdr->father = simp;
 186        hdr->again_ctor = simp->again_ctor;
 187        hdr->first_ctor = simp->first_ctor;
 188        
 189        /* note: races on simp->color don't produce any error :-) */
 190        ptr = ((char*)hdr) + HEADER_SIZE + simp->color;
 191        index = CHUNK_END(hdr);
 192        for(count = 0; count < simp->max_elems; count++) {
 193                *--index = ptr;
 194                ptr += simp->real_size;
 195                /* note: constructors are not called here in bunch but
 196                 * instead at each single simp_alloc(), in order
 197                 * to maximize chances that the cache will be
 198                 * polluted after a simp_alloc() anyway,
 199                 * and not here. */
 200        }
 201        hdr->index = hdr->fresh = hdr->emptypos = index;
 202
 203        spin_lock(&simp->lock);
 204        simp->color += COLOR_INCREMENT;
 205        if(simp->color >= simp->max_color)
 206                simp->color = 0;
 207        hdr->next = simp->usable_list;
 208        simp->usable_list = hdr;
 209}
 210
 211/* current x86 memcpy() is horribly moving around registers for nothing,
 212 * is doing unnecessary work if the size is dividable by a power-of-two,
 213 * and it clobbers way too many registers.
 214 * This results in nearly any other register being transfered to stack.
 215 * Fixing this would be a major win for the whole kernel!
 216 */
 217static void ** bunch_alloc(struct simp * simp, void ** buffer)
 218{
 219        struct header * hdr;
 220        void ** index;
 221        void ** to;
 222        void ** end;
 223        structor todo;
 224        long length;
 225
 226        spin_lock(&simp->lock);
 227        hdr = simp->usable_list;
 228        if(!hdr) {
 229                alloc_header(simp);
 230                hdr = simp->usable_list;
 231                if(!hdr) {
 232                        spin_unlock(&simp->lock);
 233                        *buffer = NULL;
 234                        return buffer+1;
 235                }
 236        }
 237        
 238        index = hdr->index;
 239        end = hdr->fresh;
 240        todo = hdr->again_ctor;
 241        if(index == end) {
 242                end = CHUNK_END(hdr);
 243                todo = hdr->first_ctor;
 244        }
 245        to = index + POSTBUFFER_SIZE/2;
 246        if(to >= end) {
 247                to = end;
 248                if(to == CHUNK_END(hdr)) {
 249                        simp->usable_list = hdr->next;
 250                        hdr->next = NULL;
 251                }
 252        }
 253        if(to > hdr->fresh)
 254                hdr->fresh = to;
 255        hdr->index = to;
 256        length = ((unsigned long)to) - (unsigned long)index;
 257        to = buffer + (length/sizeof(void**));
 258
 259        memcpy(buffer, index, length);
 260
 261        spin_unlock(&simp->lock);
 262
 263        if(todo) {
 264                do {
 265                        todo(*buffer++);
 266                } while(buffer < to);
 267        }
 268        return to;
 269}
 270
 271void * simp_alloc(struct simp * simp)
 272{
 273#ifdef __SMP__
 274        const long cpu = smp_processor_id();
 275        struct per_processor * priv = &simp->private[cpu];
 276#else
 277#define priv (&simp->private[0]) /*fool gcc to use no extra register*/
 278#endif
 279        void ** buffer_pos = priv->buffer_pos;
 280        void * res;
 281
 282        if(buffer_pos == priv->postbuffer) {
 283                buffer_pos = bunch_alloc(simp, buffer_pos);
 284        }
 285        buffer_pos--;
 286        res = *buffer_pos;
 287        priv->buffer_pos = buffer_pos;
 288        return res;
 289}
 290
 291#ifdef DEBUG
 292long check_header(struct header * hdr, void * ptr)
 293{
 294        void ** test;
 295
 296        if(!hdr) {
 297                printk("SIMP: simp_free() with NULL pointer\n");
 298                return 1;
 299        }
 300        if(strncmp(hdr->magic, global_magic, 32)) {
 301                printk("SIMP: simpe_free() with bad ptr %p, or header corruption\n", ptr);
 302                return 1;
 303        }
 304        /* This is brute force, but I don't want to pay for any
 305         * overhead if debugging is not enabled, in particular
 306         * no space overhead for keeping hashtables etc. */
 307        test = hdr->index;
 308        while(test < CHUNK_END(hdr)) {
 309                if(*test++ == ptr) {
 310                        printk("SIMP: trying to simp_free(%p) again\n", ptr);
 311                        return 1;
 312                }
 313        }
 314        return 0;
 315}
 316#endif
 317
 318static void ** bunch_free(struct simp * simp, void ** buffer)
 319{
 320        void ** stop;
 321
 322        stop = buffer - POSTBUFFER_SIZE/3;
 323
 324        spin_lock(&simp->lock);
 325        while(buffer > stop) {
 326                void * elem = buffer[-1];
 327                struct header * hdr = CHUNK_BASE(elem);
 328                void ** index = hdr->index;
 329                index--;
 330                hdr->index = index;
 331                *index = elem;
 332                if(!hdr->next) {
 333                        hdr->next = simp->usable_list;
 334                        simp->usable_list = hdr;
 335                }
 336
 337                buffer -= 2;
 338                elem = *buffer;
 339                hdr = CHUNK_BASE(elem);
 340                index = hdr->index;
 341                index--;
 342                hdr->index = index;
 343                *index = elem;
 344                if(!hdr->next) {
 345                        hdr->next = simp->usable_list;
 346                        simp->usable_list = hdr;
 347                }
 348        }
 349        spin_unlock(&simp->lock);
 350        global->changed_flag = 1;
 351        return buffer;
 352}
 353
 354void simp_free(void * objp)
 355{
 356        struct header * hdr;
 357        void ** buffer_pos;
 358        struct per_processor * private;
 359#ifdef __SMP__
 360        const long cpu = smp_processor_id();
 361#else
 362        const long cpu = 0;
 363#endif
 364
 365        hdr = CHUNK_BASE(objp);
 366#ifdef DEBUG
 367        if(check_header(hdr, objp))
 368                return;
 369#endif
 370
 371        private = &hdr->father->private[cpu];
 372        buffer_pos = private->buffer_pos;
 373        if(buffer_pos >= private->postbuffer+POSTBUFFER_SIZE) {
 374                buffer_pos = bunch_free(hdr->father, buffer_pos);
 375        }
 376
 377        *buffer_pos++ = objp;
 378        private->buffer_pos = buffer_pos;
 379
 380#ifdef DEAD_BEEF
 381        {
 382                unsigned int * ptr = (unsigned int*)objp;
 383                int count = (hdr->father->real_size - ELEM_SIZE) / sizeof(unsigned int);
 384                while(count--)
 385                        *ptr++ = 0xdeadbeef;
 386        }
 387#endif
 388}
 389
 390long simp_garbage(void)
 391{
 392        int i;
 393        int res;
 394
 395        if(!global->changed_flag)
 396                return 0; /* shortcut */
 397        /* Note: costs do not matter here. Any heavy thrashing of
 398         * simp chunks that could be caused by pools stealing each
 399         * other's memory has to be considered a BUG :-)
 400         * Simply avoid memory shortages by conservative allocating
 401         * policies.
 402         */
 403        global->changed_flag = 0;
 404        res = 0;
 405        for(i = 0; i < global->nr_simps; i++) {
 406                struct simp * simp = &global->simps[i];
 407                struct header ** base = &simp->usable_list;
 408                struct header * del;
 409
 410                spin_lock(&simp->lock);
 411                del = *base;
 412                while(del) {
 413                        if(del->index == del->emptypos) {
 414                                if(simp->dtor) {
 415                                        void ** ptr = del->index;
 416                                        while(ptr < CHUNK_END(del)) {
 417                                                simp->dtor(*ptr++);
 418                                        }
 419                                }
 420                                *base = del->next;
 421#ifdef DEBUG
 422                                memset(del, 0, CHUNK_SIZE);
 423#endif
 424                                free_pages((unsigned long)del, ORDER);
 425                                res++;
 426                        } else
 427                                base = &del->next;
 428                        del = *base;
 429                }
 430                spin_unlock(&simp->lock);
 431        }
 432        return res;
 433}
 434
 435
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.