linux/mm/zbud.c
<<
>>
Prefs
   1/*
   2 * zbud.c
   3 *
   4 * Copyright (C) 2013, Seth Jennings, IBM
   5 *
   6 * Concepts based on zcache internal zbud allocator by Dan Magenheimer.
   7 *
   8 * zbud is an special purpose allocator for storing compressed pages.  Contrary
   9 * to what its name may suggest, zbud is not a buddy allocator, but rather an
  10 * allocator that "buddies" two compressed pages together in a single memory
  11 * page.
  12 *
  13 * While this design limits storage density, it has simple and deterministic
  14 * reclaim properties that make it preferable to a higher density approach when
  15 * reclaim will be used.
  16 *
  17 * zbud works by storing compressed pages, or "zpages", together in pairs in a
  18 * single memory page called a "zbud page".  The first buddy is "left
  19 * justifed" at the beginning of the zbud page, and the last buddy is "right
  20 * justified" at the end of the zbud page.  The benefit is that if either
  21 * buddy is freed, the freed buddy space, coalesced with whatever slack space
  22 * that existed between the buddies, results in the largest possible free region
  23 * within the zbud page.
  24 *
  25 * zbud also provides an attractive lower bound on density. The ratio of zpages
  26 * to zbud pages can not be less than 1.  This ensures that zbud can never "do
  27 * harm" by using more pages to store zpages than the uncompressed zpages would
  28 * have used on their own.
  29 *
  30 * zbud pages are divided into "chunks".  The size of the chunks is fixed at
  31 * compile time and determined by NCHUNKS_ORDER below.  Dividing zbud pages
  32 * into chunks allows organizing unbuddied zbud pages into a manageable number
  33 * of unbuddied lists according to the number of free chunks available in the
  34 * zbud page.
  35 *
  36 * The zbud API differs from that of conventional allocators in that the
  37 * allocation function, zbud_alloc(), returns an opaque handle to the user,
  38 * not a dereferenceable pointer.  The user must map the handle using
  39 * zbud_map() in order to get a usable pointer by which to access the
  40 * allocation data and unmap the handle with zbud_unmap() when operations
  41 * on the allocation data are complete.
  42 */
  43
  44#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  45
  46#include <linux/atomic.h>
  47#include <linux/list.h>
  48#include <linux/mm.h>
  49#include <linux/module.h>
  50#include <linux/preempt.h>
  51#include <linux/slab.h>
  52#include <linux/spinlock.h>
  53#include <linux/zbud.h>
  54
  55/*****************
  56 * Structures
  57*****************/
  58/*
  59 * NCHUNKS_ORDER determines the internal allocation granularity, effectively
  60 * adjusting internal fragmentation.  It also determines the number of
  61 * freelists maintained in each pool. NCHUNKS_ORDER of 6 means that the
  62 * allocation granularity will be in chunks of size PAGE_SIZE/64, and there
  63 * will be 64 freelists per pool.
  64 */
  65#define NCHUNKS_ORDER   6
  66
  67#define CHUNK_SHIFT     (PAGE_SHIFT - NCHUNKS_ORDER)
  68#define CHUNK_SIZE      (1 << CHUNK_SHIFT)
  69#define NCHUNKS         (PAGE_SIZE >> CHUNK_SHIFT)
  70#define ZHDR_SIZE_ALIGNED CHUNK_SIZE
  71
  72/**
  73 * struct zbud_pool - stores metadata for each zbud pool
  74 * @lock:       protects all pool fields and first|last_chunk fields of any
  75 *              zbud page in the pool
  76 * @unbuddied:  array of lists tracking zbud pages that only contain one buddy;
  77 *              the lists each zbud page is added to depends on the size of
  78 *              its free region.
  79 * @buddied:    list tracking the zbud pages that contain two buddies;
  80 *              these zbud pages are full
  81 * @lru:        list tracking the zbud pages in LRU order by most recently
  82 *              added buddy.
  83 * @pages_nr:   number of zbud pages in the pool.
  84 * @ops:        pointer to a structure of user defined operations specified at
  85 *              pool creation time.
  86 *
  87 * This structure is allocated at pool creation time and maintains metadata
  88 * pertaining to a particular zbud pool.
  89 */
  90struct zbud_pool {
  91        spinlock_t lock;
  92        struct list_head unbuddied[NCHUNKS];
  93        struct list_head buddied;
  94        struct list_head lru;
  95        u64 pages_nr;
  96        struct zbud_ops *ops;
  97};
  98
  99/*
 100 * struct zbud_header - zbud page metadata occupying the first chunk of each
 101 *                      zbud page.
 102 * @buddy:      links the zbud page into the unbuddied/buddied lists in the pool
 103 * @lru:        links the zbud page into the lru list in the pool
 104 * @first_chunks:       the size of the first buddy in chunks, 0 if free
 105 * @last_chunks:        the size of the last buddy in chunks, 0 if free
 106 */
 107struct zbud_header {
 108        struct list_head buddy;
 109        struct list_head lru;
 110        unsigned int first_chunks;
 111        unsigned int last_chunks;
 112        bool under_reclaim;
 113};
 114
 115/*****************
 116 * Helpers
 117*****************/
 118/* Just to make the code easier to read */
 119enum buddy {
 120        FIRST,
 121        LAST
 122};
 123
 124/* Converts an allocation size in bytes to size in zbud chunks */
 125static int size_to_chunks(int size)
 126{
 127        return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT;
 128}
 129
 130#define for_each_unbuddied_list(_iter, _begin) \
 131        for ((_iter) = (_begin); (_iter) < NCHUNKS; (_iter)++)
 132
 133/* Initializes the zbud header of a newly allocated zbud page */
 134static struct zbud_header *init_zbud_page(struct page *page)
 135{
 136        struct zbud_header *zhdr = page_address(page);
 137        zhdr->first_chunks = 0;
 138        zhdr->last_chunks = 0;
 139        INIT_LIST_HEAD(&zhdr->buddy);
 140        INIT_LIST_HEAD(&zhdr->lru);
 141        zhdr->under_reclaim = 0;
 142        return zhdr;
 143}
 144
 145/* Resets the struct page fields and frees the page */
 146static void free_zbud_page(struct zbud_header *zhdr)
 147{
 148        __free_page(virt_to_page(zhdr));
 149}
 150
 151/*
 152 * Encodes the handle of a particular buddy within a zbud page
 153 * Pool lock should be held as this function accesses first|last_chunks
 154 */
 155static unsigned long encode_handle(struct zbud_header *zhdr, enum buddy bud)
 156{
 157        unsigned long handle;
 158
 159        /*
 160         * For now, the encoded handle is actually just the pointer to the data
 161         * but this might not always be the case.  A little information hiding.
 162         * Add CHUNK_SIZE to the handle if it is the first allocation to jump
 163         * over the zbud header in the first chunk.
 164         */
 165        handle = (unsigned long)zhdr;
 166        if (bud == FIRST)
 167                /* skip over zbud header */
 168                handle += ZHDR_SIZE_ALIGNED;
 169        else /* bud == LAST */
 170                handle += PAGE_SIZE - (zhdr->last_chunks  << CHUNK_SHIFT);
 171        return handle;
 172}
 173
 174/* Returns the zbud page where a given handle is stored */
 175static struct zbud_header *handle_to_zbud_header(unsigned long handle)
 176{
 177        return (struct zbud_header *)(handle & PAGE_MASK);
 178}
 179
 180/* Returns the number of free chunks in a zbud page */
 181static int num_free_chunks(struct zbud_header *zhdr)
 182{
 183        /*
 184         * Rather than branch for different situations, just use the fact that
 185         * free buddies have a length of zero to simplify everything. -1 at the
 186         * end for the zbud header.
 187         */
 188        return NCHUNKS - zhdr->first_chunks - zhdr->last_chunks - 1;
 189}
 190
 191/*****************
 192 * API Functions
 193*****************/
 194/**
 195 * zbud_create_pool() - create a new zbud pool
 196 * @gfp:        gfp flags when allocating the zbud pool structure
 197 * @ops:        user-defined operations for the zbud pool
 198 *
 199 * Return: pointer to the new zbud pool or NULL if the metadata allocation
 200 * failed.
 201 */
 202struct zbud_pool *zbud_create_pool(gfp_t gfp, struct zbud_ops *ops)
 203{
 204        struct zbud_pool *pool;
 205        int i;
 206
 207        pool = kmalloc(sizeof(struct zbud_pool), gfp);
 208        if (!pool)
 209                return NULL;
 210        spin_lock_init(&pool->lock);
 211        for_each_unbuddied_list(i, 0)
 212                INIT_LIST_HEAD(&pool->unbuddied[i]);
 213        INIT_LIST_HEAD(&pool->buddied);
 214        INIT_LIST_HEAD(&pool->lru);
 215        pool->pages_nr = 0;
 216        pool->ops = ops;
 217        return pool;
 218}
 219
 220/**
 221 * zbud_destroy_pool() - destroys an existing zbud pool
 222 * @pool:       the zbud pool to be destroyed
 223 *
 224 * The pool should be emptied before this function is called.
 225 */
 226void zbud_destroy_pool(struct zbud_pool *pool)
 227{
 228        kfree(pool);
 229}
 230
 231/**
 232 * zbud_alloc() - allocates a region of a given size
 233 * @pool:       zbud pool from which to allocate
 234 * @size:       size in bytes of the desired allocation
 235 * @gfp:        gfp flags used if the pool needs to grow
 236 * @handle:     handle of the new allocation
 237 *
 238 * This function will attempt to find a free region in the pool large enough to
 239 * satisfy the allocation request.  A search of the unbuddied lists is
 240 * performed first. If no suitable free region is found, then a new page is
 241 * allocated and added to the pool to satisfy the request.
 242 *
 243 * gfp should not set __GFP_HIGHMEM as highmem pages cannot be used
 244 * as zbud pool pages.
 245 *
 246 * Return: 0 if success and handle is set, otherwise -EINVAL is the size or
 247 * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate
 248 * a new page.
 249 */
 250int zbud_alloc(struct zbud_pool *pool, int size, gfp_t gfp,
 251                        unsigned long *handle)
 252{
 253        int chunks, i, freechunks;
 254        struct zbud_header *zhdr = NULL;
 255        enum buddy bud;
 256        struct page *page;
 257
 258        if (size <= 0 || gfp & __GFP_HIGHMEM)
 259                return -EINVAL;
 260        if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE)
 261                return -ENOSPC;
 262        chunks = size_to_chunks(size);
 263        spin_lock(&pool->lock);
 264
 265        /* First, try to find an unbuddied zbud page. */
 266        zhdr = NULL;
 267        for_each_unbuddied_list(i, chunks) {
 268                if (!list_empty(&pool->unbuddied[i])) {
 269                        zhdr = list_first_entry(&pool->unbuddied[i],
 270                                        struct zbud_header, buddy);
 271                        list_del(&zhdr->buddy);
 272                        if (zhdr->first_chunks == 0)
 273                                bud = FIRST;
 274                        else
 275                                bud = LAST;
 276                        goto found;
 277                }
 278        }
 279
 280        /* Couldn't find unbuddied zbud page, create new one */
 281        spin_unlock(&pool->lock);
 282        page = alloc_page(gfp);
 283        if (!page)
 284                return -ENOMEM;
 285        spin_lock(&pool->lock);
 286        pool->pages_nr++;
 287        zhdr = init_zbud_page(page);
 288        bud = FIRST;
 289
 290found:
 291        if (bud == FIRST)
 292                zhdr->first_chunks = chunks;
 293        else
 294                zhdr->last_chunks = chunks;
 295
 296        if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0) {
 297                /* Add to unbuddied list */
 298                freechunks = num_free_chunks(zhdr);
 299                list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
 300        } else {
 301                /* Add to buddied list */
 302                list_add(&zhdr->buddy, &pool->buddied);
 303        }
 304
 305        /* Add/move zbud page to beginning of LRU */
 306        if (!list_empty(&zhdr->lru))
 307                list_del(&zhdr->lru);
 308        list_add(&zhdr->lru, &pool->lru);
 309
 310        *handle = encode_handle(zhdr, bud);
 311        spin_unlock(&pool->lock);
 312
 313        return 0;
 314}
 315
 316/**
 317 * zbud_free() - frees the allocation associated with the given handle
 318 * @pool:       pool in which the allocation resided
 319 * @handle:     handle associated with the allocation returned by zbud_alloc()
 320 *
 321 * In the case that the zbud page in which the allocation resides is under
 322 * reclaim, as indicated by the PG_reclaim flag being set, this function
 323 * only sets the first|last_chunks to 0.  The page is actually freed
 324 * once both buddies are evicted (see zbud_reclaim_page() below).
 325 */
 326void zbud_free(struct zbud_pool *pool, unsigned long handle)
 327{
 328        struct zbud_header *zhdr;
 329        int freechunks;
 330
 331        spin_lock(&pool->lock);
 332        zhdr = handle_to_zbud_header(handle);
 333
 334        /* If first buddy, handle will be page aligned */
 335        if ((handle - ZHDR_SIZE_ALIGNED) & ~PAGE_MASK)
 336                zhdr->last_chunks = 0;
 337        else
 338                zhdr->first_chunks = 0;
 339
 340        if (zhdr->under_reclaim) {
 341                /* zbud page is under reclaim, reclaim will free */
 342                spin_unlock(&pool->lock);
 343                return;
 344        }
 345
 346        /* Remove from existing buddy list */
 347        list_del(&zhdr->buddy);
 348
 349        if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
 350                /* zbud page is empty, free */
 351                list_del(&zhdr->lru);
 352                free_zbud_page(zhdr);
 353                pool->pages_nr--;
 354        } else {
 355                /* Add to unbuddied list */
 356                freechunks = num_free_chunks(zhdr);
 357                list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
 358        }
 359
 360        spin_unlock(&pool->lock);
 361}
 362
 363#define list_tail_entry(ptr, type, member) \
 364        list_entry((ptr)->prev, type, member)
 365
 366/**
 367 * zbud_reclaim_page() - evicts allocations from a pool page and frees it
 368 * @pool:       pool from which a page will attempt to be evicted
 369 * @retires:    number of pages on the LRU list for which eviction will
 370 *              be attempted before failing
 371 *
 372 * zbud reclaim is different from normal system reclaim in that the reclaim is
 373 * done from the bottom, up.  This is because only the bottom layer, zbud, has
 374 * information on how the allocations are organized within each zbud page. This
 375 * has the potential to create interesting locking situations between zbud and
 376 * the user, however.
 377 *
 378 * To avoid these, this is how zbud_reclaim_page() should be called:
 379
 380 * The user detects a page should be reclaimed and calls zbud_reclaim_page().
 381 * zbud_reclaim_page() will remove a zbud page from the pool LRU list and call
 382 * the user-defined eviction handler with the pool and handle as arguments.
 383 *
 384 * If the handle can not be evicted, the eviction handler should return
 385 * non-zero. zbud_reclaim_page() will add the zbud page back to the
 386 * appropriate list and try the next zbud page on the LRU up to
 387 * a user defined number of retries.
 388 *
 389 * If the handle is successfully evicted, the eviction handler should
 390 * return 0 _and_ should have called zbud_free() on the handle. zbud_free()
 391 * contains logic to delay freeing the page if the page is under reclaim,
 392 * as indicated by the setting of the PG_reclaim flag on the underlying page.
 393 *
 394 * If all buddies in the zbud page are successfully evicted, then the
 395 * zbud page can be freed.
 396 *
 397 * Returns: 0 if page is successfully freed, otherwise -EINVAL if there are
 398 * no pages to evict or an eviction handler is not registered, -EAGAIN if
 399 * the retry limit was hit.
 400 */
 401int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries)
 402{
 403        int i, ret, freechunks;
 404        struct zbud_header *zhdr;
 405        unsigned long first_handle = 0, last_handle = 0;
 406
 407        spin_lock(&pool->lock);
 408        if (!pool->ops || !pool->ops->evict || list_empty(&pool->lru) ||
 409                        retries == 0) {
 410                spin_unlock(&pool->lock);
 411                return -EINVAL;
 412        }
 413        for (i = 0; i < retries; i++) {
 414                zhdr = list_tail_entry(&pool->lru, struct zbud_header, lru);
 415                list_del(&zhdr->lru);
 416                list_del(&zhdr->buddy);
 417                /* Protect zbud page against free */
 418                zhdr->under_reclaim = true;
 419                /*
 420                 * We need encode the handles before unlocking, since we can
 421                 * race with free that will set (first|last)_chunks to 0
 422                 */
 423                first_handle = 0;
 424                last_handle = 0;
 425                if (zhdr->first_chunks)
 426                        first_handle = encode_handle(zhdr, FIRST);
 427                if (zhdr->last_chunks)
 428                        last_handle = encode_handle(zhdr, LAST);
 429                spin_unlock(&pool->lock);
 430
 431                /* Issue the eviction callback(s) */
 432                if (first_handle) {
 433                        ret = pool->ops->evict(pool, first_handle);
 434                        if (ret)
 435                                goto next;
 436                }
 437                if (last_handle) {
 438                        ret = pool->ops->evict(pool, last_handle);
 439                        if (ret)
 440                                goto next;
 441                }
 442next:
 443                spin_lock(&pool->lock);
 444                zhdr->under_reclaim = false;
 445                if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
 446                        /*
 447                         * Both buddies are now free, free the zbud page and
 448                         * return success.
 449                         */
 450                        free_zbud_page(zhdr);
 451                        pool->pages_nr--;
 452                        spin_unlock(&pool->lock);
 453                        return 0;
 454                } else if (zhdr->first_chunks == 0 ||
 455                                zhdr->last_chunks == 0) {
 456                        /* add to unbuddied list */
 457                        freechunks = num_free_chunks(zhdr);
 458                        list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
 459                } else {
 460                        /* add to buddied list */
 461                        list_add(&zhdr->buddy, &pool->buddied);
 462                }
 463
 464                /* add to beginning of LRU */
 465                list_add(&zhdr->lru, &pool->lru);
 466        }
 467        spin_unlock(&pool->lock);
 468        return -EAGAIN;
 469}
 470
 471/**
 472 * zbud_map() - maps the allocation associated with the given handle
 473 * @pool:       pool in which the allocation resides
 474 * @handle:     handle associated with the allocation to be mapped
 475 *
 476 * While trivial for zbud, the mapping functions for others allocators
 477 * implementing this allocation API could have more complex information encoded
 478 * in the handle and could create temporary mappings to make the data
 479 * accessible to the user.
 480 *
 481 * Returns: a pointer to the mapped allocation
 482 */
 483void *zbud_map(struct zbud_pool *pool, unsigned long handle)
 484{
 485        return (void *)(handle);
 486}
 487
 488/**
 489 * zbud_unmap() - maps the allocation associated with the given handle
 490 * @pool:       pool in which the allocation resides
 491 * @handle:     handle associated with the allocation to be unmapped
 492 */
 493void zbud_unmap(struct zbud_pool *pool, unsigned long handle)
 494{
 495}
 496
 497/**
 498 * zbud_get_pool_size() - gets the zbud pool size in pages
 499 * @pool:       pool whose size is being queried
 500 *
 501 * Returns: size in pages of the given pool.  The pool lock need not be
 502 * taken to access pages_nr.
 503 */
 504u64 zbud_get_pool_size(struct zbud_pool *pool)
 505{
 506        return pool->pages_nr;
 507}
 508
 509static int __init init_zbud(void)
 510{
 511        /* Make sure the zbud header will fit in one chunk */
 512        BUILD_BUG_ON(sizeof(struct zbud_header) > ZHDR_SIZE_ALIGNED);
 513        pr_info("loaded\n");
 514        return 0;
 515}
 516
 517static void __exit exit_zbud(void)
 518{
 519        pr_info("unloaded\n");
 520}
 521
 522module_init(init_zbud);
 523module_exit(exit_zbud);
 524
 525MODULE_LICENSE("GPL");
 526MODULE_AUTHOR("Seth Jennings <sjenning@linux.vnet.ibm.com>");
 527MODULE_DESCRIPTION("Buddy Allocator for Compressed Pages");
 528
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.