linux-bk/mm/swap_state.c
<<
>>
Prefs
   1/*
   2 *  linux/mm/swap_state.c
   3 *
   4 *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
   5 *  Swap reorganised 29.12.95, Stephen Tweedie
   6 *
   7 *  Rewritten to use page cache, (C) 1998 Stephen Tweedie
   8 */
   9
  10#include <linux/mm.h>
  11#include <linux/kernel_stat.h>
  12#include <linux/swap.h>
  13#include <linux/init.h>
  14#include <linux/pagemap.h>
  15#include <linux/backing-dev.h>
  16#include <linux/buffer_head.h>  /* block_sync_page() */
  17
  18#include <asm/pgtable.h>
  19
  20/*
  21 * swapper_inode doesn't do anything much.  It is really only here to
  22 * avoid some special-casing in other parts of the kernel.
  23 */
  24static struct inode swapper_inode = {
  25        .i_mapping      = &swapper_space,
  26};
  27
  28static struct backing_dev_info swap_backing_dev_info = {
  29        .ra_pages       = 0,    /* No readahead */
  30        .memory_backed  = 1,    /* Does not contribute to dirty memory */
  31};
  32
  33extern struct address_space_operations swap_aops;
  34
  35struct address_space swapper_space = {
  36        .page_tree              = RADIX_TREE_INIT(GFP_ATOMIC),
  37        .page_lock              = RW_LOCK_UNLOCKED,
  38        .clean_pages            = LIST_HEAD_INIT(swapper_space.clean_pages),
  39        .dirty_pages            = LIST_HEAD_INIT(swapper_space.dirty_pages),
  40        .io_pages               = LIST_HEAD_INIT(swapper_space.io_pages),
  41        .locked_pages           = LIST_HEAD_INIT(swapper_space.locked_pages),
  42        .host                   = &swapper_inode,
  43        .a_ops                  = &swap_aops,
  44        .backing_dev_info       = &swap_backing_dev_info,
  45        .i_shared_lock          = SPIN_LOCK_UNLOCKED,
  46        .private_lock           = SPIN_LOCK_UNLOCKED,
  47        .private_list           = LIST_HEAD_INIT(swapper_space.private_list),
  48};
  49
  50#define INC_CACHE_INFO(x)       do { swap_cache_info.x++; } while (0)
  51
  52static struct {
  53        unsigned long add_total;
  54        unsigned long del_total;
  55        unsigned long find_success;
  56        unsigned long find_total;
  57        unsigned long noent_race;
  58        unsigned long exist_race;
  59} swap_cache_info;
  60
  61void show_swap_cache_info(void)
  62{
  63        printk("Swap cache: add %lu, delete %lu, find %lu/%lu, race %lu+%lu\n",
  64                swap_cache_info.add_total, swap_cache_info.del_total,
  65                swap_cache_info.find_success, swap_cache_info.find_total,
  66                swap_cache_info.noent_race, swap_cache_info.exist_race);
  67}
  68
  69int add_to_swap_cache(struct page *page, swp_entry_t entry)
  70{
  71        int error;
  72
  73        if (page->mapping)
  74                BUG();
  75        if (!swap_duplicate(entry)) {
  76                INC_CACHE_INFO(noent_race);
  77                return -ENOENT;
  78        }
  79        error = add_to_page_cache(page, &swapper_space, entry.val);
  80        /*
  81         * Anon pages are already on the LRU, we don't run lru_cache_add here.
  82         */
  83        if (error != 0) {
  84                swap_free(entry);
  85                if (error == -EEXIST)
  86                        INC_CACHE_INFO(exist_race);
  87                return error;
  88        }
  89        if (!PageLocked(page))
  90                BUG();
  91        if (!PageSwapCache(page))
  92                BUG();
  93        INC_CACHE_INFO(add_total);
  94        return 0;
  95}
  96
  97/*
  98 * This must be called only on pages that have
  99 * been verified to be in the swap cache.
 100 */
 101void __delete_from_swap_cache(struct page *page)
 102{
 103        BUG_ON(!PageLocked(page));
 104        BUG_ON(!PageSwapCache(page));
 105        BUG_ON(PageWriteback(page));
 106        ClearPageDirty(page);
 107        __remove_from_page_cache(page);
 108        INC_CACHE_INFO(del_total);
 109}
 110
 111/**
 112 * add_to_swap - allocate swap space for a page
 113 * @page: page we want to move to swap
 114 *
 115 * Allocate swap space for the page and add the page to the
 116 * swap cache.  Caller needs to hold the page lock. 
 117 */
 118int add_to_swap(struct page * page)
 119{
 120        swp_entry_t entry;
 121        int pf_flags;
 122
 123        if (!PageLocked(page))
 124                BUG();
 125
 126        for (;;) {
 127                entry = get_swap_page();
 128                if (!entry.val)
 129                        return 0;
 130
 131                /* Radix-tree node allocations are performing
 132                 * GFP_ATOMIC allocations under PF_MEMALLOC.  
 133                 * They can completely exhaust the page allocator.  
 134                 *
 135                 * So PF_MEMALLOC is dropped here.  This causes the slab 
 136                 * allocations to fail earlier, so radix-tree nodes will 
 137                 * then be allocated from the mempool reserves.
 138                 *
 139                 * We're still using __GFP_HIGH for radix-tree node
 140                 * allocations, so some of the emergency pools are available,
 141                 * just not all of them.
 142                 */
 143
 144                pf_flags = current->flags;
 145                current->flags &= ~PF_MEMALLOC;
 146                current->flags |= PF_NOWARN;
 147                ClearPageUptodate(page);                /* why? */
 148
 149                /*
 150                 * Add it to the swap cache and mark it dirty
 151                 * (adding to the page cache will clear the dirty
 152                 * and uptodate bits, so we need to do it again)
 153                 */
 154                switch (add_to_swap_cache(page, entry)) {
 155                case 0:                         /* Success */
 156                        current->flags = pf_flags;
 157                        SetPageUptodate(page);
 158                        set_page_dirty(page);
 159                        swap_free(entry);
 160                        return 1;
 161                case -ENOMEM:                   /* radix-tree allocation */
 162                        current->flags = pf_flags;
 163                        swap_free(entry);
 164                        return 0;
 165                default:                        /* ENOENT: raced */
 166                        break;
 167                }
 168                /* Raced with "speculative" read_swap_cache_async */
 169                current->flags = pf_flags;
 170                swap_free(entry);
 171        }
 172}
 173
 174/*
 175 * This must be called only on pages that have
 176 * been verified to be in the swap cache and locked.
 177 * It will never put the page into the free list,
 178 * the caller has a reference on the page.
 179 */
 180void delete_from_swap_cache(struct page *page)
 181{
 182        swp_entry_t entry;
 183
 184        BUG_ON(!PageLocked(page));
 185        BUG_ON(PageWriteback(page));
 186        BUG_ON(page_has_buffers(page));
 187  
 188        entry.val = page->index;
 189
 190        write_lock(&swapper_space.page_lock);
 191        __delete_from_swap_cache(page);
 192        write_unlock(&swapper_space.page_lock);
 193
 194        swap_free(entry);
 195        page_cache_release(page);
 196}
 197
 198int move_to_swap_cache(struct page *page, swp_entry_t entry)
 199{
 200        struct address_space *mapping = page->mapping;
 201        void **pslot;
 202        int err;
 203
 204        if (!mapping)
 205                BUG();
 206
 207        if (!swap_duplicate(entry)) {
 208                INC_CACHE_INFO(noent_race);
 209                return -ENOENT;
 210        }
 211
 212        write_lock(&swapper_space.page_lock);
 213        write_lock(&mapping->page_lock);
 214
 215        err = radix_tree_reserve(&swapper_space.page_tree, entry.val, &pslot);
 216        if (!err) {
 217                /* Remove it from the page cache */
 218                __remove_from_page_cache(page);
 219
 220                /* Add it to the swap cache */
 221                *pslot = page;
 222                /*
 223                 * This code used to clear PG_uptodate, PG_error, PG_arch1,
 224                 * PG_referenced and PG_checked.  What _should_ it clear?
 225                 */
 226                ClearPageUptodate(page);
 227                ClearPageReferenced(page);
 228
 229                SetPageLocked(page);
 230                ClearPageDirty(page);
 231                ___add_to_page_cache(page, &swapper_space, entry.val);
 232        }
 233
 234        write_unlock(&mapping->page_lock);
 235        write_unlock(&swapper_space.page_lock);
 236
 237        if (!err) {
 238                INC_CACHE_INFO(add_total);
 239                return 0;
 240        }
 241
 242        swap_free(entry);
 243
 244        if (err == -EEXIST)
 245                INC_CACHE_INFO(exist_race);
 246
 247        return err;
 248}
 249
 250int move_from_swap_cache(struct page *page, unsigned long index,
 251                struct address_space *mapping)
 252{
 253        void **pslot;
 254        int err;
 255
 256        BUG_ON(!PageLocked(page));
 257        BUG_ON(PageWriteback(page));
 258        BUG_ON(page_has_buffers(page));
 259
 260        write_lock(&swapper_space.page_lock);
 261        write_lock(&mapping->page_lock);
 262
 263        err = radix_tree_reserve(&mapping->page_tree, index, &pslot);
 264        if (!err) {
 265                swp_entry_t entry;
 266
 267                entry.val = page->index;
 268                __delete_from_swap_cache(page);
 269
 270                *pslot = page;
 271
 272                /*
 273                 * This code used to clear PG_uptodate, PG_error, PG_referenced,
 274                 * PG_arch_1 and PG_checked.  It's not really clear why.
 275                 */
 276                ClearPageUptodate(page);
 277                ClearPageReferenced(page);
 278
 279                /*
 280                 * ___add_to_page_cache puts the page on ->clean_pages,
 281                 * but it's dirty.  If it's on ->clean_pages, it will basically
 282                 * never get written out.
 283                 */
 284                SetPageDirty(page);
 285                ___add_to_page_cache(page, mapping, index);
 286                /* fix that up */
 287                list_move(&page->list, &mapping->dirty_pages);
 288                write_unlock(&mapping->page_lock);
 289                write_unlock(&swapper_space.page_lock);
 290
 291                /* Do this outside ->page_lock */
 292                swap_free(entry);
 293                return 0;
 294        }
 295
 296        write_unlock(&mapping->page_lock);
 297        write_unlock(&swapper_space.page_lock);
 298        return err;
 299}
 300
 301
 302/* 
 303 * If we are the only user, then try to free up the swap cache. 
 304 * 
 305 * Its ok to check for PageSwapCache without the page lock
 306 * here because we are going to recheck again inside 
 307 * exclusive_swap_page() _with_ the lock. 
 308 *                                      - Marcelo
 309 */
 310static inline void free_swap_cache(struct page *page)
 311{
 312        if (PageSwapCache(page) && !TestSetPageLocked(page)) {
 313                remove_exclusive_swap_page(page);
 314                unlock_page(page);
 315        }
 316}
 317
 318/* 
 319 * Perform a free_page(), also freeing any swap cache associated with
 320 * this page if it is the last user of the page. Can not do a lock_page,
 321 * as we are holding the page_table_lock spinlock.
 322 */
 323void free_page_and_swap_cache(struct page *page)
 324{
 325        free_swap_cache(page);
 326        page_cache_release(page);
 327}
 328
 329/*
 330 * Passed an array of pages, drop them all from swapcache and then release
 331 * them.  They are removed from the LRU and freed if this is their last use.
 332 */
 333void free_pages_and_swap_cache(struct page **pages, int nr)
 334{
 335        const int chunk = 16;
 336        struct page **pagep = pages;
 337
 338        while (nr) {
 339                int todo = min(chunk, nr);
 340                int i;
 341
 342                for (i = 0; i < todo; i++)
 343                        free_swap_cache(pagep[i]);
 344                release_pages(pagep, todo);
 345                pagep += todo;
 346                nr -= todo;
 347        }
 348}
 349
 350/*
 351 * Lookup a swap entry in the swap cache. A found page will be returned
 352 * unlocked and with its refcount incremented - we rely on the kernel
 353 * lock getting page table operations atomic even if we drop the page
 354 * lock before returning.
 355 */
 356struct page * lookup_swap_cache(swp_entry_t entry)
 357{
 358        struct page *found;
 359
 360        found = find_get_page(&swapper_space, entry.val);
 361        /*
 362         * Unsafe to assert PageSwapCache and mapping on page found:
 363         * if SMP nothing prevents swapoff from deleting this page from
 364         * the swap cache at this moment.  find_lock_page would prevent
 365         * that, but no need to change: we _have_ got the right page.
 366         */
 367        INC_CACHE_INFO(find_total);
 368        if (found)
 369                INC_CACHE_INFO(find_success);
 370        return found;
 371}
 372
 373/* 
 374 * Locate a page of swap in physical memory, reserving swap cache space
 375 * and reading the disk if it is not already cached.
 376 * A failure return means that either the page allocation failed or that
 377 * the swap entry is no longer in use.
 378 */
 379struct page * read_swap_cache_async(swp_entry_t entry)
 380{
 381        struct page *found_page, *new_page = NULL;
 382        int err;
 383
 384        do {
 385                /*
 386                 * First check the swap cache.  Since this is normally
 387                 * called after lookup_swap_cache() failed, re-calling
 388                 * that would confuse statistics: use find_get_page()
 389                 * directly.
 390                 */
 391                found_page = find_get_page(&swapper_space, entry.val);
 392                if (found_page)
 393                        break;
 394
 395                /*
 396                 * Get a new page to read into from swap.
 397                 */
 398                if (!new_page) {
 399                        new_page = alloc_page(GFP_HIGHUSER);
 400                        if (!new_page)
 401                                break;          /* Out of memory */
 402                }
 403
 404                /*
 405                 * Associate the page with swap entry in the swap cache.
 406                 * May fail (-ENOENT) if swap entry has been freed since
 407                 * our caller observed it.  May fail (-EEXIST) if there
 408                 * is already a page associated with this entry in the
 409                 * swap cache: added by a racing read_swap_cache_async,
 410                 * or by try_to_swap_out (or shmem_writepage) re-using
 411                 * the just freed swap entry for an existing page.
 412                 * May fail (-ENOMEM) if radix-tree node allocation failed.
 413                 */
 414                err = add_to_swap_cache(new_page, entry);
 415                if (!err) {
 416                        /*
 417                         * Initiate read into locked page and return.
 418                         */
 419                        lru_cache_add(new_page);
 420                        swap_readpage(NULL, new_page);
 421                        return new_page;
 422                }
 423        } while (err != -ENOENT && err != -ENOMEM);
 424
 425        if (new_page)
 426                page_cache_release(new_page);
 427        return found_page;
 428}
 429
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.