linux-old/mm/page_io.c
<<
>>
Prefs
   1/*
   2 *  linux/mm/page_io.c
   3 *
   4 *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
   5 *
   6 *  Swap reorganised 29.12.95, 
   7 *  Asynchronous swapping added 30.12.95. Stephen Tweedie
   8 *  Removed race in async swapping. 14.4.1996. Bruno Haible
   9 *  Add swap of shared pages through the page cache. 20.2.1998. Stephen Tweedie
  10 *  Always use brw_page, life becomes simpler. 12 May 1998 Eric Biederman
  11 */
  12
  13#include <linux/mm.h>
  14#include <linux/kernel_stat.h>
  15#include <linux/swap.h>
  16#include <linux/locks.h>
  17#include <linux/swapctl.h>
  18
  19#include <asm/pgtable.h>
  20
  21static struct wait_queue * lock_queue = NULL;
  22
  23/*
  24 * Reads or writes a swap page.
  25 * wait=1: start I/O and wait for completion. wait=0: start asynchronous I/O.
  26 *
  27 * Important prevention of race condition: the caller *must* atomically 
  28 * create a unique swap cache entry for this swap page before calling
  29 * rw_swap_page, and must lock that page.  By ensuring that there is a
  30 * single page of memory reserved for the swap entry, the normal VM page
  31 * lock on that page also doubles as a lock on swap entries.  Having only
  32 * one lock to deal with per swap entry (rather than locking swap and memory
  33 * independently) also makes it easier to make certain swapping operations
  34 * atomic, which is particularly important when we are trying to ensure 
  35 * that shared pages stay shared while being swapped.
  36 */
  37
  38static void rw_swap_page_base(int rw, unsigned long entry, struct page *page, int wait)
  39{
  40        unsigned long type, offset;
  41        struct swap_info_struct * p;
  42        int zones[PAGE_SIZE/512];
  43        int zones_used;
  44        kdev_t dev = 0;
  45        int block_size;
  46
  47#ifdef DEBUG_SWAP
  48        printk ("DebugVM: %s_swap_page entry %08lx, page %p (count %d), %s\n",
  49                (rw == READ) ? "read" : "write", 
  50                entry, (char *) page_address(page), atomic_read(&page->count),
  51                wait ? "wait" : "nowait");
  52#endif
  53
  54        type = SWP_TYPE(entry);
  55        if (type >= nr_swapfiles) {
  56                printk("Internal error: bad swap-device\n");
  57                return;
  58        }
  59
  60        /* Don't allow too many pending pages in flight.. */
  61        if (atomic_read(&nr_async_pages) > pager_daemon.swap_cluster)
  62                wait = 1;
  63
  64        p = &swap_info[type];
  65        offset = SWP_OFFSET(entry);
  66        if (offset >= p->max) {
  67                printk("rw_swap_page: weirdness\n");
  68                return;
  69        }
  70        if (p->swap_map && !p->swap_map[offset]) {
  71                printk(KERN_ERR "rw_swap_page: "
  72                        "Trying to %s unallocated swap (%08lx)\n", 
  73                        (rw == READ) ? "read" : "write", entry);
  74                return;
  75        }
  76        if (!(p->flags & SWP_USED)) {
  77                printk(KERN_ERR "rw_swap_page: "
  78                        "Trying to swap to unused swap-device\n");
  79                return;
  80        }
  81
  82        if (!PageLocked(page)) {
  83                printk(KERN_ERR "VM: swap page is unlocked\n");
  84                return;
  85        }
  86
  87        if (PageSwapCache(page)) {
  88                /* Make sure we are the only process doing I/O with this swap page. */
  89                if (test_and_set_bit(offset, p->swap_lockmap))
  90                {
  91                        struct wait_queue __wait;
  92                        
  93                        __wait.task = current;
  94                        add_wait_queue(&lock_queue, &__wait);
  95                        for (;;) {
  96                                current->state = TASK_UNINTERRUPTIBLE;
  97                                mb();
  98                                if (!test_and_set_bit(offset, p->swap_lockmap))
  99                                        break;
 100                                run_task_queue(&tq_disk);
 101                                schedule();
 102                        }
 103                        current->state = TASK_RUNNING;
 104                        remove_wait_queue(&lock_queue, &__wait);
 105                }
 106
 107                /* 
 108                 * Make sure that we have a swap cache association for this
 109                 * page.  We need this to find which swap page to unlock once
 110                 * the swap IO has completed to the physical page.  If the page
 111                 * is not already in the cache, just overload the offset entry
 112                 * as if it were: we are not allowed to manipulate the inode
 113                 * hashing for locked pages.
 114                 */
 115                if (page->offset != entry) {
 116                        printk ("swap entry mismatch");
 117                        return;
 118                }
 119        }
 120        if (rw == READ) {
 121                clear_bit(PG_uptodate, &page->flags);
 122                kstat.pswpin++;
 123        } else
 124                kstat.pswpout++;
 125
 126        atomic_inc(&page->count);
 127        if (p->swap_device) {
 128                zones[0] = offset;
 129                zones_used = 1;
 130                dev = p->swap_device;
 131                block_size = PAGE_SIZE;
 132        } else if (p->swap_file) {
 133                struct inode *swapf = p->swap_file->d_inode;
 134                int i;
 135                if (swapf->i_op->bmap == NULL
 136                        && swapf->i_op->smap != NULL){
 137                        /*
 138                                With MS-DOS, we use msdos_smap which returns
 139                                a sector number (not a cluster or block number).
 140                                It is a patch to enable the UMSDOS project.
 141                                Other people are working on better solution.
 142
 143                                It sounds like ll_rw_swap_file defined
 144                                its operation size (sector size) based on
 145                                PAGE_SIZE and the number of blocks to read.
 146                                So using bmap or smap should work even if
 147                                smap will require more blocks.
 148                        */
 149                        int j;
 150                        unsigned int block = offset << 3;
 151
 152                        for (i=0, j=0; j< PAGE_SIZE ; i++, j += 512){
 153                                if (!(zones[i] = swapf->i_op->smap(swapf,block++))) {
 154                                        printk("rw_swap_page: bad swap file\n");
 155                                        return;
 156                                }
 157                        }
 158                        block_size = 512;
 159                }else{
 160                        int j;
 161                        unsigned int block = offset
 162                                << (PAGE_SHIFT - swapf->i_sb->s_blocksize_bits);
 163
 164                        block_size = swapf->i_sb->s_blocksize;
 165                        for (i=0, j=0; j< PAGE_SIZE ; i++, j += block_size)
 166                                if (!(zones[i] = bmap(swapf,block++))) {
 167                                        printk("rw_swap_page: bad swap file\n");
 168                                        return;
 169                                }
 170                        zones_used = i;
 171                        dev = swapf->i_dev;
 172                }
 173        } else {
 174                printk(KERN_ERR "rw_swap_page: no swap file or device\n");
 175                /* Do some cleaning up so if this ever happens we can hopefully
 176                 * trigger controlled shutdown.
 177                 */
 178                if (PageSwapCache(page)) {
 179                        if (!test_and_clear_bit(offset,p->swap_lockmap))
 180                                printk("swap_after_unlock_page: lock already cleared\n");
 181                        wake_up(&lock_queue);
 182                }
 183                atomic_dec(&page->count);
 184                return;
 185        }
 186        if (!wait) {
 187                set_bit(PG_decr_after, &page->flags);
 188                atomic_inc(&nr_async_pages);
 189        }
 190        if (PageSwapCache(page)) {
 191                /* only lock/unlock swap cache pages! */
 192                set_bit(PG_swap_unlock_after, &page->flags);
 193        }
 194        set_bit(PG_free_after, &page->flags);
 195
 196        /* block_size == PAGE_SIZE/zones_used */
 197        brw_page(rw, page, dev, zones, block_size, 0);
 198 
 199        /* Note! For consistency we do all of the logic,
 200         * decrementing the page count, and unlocking the page in the
 201         * swap lock map - in the IO completion handler.
 202         */
 203        if (!wait) 
 204                return;
 205        wait_on_page(page);
 206        /* This shouldn't happen, but check to be sure. */
 207        if (atomic_read(&page->count) == 0)
 208                printk(KERN_ERR "rw_swap_page: page unused while waiting!\n");
 209
 210#ifdef DEBUG_SWAP
 211        printk ("DebugVM: %s_swap_page finished on page %p (count %d)\n",
 212                (rw == READ) ? "read" : "write", 
 213                (char *) page_adddress(page), 
 214                atomic_read(&page->count));
 215#endif
 216}
 217
 218/* Note: We could remove this totally asynchronous function,
 219 * and improve swap performance, and remove the need for the swap lock map,
 220 * by not removing pages from the swap cache until after I/O has been
 221 * processed and letting remove_from_page_cache decrement the swap count
 222 * just before it removes the page from the page cache.
 223 */
 224/* This is run when asynchronous page I/O has completed. */
 225void swap_after_unlock_page (unsigned long entry)
 226{
 227        unsigned long type, offset;
 228        struct swap_info_struct * p;
 229
 230        type = SWP_TYPE(entry);
 231        if (type >= nr_swapfiles) {
 232                printk("swap_after_unlock_page: bad swap-device\n");
 233                return;
 234        }
 235        p = &swap_info[type];
 236        offset = SWP_OFFSET(entry);
 237        if (offset >= p->max) {
 238                printk("swap_after_unlock_page: weirdness\n");
 239                return;
 240        }
 241        if (!test_and_clear_bit(offset,p->swap_lockmap))
 242                printk("swap_after_unlock_page: lock already cleared\n");
 243        wake_up(&lock_queue);
 244}
 245
 246/* A simple wrapper so the base function doesn't need to enforce
 247 * that all swap pages go through the swap cache!
 248 */
 249void rw_swap_page(int rw, unsigned long entry, char *buf, int wait)
 250{
 251        struct page *page = mem_map + MAP_NR(buf);
 252
 253        if (page->inode && page->inode != &swapper_inode)
 254                panic ("Tried to swap a non-swapper page");
 255
 256        /*
 257         * Make sure that we have a swap cache association for this
 258         * page.  We need this to find which swap page to unlock once
 259         * the swap IO has completed to the physical page.  If the page
 260         * is not already in the cache, just overload the offset entry
 261         * as if it were: we are not allowed to manipulate the inode
 262         * hashing for locked pages.
 263         */
 264        if (!PageSwapCache(page)) {
 265                printk("VM: swap page is not in swap cache\n");
 266                return;
 267        }
 268        if (page->offset != entry) {
 269                printk ("swap entry mismatch");
 270                return;
 271        }
 272        rw_swap_page_base(rw, entry, page, wait);
 273}
 274
 275/*
 276 * Setting up a new swap file needs a simple wrapper just to read the 
 277 * swap signature.  SysV shared memory also needs a simple wrapper.
 278 */
 279void rw_swap_page_nocache(int rw, unsigned long entry, char *buffer)
 280{
 281        struct page *page;
 282        
 283        page = mem_map + MAP_NR((unsigned long) buffer);
 284        wait_on_page(page);
 285        set_bit(PG_locked, &page->flags);
 286        if (test_and_set_bit(PG_swap_cache, &page->flags)) {
 287                printk ("VM: read_swap_page: page already in swap cache!\n");
 288                return;
 289        }
 290        if (page->inode) {
 291                printk ("VM: read_swap_page: page already in page cache!\n");
 292                return;
 293        }
 294        page->inode = &swapper_inode;
 295        page->offset = entry;
 296        atomic_inc(&page->count);       /* Protect from shrink_mmap() */
 297        rw_swap_page(rw, entry, buffer, 1);
 298        atomic_dec(&page->count);
 299        page->inode = 0;
 300        clear_bit(PG_swap_cache, &page->flags);
 301}
 302
 303/*
 304 * shmfs needs a version that doesn't put the page in the page cache!
 305 * The swap lock map insists that pages be in the page cache!
 306 * Therefore we can't use it.  Later when we can remove the need for the
 307 * lock map and we can reduce the number of functions exported.
 308 */
 309void rw_swap_page_nolock(int rw, unsigned long entry, char *buffer, int wait)
 310{
 311        struct page *page = mem_map + MAP_NR((unsigned long) buffer);
 312        
 313        if (!PageLocked(page)) {
 314                printk("VM: rw_swap_page_nolock: page not locked!\n");
 315                return;
 316        }
 317        if (PageSwapCache(page)) {
 318                printk ("VM: rw_swap_page_nolock: page in swap cache!\n");
 319                return;
 320        }
 321        rw_swap_page_base(rw, entry, page, wait);
 322}
 323
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.