linux-old/mm/swapfile.c
<<
>>
Prefs
   1/*
   2 *  linux/mm/swapfile.c
   3 *
   4 *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
   5 *  Swap reorganised 29.12.95, Stephen Tweedie
   6 */
   7
   8#include <linux/mm.h>
   9#include <linux/smp.h>
  10#include <linux/smp_lock.h>
  11#include <linux/sched.h>
  12#include <linux/head.h>
  13#include <linux/kernel.h>
  14#include <linux/kernel_stat.h>
  15#include <linux/errno.h>
  16#include <linux/string.h>
  17#include <linux/stat.h>
  18#include <linux/swap.h>
  19#include <linux/fs.h>
  20#include <linux/swapctl.h>
  21#include <linux/malloc.h>
  22#include <linux/blkdev.h> /* for blk_size */
  23#include <linux/vmalloc.h>
  24#include <linux/dcache.h>
  25
  26#include <asm/dma.h>
  27#include <asm/system.h> /* for cli()/sti() */
  28#include <asm/uaccess.h> /* for copy_to/from_user */
  29#include <asm/bitops.h>
  30#include <asm/pgtable.h>
  31
  32unsigned int nr_swapfiles = 0;
  33
  34static struct {
  35        int head;       /* head of priority-ordered swapfile list */
  36        int next;       /* swapfile to be used next */
  37} swap_list = {-1, -1};
  38
  39struct swap_info_struct swap_info[MAX_SWAPFILES];
  40
  41
  42static inline int scan_swap_map(struct swap_info_struct *si)
  43{
  44        unsigned long offset;
  45        /* 
  46         * We try to cluster swap pages by allocating them
  47         * sequentially in swap.  Once we've allocated
  48         * SWAP_CLUSTER_MAX pages this way, however, we resort to
  49         * first-free allocation, starting a new cluster.  This
  50         * prevents us from scattering swap pages all over the entire
  51         * swap partition, so that we reduce overall disk seek times
  52         * between swap pages.  -- sct */
  53        if (si->cluster_nr) {
  54                while (si->cluster_next <= si->highest_bit) {
  55                        offset = si->cluster_next++;
  56                        if (si->swap_map[offset])
  57                                continue;
  58                        if (test_bit(offset, si->swap_lockmap))
  59                                continue;
  60                        si->cluster_nr--;
  61                        goto got_page;
  62                }
  63        }
  64        si->cluster_nr = SWAP_CLUSTER_MAX;
  65        for (offset = si->lowest_bit; offset <= si->highest_bit ; offset++) {
  66                if (si->swap_map[offset])
  67                        continue;
  68                if (test_bit(offset, si->swap_lockmap))
  69                        continue;
  70                si->lowest_bit = offset;
  71got_page:
  72                si->swap_map[offset] = 1;
  73                nr_swap_pages--;
  74                if (offset == si->highest_bit)
  75                        si->highest_bit--;
  76                si->cluster_next = offset;
  77                return offset;
  78        }
  79        return 0;
  80}
  81
  82unsigned long get_swap_page(void)
  83{
  84        struct swap_info_struct * p;
  85        unsigned long offset, entry;
  86        int type, wrapped = 0;
  87
  88        type = swap_list.next;
  89        if (type < 0)
  90                return 0;
  91        if (nr_swap_pages == 0)
  92                return 0;
  93
  94        while (1) {
  95                p = &swap_info[type];
  96                if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
  97                        offset = scan_swap_map(p);
  98                        if (offset) {
  99                                entry = SWP_ENTRY(type,offset);
 100                                type = swap_info[type].next;
 101                                if (type < 0 ||
 102                                        p->prio != swap_info[type].prio) 
 103                                {
 104                                                swap_list.next = swap_list.head;
 105                                }
 106                                else
 107                                {
 108                                        swap_list.next = type;
 109                                }
 110                                return entry;
 111                        }
 112                }
 113                type = p->next;
 114                if (!wrapped) {
 115                        if (type < 0 || p->prio != swap_info[type].prio) {
 116                                type = swap_list.head;
 117                                wrapped = 1;
 118                        }
 119                } else if (type < 0) {
 120                        return 0;       /* out of swap space */
 121                }
 122        }
 123}
 124
 125void swap_free(unsigned long entry)
 126{
 127        struct swap_info_struct * p;
 128        unsigned long offset, type;
 129
 130        if (!entry)
 131                return;
 132        type = SWP_TYPE(entry);
 133        if (type & SHM_SWP_TYPE)
 134                return;
 135        if (type >= nr_swapfiles) {
 136                printk("Trying to free nonexistent swap-page\n");
 137                return;
 138        }
 139        p = & swap_info[type];
 140        offset = SWP_OFFSET(entry);
 141        if (offset >= p->max) {
 142                printk("swap_free: weirdness\n");
 143                return;
 144        }
 145        if (!(p->flags & SWP_USED)) {
 146                printk("Trying to free swap from unused swap-device\n");
 147                return;
 148        }
 149        if (offset < p->lowest_bit)
 150                p->lowest_bit = offset;
 151        if (offset > p->highest_bit)
 152                p->highest_bit = offset;
 153        if (!p->swap_map[offset])
 154                printk("swap_free: swap-space map bad (entry %08lx)\n",entry);
 155        else
 156                if (!--p->swap_map[offset])
 157                        nr_swap_pages++;
 158        if (p->prio > swap_info[swap_list.next].prio) {
 159            swap_list.next = swap_list.head;
 160        }
 161}
 162
 163/*
 164 * Trying to stop swapping from a file is fraught with races, so
 165 * we repeat quite a bit here when we have to pause. swapoff()
 166 * isn't exactly timing-critical, so who cares (but this is /really/
 167 * inefficient, ugh).
 168 *
 169 * We return 1 after having slept, which makes the process start over
 170 * from the beginning for this process..
 171 */
 172static inline int unuse_pte(struct vm_area_struct * vma, unsigned long address,
 173        pte_t *dir, unsigned long entry, unsigned long page)
 174{
 175        pte_t pte = *dir;
 176
 177        if (pte_none(pte))
 178                return 0;
 179        if (pte_present(pte)) {
 180                struct page *pg;
 181                unsigned long page_nr = MAP_NR(pte_page(pte));
 182                unsigned long pg_swap_entry;
 183
 184                if (page_nr >= max_mapnr)
 185                        return 0;
 186                pg = mem_map + page_nr;
 187                if (!(pg_swap_entry = in_swap_cache(pg)))
 188                        return 0;
 189                if (SWP_TYPE(pg_swap_entry) != SWP_TYPE(entry))
 190                        return 0;
 191                delete_from_swap_cache(pg);
 192                set_pte(dir, pte_mkdirty(pte));
 193                if (pg_swap_entry != entry)
 194                        return 0;
 195                free_page(page);
 196                return 1;
 197        }
 198        if (pte_val(pte) != entry)
 199                return 0;
 200        set_pte(dir, pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))));
 201        flush_tlb_page(vma, address);
 202        ++vma->vm_mm->rss;
 203        swap_free(pte_val(pte));
 204        return 1;
 205}
 206
 207static inline int unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
 208        unsigned long address, unsigned long size, unsigned long offset,
 209        unsigned long entry, unsigned long page)
 210{
 211        pte_t * pte;
 212        unsigned long end;
 213
 214        if (pmd_none(*dir))
 215                return 0;
 216        if (pmd_bad(*dir)) {
 217                printk("unuse_pmd: bad pmd (%08lx)\n", pmd_val(*dir));
 218                pmd_clear(dir);
 219                return 0;
 220        }
 221        pte = pte_offset(dir, address);
 222        offset += address & PMD_MASK;
 223        address &= ~PMD_MASK;
 224        end = address + size;
 225        if (end > PMD_SIZE)
 226                end = PMD_SIZE;
 227        do {
 228                if (unuse_pte(vma, offset+address-vma->vm_start, pte, entry, 
 229                                page))
 230                        return 1;
 231                address += PAGE_SIZE;
 232                pte++;
 233        } while (address < end);
 234        return 0;
 235}
 236
 237static inline int unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
 238        unsigned long address, unsigned long size,
 239        unsigned long entry, unsigned long page)
 240{
 241        pmd_t * pmd;
 242        unsigned long offset, end;
 243
 244        if (pgd_none(*dir))
 245                return 0;
 246        if (pgd_bad(*dir)) {
 247                printk("unuse_pgd: bad pgd (%08lx)\n", pgd_val(*dir));
 248                pgd_clear(dir);
 249                return 0;
 250        }
 251        pmd = pmd_offset(dir, address);
 252        offset = address & PGDIR_MASK;
 253        address &= ~PGDIR_MASK;
 254        end = address + size;
 255        if (end > PGDIR_SIZE)
 256                end = PGDIR_SIZE;
 257        do {
 258                if (unuse_pmd(vma, pmd, address, end - address, offset, entry,
 259                                 page))
 260                        return 1;
 261                address = (address + PMD_SIZE) & PMD_MASK;
 262                pmd++;
 263        } while (address < end);
 264        return 0;
 265}
 266
 267static int unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir,
 268                        unsigned long entry, unsigned long page)
 269{
 270        unsigned long start = vma->vm_start, end = vma->vm_end;
 271
 272        while (start < end) {
 273                if (unuse_pgd(vma, pgdir, start, end - start, entry, page))
 274                        return 1;
 275                start = (start + PGDIR_SIZE) & PGDIR_MASK;
 276                pgdir++;
 277        }
 278        return 0;
 279}
 280
 281static int unuse_process(struct mm_struct * mm, unsigned long entry, 
 282                        unsigned long page)
 283{
 284        struct vm_area_struct* vma;
 285
 286        /*
 287         * Go through process' page directory.
 288         */
 289        if (!mm || mm == &init_mm)
 290                return 0;
 291        for (vma = mm->mmap; vma; vma = vma->vm_next) {
 292                pgd_t * pgd = pgd_offset(mm, vma->vm_start);
 293                if (unuse_vma(vma, pgd, entry, page))
 294                        return 1;
 295        }
 296        return 0;
 297}
 298
 299static unsigned long find_swap_entry(int type)
 300{
 301        struct swap_info_struct * p = &swap_info[type];
 302        int i;
 303
 304        for (i = 1 ; i < p->max ; i++) {
 305                if (p->swap_map[i] > 0 && p->swap_map[i] != 0x80)
 306                        return SWP_ENTRY(type, i);
 307        }
 308        return 0;
 309}
 310
 311/*
 312 * We completely avoid races by reading each swap page in advance,
 313 * and then search for the process using it.  All the necessary
 314 * page table adjustments can then be made atomically.
 315 */
 316static int try_to_unuse(unsigned int type)
 317{
 318        unsigned long page = 0;
 319        struct task_struct *p;
 320        unsigned long entry;
 321
 322        /*
 323         * Find all swap entries in use ...
 324         */
 325        while ((entry = find_swap_entry(type)) != 0) {
 326                if (!page) {
 327                        page = __get_free_page(GFP_KERNEL);
 328                        if (!page)
 329                                return -ENOMEM;
 330                }
 331
 332                /*
 333                 * Read in the page, and then free the swap page.
 334                 */
 335                read_swap_page(entry, (char *) page);
 336
 337                read_lock(&tasklist_lock);
 338                for_each_task(p) {
 339                        if (unuse_process(p->mm, entry, page)) {
 340                                page = 0;
 341                                goto unlock;
 342                        }
 343                }
 344        unlock:
 345                read_unlock(&tasklist_lock);
 346                if (page) {
 347                        printk("try_to_unuse: didn't find entry %8lx\n",
 348                                entry);
 349                        swap_free(entry);
 350                }
 351        }
 352
 353        if (page)
 354                free_page(page);
 355        return 0;
 356}
 357
 358asmlinkage int sys_swapoff(const char * specialfile)
 359{
 360        struct swap_info_struct * p = NULL;
 361        struct dentry * dentry;
 362        struct file filp;
 363        int i, type, prev;
 364        int err = -EPERM;
 365
 366        lock_kernel();
 367        if (!suser())
 368                goto out;
 369
 370        dentry = namei(specialfile);
 371        err = PTR_ERR(dentry);
 372        if (IS_ERR(dentry))
 373                goto out;
 374
 375        prev = -1;
 376        for (type = swap_list.head; type >= 0; type = swap_info[type].next) {
 377                p = swap_info + type;
 378                if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
 379                        if (p->swap_file) {
 380                                if (p->swap_file == dentry)
 381                                  break;
 382                        } else {
 383                                if (S_ISBLK(dentry->d_inode->i_mode)
 384                                    && (p->swap_device == dentry->d_inode->i_rdev))
 385                                  break;
 386                        }
 387                }
 388                prev = type;
 389        }
 390        err = -EINVAL;
 391        if (type < 0){
 392                dput(dentry);
 393                goto out;
 394        }
 395        if (prev < 0) {
 396                swap_list.head = p->next;
 397        } else {
 398                swap_info[prev].next = p->next;
 399        }
 400        if (type == swap_list.next) {
 401                /* just pick something that's safe... */
 402                swap_list.next = swap_list.head;
 403        }
 404        p->flags = SWP_USED;
 405        err = try_to_unuse(type);
 406        if (err) {
 407                dput(dentry);
 408                /* re-insert swap space back into swap_list */
 409                for (prev = -1, i = swap_list.head; i >= 0; prev = i, i = swap_info[i].next)
 410                        if (p->prio >= swap_info[i].prio)
 411                                break;
 412                p->next = i;
 413                if (prev < 0)
 414                        swap_list.head = swap_list.next = p - swap_info;
 415                else
 416                        swap_info[prev].next = p - swap_info;
 417                p->flags = SWP_WRITEOK;
 418                goto out;
 419        }
 420        if(p->swap_device){
 421                memset(&filp, 0, sizeof(filp));         
 422                filp.f_dentry = dentry;
 423                filp.f_mode = 3; /* read write */
 424                /* open it again to get fops */
 425                if( !blkdev_open(dentry->d_inode, &filp) &&
 426                   filp.f_op && filp.f_op->release){
 427                        filp.f_op->release(dentry->d_inode,&filp);
 428                        filp.f_op->release(dentry->d_inode,&filp);
 429                }
 430        }
 431        dput(dentry);
 432
 433        nr_swap_pages -= p->pages;
 434        dput(p->swap_file);
 435        p->swap_file = NULL;
 436        p->swap_device = 0;
 437        vfree(p->swap_map);
 438        p->swap_map = NULL;
 439        free_page((long) p->swap_lockmap);
 440        p->swap_lockmap = NULL;
 441        p->flags = 0;
 442        err = 0;
 443out:
 444        unlock_kernel();
 445        return err;
 446}
 447
 448int get_swaparea_info(char *buf)
 449{
 450        char * page = (char *) __get_free_page(GFP_KERNEL);
 451        struct swap_info_struct *ptr = swap_info;
 452        int i, j, len = 0, usedswap;
 453
 454        if (!page)
 455                return -ENOMEM;
 456
 457        len += sprintf(buf, "Filename\t\t\tType\t\tSize\tUsed\tPriority\n");
 458        for (i = 0 ; i < nr_swapfiles ; i++, ptr++) {
 459                if (ptr->flags & SWP_USED) {
 460                        char * path = d_path(ptr->swap_file, page, PAGE_SIZE);
 461
 462                        len += sprintf(buf + len, "%-31s ", path);
 463
 464                        if (!ptr->swap_device)
 465                                len += sprintf(buf + len, "file\t\t");
 466                        else
 467                                len += sprintf(buf + len, "partition\t");
 468
 469                        usedswap = 0;
 470                        for (j = 0; j < ptr->max; ++j)
 471                                switch (ptr->swap_map[j]) {
 472                                        case 128:
 473                                        case 0:
 474                                                continue;
 475                                        default:
 476                                                usedswap++;
 477                                }
 478                        len += sprintf(buf + len, "%d\t%d\t%d\n", ptr->pages << (PAGE_SHIFT - 10), 
 479                                usedswap << (PAGE_SHIFT - 10), ptr->prio);
 480                }
 481        }
 482        free_page((unsigned long) page);
 483        return len;
 484}
 485
 486/*
 487 * Written 01/25/92 by Simmule Turner, heavily changed by Linus.
 488 *
 489 * The swapon system call
 490 */
 491asmlinkage int sys_swapon(const char * specialfile, int swap_flags)
 492{
 493        struct swap_info_struct * p;
 494        struct dentry * swap_dentry;
 495        unsigned int type;
 496        int i, j, prev;
 497        int error = -EPERM;
 498        struct file filp;
 499        static int least_priority = 0;
 500
 501        lock_kernel();
 502        if (!suser())
 503                goto out;
 504        memset(&filp, 0, sizeof(filp));
 505        p = swap_info;
 506        for (type = 0 ; type < nr_swapfiles ; type++,p++)
 507                if (!(p->flags & SWP_USED))
 508                        break;
 509        if (type >= MAX_SWAPFILES)
 510                goto out;
 511        if (type >= nr_swapfiles)
 512                nr_swapfiles = type+1;
 513        p->flags = SWP_USED;
 514        p->swap_file = NULL;
 515        p->swap_device = 0;
 516        p->swap_map = NULL;
 517        p->swap_lockmap = NULL;
 518        p->lowest_bit = 0;
 519        p->highest_bit = 0;
 520        p->cluster_nr = 0;
 521        p->max = 1;
 522        p->next = -1;
 523        if (swap_flags & SWAP_FLAG_PREFER) {
 524                p->prio =
 525                  (swap_flags & SWAP_FLAG_PRIO_MASK)>>SWAP_FLAG_PRIO_SHIFT;
 526        } else {
 527                p->prio = --least_priority;
 528        }
 529        swap_dentry = namei(specialfile);
 530        error = PTR_ERR(swap_dentry);
 531        if (IS_ERR(swap_dentry))
 532                goto bad_swap_2;
 533
 534        p->swap_file = swap_dentry;
 535        error = -EINVAL;
 536
 537        if (S_ISBLK(swap_dentry->d_inode->i_mode)) {
 538                p->swap_device = swap_dentry->d_inode->i_rdev;
 539                set_blocksize(p->swap_device, PAGE_SIZE);
 540                
 541                filp.f_dentry = swap_dentry;
 542                filp.f_mode = 3; /* read write */
 543                error = blkdev_open(swap_dentry->d_inode, &filp);
 544                if (error)
 545                        goto bad_swap_2;
 546                error = -ENODEV;
 547                if (!p->swap_device ||
 548                    (blk_size[MAJOR(p->swap_device)] &&
 549                     !blk_size[MAJOR(p->swap_device)][MINOR(p->swap_device)]))
 550                        goto bad_swap;
 551                error = -EBUSY;
 552                for (i = 0 ; i < nr_swapfiles ; i++) {
 553                        if (i == type)
 554                                continue;
 555                        if (p->swap_device == swap_info[i].swap_device)
 556                                goto bad_swap;
 557                }
 558        } else if (!S_ISREG(swap_dentry->d_inode->i_mode))
 559                goto bad_swap;
 560        p->swap_lockmap = (unsigned char *) get_free_page(GFP_USER);
 561        if (!p->swap_lockmap) {
 562                printk("Unable to start swapping: out of memory :-)\n");
 563                error = -ENOMEM;
 564                goto bad_swap;
 565        }
 566        read_swap_page(SWP_ENTRY(type,0), (char *) p->swap_lockmap);
 567        if (memcmp("SWAP-SPACE",p->swap_lockmap+PAGE_SIZE-10,10)) {
 568                printk("Unable to find swap-space signature\n");
 569                error = -EINVAL;
 570                goto bad_swap;
 571        }
 572        memset(p->swap_lockmap+PAGE_SIZE-10,0,10);
 573        j = 0;
 574        p->lowest_bit = 0;
 575        p->highest_bit = 0;
 576        for (i = 1 ; i < 8*PAGE_SIZE ; i++) {
 577                if (test_bit(i,p->swap_lockmap)) {
 578                        if (!p->lowest_bit)
 579                                p->lowest_bit = i;
 580                        p->highest_bit = i;
 581                        p->max = i+1;
 582                        j++;
 583                }
 584        }
 585        if (!j) {
 586                printk("Empty swap-file\n");
 587                error = -EINVAL;
 588                goto bad_swap;
 589        }
 590        p->swap_map = (unsigned char *) vmalloc(p->max);
 591        if (!p->swap_map) {
 592                error = -ENOMEM;
 593                goto bad_swap;
 594        }
 595        for (i = 1 ; i < p->max ; i++) {
 596                if (test_bit(i,p->swap_lockmap))
 597                        p->swap_map[i] = 0;
 598                else
 599                        p->swap_map[i] = 0x80;
 600        }
 601        p->swap_map[0] = 0x80;
 602        memset(p->swap_lockmap,0,PAGE_SIZE);
 603        p->flags = SWP_WRITEOK;
 604        p->pages = j;
 605        nr_swap_pages += j;
 606        printk("Adding Swap: %dk swap-space (priority %d)\n",
 607               j<<(PAGE_SHIFT-10), p->prio);
 608
 609        /* insert swap space into swap_list: */
 610        prev = -1;
 611        for (i = swap_list.head; i >= 0; i = swap_info[i].next) {
 612                if (p->prio >= swap_info[i].prio) {
 613                        break;
 614                }
 615                prev = i;
 616        }
 617        p->next = i;
 618        if (prev < 0) {
 619                swap_list.head = swap_list.next = p - swap_info;
 620        } else {
 621                swap_info[prev].next = p - swap_info;
 622        }
 623        error = 0;
 624        goto out;
 625bad_swap:
 626        if(filp.f_op && filp.f_op->release)
 627                filp.f_op->release(filp.f_dentry->d_inode,&filp);
 628bad_swap_2:
 629        free_page((long) p->swap_lockmap);
 630        vfree(p->swap_map);
 631        dput(p->swap_file);
 632        p->swap_device = 0;
 633        p->swap_file = NULL;
 634        p->swap_map = NULL;
 635        p->swap_lockmap = NULL;
 636        p->flags = 0;
 637out:
 638        unlock_kernel();
 639        return error;
 640}
 641
 642void si_swapinfo(struct sysinfo *val)
 643{
 644        unsigned int i, j;
 645
 646        val->freeswap = val->totalswap = 0;
 647        for (i = 0; i < nr_swapfiles; i++) {
 648                if ((swap_info[i].flags & SWP_WRITEOK) != SWP_WRITEOK)
 649                        continue;
 650                for (j = 0; j < swap_info[i].max; ++j)
 651                        switch (swap_info[i].swap_map[j]) {
 652                                case 128:
 653                                        continue;
 654                                case 0:
 655                                        ++val->freeswap;
 656                                default:
 657                                        ++val->totalswap;
 658                        }
 659        }
 660        val->freeswap <<= PAGE_SHIFT;
 661        val->totalswap <<= PAGE_SHIFT;
 662        return;
 663}
 664
 665
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.