linux-bk/kernel/power/pmdisk.c History
<<
>>
Prefs
   1/*
   2 * kernel/power/pmdisk.c - Suspend-to-disk implmentation
   3 *
   4 * This STD implementation is initially derived from swsusp (suspend-to-swap).
   5 * The original copyright on that was: 
   6 *
   7 * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
   8 * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz>
   9 *
  10 * The additional parts are: 
  11 * 
  12 * Copyright (C) 2003 Patrick Mochel
  13 * Copyright (C) 2003 Open Source Development Lab
  14 * 
  15 * This file is released under the GPLv2. 
  16 *
  17 * For more information, please see the text files in Documentation/power/
  18 *
  19 */
  20
  21#undef DEBUG
  22
  23#include <linux/mm.h>
  24#include <linux/bio.h>
  25#include <linux/suspend.h>
  26#include <linux/version.h>
  27#include <linux/reboot.h>
  28#include <linux/device.h>
  29#include <linux/swapops.h>
  30#include <linux/bootmem.h>
  31#include <linux/utsname.h>
  32
  33#include <asm/mmu_context.h>
  34
  35#include "power.h"
  36
  37
  38extern asmlinkage int pmdisk_arch_suspend(int resume);
  39
  40#define __ADDRESS(x)  ((unsigned long) phys_to_virt(x))
  41#define ADDRESS(x) __ADDRESS((x) << PAGE_SHIFT)
  42#define ADDRESS2(x) __ADDRESS(__pa(x))          /* Needed for x86-64 where some pages are in memory twice */
  43
  44/* References to section boundaries */
  45extern char __nosave_begin, __nosave_end;
  46
  47extern int is_head_of_free_region(struct page *);
  48
  49/* Variables to be preserved over suspend */
  50static int pagedir_order_check;
  51static int nr_copy_pages_check;
  52
  53/* For resume= kernel option */
  54static char resume_file[256] = CONFIG_PM_DISK_PARTITION;
  55
  56static dev_t resume_device;
  57/* Local variables that should not be affected by save */
  58unsigned int pmdisk_pages __nosavedata = 0;
  59
  60/* Suspend pagedir is allocated before final copy, therefore it
  61   must be freed after resume 
  62
  63   Warning: this is evil. There are actually two pagedirs at time of
  64   resume. One is "pagedir_save", which is empty frame allocated at
  65   time of suspend, that must be freed. Second is "pagedir_nosave", 
  66   allocated at time of resume, that travels through memory not to
  67   collide with anything.
  68 */
  69suspend_pagedir_t *pm_pagedir_nosave __nosavedata = NULL;
  70static suspend_pagedir_t *pagedir_save;
  71static int pagedir_order __nosavedata = 0;
  72
  73
  74struct pmdisk_info {
  75        struct new_utsname      uts;
  76        u32                     version_code;
  77        unsigned long           num_physpages;
  78        int                     cpus;
  79        unsigned long           image_pages;
  80        unsigned long           pagedir_pages;
  81        swp_entry_t             pagedir[768];
  82} __attribute__((aligned(PAGE_SIZE))) pmdisk_info;
  83
  84
  85
  86#define PMDISK_SIG      "pmdisk-swap1"
  87
  88struct pmdisk_header {
  89        char reserved[PAGE_SIZE - 20 - sizeof(swp_entry_t)];
  90        swp_entry_t pmdisk_info;
  91        char    orig_sig[10];
  92        char    sig[10];
  93} __attribute__((packed, aligned(PAGE_SIZE))) pmdisk_header;
  94
  95/*
  96 * XXX: We try to keep some more pages free so that I/O operations succeed
  97 * without paging. Might this be more?
  98 */
  99#define PAGES_FOR_IO    512
 100
 101
 102/*
 103 * Saving part...
 104 */
 105
 106
 107/* We memorize in swapfile_used what swap devices are used for suspension */
 108#define SWAPFILE_UNUSED    0
 109#define SWAPFILE_SUSPEND   1    /* This is the suspending device */
 110#define SWAPFILE_IGNORED   2    /* Those are other swap devices ignored for suspension */
 111
 112static unsigned short swapfile_used[MAX_SWAPFILES];
 113static unsigned short root_swap;
 114
 115
 116static int mark_swapfiles(swp_entry_t prev)
 117{
 118        int error;
 119
 120        rw_swap_page_sync(READ, 
 121                          swp_entry(root_swap, 0),
 122                          virt_to_page((unsigned long)&pmdisk_header));
 123        if (!memcmp("SWAP-SPACE",pmdisk_header.sig,10) ||
 124            !memcmp("SWAPSPACE2",pmdisk_header.sig,10)) {
 125                memcpy(pmdisk_header.orig_sig,pmdisk_header.sig,10);
 126                memcpy(pmdisk_header.sig,PMDISK_SIG,10);
 127                pmdisk_header.pmdisk_info = prev;
 128                error = rw_swap_page_sync(WRITE, 
 129                                          swp_entry(root_swap, 0),
 130                                          virt_to_page((unsigned long)
 131                                                       &pmdisk_header));
 132        } else {
 133                pr_debug("pmdisk: Partition is not swap space.\n");
 134                error = -ENODEV;
 135        }
 136        return error;
 137}
 138
 139static int read_swapfiles(void) /* This is called before saving image */
 140{
 141        int i, len;
 142        
 143        len=strlen(resume_file);
 144        root_swap = 0xFFFF;
 145        
 146        swap_list_lock();
 147        for(i=0; i<MAX_SWAPFILES; i++) {
 148                if (swap_info[i].flags == 0) {
 149                        swapfile_used[i]=SWAPFILE_UNUSED;
 150                } else {
 151                        if(!len) {
 152                                pr_debug("pmdisk: Default resume partition not set.\n");
 153                                if(root_swap == 0xFFFF) {
 154                                        swapfile_used[i] = SWAPFILE_SUSPEND;
 155                                        root_swap = i;
 156                                } else
 157                                        swapfile_used[i] = SWAPFILE_IGNORED;                              
 158                        } else {
 159                                /* we ignore all swap devices that are not the resume_file */
 160                                if (1) {
 161// FIXME                                if(resume_device == swap_info[i].swap_device) {
 162                                        swapfile_used[i] = SWAPFILE_SUSPEND;
 163                                        root_swap = i;
 164                                } else
 165                                        swapfile_used[i] = SWAPFILE_IGNORED;
 166                        }
 167                }
 168        }
 169        swap_list_unlock();
 170        return (root_swap != 0xffff) ? 0 : -ENODEV;
 171}
 172
 173
 174/* This is called after saving image so modification
 175   will be lost after resume... and that's what we want. */
 176static void lock_swapdevices(void)
 177{
 178        int i;
 179
 180        swap_list_lock();
 181        for(i = 0; i< MAX_SWAPFILES; i++)
 182                if(swapfile_used[i] == SWAPFILE_IGNORED) {
 183                        swap_info[i].flags ^= 0xFF; /* we make the device unusable. A new call to
 184                                                       lock_swapdevices can unlock the devices. */
 185                }
 186        swap_list_unlock();
 187}
 188
 189
 190
 191/**
 192 *      write_swap_page - Write one page to a fresh swap location.
 193 *      @addr:  Address we're writing.
 194 *      @loc:   Place to store the entry we used.
 195 *
 196 *      Allocate a new swap entry and 'sync' it. Note we discard -EIO
 197 *      errors. That is an artifact left over from swsusp. It did not 
 198 *      check the return of rw_swap_page_sync() at all, since most pages
 199 *      written back to swap would return -EIO.
 200 *      This is a partial improvement, since we will at least return other
 201 *      errors, though we need to eventually fix the damn code.
 202 */
 203
 204static int write_swap_page(unsigned long addr, swp_entry_t * loc)
 205{
 206        swp_entry_t entry;
 207        int error = 0;
 208
 209        entry = get_swap_page();
 210        if (swp_offset(entry) && 
 211            swapfile_used[swp_type(entry)] == SWAPFILE_SUSPEND) {
 212                error = rw_swap_page_sync(WRITE, entry,
 213                                          virt_to_page(addr));
 214                if (error == -EIO)
 215                        error = 0;
 216                if (!error)
 217                        *loc = entry;
 218        } else
 219                error = -ENOSPC;
 220        return error;
 221}
 222
 223
 224/**
 225 *      free_data - Free the swap entries used by the saved image.
 226 *
 227 *      Walk the list of used swap entries and free each one. 
 228 */
 229
 230static void free_data(void)
 231{
 232        swp_entry_t entry;
 233        int i;
 234
 235        for (i = 0; i < pmdisk_pages; i++) {
 236                entry = (pm_pagedir_nosave + i)->swap_address;
 237                if (entry.val)
 238                        swap_free(entry);
 239                else
 240                        break;
 241                (pm_pagedir_nosave + i)->swap_address = (swp_entry_t){0};
 242        }
 243}
 244
 245
 246/**
 247 *      write_data - Write saved image to swap.
 248 *
 249 *      Walk the list of pages in the image and sync each one to swap.
 250 */
 251
 252static int write_data(void)
 253{
 254        int error = 0;
 255        int i;
 256
 257        printk( "Writing data to swap (%d pages): ", pmdisk_pages );
 258        for (i = 0; i < pmdisk_pages && !error; i++) {
 259                if (!(i%100))
 260                        printk( "." );
 261                error = write_swap_page((pm_pagedir_nosave+i)->address,
 262                                        &((pm_pagedir_nosave+i)->swap_address));
 263        }
 264        printk(" %d Pages done.\n",i);
 265        return error;
 266}
 267
 268
 269/**
 270 *      free_pagedir - Free pages used by the page directory.
 271 */
 272
 273static void free_pagedir_entries(void)
 274{
 275        int num = pmdisk_info.pagedir_pages;
 276        int i;
 277
 278        for (i = 0; i < num; i++)
 279                swap_free(pmdisk_info.pagedir[i]);
 280}
 281
 282
 283/**
 284 *      write_pagedir - Write the array of pages holding the page directory.
 285 *      @last:  Last swap entry we write (needed for header).
 286 */
 287
 288static int write_pagedir(void)
 289{
 290        unsigned long addr = (unsigned long)pm_pagedir_nosave;
 291        int error = 0;
 292        int n = SUSPEND_PD_PAGES(pmdisk_pages);
 293        int i;
 294
 295        pmdisk_info.pagedir_pages = n;
 296        printk( "Writing pagedir (%d pages)\n", n);
 297        for (i = 0; i < n && !error; i++, addr += PAGE_SIZE)
 298                error = write_swap_page(addr,&pmdisk_info.pagedir[i]);
 299        return error;
 300}
 301
 302
 303#ifdef DEBUG
 304static void dump_pmdisk_info(void)
 305{
 306        printk(" pmdisk: Version: %u\n",pmdisk_info.version_code);
 307        printk(" pmdisk: Num Pages: %ld\n",pmdisk_info.num_physpages);
 308        printk(" pmdisk: UTS Sys: %s\n",pmdisk_info.uts.sysname);
 309        printk(" pmdisk: UTS Node: %s\n",pmdisk_info.uts.nodename);
 310        printk(" pmdisk: UTS Release: %s\n",pmdisk_info.uts.release);
 311        printk(" pmdisk: UTS Version: %s\n",pmdisk_info.uts.version);
 312        printk(" pmdisk: UTS Machine: %s\n",pmdisk_info.uts.machine);
 313        printk(" pmdisk: UTS Domain: %s\n",pmdisk_info.uts.domainname);
 314        printk(" pmdisk: CPUs: %d\n",pmdisk_info.cpus);
 315        printk(" pmdisk: Image: %ld Pages\n",pmdisk_info.image_pages);
 316        printk(" pmdisk: Pagedir: %ld Pages\n",pmdisk_info.pagedir_pages);
 317}
 318#else
 319static void dump_pmdisk_info(void)
 320{
 321
 322}
 323#endif
 324
 325static void init_header(void)
 326{
 327        memset(&pmdisk_info,0,sizeof(pmdisk_info));
 328        pmdisk_info.version_code = LINUX_VERSION_CODE;
 329        pmdisk_info.num_physpages = num_physpages;
 330        memcpy(&pmdisk_info.uts,&system_utsname,sizeof(system_utsname));
 331
 332        pmdisk_info.cpus = num_online_cpus();
 333        pmdisk_info.image_pages = pmdisk_pages;
 334}
 335
 336/**
 337 *      write_header - Fill and write the suspend header.
 338 *      @entry: Location of the last swap entry used.
 339 *
 340 *      Allocate a page, fill header, write header. 
 341 *
 342 *      @entry is the location of the last pagedir entry written on 
 343 *      entrance. On exit, it contains the location of the header. 
 344 */
 345
 346static int write_header(swp_entry_t * entry)
 347{
 348        dump_pmdisk_info();
 349        return write_swap_page((unsigned long)&pmdisk_info,entry);
 350}
 351
 352
 353
 354/**
 355 *      write_suspend_image - Write entire image and metadata.
 356 *
 357 */
 358
 359static int write_suspend_image(void)
 360{
 361        int error;
 362        swp_entry_t prev = { 0 };
 363
 364        init_header();
 365
 366        if ((error = write_data()))
 367                goto FreeData;
 368
 369        if ((error = write_pagedir()))
 370                goto FreePagedir;
 371
 372        if ((error = write_header(&prev)))
 373                goto FreePagedir;
 374
 375        error = mark_swapfiles(prev);
 376 Done:
 377        return error;
 378 FreePagedir:
 379        free_pagedir_entries();
 380 FreeData:
 381        free_data();
 382        goto Done;
 383}
 384
 385
 386
 387/**
 388 *      saveable - Determine whether a page should be cloned or not.
 389 *      @pfn:   The page
 390 *
 391 *      We save a page if it's Reserved, and not in the range of pages
 392 *      statically defined as 'unsaveable', or if it isn't reserved, and
 393 *      isn't part of a free chunk of pages.
 394 *      If it is part of a free chunk, we update @pfn to point to the last 
 395 *      page of the chunk.
 396 */
 397
 398static int saveable(unsigned long * pfn)
 399{
 400        struct page * page = pfn_to_page(*pfn);
 401
 402        if (PageNosave(page))
 403                return 0;
 404
 405        if (!PageReserved(page)) {
 406                int chunk_size;
 407
 408                if ((chunk_size = is_head_of_free_region(page))) {
 409                        *pfn += chunk_size - 1;
 410                        return 0;
 411                }
 412        } else if (PageReserved(page)) {
 413                /* Just copy whole code segment. 
 414                 * Hopefully it is not that big.
 415                 */
 416                if ((ADDRESS(*pfn) >= (unsigned long) ADDRESS2(&__nosave_begin)) && 
 417                    (ADDRESS(*pfn) <  (unsigned long) ADDRESS2(&__nosave_end))) {
 418                        pr_debug("[nosave %lx]\n", ADDRESS(*pfn));
 419                        return 0;
 420                }
 421                /* Hmm, perhaps copying all reserved pages is not 
 422                 * too healthy as they may contain 
 423                 * critical bios data? 
 424                 */
 425        }
 426        return 1;
 427}
 428
 429
 430
 431/**
 432 *      count_pages - Determine size of page directory.
 433 *      
 434 *      Iterate over all the pages in the system and tally the number
 435 *      we need to clone.
 436 */
 437
 438static void count_pages(void)
 439{
 440        unsigned long pfn;
 441        int n = 0;
 442        
 443        for (pfn = 0; pfn < max_pfn; pfn++) {
 444                if (saveable(&pfn))
 445                        n++;
 446        }
 447        pmdisk_pages = n;
 448}
 449
 450
 451/**
 452 *      copy_pages - Atomically snapshot memory.
 453 *
 454 *      Iterate over all the pages in the system and copy each one 
 455 *      into its corresponding location in the pagedir.
 456 *      We rely on the fact that the number of pages that we're snap-
 457 *      shotting hasn't changed since we counted them. 
 458 */
 459
 460static void copy_pages(void)
 461{
 462        struct pbe * p = pagedir_save;
 463        unsigned long pfn;
 464        int n = 0;
 465
 466        for (pfn = 0; pfn < max_pfn; pfn++) {
 467                if (saveable(&pfn)) {
 468                        n++;
 469                        p->orig_address = ADDRESS(pfn);
 470                        copy_page((void *) p->address, 
 471                                  (void *) p->orig_address);
 472                        p++;
 473                }
 474        }
 475        BUG_ON(n != pmdisk_pages);
 476}
 477
 478
 479/**
 480 *      free_image_pages - Free each page allocated for snapshot.
 481 */
 482
 483static void free_image_pages(void)
 484{
 485        struct pbe * p;
 486        int i;
 487
 488        for (i = 0, p = pagedir_save; i < pmdisk_pages; i++, p++) {
 489                ClearPageNosave(virt_to_page(p->address));
 490                free_page(p->address);
 491        }
 492}
 493
 494
 495/**
 496 *      free_pagedir - Free the page directory.
 497 */
 498
 499static void free_pagedir(void)
 500{
 501        free_image_pages();
 502        free_pages((unsigned long)pagedir_save, pagedir_order);
 503}
 504
 505
 506static void calc_order(void)
 507{
 508        int diff;
 509        int order;
 510
 511        order = get_bitmask_order(SUSPEND_PD_PAGES(pmdisk_pages));
 512        pmdisk_pages += 1 << order;
 513        do {
 514                diff = get_bitmask_order(SUSPEND_PD_PAGES(pmdisk_pages)) - order;
 515                if (diff) {
 516                        order += diff;
 517                        pmdisk_pages += 1 << diff;
 518                }
 519        } while(diff);
 520        pagedir_order = order;
 521}
 522
 523
 524/**
 525 *      alloc_pagedir - Allocate the page directory.
 526 *
 527 *      First, determine exactly how many contiguous pages we need, 
 528 *      allocate them, then mark each 'unsavable'.
 529 */
 530
 531static int alloc_pagedir(void)
 532{
 533        calc_order();
 534        pagedir_save = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC | __GFP_COLD, 
 535                                                             pagedir_order);
 536        if(!pagedir_save)
 537                return -ENOMEM;
 538        memset(pagedir_save,0,(1 << pagedir_order) * PAGE_SIZE);
 539        pm_pagedir_nosave = pagedir_save;
 540        return 0;
 541}
 542
 543
 544/**
 545 *      alloc_image_pages - Allocate pages for the snapshot.
 546 *
 547 */
 548
 549static int alloc_image_pages(void)
 550{
 551        struct pbe * p;
 552        int i;
 553
 554        for (i = 0, p = pagedir_save; i < pmdisk_pages; i++, p++) {
 555                p->address = get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
 556                if(!p->address)
 557                        goto Error;
 558                SetPageNosave(virt_to_page(p->address));
 559        }
 560        return 0;
 561 Error:
 562        do { 
 563                if (p->address)
 564                        free_page(p->address);
 565                p->address = 0;
 566        } while (p-- > pagedir_save);
 567        return -ENOMEM;
 568}
 569
 570
 571/**
 572 *      enough_free_mem - Make sure we enough free memory to snapshot.
 573 *
 574 *      Returns TRUE or FALSE after checking the number of available 
 575 *      free pages.
 576 */
 577
 578static int enough_free_mem(void)
 579{
 580        if(nr_free_pages() < (pmdisk_pages + PAGES_FOR_IO)) {
 581                pr_debug("pmdisk: Not enough free pages: Have %d\n",
 582                         nr_free_pages());
 583                return 0;
 584        }
 585        return 1;
 586}
 587
 588
 589/**
 590 *      enough_swap - Make sure we have enough swap to save the image.
 591 *
 592 *      Returns TRUE or FALSE after checking the total amount of swap 
 593 *      space avaiable.
 594 *
 595 *      FIXME: si_swapinfo(&i) returns all swap devices information.
 596 *      We should only consider resume_device. 
 597 */
 598
 599static int enough_swap(void)
 600{
 601        struct sysinfo i;
 602
 603        si_swapinfo(&i);
 604        if (i.freeswap < (pmdisk_pages + PAGES_FOR_IO))  {
 605                pr_debug("pmdisk: Not enough swap. Need %ld\n",i.freeswap);
 606                return 0;
 607        }
 608        return 1;
 609}
 610
 611
 612/**
 613 *      pmdisk_suspend - Atomically snapshot the system.
 614 *
 615 *      This must be called with interrupts disabled, to prevent the 
 616 *      system changing at all from underneath us. 
 617 *
 618 *      To do this, we count the number of pages in the system that we 
 619 *      need to save; make sure we have enough memory and swap to clone
 620 *      the pages and save them in swap, allocate the space to hold them,
 621 *      and then snapshot them all.
 622 */
 623
 624int pmdisk_suspend(void)
 625{
 626        int error = 0;
 627
 628        if ((error = read_swapfiles()))
 629                return error;
 630
 631        drain_local_pages();
 632
 633        pm_pagedir_nosave = NULL;
 634        pr_debug("pmdisk: Counting pages to copy.\n" );
 635        count_pages();
 636        
 637        pr_debug("pmdisk: (pages needed: %d + %d free: %d)\n",
 638                 pmdisk_pages,PAGES_FOR_IO,nr_free_pages());
 639
 640        if (!enough_free_mem())
 641                return -ENOMEM;
 642
 643        if (!enough_swap())
 644                return -ENOSPC;
 645
 646        if ((error = alloc_pagedir())) {
 647                pr_debug("pmdisk: Allocating pagedir failed.\n");
 648                return error;
 649        }
 650        if ((error = alloc_image_pages())) {
 651                pr_debug("pmdisk: Allocating image pages failed.\n");
 652                free_pagedir();
 653                return error;
 654        }
 655
 656        nr_copy_pages_check = pmdisk_pages;
 657        pagedir_order_check = pagedir_order;
 658
 659        /* During allocating of suspend pagedir, new cold pages may appear. 
 660         * Kill them 
 661         */
 662        drain_local_pages();
 663
 664        /* copy */
 665        copy_pages();
 666
 667        /*
 668         * End of critical section. From now on, we can write to memory,
 669         * but we should not touch disk. This specially means we must _not_
 670         * touch swap space! Except we must write out our image of course.
 671         */
 672
 673        pr_debug("pmdisk: %d pages copied\n", pmdisk_pages );
 674        return 0;
 675}
 676
 677
 678/**
 679 *      suspend_save_image - Prepare and write saved image to swap.
 680 *
 681 *      IRQs are re-enabled here so we can resume devices and safely write
 682 *      to the swap devices. We disable them again before we leave.
 683 *
 684 *      The second lock_swapdevices() will unlock ignored swap devices since
 685 *      writing is finished.
 686 *      It is important _NOT_ to umount filesystems at this point. We want
 687 *      them synced (in case something goes wrong) but we DO not want to mark
 688 *      filesystem clean: it is not. (And it does not matter, if we resume
 689 *      correctly, we'll mark system clean, anyway.)
 690 */
 691
 692static int suspend_save_image(void)
 693{
 694        int error;
 695        device_resume();
 696        lock_swapdevices();
 697        error = write_suspend_image();
 698        lock_swapdevices();
 699        return error;
 700}
 701
 702/*
 703 * Magic happens here
 704 */
 705
 706int pmdisk_resume(void)
 707{
 708        BUG_ON (nr_copy_pages_check != pmdisk_pages);
 709        BUG_ON (pagedir_order_check != pagedir_order);
 710        
 711        /* Even mappings of "global" things (vmalloc) need to be fixed */
 712        __flush_tlb_global();
 713        return 0;
 714}
 715
 716/* pmdisk_arch_suspend() is implemented in arch/?/power/pmdisk.S,
 717   and basically does:
 718
 719        if (!resume) {
 720                save_processor_state();
 721                SAVE_REGISTERS
 722                return pmdisk_suspend();
 723        }
 724        GO_TO_SWAPPER_PAGE_TABLES
 725        COPY_PAGES_BACK
 726        RESTORE_REGISTERS
 727        restore_processor_state();
 728        return pmdisk_resume();
 729
 730 */
 731
 732
 733/* More restore stuff */
 734
 735#define does_collide(addr) does_collide_order(pm_pagedir_nosave, addr, 0)
 736
 737/*
 738 * Returns true if given address/order collides with any orig_address 
 739 */
 740static int __init does_collide_order(suspend_pagedir_t *pagedir, 
 741                                     unsigned long addr, int order)
 742{
 743        int i;
 744        unsigned long addre = addr + (PAGE_SIZE<<order);
 745        
 746        for(i=0; i < pmdisk_pages; i++)
 747                if((pagedir+i)->orig_address >= addr &&
 748                        (pagedir+i)->orig_address < addre)
 749                        return 1;
 750
 751        return 0;
 752}
 753
 754/*
 755 * We check here that pagedir & pages it points to won't collide with pages
 756 * where we're going to restore from the loaded pages later
 757 */
 758static int __init check_pagedir(void)
 759{
 760        int i;
 761
 762        for(i=0; i < pmdisk_pages; i++) {
 763                unsigned long addr;
 764
 765                do {
 766                        addr = get_zeroed_page(GFP_ATOMIC);
 767                        if(!addr)
 768                                return -ENOMEM;
 769                } while (does_collide(addr));
 770
 771                (pm_pagedir_nosave+i)->address = addr;
 772        }
 773        return 0;
 774}
 775
 776static int __init relocate_pagedir(void)
 777{
 778        /*
 779         * We have to avoid recursion (not to overflow kernel stack),
 780         * and that's why code looks pretty cryptic 
 781         */
 782        suspend_pagedir_t *old_pagedir = pm_pagedir_nosave;
 783        void **eaten_memory = NULL;
 784        void **c = eaten_memory, *m, *f;
 785        int err;
 786
 787        pr_debug("pmdisk: Relocating pagedir\n");
 788
 789        if(!does_collide_order(old_pagedir, (unsigned long)old_pagedir, pagedir_order)) {
 790                pr_debug("pmdisk: Relocation not necessary\n");
 791                return 0;
 792        }
 793
 794        err = -ENOMEM;
 795        while ((m = (void *) __get_free_pages(GFP_ATOMIC, pagedir_order)) != NULL) {
 796                if (!does_collide_order(old_pagedir, (unsigned long)m,
 797                                        pagedir_order)) {
 798                        pm_pagedir_nosave =
 799                                memcpy(m, old_pagedir,
 800                                       PAGE_SIZE << pagedir_order);
 801                        err = 0;
 802                        break;
 803                }
 804                eaten_memory = m;
 805                printk( "." ); 
 806                *eaten_memory = c;
 807                c = eaten_memory;
 808        }
 809
 810        c = eaten_memory;
 811        while(c) {
 812                printk(":");
 813                f = c;
 814                c = *c;
 815                free_pages((unsigned long)f, pagedir_order);
 816        }
 817        printk("|\n");
 818        return err;
 819}
 820
 821
 822static struct block_device * resume_bdev;
 823
 824
 825/**
 826 *      Using bio to read from swap.
 827 *      This code requires a bit more work than just using buffer heads
 828 *      but, it is the recommended way for 2.5/2.6.
 829 *      The following are to signal the beginning and end of I/O. Bios
 830 *      finish asynchronously, while we want them to happen synchronously.
 831 *      A simple atomic_t, and a wait loop take care of this problem.
 832 */
 833
 834static atomic_t io_done = ATOMIC_INIT(0);
 835
 836static void start_io(void)
 837{
 838        atomic_set(&io_done,1);
 839}
 840
 841static int end_io(struct bio * bio, unsigned int num, int err)
 842{
 843        atomic_set(&io_done,0);
 844        return 0;
 845}
 846
 847static void wait_io(void)
 848{
 849        while(atomic_read(&io_done))
 850                io_schedule();
 851}
 852
 853
 854/**
 855 *      submit - submit BIO request.
 856 *      @rw:    READ or WRITE.
 857 *      @off    physical offset of page.
 858 *      @page:  page we're reading or writing.
 859 *
 860 *      Straight from the textbook - allocate and initialize the bio.
 861 *      If we're writing, make sure the page is marked as dirty.
 862 *      Then submit it and wait.
 863 */
 864
 865static int submit(int rw, pgoff_t page_off, void * page)
 866{
 867        int error = 0;
 868        struct bio * bio;
 869
 870        bio = bio_alloc(GFP_ATOMIC,1);
 871        if (!bio)
 872                return -ENOMEM;
 873        bio->bi_sector = page_off * (PAGE_SIZE >> 9);
 874        bio_get(bio);
 875        bio->bi_bdev = resume_bdev;
 876        bio->bi_end_io = end_io;
 877
 878        if (bio_add_page(bio, virt_to_page(page), PAGE_SIZE, 0) < PAGE_SIZE) {
 879                printk("pmdisk: ERROR: adding page to bio at %ld\n",page_off);
 880                error = -EFAULT;
 881                goto Done;
 882        }
 883
 884        if (rw == WRITE)
 885                bio_set_pages_dirty(bio);
 886        start_io();
 887        submit_bio(rw | (1 << BIO_RW_SYNC), bio);
 888        wait_io();
 889 Done:
 890        bio_put(bio);
 891        return error;
 892}
 893
 894static int
 895read_page(pgoff_t page_off, void * page)
 896{
 897        return submit(READ,page_off,page);
 898}
 899
 900static int
 901write_page(pgoff_t page_off, void * page)
 902{
 903        return submit(WRITE,page_off,page);
 904}
 905
 906
 907extern dev_t __init name_to_dev_t(const char *line);
 908
 909
 910static int __init check_sig(void)
 911{
 912        int error;
 913
 914        memset(&pmdisk_header,0,sizeof(pmdisk_header));
 915        if ((error = read_page(0,&pmdisk_header)))
 916                return error;
 917        if (!memcmp(PMDISK_SIG,pmdisk_header.sig,10)) {
 918                memcpy(pmdisk_header.sig,pmdisk_header.orig_sig,10);
 919
 920                /*
 921                 * Reset swap signature now.
 922                 */
 923                error = write_page(0,&pmdisk_header);
 924        } else { 
 925                pr_debug(KERN_ERR "pmdisk: Invalid partition type.\n");
 926                return -EINVAL;
 927        }
 928        if (!error)
 929                pr_debug("pmdisk: Signature found, resuming\n");
 930        return error;
 931}
 932
 933
 934/*
 935 * Sanity check if this image makes sense with this kernel/swap context
 936 * I really don't think that it's foolproof but more than nothing..
 937 */
 938
 939static const char * __init sanity_check(void)
 940{
 941        dump_pmdisk_info();
 942        if(pmdisk_info.version_code != LINUX_VERSION_CODE)
 943                return "kernel version";
 944        if(pmdisk_info.num_physpages != num_physpages)
 945                return "memory size";
 946        if (strcmp(pmdisk_info.uts.sysname,system_utsname.sysname))
 947                return "system type";
 948        if (strcmp(pmdisk_info.uts.release,system_utsname.release))
 949                return "kernel release";
 950        if (strcmp(pmdisk_info.uts.version,system_utsname.version))
 951                return "version";
 952        if (strcmp(pmdisk_info.uts.machine,system_utsname.machine))
 953                return "machine";
 954        if(pmdisk_info.cpus != num_online_cpus())
 955                return "number of cpus";
 956        return NULL;
 957}
 958
 959
 960static int __init check_header(void)
 961{
 962        const char * reason = NULL;
 963        int error;
 964
 965        init_header();
 966
 967        if ((error = read_page(swp_offset(pmdisk_header.pmdisk_info), 
 968                               &pmdisk_info)))
 969                return error;
 970
 971        /* Is this same machine? */
 972        if ((reason = sanity_check())) {
 973                printk(KERN_ERR "pmdisk: Resume mismatch: %s\n",reason);
 974                return -EPERM;
 975        }
 976        pmdisk_pages = pmdisk_info.image_pages;
 977        return error;
 978}
 979
 980
 981static int __init read_pagedir(void)
 982{
 983        unsigned long addr;
 984        int i, n = pmdisk_info.pagedir_pages;
 985        int error = 0;
 986
 987        pagedir_order = get_bitmask_order(n);
 988
 989        addr =__get_free_pages(GFP_ATOMIC, pagedir_order);
 990        if (!addr)
 991                return -ENOMEM;
 992        pm_pagedir_nosave = (struct pbe *)addr;
 993
 994        pr_debug("pmdisk: Reading pagedir (%d Pages)\n",n);
 995
 996        for (i = 0; i < n && !error; i++, addr += PAGE_SIZE) {
 997                unsigned long offset = swp_offset(pmdisk_info.pagedir[i]);
 998                if (offset)
 999                        error = read_page(offset, (void *)addr);
1000                else
1001                        error = -EFAULT;
1002        }
1003        if (error)
1004                free_pages((unsigned long)pm_pagedir_nosave,pagedir_order);
1005        return error;
1006}
1007
1008
1009/**
1010 *      read_image_data - Read image pages from swap.
1011 *
1012 *      You do not need to check for overlaps, check_pagedir()
1013 *      already did that.
1014 */
1015
1016static int __init read_image_data(void)
1017{
1018        struct pbe * p;
1019        int error = 0;
1020        int i;
1021
1022        printk( "Reading image data (%d pages): ", pmdisk_pages );
1023        for(i = 0, p = pm_pagedir_nosave; i < pmdisk_pages && !error; i++, p++) {
1024                if (!(i%100))
1025                        printk( "." );
1026                error = read_page(swp_offset(p->swap_address),
1027                                  (void *)p->address);
1028        }
1029        printk(" %d done.\n",i);
1030        return error;
1031}
1032
1033
1034static int __init read_suspend_image(void)
1035{
1036        int error = 0;
1037
1038        if ((error = check_sig()))
1039                return error;
1040        if ((error = check_header()))
1041                return error;
1042        if ((error = read_pagedir()))
1043                return error;
1044        if ((error = relocate_pagedir()))
1045                goto FreePagedir;
1046        if ((error = check_pagedir()))
1047                goto FreePagedir;
1048        if ((error = read_image_data()))
1049                goto FreePagedir;
1050 Done:
1051        return error;
1052 FreePagedir:
1053        free_pages((unsigned long)pm_pagedir_nosave,pagedir_order);
1054        goto Done;
1055}
1056
1057/**
1058 *      pmdisk_save - Snapshot memory
1059 */
1060
1061int pmdisk_save(void) 
1062{
1063        int error;
1064
1065#if defined (CONFIG_HIGHMEM) || defined (CONFIG_DISCONTIGMEM)
1066        pr_debug("pmdisk: not supported with high- or discontig-mem.\n");
1067        return -EPERM;
1068#endif
1069        if ((error = arch_prepare_suspend()))
1070                return error;
1071        local_irq_disable();
1072        save_processor_state();
1073        error = pmdisk_arch_suspend(0);
1074        restore_processor_state();
1075        local_irq_enable();
1076        return error;
1077}
1078
1079
1080/**
1081 *      pmdisk_write - Write saved memory image to swap.
1082 *
1083 *      pmdisk_arch_suspend(0) returns after system is resumed.
1084 *
1085 *      pmdisk_arch_suspend() copies all "used" memory to "free" memory,
1086 *      then unsuspends all device drivers, and writes memory to disk
1087 *      using normal kernel mechanism.
1088 */
1089
1090int pmdisk_write(void)
1091{
1092        return suspend_save_image();
1093}
1094
1095
1096/**
1097 *      pmdisk_read - Read saved image from swap.
1098 */
1099
1100int __init pmdisk_read(void)
1101{
1102        int error;
1103
1104        if (!strlen(resume_file))
1105                return -ENOENT;
1106
1107        resume_device = name_to_dev_t(resume_file);
1108        pr_debug("pmdisk: Resume From Partition: %s\n", resume_file);
1109
1110        resume_bdev = open_by_devnum(resume_device, FMODE_READ);
1111        if (!IS_ERR(resume_bdev)) {
1112                set_blocksize(resume_bdev, PAGE_SIZE);
1113                error = read_suspend_image();
1114                blkdev_put(resume_bdev);
1115        } else
1116                error = PTR_ERR(resume_bdev);
1117
1118        if (!error)
1119                pr_debug("Reading resume file was successful\n");
1120        else
1121                pr_debug("pmdisk: Error %d resuming\n", error);
1122        return error;
1123}
1124
1125
1126/**
1127 *      pmdisk_restore - Replace running kernel with saved image.
1128 */
1129
1130int __init pmdisk_restore(void)
1131{
1132        int error;
1133        local_irq_disable();
1134        save_processor_state();
1135        error = pmdisk_arch_suspend(1);
1136        restore_processor_state();
1137        local_irq_enable();
1138        return error;
1139}
1140
1141
1142/**
1143 *      pmdisk_free - Free memory allocated to hold snapshot.
1144 */
1145
1146int pmdisk_free(void)
1147{
1148        pr_debug( "Freeing prev allocated pagedir\n" );
1149        free_pagedir();
1150        return 0;
1151}
1152
1153static int __init pmdisk_setup(char *str)
1154{
1155        if (strlen(str)) {
1156                if (!strcmp(str,"off"))
1157                        resume_file[0] = '\0';
1158                else
1159                        strncpy(resume_file, str, 255);
1160        } else
1161                resume_file[0] = '\0';
1162        return 1;
1163}
1164
1165__setup("pmdisk=", pmdisk_setup);
1166
1167
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.