linux-bk/kernel/power/swsusp.c
<<
>>
Prefs
   1/*
   2 * linux/kernel/power/swsusp.c
   3 *
   4 * This file is to realize architecture-independent
   5 * machine suspend feature using pretty near only high-level routines
   6 *
   7 * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
   8 * Copyright (C) 1998,2001-2004 Pavel Machek <pavel@suse.cz>
   9 *
  10 * This file is released under the GPLv2.
  11 *
  12 * I'd like to thank the following people for their work:
  13 * 
  14 * Pavel Machek <pavel@ucw.cz>:
  15 * Modifications, defectiveness pointing, being with me at the very beginning,
  16 * suspend to swap space, stop all tasks. Port to 2.4.18-ac and 2.5.17.
  17 *
  18 * Steve Doddi <dirk@loth.demon.co.uk>: 
  19 * Support the possibility of hardware state restoring.
  20 *
  21 * Raph <grey.havens@earthling.net>:
  22 * Support for preserving states of network devices and virtual console
  23 * (including X and svgatextmode)
  24 *
  25 * Kurt Garloff <garloff@suse.de>:
  26 * Straightened the critical function in order to prevent compilers from
  27 * playing tricks with local variables.
  28 *
  29 * Andreas Mohr <a.mohr@mailto.de>
  30 *
  31 * Alex Badea <vampire@go.ro>:
  32 * Fixed runaway init
  33 *
  34 * More state savers are welcome. Especially for the scsi layer...
  35 *
  36 * For TODOs,FIXMEs also look in Documentation/power/swsusp.txt
  37 */
  38
  39#include <linux/module.h>
  40#include <linux/mm.h>
  41#include <linux/suspend.h>
  42#include <linux/smp_lock.h>
  43#include <linux/file.h>
  44#include <linux/utsname.h>
  45#include <linux/version.h>
  46#include <linux/delay.h>
  47#include <linux/reboot.h>
  48#include <linux/bitops.h>
  49#include <linux/vt_kern.h>
  50#include <linux/kbd_kern.h>
  51#include <linux/keyboard.h>
  52#include <linux/spinlock.h>
  53#include <linux/genhd.h>
  54#include <linux/kernel.h>
  55#include <linux/major.h>
  56#include <linux/swap.h>
  57#include <linux/pm.h>
  58#include <linux/device.h>
  59#include <linux/buffer_head.h>
  60#include <linux/swapops.h>
  61#include <linux/bootmem.h>
  62#include <linux/syscalls.h>
  63#include <linux/console.h>
  64#include <linux/highmem.h>
  65
  66#include <asm/uaccess.h>
  67#include <asm/mmu_context.h>
  68#include <asm/pgtable.h>
  69#include <asm/io.h>
  70
  71#include "power.h"
  72
  73unsigned char software_suspend_enabled = 0;
  74
  75#define NORESUME                1
  76#define RESUME_SPECIFIED        2
  77
  78/* References to section boundaries */
  79extern char __nosave_begin, __nosave_end;
  80
  81extern int is_head_of_free_region(struct page *);
  82
  83/* Locks */
  84spinlock_t suspend_pagedir_lock __nosavedata = SPIN_LOCK_UNLOCKED;
  85
  86/* Variables to be preserved over suspend */
  87static int pagedir_order_check;
  88static int nr_copy_pages_check;
  89
  90static int resume_status;
  91static char resume_file[256] = "";                      /* For resume= kernel option */
  92static dev_t resume_device;
  93/* Local variables that should not be affected by save */
  94unsigned int nr_copy_pages __nosavedata = 0;
  95
  96/* Suspend pagedir is allocated before final copy, therefore it
  97   must be freed after resume 
  98
  99   Warning: this is evil. There are actually two pagedirs at time of
 100   resume. One is "pagedir_save", which is empty frame allocated at
 101   time of suspend, that must be freed. Second is "pagedir_nosave", 
 102   allocated at time of resume, that travels through memory not to
 103   collide with anything.
 104
 105   Warning: this is even more evil than it seems. Pagedirs this file
 106   talks about are completely different from page directories used by
 107   MMU hardware.
 108 */
 109suspend_pagedir_t *pagedir_nosave __nosavedata = NULL;
 110static suspend_pagedir_t *pagedir_save;
 111static int pagedir_order __nosavedata = 0;
 112
 113struct link {
 114        char dummy[PAGE_SIZE - sizeof(swp_entry_t)];
 115        swp_entry_t next;
 116};
 117
 118union diskpage {
 119        union swap_header swh;
 120        struct link link;
 121        struct suspend_header sh;
 122};
 123
 124/*
 125 * XXX: We try to keep some more pages free so that I/O operations succeed
 126 * without paging. Might this be more?
 127 */
 128#define PAGES_FOR_IO    512
 129
 130static const char name_suspend[] = "Suspend Machine: ";
 131static const char name_resume[] = "Resume Machine: ";
 132
 133/*
 134 * Debug
 135 */
 136#define DEBUG_DEFAULT
 137#undef  DEBUG_PROCESS
 138#undef  DEBUG_SLOW
 139#define TEST_SWSUSP 0           /* Set to 1 to reboot instead of halt machine after suspension */
 140
 141#ifdef DEBUG_DEFAULT
 142# define PRINTK(f, a...)        printk(f, ## a)
 143#else
 144# define PRINTK(f, a...)        do { } while(0)
 145#endif
 146
 147#ifdef DEBUG_SLOW
 148#define MDELAY(a) mdelay(a)
 149#else
 150#define MDELAY(a) do { } while(0)
 151#endif
 152
 153/*
 154 * Saving part...
 155 */
 156
 157static __inline__ int fill_suspend_header(struct suspend_header *sh)
 158{
 159        memset((char *)sh, 0, sizeof(*sh));
 160
 161        sh->version_code = LINUX_VERSION_CODE;
 162        sh->num_physpages = num_physpages;
 163        strncpy(sh->machine, system_utsname.machine, 8);
 164        strncpy(sh->version, system_utsname.version, 20);
 165        /* FIXME: Is this bogus? --RR */
 166        sh->num_cpus = num_online_cpus();
 167        sh->page_size = PAGE_SIZE;
 168        sh->suspend_pagedir = pagedir_nosave;
 169        BUG_ON (pagedir_save != pagedir_nosave);
 170        sh->num_pbes = nr_copy_pages;
 171        /* TODO: needed? mounted fs' last mounted date comparison
 172         * [so they haven't been mounted since last suspend.
 173         * Maybe it isn't.] [we'd need to do this for _all_ fs-es]
 174         */
 175        return 0;
 176}
 177
 178/* We memorize in swapfile_used what swap devices are used for suspension */
 179#define SWAPFILE_UNUSED    0
 180#define SWAPFILE_SUSPEND   1    /* This is the suspending device */
 181#define SWAPFILE_IGNORED   2    /* Those are other swap devices ignored for suspension */
 182
 183static unsigned short swapfile_used[MAX_SWAPFILES];
 184static unsigned short root_swap;
 185#define MARK_SWAP_SUSPEND 0
 186#define MARK_SWAP_RESUME 2
 187
 188static void mark_swapfiles(swp_entry_t prev, int mode)
 189{
 190        swp_entry_t entry;
 191        union diskpage *cur;
 192        struct page *page;
 193
 194        if (root_swap == 0xFFFF)  /* ignored */
 195                return;
 196
 197        page = alloc_page(GFP_ATOMIC);
 198        if (!page)
 199                panic("Out of memory in mark_swapfiles");
 200        cur = page_address(page);
 201        /* XXX: this is dirty hack to get first page of swap file */
 202        entry = swp_entry(root_swap, 0);
 203        rw_swap_page_sync(READ, entry, page);
 204
 205        if (mode == MARK_SWAP_RESUME) {
 206                if (!memcmp("S1",cur->swh.magic.magic,2))
 207                        memcpy(cur->swh.magic.magic,"SWAP-SPACE",10);
 208                else if (!memcmp("S2",cur->swh.magic.magic,2))
 209                        memcpy(cur->swh.magic.magic,"SWAPSPACE2",10);
 210                else printk("%sUnable to find suspended-data signature (%.10s - misspelled?\n", 
 211                        name_resume, cur->swh.magic.magic);
 212        } else {
 213                if ((!memcmp("SWAP-SPACE",cur->swh.magic.magic,10)))
 214                        memcpy(cur->swh.magic.magic,"S1SUSP....",10);
 215                else if ((!memcmp("SWAPSPACE2",cur->swh.magic.magic,10)))
 216                        memcpy(cur->swh.magic.magic,"S2SUSP....",10);
 217                else panic("\nSwapspace is not swapspace (%.10s)\n", cur->swh.magic.magic);
 218                cur->link.next = prev; /* prev is the first/last swap page of the resume area */
 219                /* link.next lies *no more* in last 4/8 bytes of magic */
 220        }
 221        rw_swap_page_sync(WRITE, entry, page);
 222        __free_page(page);
 223}
 224
 225
 226/*
 227 * Check whether the swap device is the specified resume
 228 * device, irrespective of whether they are specified by
 229 * identical names.
 230 *
 231 * (Thus, device inode aliasing is allowed.  You can say /dev/hda4
 232 * instead of /dev/ide/host0/bus0/target0/lun0/part4 [if using devfs]
 233 * and they'll be considered the same device.  This is *necessary* for
 234 * devfs, since the resume code can only recognize the form /dev/hda4,
 235 * but the suspend code would see the long name.)
 236 */
 237static int is_resume_device(const struct swap_info_struct *swap_info)
 238{
 239        struct file *file = swap_info->swap_file;
 240        struct inode *inode = file->f_dentry->d_inode;
 241
 242        return S_ISBLK(inode->i_mode) &&
 243                resume_device == MKDEV(imajor(inode), iminor(inode));
 244}
 245
 246static void read_swapfiles(void) /* This is called before saving image */
 247{
 248        int i, len;
 249        
 250        len=strlen(resume_file);
 251        root_swap = 0xFFFF;
 252        
 253        swap_list_lock();
 254        for(i=0; i<MAX_SWAPFILES; i++) {
 255                if (swap_info[i].flags == 0) {
 256                        swapfile_used[i]=SWAPFILE_UNUSED;
 257                } else {
 258                        if(!len) {
 259                                printk(KERN_WARNING "resume= option should be used to set suspend device" );
 260                                if(root_swap == 0xFFFF) {
 261                                        swapfile_used[i] = SWAPFILE_SUSPEND;
 262                                        root_swap = i;
 263                                } else
 264                                        swapfile_used[i] = SWAPFILE_IGNORED;                              
 265                        } else {
 266                                /* we ignore all swap devices that are not the resume_file */
 267                                if (is_resume_device(&swap_info[i])) {
 268                                        swapfile_used[i] = SWAPFILE_SUSPEND;
 269                                        root_swap = i;
 270                                } else {
 271                                        swapfile_used[i] = SWAPFILE_IGNORED;
 272                                }
 273                        }
 274                }
 275        }
 276        swap_list_unlock();
 277}
 278
 279static void lock_swapdevices(void) /* This is called after saving image so modification
 280                                      will be lost after resume... and that's what we want. */
 281{
 282        int i;
 283
 284        swap_list_lock();
 285        for(i = 0; i< MAX_SWAPFILES; i++)
 286                if(swapfile_used[i] == SWAPFILE_IGNORED) {
 287                        swap_info[i].flags ^= 0xFF; /* we make the device unusable. A new call to
 288                                                       lock_swapdevices can unlock the devices. */
 289                }
 290        swap_list_unlock();
 291}
 292
 293/**
 294 *    write_suspend_image - Write entire image to disk.
 295 *
 296 *    After writing suspend signature to the disk, suspend may no
 297 *    longer fail: we have ready-to-run image in swap, and rollback
 298 *    would happen on next reboot -- corrupting data.
 299 *
 300 *    Note: The buffer we allocate to use to write the suspend header is
 301 *    not freed; its not needed since the system is going down anyway
 302 *    (plus it causes an oops and I'm lazy^H^H^H^Htoo busy).
 303 */
 304static int write_suspend_image(void)
 305{
 306        int i;
 307        swp_entry_t entry, prev = { 0 };
 308        int nr_pgdir_pages = SUSPEND_PD_PAGES(nr_copy_pages);
 309        union diskpage *cur,  *buffer = (union diskpage *)get_zeroed_page(GFP_ATOMIC);
 310        unsigned long address;
 311        struct page *page;
 312
 313        if (!buffer)
 314                return -ENOMEM;
 315
 316        printk( "Writing data to swap (%d pages): ", nr_copy_pages );
 317        for (i=0; i<nr_copy_pages; i++) {
 318                if (!(i%100))
 319                        printk( "." );
 320                entry = get_swap_page();
 321                if (!entry.val)
 322                        panic("\nNot enough swapspace when writing data" );
 323                
 324                if (swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND)
 325                        panic("\nPage %d: not enough swapspace on suspend device", i );
 326            
 327                address = (pagedir_nosave+i)->address;
 328                page = virt_to_page(address);
 329                rw_swap_page_sync(WRITE, entry, page);
 330                (pagedir_nosave+i)->swap_address = entry;
 331        }
 332        printk( "|\n" );
 333        printk( "Writing pagedir (%d pages): ", nr_pgdir_pages);
 334        for (i=0; i<nr_pgdir_pages; i++) {
 335                cur = (union diskpage *)((char *) pagedir_nosave)+i;
 336                BUG_ON ((char *) cur != (((char *) pagedir_nosave) + i*PAGE_SIZE));
 337                printk( "." );
 338                entry = get_swap_page();
 339                if (!entry.val) {
 340                        printk(KERN_CRIT "Not enough swapspace when writing pgdir\n" );
 341                        panic("Don't know how to recover");
 342                        free_page((unsigned long) buffer);
 343                        return -ENOSPC;
 344                }
 345
 346                if(swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND)
 347                        panic("\nNot enough swapspace for pagedir on suspend device" );
 348
 349                BUG_ON (sizeof(swp_entry_t) != sizeof(long));
 350                BUG_ON (PAGE_SIZE % sizeof(struct pbe));
 351
 352                cur->link.next = prev;                          
 353                page = virt_to_page((unsigned long)cur);
 354                rw_swap_page_sync(WRITE, entry, page);
 355                prev = entry;
 356        }
 357        printk("H");
 358        BUG_ON (sizeof(struct suspend_header) > PAGE_SIZE-sizeof(swp_entry_t));
 359        BUG_ON (sizeof(union diskpage) != PAGE_SIZE);
 360        BUG_ON (sizeof(struct link) != PAGE_SIZE);
 361        entry = get_swap_page();
 362        if (!entry.val)
 363                panic( "\nNot enough swapspace when writing header" );
 364        if (swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND)
 365                panic("\nNot enough swapspace for header on suspend device" );
 366
 367        cur = (void *) buffer;
 368        if (fill_suspend_header(&cur->sh))
 369                BUG();          /* Not a BUG_ON(): we want fill_suspend_header to be called, always */
 370                
 371        cur->link.next = prev;
 372
 373        page = virt_to_page((unsigned long)cur);
 374        rw_swap_page_sync(WRITE, entry, page);
 375        prev = entry;
 376
 377        printk( "S" );
 378        mark_swapfiles(prev, MARK_SWAP_SUSPEND);
 379        printk( "|\n" );
 380
 381        MDELAY(1000);
 382        return 0;
 383}
 384
 385#ifdef CONFIG_HIGHMEM
 386struct highmem_page {
 387        char *data;
 388        struct page *page;
 389        struct highmem_page *next;
 390};
 391
 392struct highmem_page *highmem_copy = NULL;
 393
 394static int save_highmem_zone(struct zone *zone)
 395{
 396        unsigned long zone_pfn;
 397        for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
 398                struct page *page;
 399                struct highmem_page *save;
 400                void *kaddr;
 401                unsigned long pfn = zone_pfn + zone->zone_start_pfn;
 402                int chunk_size;
 403
 404                if (!(pfn%1000))
 405                        printk(".");
 406                if (!pfn_valid(pfn))
 407                        continue;
 408                page = pfn_to_page(pfn);
 409                /*
 410                 * This condition results from rvmalloc() sans vmalloc_32()
 411                 * and architectural memory reservations. This should be
 412                 * corrected eventually when the cases giving rise to this
 413                 * are better understood.
 414                 */
 415                if (PageReserved(page)) {
 416                        printk("highmem reserved page?!\n");
 417                        continue;
 418                }
 419                if ((chunk_size = is_head_of_free_region(page))) {
 420                        pfn += chunk_size - 1;
 421                        zone_pfn += chunk_size - 1;
 422                        continue;
 423                }
 424                save = kmalloc(sizeof(struct highmem_page), GFP_ATOMIC);
 425                if (!save)
 426                        return -ENOMEM;
 427                save->next = highmem_copy;
 428                save->page = page;
 429                save->data = (void *) get_zeroed_page(GFP_ATOMIC);
 430                if (!save->data) {
 431                        kfree(save);
 432                        return -ENOMEM;
 433                }
 434                kaddr = kmap_atomic(page, KM_USER0);
 435                memcpy(save->data, kaddr, PAGE_SIZE);
 436                kunmap_atomic(kaddr, KM_USER0);
 437                highmem_copy = save;
 438        }
 439        return 0;
 440}
 441
 442static int save_highmem(void)
 443{
 444        struct zone *zone;
 445        int res = 0;
 446        for_each_zone(zone) {
 447                if (is_highmem(zone))
 448                        res = save_highmem_zone(zone);
 449                if (res)
 450                        return res;
 451        }
 452        return 0;
 453}
 454
 455static int restore_highmem(void)
 456{
 457        while (highmem_copy) {
 458                struct highmem_page *save = highmem_copy;
 459                void *kaddr;
 460                highmem_copy = save->next;
 461
 462                kaddr = kmap_atomic(save->page, KM_USER0);
 463                memcpy(kaddr, save->data, PAGE_SIZE);
 464                kunmap_atomic(kaddr, KM_USER0);
 465                free_page((long) save->data);
 466                kfree(save);
 467        }
 468        return 0;
 469}
 470#endif
 471
 472static int pfn_is_nosave(unsigned long pfn)
 473{
 474        unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT;
 475        unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT;
 476        return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
 477}
 478
 479/* if *pagedir_p != NULL it also copies the counted pages */
 480static int count_and_copy_zone(struct zone *zone, struct pbe **pagedir_p)
 481{
 482        unsigned long zone_pfn, chunk_size, nr_copy_pages = 0;
 483        struct pbe *pbe = *pagedir_p;
 484        for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
 485                struct page *page;
 486                unsigned long pfn = zone_pfn + zone->zone_start_pfn;
 487
 488                if (!(pfn%1000))
 489                        printk(".");
 490                if (!pfn_valid(pfn))
 491                        continue;
 492                page = pfn_to_page(pfn);
 493                BUG_ON(PageReserved(page) && PageNosave(page));
 494                if (PageNosave(page))
 495                        continue;
 496                if (PageReserved(page) && pfn_is_nosave(pfn)) {
 497                        PRINTK("[nosave pfn 0x%lx]", pfn);
 498                        continue;
 499                }
 500                if ((chunk_size = is_head_of_free_region(page))) {
 501                        pfn += chunk_size - 1;
 502                        zone_pfn += chunk_size - 1;
 503                        continue;
 504                }
 505                nr_copy_pages++;
 506                if (!pbe)
 507                        continue;
 508                pbe->orig_address = (long) page_address(page);
 509                /* Copy page is dangerous: it likes to mess with
 510                   preempt count on specific cpus. Wrong preempt count is then copied,
 511                   oops. */
 512                copy_page((void *)pbe->address, (void *)pbe->orig_address);
 513                pbe++;
 514        }
 515        *pagedir_p = pbe;
 516        return nr_copy_pages;
 517}
 518
 519static int count_and_copy_data_pages(struct pbe *pagedir_p)
 520{
 521        int nr_copy_pages = 0;
 522        struct zone *zone;
 523        for_each_zone(zone) {
 524                if (!is_highmem(zone))
 525                        nr_copy_pages += count_and_copy_zone(zone, &pagedir_p);
 526        }
 527        return nr_copy_pages;
 528}
 529
 530static void free_suspend_pagedir_zone(struct zone *zone, unsigned long pagedir)
 531{
 532        unsigned long zone_pfn, pagedir_end, pagedir_pfn, pagedir_end_pfn;
 533        pagedir_end = pagedir + (PAGE_SIZE << pagedir_order);
 534        pagedir_pfn = __pa(pagedir) >> PAGE_SHIFT;
 535        pagedir_end_pfn = __pa(pagedir_end) >> PAGE_SHIFT;
 536        for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
 537                struct page *page;
 538                unsigned long pfn = zone_pfn + zone->zone_start_pfn;
 539                if (!pfn_valid(pfn))
 540                        continue;
 541                page = pfn_to_page(pfn);
 542                if (!TestClearPageNosave(page))
 543                        continue;
 544                else if (pfn >= pagedir_pfn && pfn < pagedir_end_pfn)
 545                        continue;
 546                __free_page(page);
 547        }
 548}
 549
 550static void free_suspend_pagedir(unsigned long this_pagedir)
 551{
 552        struct zone *zone;
 553        for_each_zone(zone) {
 554                if (!is_highmem(zone))
 555                        free_suspend_pagedir_zone(zone, this_pagedir);
 556        }
 557        free_pages(this_pagedir, pagedir_order);
 558}
 559
 560static suspend_pagedir_t *create_suspend_pagedir(int nr_copy_pages)
 561{
 562        int i;
 563        suspend_pagedir_t *pagedir;
 564        struct pbe *p;
 565        struct page *page;
 566
 567        pagedir_order = get_bitmask_order(SUSPEND_PD_PAGES(nr_copy_pages));
 568
 569        p = pagedir = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC | __GFP_COLD, pagedir_order);
 570        if (!pagedir)
 571                return NULL;
 572
 573        page = virt_to_page(pagedir);
 574        for(i=0; i < 1<<pagedir_order; i++)
 575                SetPageNosave(page++);
 576                
 577        while(nr_copy_pages--) {
 578                p->address = get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
 579                if (!p->address) {
 580                        free_suspend_pagedir((unsigned long) pagedir);
 581                        return NULL;
 582                }
 583                SetPageNosave(virt_to_page(p->address));
 584                p->orig_address = 0;
 585                p++;
 586        }
 587        return pagedir;
 588}
 589
 590static int prepare_suspend_processes(void)
 591{
 592        sys_sync();     /* Syncing needs pdflushd, so do it before stopping processes */
 593        if (freeze_processes()) {
 594                printk( KERN_ERR "Suspend failed: Not all processes stopped!\n" );
 595                thaw_processes();
 596                return 1;
 597        }
 598        return 0;
 599}
 600
 601/*
 602 * Try to free as much memory as possible, but do not OOM-kill anyone
 603 *
 604 * Notice: all userland should be stopped at this point, or livelock is possible.
 605 */
 606static void free_some_memory(void)
 607{
 608        printk("Freeing memory: ");
 609        while (shrink_all_memory(10000))
 610                printk(".");
 611        printk("|\n");
 612}
 613
 614static int suspend_prepare_image(void)
 615{
 616        struct sysinfo i;
 617        unsigned int nr_needed_pages = 0;
 618
 619        pagedir_nosave = NULL;
 620        printk( "/critical section: ");
 621#ifdef CONFIG_HIGHMEM
 622        printk( "handling highmem" );
 623        if (save_highmem()) {
 624                printk(KERN_CRIT "%sNot enough free pages for highmem\n", name_suspend);
 625                return -ENOMEM;
 626        }
 627        printk(", ");
 628#endif
 629
 630        printk("counting pages to copy" );
 631        drain_local_pages();
 632        nr_copy_pages = count_and_copy_data_pages(NULL);
 633        nr_needed_pages = nr_copy_pages + PAGES_FOR_IO;
 634        
 635        printk(" (pages needed: %d+%d=%d free: %d)\n",nr_copy_pages,PAGES_FOR_IO,nr_needed_pages,nr_free_pages());
 636        if(nr_free_pages() < nr_needed_pages) {
 637                printk(KERN_CRIT "%sCouldn't get enough free pages, on %d pages short\n",
 638                       name_suspend, nr_needed_pages-nr_free_pages());
 639                root_swap = 0xFFFF;
 640                return -ENOMEM;
 641        }
 642        si_swapinfo(&i);        /* FIXME: si_swapinfo(&i) returns all swap devices information.
 643                                   We should only consider resume_device. */
 644        if (i.freeswap < nr_needed_pages)  {
 645                printk(KERN_CRIT "%sThere's not enough swap space available, on %ld pages short\n",
 646                       name_suspend, nr_needed_pages-i.freeswap);
 647                return -ENOSPC;
 648        }
 649
 650        PRINTK( "Alloc pagedir\n" ); 
 651        pagedir_save = pagedir_nosave = create_suspend_pagedir(nr_copy_pages);
 652        if (!pagedir_nosave) {
 653                /* Pagedir is big, one-chunk allocation. It is easily possible for this allocation to fail */
 654                printk(KERN_CRIT "%sCouldn't allocate continuous pagedir\n", name_suspend);
 655                return -ENOMEM;
 656        }
 657        nr_copy_pages_check = nr_copy_pages;
 658        pagedir_order_check = pagedir_order;
 659
 660        drain_local_pages();    /* During allocating of suspend pagedir, new cold pages may appear. Kill them */
 661        if (nr_copy_pages != count_and_copy_data_pages(pagedir_nosave)) /* copy */
 662                BUG();
 663
 664        /*
 665         * End of critical section. From now on, we can write to memory,
 666         * but we should not touch disk. This specially means we must _not_
 667         * touch swap space! Except we must write out our image of course.
 668         */
 669
 670        printk( "critical section/: done (%d pages copied)\n", nr_copy_pages );
 671        return 0;
 672}
 673
 674static void suspend_save_image(void)
 675{
 676        device_resume();
 677
 678        lock_swapdevices();
 679        write_suspend_image();
 680        lock_swapdevices();     /* This will unlock ignored swap devices since writing is finished */
 681
 682        /* It is important _NOT_ to umount filesystems at this point. We want
 683         * them synced (in case something goes wrong) but we DO not want to mark
 684         * filesystem clean: it is not. (And it does not matter, if we resume
 685         * correctly, we'll mark system clean, anyway.)
 686         */
 687}
 688
 689static void suspend_power_down(void)
 690{
 691        extern int C_A_D;
 692        C_A_D = 0;
 693        printk(KERN_EMERG "%s%s Trying to power down.\n", name_suspend, TEST_SWSUSP ? "Disable TEST_SWSUSP. NOT ": "");
 694#ifdef CONFIG_VT
 695        PRINTK(KERN_EMERG "shift_state: %04x\n", shift_state);
 696        mdelay(1000);
 697        if (TEST_SWSUSP ^ (!!(shift_state & (1 << KG_CTRL))))
 698                machine_restart(NULL);
 699        else
 700#endif
 701        {
 702                device_suspend(3);
 703                device_shutdown();
 704                machine_power_off();
 705        }
 706
 707        printk(KERN_EMERG "%sProbably not capable for powerdown. System halted.\n", name_suspend);
 708        machine_halt();
 709        while (1);
 710        /* NOTREACHED */
 711}
 712
 713/*
 714 * Magic happens here
 715 */
 716
 717asmlinkage void do_magic_resume_1(void)
 718{
 719        barrier();
 720        mb();
 721        spin_lock_irq(&suspend_pagedir_lock);   /* Done to disable interrupts */ 
 722
 723        device_power_down(3);
 724        PRINTK( "Waiting for DMAs to settle down...\n");
 725        mdelay(1000);   /* We do not want some readahead with DMA to corrupt our memory, right?
 726                           Do it with disabled interrupts for best effect. That way, if some
 727                           driver scheduled DMA, we have good chance for DMA to finish ;-). */
 728}
 729
 730asmlinkage void do_magic_resume_2(void)
 731{
 732        BUG_ON (nr_copy_pages_check != nr_copy_pages);
 733        BUG_ON (pagedir_order_check != pagedir_order);
 734
 735        __flush_tlb_global();           /* Even mappings of "global" things (vmalloc) need to be fixed */
 736
 737        PRINTK( "Freeing prev allocated pagedir\n" );
 738        free_suspend_pagedir((unsigned long) pagedir_save);
 739
 740#ifdef CONFIG_HIGHMEM
 741        printk( "Restoring highmem\n" );
 742        restore_highmem();
 743#endif
 744        printk("done, devices\n");
 745
 746        device_power_up();
 747        spin_unlock_irq(&suspend_pagedir_lock);
 748        device_resume();
 749
 750        /* Fixme: this is too late; we should do this ASAP to avoid "infinite reboots" problem */
 751        PRINTK( "Fixing swap signatures... " );
 752        mark_swapfiles(((swp_entry_t) {0}), MARK_SWAP_RESUME);
 753        PRINTK( "ok\n" );
 754
 755#ifdef SUSPEND_CONSOLE
 756        acquire_console_sem();
 757        update_screen(fg_console);
 758        release_console_sem();
 759#endif
 760}
 761
 762/* do_magic() is implemented in arch/?/kernel/suspend_asm.S, and basically does:
 763
 764        if (!resume) {
 765                do_magic_suspend_1();
 766                save_processor_state();
 767                SAVE_REGISTERS
 768                do_magic_suspend_2();
 769                return;
 770        }
 771        GO_TO_SWAPPER_PAGE_TABLES
 772        do_magic_resume_1();
 773        COPY_PAGES_BACK
 774        RESTORE_REGISTERS
 775        restore_processor_state();
 776        do_magic_resume_2();
 777
 778 */
 779
 780asmlinkage void do_magic_suspend_1(void)
 781{
 782        mb();
 783        barrier();
 784        BUG_ON(in_atomic());
 785        spin_lock_irq(&suspend_pagedir_lock);
 786}
 787
 788asmlinkage void do_magic_suspend_2(void)
 789{
 790        int is_problem;
 791        read_swapfiles();
 792        device_power_down(3);
 793        is_problem = suspend_prepare_image();
 794        device_power_up();
 795        spin_unlock_irq(&suspend_pagedir_lock);
 796        if (!is_problem) {
 797                kernel_fpu_end();       /* save_processor_state() does kernel_fpu_begin, and we need to revert it in order to pass in_atomic() checks */
 798                BUG_ON(in_atomic());
 799                suspend_save_image();
 800                suspend_power_down();   /* FIXME: if suspend_power_down is commented out, console is lost after few suspends ?! */
 801        }
 802
 803        printk(KERN_EMERG "%sSuspend failed, trying to recover...\n", name_suspend);
 804        MDELAY(1000); /* So user can wait and report us messages if armageddon comes :-) */
 805
 806        barrier();
 807        mb();
 808        spin_lock_irq(&suspend_pagedir_lock);   /* Done to disable interrupts */ 
 809
 810        free_pages((unsigned long) pagedir_nosave, pagedir_order);
 811        spin_unlock_irq(&suspend_pagedir_lock);
 812
 813        device_resume();
 814        PRINTK( "Fixing swap signatures... " );
 815        mark_swapfiles(((swp_entry_t) {0}), MARK_SWAP_RESUME);
 816        PRINTK( "ok\n" );
 817}
 818
 819/*
 820 * This is main interface to the outside world. It needs to be
 821 * called from process context.
 822 */
 823int software_suspend(void)
 824{
 825        int res;
 826        if (!software_suspend_enabled)
 827                return -EAGAIN;
 828
 829        software_suspend_enabled = 0;
 830        might_sleep();
 831
 832        if (arch_prepare_suspend()) {
 833                printk("%sArchitecture failed to prepare\n", name_suspend);
 834                return -EPERM;
 835        }               
 836        if (pm_prepare_console())
 837                printk( "%sCan't allocate a console... proceeding\n", name_suspend);
 838        if (!prepare_suspend_processes()) {
 839
 840                /* At this point, all user processes and "dangerous"
 841                   kernel threads are stopped. Free some memory, as we
 842                   need half of memory free. */
 843
 844                free_some_memory();
 845                disable_nonboot_cpus();
 846                /* Save state of all device drivers, and stop them. */
 847                printk("Suspending devices... ");
 848                if ((res = device_suspend(3))==0) {
 849                        /* If stopping device drivers worked, we proceed basically into
 850                         * suspend_save_image.
 851                         *
 852                         * do_magic(0) returns after system is resumed.
 853                         *
 854                         * do_magic() copies all "used" memory to "free" memory, then
 855                         * unsuspends all device drivers, and writes memory to disk
 856                         * using normal kernel mechanism.
 857                         */
 858                        do_magic(0);
 859                }
 860                thaw_processes();
 861                enable_nonboot_cpus();
 862        } else
 863                res = -EBUSY;
 864        software_suspend_enabled = 1;
 865        MDELAY(1000);
 866        pm_restore_console();
 867        return res;
 868}
 869
 870/* More restore stuff */
 871
 872#define does_collide(addr) does_collide_order(pagedir_nosave, addr, 0)
 873
 874/*
 875 * Returns true if given address/order collides with any orig_address 
 876 */
 877static int does_collide_order(suspend_pagedir_t *pagedir, unsigned long addr,
 878                int order)
 879{
 880        int i;
 881        unsigned long addre = addr + (PAGE_SIZE<<order);
 882        
 883        for(i=0; i < nr_copy_pages; i++)
 884                if((pagedir+i)->orig_address >= addr &&
 885                        (pagedir+i)->orig_address < addre)
 886                        return 1;
 887
 888        return 0;
 889}
 890
 891/*
 892 * We check here that pagedir & pages it points to won't collide with pages
 893 * where we're going to restore from the loaded pages later
 894 */
 895static int check_pagedir(void)
 896{
 897        int i;
 898
 899        for(i=0; i < nr_copy_pages; i++) {
 900                unsigned long addr;
 901
 902                do {
 903                        addr = get_zeroed_page(GFP_ATOMIC);
 904                        if(!addr)
 905                                return -ENOMEM;
 906                } while (does_collide(addr));
 907
 908                (pagedir_nosave+i)->address = addr;
 909        }
 910        return 0;
 911}
 912
 913static int relocate_pagedir(void)
 914{
 915        /*
 916         * We have to avoid recursion (not to overflow kernel stack),
 917         * and that's why code looks pretty cryptic 
 918         */
 919        suspend_pagedir_t *old_pagedir = pagedir_nosave;
 920        void **eaten_memory = NULL;
 921        void **c = eaten_memory, *m, *f;
 922        int ret = 0;
 923
 924        printk("Relocating pagedir ");
 925
 926        if(!does_collide_order(old_pagedir, (unsigned long)old_pagedir, pagedir_order)) {
 927                printk("not necessary\n");
 928                return 0;
 929        }
 930
 931        while ((m = (void *) __get_free_pages(GFP_ATOMIC, pagedir_order)) != NULL) {
 932                if (!does_collide_order(old_pagedir, (unsigned long)m, pagedir_order))
 933                        break;
 934                eaten_memory = m;
 935                printk( "." ); 
 936                *eaten_memory = c;
 937                c = eaten_memory;
 938        }
 939
 940        if (!m) {
 941                printk("out of memory\n");
 942                ret = -ENOMEM;
 943        } else {
 944                pagedir_nosave =
 945                        memcpy(m, old_pagedir, PAGE_SIZE << pagedir_order);
 946        }
 947
 948        c = eaten_memory;
 949        while (c) {
 950                printk(":");
 951                f = c;
 952                c = *c;
 953                free_pages((unsigned long)f, pagedir_order);
 954        }
 955        printk("|\n");
 956        return ret;
 957}
 958
 959/*
 960 * Sanity check if this image makes sense with this kernel/swap context
 961 * I really don't think that it's foolproof but more than nothing..
 962 */
 963
 964static int sanity_check_failed(char *reason)
 965{
 966        printk(KERN_ERR "%s%s\n", name_resume, reason);
 967        return -EPERM;
 968}
 969
 970static int sanity_check(struct suspend_header *sh)
 971{
 972        if (sh->version_code != LINUX_VERSION_CODE)
 973                return sanity_check_failed("Incorrect kernel version");
 974        if (sh->num_physpages != num_physpages)
 975                return sanity_check_failed("Incorrect memory size");
 976        if (strncmp(sh->machine, system_utsname.machine, 8))
 977                return sanity_check_failed("Incorrect machine type");
 978        if (strncmp(sh->version, system_utsname.version, 20))
 979                return sanity_check_failed("Incorrect version");
 980        if (sh->num_cpus != num_online_cpus())
 981                return sanity_check_failed("Incorrect number of cpus");
 982        if (sh->page_size != PAGE_SIZE)
 983                return sanity_check_failed("Incorrect PAGE_SIZE");
 984        return 0;
 985}
 986
 987static int bdev_read_page(struct block_device *bdev, long pos, void *buf)
 988{
 989        struct buffer_head *bh;
 990        BUG_ON (pos%PAGE_SIZE);
 991        bh = __bread(bdev, pos/PAGE_SIZE, PAGE_SIZE);
 992        if (!bh || (!bh->b_data)) {
 993                return -1;
 994        }
 995        memcpy(buf, bh->b_data, PAGE_SIZE);     /* FIXME: may need kmap() */
 996        BUG_ON(!buffer_uptodate(bh));
 997        brelse(bh);
 998        return 0;
 999} 
1000
1001static int bdev_write_page(struct block_device *bdev, long pos, void *buf)
1002{
1003#if 0
1004        struct buffer_head *bh;
1005        BUG_ON (pos%PAGE_SIZE);
1006        bh = __bread(bdev, pos/PAGE_SIZE, PAGE_SIZE);
1007        if (!bh || (!bh->b_data)) {
1008                return -1;
1009        }
1010        memcpy(bh->b_data, buf, PAGE_SIZE);     /* FIXME: may need kmap() */
1011        BUG_ON(!buffer_uptodate(bh));
1012        generic_make_request(WRITE, bh);
1013        if (!buffer_uptodate(bh))
1014                printk(KERN_CRIT "%sWarning %s: Fixing swap signatures unsuccessful...\n", name_resume, resume_file);
1015        wait_on_buffer(bh);
1016        brelse(bh);
1017        return 0;
1018#endif
1019        printk(KERN_CRIT "%sWarning %s: Fixing swap signatures unimplemented...\n", name_resume, resume_file);
1020        return 0;
1021}
1022
1023extern dev_t __init name_to_dev_t(const char *line);
1024
1025static int __init __read_suspend_image(struct block_device *bdev, union diskpage *cur, int noresume)
1026{
1027        swp_entry_t next;
1028        int i, nr_pgdir_pages;
1029
1030#define PREPARENEXT \
1031        {       next = cur->link.next; \
1032                next.val = swp_offset(next) * PAGE_SIZE; \
1033        }
1034
1035        if (bdev_read_page(bdev, 0, cur)) return -EIO;
1036
1037        if ((!memcmp("SWAP-SPACE",cur->swh.magic.magic,10)) ||
1038            (!memcmp("SWAPSPACE2",cur->swh.magic.magic,10))) {
1039                printk(KERN_ERR "%sThis is normal swap space\n", name_resume );
1040                return -EINVAL;
1041        }
1042
1043        PREPARENEXT; /* We have to read next position before we overwrite it */
1044
1045        if (!memcmp("S1",cur->swh.magic.magic,2))
1046                memcpy(cur->swh.magic.magic,"SWAP-SPACE",10);
1047        else if (!memcmp("S2",cur->swh.magic.magic,2))
1048                memcpy(cur->swh.magic.magic,"SWAPSPACE2",10);
1049        else {
1050                if (noresume)
1051                        return -EINVAL;
1052                panic("%sUnable to find suspended-data signature (%.10s - misspelled?\n", 
1053                        name_resume, cur->swh.magic.magic);
1054        }
1055        if (noresume) {
1056                /* We don't do a sanity check here: we want to restore the swap
1057                   whatever version of kernel made the suspend image;
1058                   We need to write swap, but swap is *not* enabled so
1059                   we must write the device directly */
1060                printk("%s: Fixing swap signatures %s...\n", name_resume, resume_file);
1061                bdev_write_page(bdev, 0, cur);
1062        }
1063
1064        printk( "%sSignature found, resuming\n", name_resume );
1065        MDELAY(1000);
1066
1067        if (bdev_read_page(bdev, next.val, cur)) return -EIO;
1068        if (sanity_check(&cur->sh))     /* Is this same machine? */     
1069                return -EPERM;
1070        PREPARENEXT;
1071
1072        pagedir_save = cur->sh.suspend_pagedir;
1073        nr_copy_pages = cur->sh.num_pbes;
1074        nr_pgdir_pages = SUSPEND_PD_PAGES(nr_copy_pages);
1075        pagedir_order = get_bitmask_order(nr_pgdir_pages);
1076
1077        pagedir_nosave = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC, pagedir_order);
1078        if (!pagedir_nosave)
1079                return -ENOMEM;
1080
1081        PRINTK( "%sReading pagedir, ", name_resume );
1082
1083        /* We get pages in reverse order of saving! */
1084        for (i=nr_pgdir_pages-1; i>=0; i--) {
1085                BUG_ON (!next.val);
1086                cur = (union diskpage *)((char *) pagedir_nosave)+i;
1087                if (bdev_read_page(bdev, next.val, cur)) return -EIO;
1088                PREPARENEXT;
1089        }
1090        BUG_ON (next.val);
1091
1092        if (relocate_pagedir())
1093                return -ENOMEM;
1094        if (check_pagedir())
1095                return -ENOMEM;
1096
1097        printk( "Reading image data (%d pages): ", nr_copy_pages );
1098        for(i=0; i < nr_copy_pages; i++) {
1099                swp_entry_t swap_address = (pagedir_nosave+i)->swap_address;
1100                if (!(i%100))
1101                        printk( "." );
1102                /* You do not need to check for overlaps...
1103                   ... check_pagedir already did this work */
1104                if (bdev_read_page(bdev, swp_offset(swap_address) * PAGE_SIZE, (char *)((pagedir_nosave+i)->address)))
1105                        return -EIO;
1106        }
1107        printk( "|\n" );
1108        return 0;
1109}
1110
1111static int __init read_suspend_image(const char * specialfile, int noresume)
1112{
1113        union diskpage *cur;
1114        unsigned long scratch_page = 0;
1115        int error;
1116        char b[BDEVNAME_SIZE];
1117
1118        resume_device = name_to_dev_t(specialfile);
1119        scratch_page = get_zeroed_page(GFP_ATOMIC);
1120        cur = (void *) scratch_page;
1121        if (cur) {
1122                struct block_device *bdev;
1123                printk("Resuming from device %s\n",
1124                                __bdevname(resume_device, b));
1125                bdev = open_by_devnum(resume_device, FMODE_READ);
1126                if (IS_ERR(bdev)) {
1127                        error = PTR_ERR(bdev);
1128                } else {
1129                        set_blocksize(bdev, PAGE_SIZE);
1130                        error = __read_suspend_image(bdev, cur, noresume);
1131                        blkdev_put(bdev);
1132                }
1133        } else error = -ENOMEM;
1134
1135        if (scratch_page)
1136                free_page(scratch_page);
1137        switch (error) {
1138                case 0:
1139                        PRINTK("Reading resume file was successful\n");
1140                        break;
1141                case -EINVAL:
1142                        break;
1143                case -EIO:
1144                        printk( "%sI/O error\n", name_resume);
1145                        break;
1146                case -ENOENT:
1147                        printk( "%s%s: No such file or directory\n", name_resume, specialfile);
1148                        break;
1149                case -ENOMEM:
1150                        printk( "%sNot enough memory\n", name_resume);
1151                        break;
1152                default:
1153                        printk( "%sError %d resuming\n", name_resume, error );
1154        }
1155        MDELAY(1000);
1156        return error;
1157}
1158
1159/**
1160 *      software_resume - Resume from a saved image.
1161 *
1162 *      Called as a late_initcall (so all devices are discovered and 
1163 *      initialized), we call swsusp to see if we have a saved image or not.
1164 *      If so, we quiesce devices, then restore the saved image. We will 
1165 *      return above (in pm_suspend_disk() ) if everything goes well. 
1166 *      Otherwise, we fail gracefully and return to the normally 
1167 *      scheduled program.
1168 *
1169 */
1170static int __init software_resume(void)
1171{
1172        if (num_online_cpus() > 1) {
1173                printk(KERN_WARNING "Software Suspend has malfunctioning SMP support. Disabled :(\n");  
1174                return -EINVAL;
1175        }
1176        /* We enable the possibility of machine suspend */
1177        software_suspend_enabled = 1;
1178        if (!resume_status)
1179                return 0;
1180
1181        printk( "%s", name_resume );
1182        if (resume_status == NORESUME) {
1183                if(resume_file[0])
1184                        read_suspend_image(resume_file, 1);
1185                printk( "disabled\n" );
1186                return 0;
1187        }
1188        MDELAY(1000);
1189
1190        if (pm_prepare_console())
1191                printk("swsusp: Can't allocate a console... proceeding\n");
1192
1193        if (!resume_file[0] && resume_status == RESUME_SPECIFIED) {
1194                printk( "suspension device unspecified\n" );
1195                return -EINVAL;
1196        }
1197
1198        printk( "resuming from %s\n", resume_file);
1199        if (read_suspend_image(resume_file, 0))
1200                goto read_failure;
1201        /* FIXME: Should we stop processes here, just to be safer? */
1202        disable_nonboot_cpus();
1203        device_suspend(3);
1204        do_magic(1);
1205        panic("This never returns");
1206
1207read_failure:
1208        pm_restore_console();
1209        return 0;
1210}
1211
1212late_initcall(software_resume);
1213
1214static int __init resume_setup(char *str)
1215{
1216        if (resume_status == NORESUME)
1217                return 1;
1218
1219        strncpy( resume_file, str, 255 );
1220        resume_status = RESUME_SPECIFIED;
1221
1222        return 1;
1223}
1224
1225static int __init noresume_setup(char *str)
1226{
1227        resume_status = NORESUME;
1228        return 1;
1229}
1230
1231__setup("noresume", noresume_setup);
1232__setup("resume=", resume_setup);
1233
1234EXPORT_SYMBOL(software_suspend);
1235EXPORT_SYMBOL(software_suspend_enabled);
1236
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.