linux-bk/kernel/suspend.c
<<
>>
Prefs
   1/*
   2 * linux/kernel/suspend.c
   3 *
   4 * This file is to realize architecture-independent
   5 * machine suspend feature using pretty near only high-level routines
   6 *
   7 * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
   8 * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz>
   9 *
  10 * I'd like to thank the following people for their work:
  11 * 
  12 * Pavel Machek <pavel@ucw.cz>:
  13 * Modifications, defectiveness pointing, being with me at the very beginning,
  14 * suspend to swap space, stop all tasks. Port to 2.4.18-ac and 2.5.17.
  15 *
  16 * Steve Doddi <dirk@loth.demon.co.uk>: 
  17 * Support the possibility of hardware state restoring.
  18 *
  19 * Raph <grey.havens@earthling.net>:
  20 * Support for preserving states of network devices and virtual console
  21 * (including X and svgatextmode)
  22 *
  23 * Kurt Garloff <garloff@suse.de>:
  24 * Straightened the critical function in order to prevent compilers from
  25 * playing tricks with local variables.
  26 *
  27 * Andreas Mohr <a.mohr@mailto.de>
  28 *
  29 * Alex Badea <vampire@go.ro>:
  30 * Fixed runaway init
  31 *
  32 * More state savers are welcome. Especially for the scsi layer...
  33 *
  34 * For TODOs,FIXMEs also look in Documentation/swsusp.txt
  35 */
  36
  37#include <linux/module.h>
  38#include <linux/mm.h>
  39#include <linux/suspend.h>
  40#include <linux/smp_lock.h>
  41#include <linux/file.h>
  42#include <linux/utsname.h>
  43#include <linux/version.h>
  44#include <linux/delay.h>
  45#include <linux/reboot.h>
  46#include <linux/vt_kern.h>
  47#include <linux/bitops.h>
  48#include <linux/interrupt.h>
  49#include <linux/kbd_kern.h>
  50#include <linux/keyboard.h>
  51#include <linux/spinlock.h>
  52#include <linux/genhd.h>
  53#include <linux/kernel.h>
  54#include <linux/major.h>
  55#include <linux/blk.h>
  56#include <linux/swap.h>
  57#include <linux/pm.h>
  58#include <linux/device.h>
  59#include <linux/buffer_head.h>
  60
  61#include <asm/uaccess.h>
  62#include <asm/mmu_context.h>
  63#include <asm/pgtable.h>
  64#include <asm/io.h>
  65#include <linux/swapops.h>
  66
  67extern void signal_wake_up(struct task_struct *t);
  68extern int sys_sync(void);
  69
  70unsigned char software_suspend_enabled = 0;
  71
  72#define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1)
  73/* With SUSPEND_CONSOLE defined, it suspend looks *really* cool, but
  74   we probably do not take enough locks for switching consoles, etc,
  75   so bad things might happen.
  76*/
  77#if !defined(CONFIG_VT) || !defined(CONFIG_VT_CONSOLE)
  78#undef SUSPEND_CONSOLE
  79#endif
  80
  81#define TIMEOUT (6 * HZ)                        /* Timeout for stopping processes */
  82#define ADDRESS(x) ((unsigned long) phys_to_virt(((x) << PAGE_SHIFT)))
  83
  84extern void wakeup_bdflush(void);
  85extern int C_A_D;
  86
  87/* References to section boundaries */
  88extern char _text, _etext, _edata, __bss_start, _end;
  89extern char __nosave_begin, __nosave_end;
  90
  91extern int console_loglevel;
  92extern int is_head_of_free_region(struct page *);
  93
  94/* Locks */
  95spinlock_t suspend_pagedir_lock __nosavedata = SPIN_LOCK_UNLOCKED;
  96
  97/* Variables to be preserved over suspend */
  98static int new_loglevel = 7;
  99static int orig_loglevel = 0;
 100static int orig_fgconsole, orig_kmsg;
 101static int pagedir_order_check;
 102static int nr_copy_pages_check;
 103
 104static int resume_status = 0;
 105static char resume_file[256] = "";                      /* For resume= kernel option */
 106static kdev_t resume_device;
 107/* Local variables that should not be affected by save */
 108unsigned int nr_copy_pages __nosavedata = 0;
 109
 110static int pm_suspend_state = 0;
 111
 112/* Suspend pagedir is allocated before final copy, therefore it
 113   must be freed after resume 
 114
 115   Warning: this is evil. There are actually two pagedirs at time of
 116   resume. One is "pagedir_save", which is empty frame allocated at
 117   time of suspend, that must be freed. Second is "pagedir_nosave", 
 118   allocated at time of resume, that travels through memory not to
 119   collide with anything.
 120 */
 121suspend_pagedir_t *pagedir_nosave __nosavedata = NULL;
 122static suspend_pagedir_t *pagedir_save;
 123static int pagedir_order __nosavedata = 0;
 124
 125struct link {
 126        char dummy[PAGE_SIZE - sizeof(swp_entry_t)];
 127        swp_entry_t next;
 128};
 129
 130union diskpage {
 131        union swap_header swh;
 132        struct link link;
 133        struct suspend_header sh;
 134};
 135
 136/*
 137 * XXX: We try to keep some more pages free so that I/O operations succeed
 138 * without paging. Might this be more?
 139 */
 140#define PAGES_FOR_IO    512
 141
 142static const char name_suspend[] = "Suspend Machine: ";
 143static const char name_resume[] = "Resume Machine: ";
 144
 145/*
 146 * Debug
 147 */
 148#undef  DEBUG_DEFAULT
 149#undef  DEBUG_PROCESS
 150#undef  DEBUG_SLOW
 151#define TEST_SWSUSP 1           /* Set to 1 to reboot instead of halt machine after suspension */
 152
 153#ifdef DEBUG_DEFAULT
 154# define PRINTK(f, a...)       printk(f, ## a)
 155#else
 156# define PRINTK(f, a...)
 157#endif
 158
 159#ifdef DEBUG_SLOW
 160#define MDELAY(a) mdelay(a)
 161#else
 162#define MDELAY(a)
 163#endif
 164
 165/*
 166 * Refrigerator and related stuff
 167 */
 168
 169#define INTERESTING(p) \
 170                        /* We don't want to touch kernel_threads..*/ \
 171                        if (p->flags & PF_IOTHREAD) \
 172                                continue; \
 173                        if (p == current) \
 174                                continue; \
 175                        if (p->state == TASK_ZOMBIE) \
 176                                continue;
 177
 178/* Refrigerator is place where frozen processes are stored :-). */
 179void refrigerator(unsigned long flag)
 180{
 181        /* You need correct to work with real-time processes.
 182           OTOH, this way one process may see (via /proc/) some other
 183           process in stopped state (and thereby discovered we were
 184           suspended. We probably do not care. 
 185         */
 186        long save;
 187        save = current->state;
 188        current->state = TASK_STOPPED;
 189        PRINTK("%s entered refrigerator\n", current->comm);
 190        printk("=");
 191        current->flags &= ~PF_FREEZE;
 192        if (flag)
 193                flush_signals(current); /* We have signaled a kernel thread, which isn't normal behaviour
 194                                           and that may lead to 100%CPU sucking because those threads
 195                                           just don't manage signals. */
 196        current->flags |= PF_FROZEN;
 197        while (current->flags & PF_FROZEN)
 198                schedule();
 199        PRINTK("%s left refrigerator\n", current->comm);
 200        current->state = save;
 201}
 202
 203/* 0 = success, else # of processes that we failed to stop */
 204int freeze_processes(void)
 205{
 206        int todo, start_time;
 207        struct task_struct *g, *p;
 208        
 209        printk( "Stopping tasks: " );
 210        start_time = jiffies;
 211        do {
 212                todo = 0;
 213                read_lock(&tasklist_lock);
 214                do_each_thread(g, p) {
 215                        unsigned long flags;
 216                        INTERESTING(p);
 217                        if (p->flags & PF_FROZEN)
 218                                continue;
 219
 220                        /* FIXME: smp problem here: we may not access other process' flags
 221                           without locking */
 222                        p->flags |= PF_FREEZE;
 223                        spin_lock_irqsave(&p->sigmask_lock, flags);
 224                        signal_wake_up(p);
 225                        spin_unlock_irqrestore(&p->sigmask_lock, flags);
 226                        todo++;
 227                } while_each_thread(g, p);
 228                read_unlock(&tasklist_lock);
 229                yield();
 230                if (time_after(jiffies, start_time + TIMEOUT)) {
 231                        printk( "\n" );
 232                        printk(KERN_ERR " stopping tasks failed (%d tasks remaining)\n", todo );
 233                        return todo;
 234                }
 235        } while(todo);
 236        
 237        printk( "|\n" );
 238        return 0;
 239}
 240
 241void thaw_processes(void)
 242{
 243        struct task_struct *g, *p;
 244
 245        printk( "Restarting tasks..." );
 246        read_lock(&tasklist_lock);
 247        do_each_thread(g, p) {
 248                INTERESTING(p);
 249                
 250                if (p->flags & PF_FROZEN) p->flags &= ~PF_FROZEN;
 251                else
 252                        printk(KERN_INFO " Strange, %s not stopped\n", p->comm );
 253                wake_up_process(p);
 254        } while_each_thread(g, p);
 255
 256        read_unlock(&tasklist_lock);
 257        printk( " done\n" );
 258        MDELAY(500);
 259}
 260
 261/*
 262 * Saving part...
 263 */
 264
 265static __inline__ int fill_suspend_header(struct suspend_header *sh)
 266{
 267        memset((char *)sh, 0, sizeof(*sh));
 268
 269        sh->version_code = LINUX_VERSION_CODE;
 270        sh->num_physpages = num_physpages;
 271        strncpy(sh->machine, system_utsname.machine, 8);
 272        strncpy(sh->version, system_utsname.version, 20);
 273        /* FIXME: Is this bogus? --RR */
 274        sh->num_cpus = num_online_cpus();
 275        sh->page_size = PAGE_SIZE;
 276        sh->suspend_pagedir = pagedir_nosave;
 277        BUG_ON (pagedir_save != pagedir_nosave);
 278        sh->num_pbes = nr_copy_pages;
 279        /* TODO: needed? mounted fs' last mounted date comparison
 280         * [so they haven't been mounted since last suspend.
 281         * Maybe it isn't.] [we'd need to do this for _all_ fs-es]
 282         */
 283        return 0;
 284}
 285
 286/*
 287 * This is our sync function. With this solution we probably won't sleep
 288 * but that should not be a problem since tasks are stopped..
 289 */
 290
 291static inline void do_suspend_sync(void)
 292{
 293        blk_run_queues();
 294#warning This might be broken. We need to somehow wait for data to reach the disk
 295}
 296
 297/* We memorize in swapfile_used what swap devices are used for suspension */
 298#define SWAPFILE_UNUSED    0
 299#define SWAPFILE_SUSPEND   1    /* This is the suspending device */
 300#define SWAPFILE_IGNORED   2    /* Those are other swap devices ignored for suspension */
 301
 302static unsigned short swapfile_used[MAX_SWAPFILES];
 303static unsigned short root_swap;
 304#define MARK_SWAP_SUSPEND 0
 305#define MARK_SWAP_RESUME 2
 306
 307static void mark_swapfiles(swp_entry_t prev, int mode)
 308{
 309        swp_entry_t entry;
 310        union diskpage *cur;
 311        struct page *page;
 312
 313        page = alloc_page(GFP_ATOMIC);
 314        if (!page)
 315                panic("Out of memory in mark_swapfiles");
 316        cur = page_address(page);
 317        /* XXX: this is dirty hack to get first page of swap file */
 318        entry = swp_entry(root_swap, 0);
 319        rw_swap_page_sync(READ, entry, page);
 320
 321        if (mode == MARK_SWAP_RESUME) {
 322                if (!memcmp("SUSP1R",cur->swh.magic.magic,6))
 323                        memcpy(cur->swh.magic.magic,"SWAP-SPACE",10);
 324                else if (!memcmp("SUSP2R",cur->swh.magic.magic,6))
 325                        memcpy(cur->swh.magic.magic,"SWAPSPACE2",10);
 326                else printk("%sUnable to find suspended-data signature (%.10s - misspelled?\n", 
 327                        name_resume, cur->swh.magic.magic);
 328        } else {
 329                if ((!memcmp("SWAP-SPACE",cur->swh.magic.magic,10)))
 330                        memcpy(cur->swh.magic.magic,"SUSP1R....",10);
 331                else if ((!memcmp("SWAPSPACE2",cur->swh.magic.magic,10)))
 332                        memcpy(cur->swh.magic.magic,"SUSP2R....",10);
 333                else panic("\nSwapspace is not swapspace (%.10s)\n", cur->swh.magic.magic);
 334                cur->link.next = prev; /* prev is the first/last swap page of the resume area */
 335                /* link.next lies *no more* in last 4 bytes of magic */
 336        }
 337        rw_swap_page_sync(WRITE, entry, page);
 338        __free_page(page);
 339}
 340
 341static void read_swapfiles(void) /* This is called before saving image */
 342{
 343        int i, len;
 344        
 345        len=strlen(resume_file);
 346        root_swap = 0xFFFF;
 347        
 348        swap_list_lock();
 349        for(i=0; i<MAX_SWAPFILES; i++) {
 350                if (swap_info[i].flags == 0) {
 351                        swapfile_used[i]=SWAPFILE_UNUSED;
 352                } else {
 353                        if(!len) {
 354                                printk(KERN_WARNING "resume= option should be used to set suspend device" );
 355                                if(root_swap == 0xFFFF) {
 356                                        swapfile_used[i] = SWAPFILE_SUSPEND;
 357                                        root_swap = i;
 358                                } else
 359                                        swapfile_used[i] = SWAPFILE_IGNORED;                              
 360                        } else {
 361                                /* we ignore all swap devices that are not the resume_file */
 362                                if (1) {
 363// FIXME                                if(resume_device == swap_info[i].swap_device) {
 364                                        swapfile_used[i] = SWAPFILE_SUSPEND;
 365                                        root_swap = i;
 366                                } else {
 367#if 0
 368                                        printk( "Resume: device %s (%x != %x) ignored\n", swap_info[i].swap_file->d_name.name, swap_info[i].swap_device, resume_device );                                 
 369#endif
 370                                        swapfile_used[i] = SWAPFILE_IGNORED;
 371                                }
 372                        }
 373                }
 374        }
 375        swap_list_unlock();
 376}
 377
 378static void lock_swapdevices(void) /* This is called after saving image so modification
 379                                      will be lost after resume... and that's what we want. */
 380{
 381        int i;
 382
 383        swap_list_lock();
 384        for(i = 0; i< MAX_SWAPFILES; i++)
 385                if(swapfile_used[i] == SWAPFILE_IGNORED) {
 386                        swap_info[i].flags ^= 0xFF; /* we make the device unusable. A new call to
 387                                                       lock_swapdevices can unlock the devices. */
 388                }
 389        swap_list_unlock();
 390}
 391
 392static int write_suspend_image(void)
 393{
 394        int i;
 395        swp_entry_t entry, prev = { 0 };
 396        int nr_pgdir_pages = SUSPEND_PD_PAGES(nr_copy_pages);
 397        union diskpage *cur,  *buffer = (union diskpage *)get_free_page(GFP_ATOMIC);
 398        unsigned long address;
 399        struct page *page;
 400
 401        printk( "Writing data to swap (%d pages): ", nr_copy_pages );
 402        for (i=0; i<nr_copy_pages; i++) {
 403                if (!(i%100))
 404                        printk( "." );
 405                if (!(entry = get_swap_page()).val)
 406                        panic("\nNot enough swapspace when writing data" );
 407                
 408                if (swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND)
 409                        panic("\nPage %d: not enough swapspace on suspend device", i );
 410            
 411                address = (pagedir_nosave+i)->address;
 412                page = virt_to_page(address);
 413                rw_swap_page_sync(WRITE, entry, page);
 414                (pagedir_nosave+i)->swap_address = entry;
 415        }
 416        printk( "|\n" );
 417        printk( "Writing pagedir (%d pages): ", nr_pgdir_pages);
 418        for (i=0; i<nr_pgdir_pages; i++) {
 419                cur = (union diskpage *)((char *) pagedir_nosave)+i;
 420                BUG_ON ((char *) cur != (((char *) pagedir_nosave) + i*PAGE_SIZE));
 421                printk( "." );
 422                if (!(entry = get_swap_page()).val) {
 423                        printk(KERN_CRIT "Not enough swapspace when writing pgdir\n" );
 424                        panic("Don't know how to recover");
 425                        free_page((unsigned long) buffer);
 426                        return -ENOSPC;
 427                }
 428
 429                if(swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND)
 430                        panic("\nNot enough swapspace for pagedir on suspend device" );
 431
 432                BUG_ON (sizeof(swp_entry_t) != sizeof(long));
 433                BUG_ON (PAGE_SIZE % sizeof(struct pbe));
 434
 435                cur->link.next = prev;                          
 436                page = virt_to_page((unsigned long)cur);
 437                rw_swap_page_sync(WRITE, entry, page);
 438                prev = entry;
 439        }
 440        printk("H");
 441        BUG_ON (sizeof(struct suspend_header) > PAGE_SIZE-sizeof(swp_entry_t));
 442        BUG_ON (sizeof(union diskpage) != PAGE_SIZE);
 443        if (!(entry = get_swap_page()).val)
 444                panic( "\nNot enough swapspace when writing header" );
 445        if (swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND)
 446                panic("\nNot enough swapspace for header on suspend device" );
 447
 448        cur = (void *) buffer;
 449        if (fill_suspend_header(&cur->sh))
 450                panic("\nOut of memory while writing header");
 451                
 452        cur->link.next = prev;
 453
 454        page = virt_to_page((unsigned long)cur);
 455        rw_swap_page_sync(WRITE, entry, page);
 456        prev = entry;
 457
 458        printk( "S" );
 459        mark_swapfiles(prev, MARK_SWAP_SUSPEND);
 460        printk( "|\n" );
 461
 462        MDELAY(1000);
 463        free_page((unsigned long) buffer);
 464        return 0;
 465}
 466
 467/* if pagedir_p != NULL it also copies the counted pages */
 468static int count_and_copy_data_pages(struct pbe *pagedir_p)
 469{
 470        int chunk_size;
 471        int nr_copy_pages = 0;
 472        int pfn;
 473        struct page *page;
 474
 475#ifndef CONFIG_DISCONTIGMEM     
 476        if (max_mapnr != num_physpages)
 477                panic("mapnr is not expected");
 478#endif
 479        for (pfn = 0; pfn < num_physpages; pfn++) {
 480                page = pfn_to_page(pfn);
 481                if (PageHighMem(page))
 482                        panic("Swsusp not supported on highmem boxes. Send 1GB of RAM to <pavel@ucw.cz> and try again ;-).");
 483                if (!PageReserved(page)) {
 484                        if (PageNosave(page))
 485                                continue;
 486
 487                        if ((chunk_size=is_head_of_free_region(page))!=0) {
 488                                pfn += chunk_size - 1;
 489                                continue;
 490                        }
 491                } else if (PageReserved(page)) {
 492                        BUG_ON (PageNosave(page));
 493
 494                        /*
 495                         * Just copy whole code segment. Hopefully it is not that big.
 496                         */
 497                        if (ADDRESS(pfn) >= (unsigned long)
 498                                &__nosave_begin && ADDRESS(pfn) < 
 499                                (unsigned long)&__nosave_end) {
 500                                PRINTK("[nosave %x]", ADDRESS(pfn));
 501                                continue;
 502                        }
 503                        /* Hmm, perhaps copying all reserved pages is not too healthy as they may contain 
 504                           critical bios data? */
 505                } else  BUG();
 506
 507                nr_copy_pages++;
 508                if (pagedir_p) {
 509                        pagedir_p->orig_address = ADDRESS(pfn);
 510                        copy_page(pagedir_p->address, pagedir_p->orig_address);
 511                        pagedir_p++;
 512                }
 513        }
 514        return nr_copy_pages;
 515}
 516
 517static void free_suspend_pagedir(unsigned long this_pagedir)
 518{
 519        struct page *page;
 520        int pfn;
 521        unsigned long this_pagedir_end = this_pagedir +
 522                (PAGE_SIZE << pagedir_order);
 523
 524        for(pfn = 0; pfn < num_physpages; pfn++) {
 525                page = pfn_to_page(pfn);
 526                if (!TestClearPageNosave(page))
 527                        continue;
 528
 529                if (ADDRESS(pfn) >= this_pagedir && ADDRESS(pfn) < this_pagedir_end)
 530                        continue; /* old pagedir gets freed in one */
 531                
 532                free_page(ADDRESS(pfn));
 533        }
 534        free_pages(this_pagedir, pagedir_order);
 535}
 536
 537static suspend_pagedir_t *create_suspend_pagedir(int nr_copy_pages)
 538{
 539        int i;
 540        suspend_pagedir_t *pagedir;
 541        struct pbe *p;
 542        struct page *page;
 543
 544        pagedir_order = get_bitmask_order(SUSPEND_PD_PAGES(nr_copy_pages));
 545
 546        p = pagedir = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC, pagedir_order);
 547        if(!pagedir)
 548                return NULL;
 549
 550        page = virt_to_page(pagedir);
 551        for(i=0; i < 1<<pagedir_order; i++)
 552                SetPageNosave(page++);
 553                
 554        while(nr_copy_pages--) {
 555                p->address = get_free_page(GFP_ATOMIC);
 556                if(!p->address) {
 557                        free_suspend_pagedir((unsigned long) pagedir);
 558                        return NULL;
 559                }
 560                SetPageNosave(virt_to_page(p->address));
 561                p->orig_address = 0;
 562                p++;
 563        }
 564        return pagedir;
 565}
 566
 567static int prepare_suspend_console(void)
 568{
 569        orig_loglevel = console_loglevel;
 570        console_loglevel = new_loglevel;
 571
 572#ifdef CONFIG_VT
 573        orig_fgconsole = fg_console;
 574#ifdef SUSPEND_CONSOLE
 575        if(vc_allocate(SUSPEND_CONSOLE))
 576          /* we can't have a free VC for now. Too bad,
 577           * we don't want to mess the screen for now. */
 578                return 1;
 579
 580        set_console (SUSPEND_CONSOLE);
 581        if(vt_waitactive(SUSPEND_CONSOLE)) {
 582                PRINTK("Bummer. Can't switch VCs.");
 583                return 1;
 584        }
 585        orig_kmsg = kmsg_redirect;
 586        kmsg_redirect = SUSPEND_CONSOLE;
 587#endif
 588#endif
 589        return 0;
 590}
 591
 592static void restore_console(void)
 593{
 594        console_loglevel = orig_loglevel;
 595#ifdef SUSPEND_CONSOLE
 596        set_console (orig_fgconsole);
 597#endif
 598        return;
 599}
 600
 601static int prepare_suspend_processes(void)
 602{
 603        if (freeze_processes()) {
 604                printk( KERN_ERR "Suspend failed: Not all processes stopped!\n" );
 605                thaw_processes();
 606                return 1;
 607        }
 608        sys_sync();
 609        return 0;
 610}
 611
 612/*
 613 * Try to free as much memory as possible, but do not OOM-kill anyone
 614 *
 615 * Notice: all userland should be stopped at this point, or livelock is possible.
 616 */
 617static void free_some_memory(void)
 618{
 619        printk("Freeing memory: ");
 620        while (try_to_free_pages(&contig_page_data.node_zones[ZONE_HIGHMEM], GFP_KSWAPD, 0))
 621                printk(".");
 622        printk("|\n");
 623}
 624
 625/* Make disk drivers accept operations, again */
 626static void drivers_unsuspend(void)
 627{
 628        device_resume(RESUME_RESTORE_STATE);
 629        device_resume(RESUME_ENABLE);
 630}
 631
 632/* Called from process context */
 633static int drivers_suspend(void)
 634{
 635        device_suspend(4, SUSPEND_NOTIFY);
 636        device_suspend(4, SUSPEND_SAVE_STATE);
 637        device_suspend(4, SUSPEND_DISABLE);
 638        if(!pm_suspend_state) {
 639                if(pm_send_all(PM_SUSPEND,(void *)3)) {
 640                        printk(KERN_WARNING "Problem while sending suspend event\n");
 641                        return(1);
 642                }
 643                pm_suspend_state=1;
 644        } else
 645                printk(KERN_WARNING "PM suspend state already raised\n");
 646          
 647        return(0);
 648}
 649
 650#define RESUME_PHASE1 1 /* Called from interrupts disabled */
 651#define RESUME_PHASE2 2 /* Called with interrupts enabled */
 652#define RESUME_ALL_PHASES (RESUME_PHASE1 | RESUME_PHASE2)
 653static void drivers_resume(int flags)
 654{
 655        if (flags & RESUME_PHASE1) {
 656                device_resume(RESUME_RESTORE_STATE);
 657                device_resume(RESUME_ENABLE);
 658        }
 659        if (flags & RESUME_PHASE2) {
 660                if(pm_suspend_state) {
 661                        if(pm_send_all(PM_RESUME,(void *)0))
 662                                printk(KERN_WARNING "Problem while sending resume event\n");
 663                        pm_suspend_state=0;
 664                } else
 665                        printk(KERN_WARNING "PM suspend state wasn't raised\n");
 666
 667#ifdef SUSPEND_CONSOLE
 668                update_screen(fg_console);      /* Hmm, is this the problem? */
 669#endif
 670        }
 671}
 672
 673static int suspend_save_image(void)
 674{
 675        struct sysinfo i;
 676        unsigned int nr_needed_pages = 0;
 677
 678        pagedir_nosave = NULL;
 679        printk( "/critical section: Counting pages to copy" );
 680        nr_copy_pages = count_and_copy_data_pages(NULL);
 681        nr_needed_pages = nr_copy_pages + PAGES_FOR_IO;
 682        
 683        printk(" (pages needed: %d+%d=%d free: %d)\n",nr_copy_pages,PAGES_FOR_IO,nr_needed_pages,nr_free_pages());
 684        if(nr_free_pages() < nr_needed_pages) {
 685                printk(KERN_CRIT "%sCouldn't get enough free pages, on %d pages short\n",
 686                       name_suspend, nr_needed_pages-nr_free_pages());
 687                spin_unlock_irq(&suspend_pagedir_lock);
 688                return 1;
 689        }
 690        si_swapinfo(&i);        /* FIXME: si_swapinfo(&i) returns all swap devices information.
 691                                   We should only consider resume_device. */
 692        if (i.freeswap < nr_needed_pages)  {
 693                printk(KERN_CRIT "%sThere's not enough swap space available, on %ld pages short\n",
 694                       name_suspend, nr_needed_pages-i.freeswap);
 695                spin_unlock_irq(&suspend_pagedir_lock);
 696                return 1;
 697        }
 698
 699        PRINTK( "Alloc pagedir\n" ); 
 700        pagedir_save = pagedir_nosave = create_suspend_pagedir(nr_copy_pages);
 701        if(!pagedir_nosave) {
 702                /* Shouldn't happen */
 703                printk(KERN_CRIT "%sCouldn't allocate enough pages\n",name_suspend);
 704                panic("Really should not happen");
 705                spin_unlock_irq(&suspend_pagedir_lock);
 706                return 1;
 707        }
 708        nr_copy_pages_check = nr_copy_pages;
 709        pagedir_order_check = pagedir_order;
 710
 711        if (nr_copy_pages != count_and_copy_data_pages(pagedir_nosave)) /* copy */
 712                BUG();
 713
 714        /*
 715         * End of critical section. From now on, we can write to memory,
 716         * but we should not touch disk. This specially means we must _not_
 717         * touch swap space! Except we must write out our image of course.
 718         *
 719         * Following line enforces not writing to disk until we choose.
 720         */
 721        drivers_unsuspend();
 722        spin_unlock_irq(&suspend_pagedir_lock);
 723        printk( "critical section/: done (%d pages copied)\n", nr_copy_pages );
 724
 725        lock_swapdevices();
 726        write_suspend_image();
 727        lock_swapdevices();     /* This will unlock ignored swap devices since writing is finished */
 728
 729        /* It is important _NOT_ to umount filesystems at this point. We want
 730         * them synced (in case something goes wrong) but we DO not want to mark
 731         * filesystem clean: it is not. (And it does not matter, if we resume
 732         * correctly, we'll mark system clean, anyway.)
 733         */
 734        return 0;
 735}
 736
 737void suspend_power_down(void)
 738{
 739        C_A_D = 0;
 740        printk(KERN_EMERG "%s%s Trying to power down.\n", name_suspend, TEST_SWSUSP ? "Disable TEST_SWSUSP. NOT ": "");
 741#ifdef CONFIG_VT
 742        PRINTK(KERN_EMERG "shift_state: %04x\n", shift_state);
 743        mdelay(1000);
 744        if (TEST_SWSUSP ^ (!!(shift_state & (1 << KG_CTRL))))
 745                machine_restart(NULL);
 746        else
 747#endif
 748        {
 749                device_shutdown();
 750                machine_power_off();
 751        }
 752
 753        printk(KERN_EMERG "%sProbably not capable for powerdown. System halted.\n", name_suspend);
 754        machine_halt();
 755        while (1);
 756        /* NOTREACHED */
 757}
 758
 759/*
 760 * Magic happens here
 761 */
 762
 763void do_magic_resume_1(void)
 764{
 765        barrier();
 766        mb();
 767        spin_lock_irq(&suspend_pagedir_lock);   /* Done to disable interrupts */ 
 768
 769        PRINTK( "Waiting for DMAs to settle down...\n");
 770        mdelay(1000);   /* We do not want some readahead with DMA to corrupt our memory, right?
 771                           Do it with disabled interrupts for best effect. That way, if some
 772                           driver scheduled DMA, we have good chance for DMA to finish ;-). */
 773}
 774
 775void do_magic_resume_2(void)
 776{
 777        BUG_ON (nr_copy_pages_check != nr_copy_pages);
 778        BUG_ON (pagedir_order_check != pagedir_order);
 779
 780        PRINTK( "Freeing prev allocated pagedir\n" );
 781        free_suspend_pagedir((unsigned long) pagedir_save);
 782        __flush_tlb_global();           /* Even mappings of "global" things (vmalloc) need to be fixed */
 783        drivers_resume(RESUME_ALL_PHASES);
 784        spin_unlock_irq(&suspend_pagedir_lock);
 785
 786        PRINTK( "Fixing swap signatures... " );
 787        mark_swapfiles(((swp_entry_t) {0}), MARK_SWAP_RESUME);
 788        PRINTK( "ok\n" );
 789
 790#ifdef SUSPEND_CONSOLE
 791        update_screen(fg_console);      /* Hmm, is this the problem? */
 792#endif
 793}
 794
 795void do_magic_suspend_1(void)
 796{
 797        mb();
 798        barrier();
 799        spin_lock_irq(&suspend_pagedir_lock);
 800}
 801
 802void do_magic_suspend_2(void)
 803{
 804        read_swapfiles();
 805        if (!suspend_save_image())
 806                suspend_power_down();   /* FIXME: if suspend_power_down is commented out, console is lost after few suspends ?! */
 807
 808        printk(KERN_EMERG "%sSuspend failed, trying to recover...\n", name_suspend);
 809        MDELAY(1000); /* So user can wait and report us messages if armageddon comes :-) */
 810
 811        barrier();
 812        mb();
 813        drivers_resume(RESUME_PHASE2);
 814        spin_lock_irq(&suspend_pagedir_lock);   /* Done to disable interrupts */ 
 815        mdelay(1000);
 816
 817        free_pages((unsigned long) pagedir_nosave, pagedir_order);
 818        drivers_resume(RESUME_PHASE1);
 819        spin_unlock_irq(&suspend_pagedir_lock);
 820        mark_swapfiles(((swp_entry_t) {0}), MARK_SWAP_RESUME);
 821        PRINTK(KERN_WARNING "%sLeaving do_magic_suspend_2...\n", name_suspend); 
 822}
 823
 824void do_software_suspend(void)
 825{
 826        arch_prepare_suspend();
 827        if (prepare_suspend_console())
 828                printk( "%sCan't allocate a console... proceeding\n", name_suspend);
 829        if (!prepare_suspend_processes()) {
 830
 831                /* At this point, all user processes and "dangerous"
 832                   kernel threads are stopped. Free some memory, as we
 833                   need half of memory free. */
 834
 835                free_some_memory();
 836                
 837                /* No need to invalidate any vfsmnt list -- they will be valid after resume, anyway.
 838                 *
 839                 * We sync here -- so you have consistent filesystem state when things go wrong.
 840                 * -- so that noone writes to disk after we do atomic copy of data.
 841                 */
 842                PRINTK("Syncing disks before copy\n");
 843                do_suspend_sync();
 844
 845                /* Save state of all device drivers, and stop them. */             
 846                if(drivers_suspend()==0)
 847                        /* If stopping device drivers worked, we proceed basically into
 848                         * suspend_save_image.
 849                         *
 850                         * do_magic(0) returns after system is resumed.
 851                         *
 852                         * do_magic() copies all "used" memory to "free" memory, then
 853                         * unsuspends all device drivers, and writes memory to disk
 854                         * using normal kernel mechanism.
 855                         */
 856                        do_magic(0);
 857                PRINTK("Restarting processes...\n");
 858                thaw_processes();
 859        }
 860        software_suspend_enabled = 1;
 861        MDELAY(1000);
 862        restore_console ();
 863}
 864
 865/*
 866 * This is main interface to the outside world. It needs to be
 867 * called from process context.
 868 */
 869void software_suspend(void)
 870{
 871        if(!software_suspend_enabled)
 872                return;
 873
 874        software_suspend_enabled = 0;
 875        BUG_ON(in_interrupt());
 876        do_software_suspend();
 877}
 878
 879/* More restore stuff */
 880
 881/* FIXME: Why not memcpy(to, from, 1<<pagedir_order*PAGE_SIZE)? */
 882static void copy_pagedir(suspend_pagedir_t *to, suspend_pagedir_t *from)
 883{
 884        int i;
 885        char *topointer=(char *)to, *frompointer=(char *)from;
 886
 887        for(i=0; i < 1 << pagedir_order; i++) {
 888                copy_page(topointer, frompointer);
 889                topointer += PAGE_SIZE;
 890                frompointer += PAGE_SIZE;
 891        }
 892}
 893
 894#define does_collide(addr) does_collide_order(pagedir_nosave, addr, 0)
 895
 896/*
 897 * Returns true if given address/order collides with any orig_address 
 898 */
 899static int does_collide_order(suspend_pagedir_t *pagedir, unsigned long addr,
 900                int order)
 901{
 902        int i;
 903        unsigned long addre = addr + (PAGE_SIZE<<order);
 904        
 905        for(i=0; i < nr_copy_pages; i++)
 906                if((pagedir+i)->orig_address >= addr &&
 907                        (pagedir+i)->orig_address < addre)
 908                        return 1;
 909
 910        return 0;
 911}
 912
 913/*
 914 * We check here that pagedir & pages it points to won't collide with pages
 915 * where we're going to restore from the loaded pages later
 916 */
 917static int check_pagedir(void)
 918{
 919        int i;
 920
 921        for(i=0; i < nr_copy_pages; i++) {
 922                unsigned long addr;
 923
 924                do {
 925                        addr = get_free_page(GFP_ATOMIC);
 926                        if(!addr)
 927                                return -ENOMEM;
 928                } while (does_collide(addr));
 929
 930                (pagedir_nosave+i)->address = addr;
 931        }
 932        return 0;
 933}
 934
 935static int relocate_pagedir(void)
 936{
 937        /*
 938         * We have to avoid recursion (not to overflow kernel stack),
 939         * and that's why code looks pretty cryptic 
 940         */
 941        suspend_pagedir_t *new_pagedir, *old_pagedir = pagedir_nosave;
 942        void **eaten_memory = NULL;
 943        void **c = eaten_memory, *m, *f;
 944
 945        printk("Relocating pagedir");
 946
 947        if(!does_collide_order(old_pagedir, (unsigned long)old_pagedir, pagedir_order)) {
 948                printk("not neccessary\n");
 949                return 0;
 950        }
 951
 952        while ((m = (void *) __get_free_pages(GFP_ATOMIC, pagedir_order))) {
 953                memset(m, 0, PAGE_SIZE);
 954                if (!does_collide_order(old_pagedir, (unsigned long)m, pagedir_order))
 955                        break;
 956                eaten_memory = m;
 957                printk( "." ); 
 958                *eaten_memory = c;
 959                c = eaten_memory;
 960        }
 961
 962        if (!m)
 963                return -ENOMEM;
 964
 965        pagedir_nosave = new_pagedir = m;
 966        copy_pagedir(new_pagedir, old_pagedir);
 967
 968        c = eaten_memory;
 969        while(c) {
 970                printk(":");
 971                f = *c;
 972                c = *c;
 973                if (f)
 974                        free_pages((unsigned long)f, pagedir_order);
 975        }
 976        printk("|\n");
 977        return 0;
 978}
 979
 980/*
 981 * Sanity check if this image makes sense with this kernel/swap context
 982 * I really don't think that it's foolproof but more than nothing..
 983 */
 984
 985static int sanity_check_failed(char *reason)
 986{
 987        printk(KERN_ERR "%s%s\n",name_resume,reason);
 988        return -EPERM;
 989}
 990
 991static int sanity_check(struct suspend_header *sh)
 992{
 993        if(sh->version_code != LINUX_VERSION_CODE)
 994                return sanity_check_failed("Incorrect kernel version");
 995        if(sh->num_physpages != num_physpages)
 996                return sanity_check_failed("Incorrect memory size");
 997        if(strncmp(sh->machine, system_utsname.machine, 8))
 998                return sanity_check_failed("Incorrect machine type");
 999        if(strncmp(sh->version, system_utsname.version, 20))
1000                return sanity_check_failed("Incorrect version");
1001        if(sh->num_cpus != num_online_cpus())
1002                return sanity_check_failed("Incorrect number of cpus");
1003        if(sh->page_size != PAGE_SIZE)
1004                return sanity_check_failed("Incorrect PAGE_SIZE");
1005        return 0;
1006}
1007
1008static int bdev_read_page(struct block_device *bdev, long pos, void *buf)
1009{
1010        struct buffer_head *bh;
1011        BUG_ON (pos%PAGE_SIZE);
1012        bh = __bread(bdev, pos/PAGE_SIZE, PAGE_SIZE);
1013        if (!bh || (!bh->b_data)) {
1014                return -1;
1015        }
1016        memcpy(buf, bh->b_data, PAGE_SIZE);     /* FIXME: may need kmap() */
1017        BUG_ON(!buffer_uptodate(bh));
1018        brelse(bh);
1019        return 0;
1020} 
1021
1022static int bdev_write_page(struct block_device *bdev, long pos, void *buf)
1023{
1024#if 0
1025        struct buffer_head *bh;
1026        BUG_ON (pos%PAGE_SIZE);
1027        bh = __bread(bdev, pos/PAGE_SIZE, PAGE_SIZE);
1028        if (!bh || (!bh->b_data)) {
1029                return -1;
1030        }
1031        memcpy(bh->b_data, buf, PAGE_SIZE);     /* FIXME: may need kmap() */
1032        BUG_ON(!buffer_uptodate(bh));
1033        generic_make_request(WRITE, bh);
1034        if (!buffer_uptodate(bh))
1035                printk(KERN_CRIT "%sWarning %s: Fixing swap signatures unsuccessful...\n", name_resume, resume_file);
1036        wait_on_buffer(bh);
1037        brelse(bh);
1038        return 0;
1039#endif
1040        printk(KERN_CRIT "%sWarning %s: Fixing swap signatures unimplemented...\n", name_resume, resume_file);
1041}
1042
1043extern kdev_t __init name_to_kdev_t(const char *line);
1044
1045static int __read_suspend_image(struct block_device *bdev, union diskpage *cur, int noresume)
1046{
1047        swp_entry_t next;
1048        int i, nr_pgdir_pages;
1049
1050#define PREPARENEXT \
1051        {       next = cur->link.next; \
1052                next.val = swp_offset(next) * PAGE_SIZE; \
1053        }
1054
1055        if (bdev_read_page(bdev, 0, cur)) return -EIO;
1056
1057        if ((!memcmp("SWAP-SPACE",cur->swh.magic.magic,10)) ||
1058            (!memcmp("SWAPSPACE2",cur->swh.magic.magic,10))) {
1059                printk(KERN_ERR "%sThis is normal swap space\n", name_resume );
1060                return -EINVAL;
1061        }
1062
1063        PREPARENEXT; /* We have to read next position before we overwrite it */
1064
1065        if (!memcmp("SUSP1R",cur->swh.magic.magic,6))
1066                memcpy(cur->swh.magic.magic,"SWAP-SPACE",10);
1067        else if (!memcmp("SUSP2R",cur->swh.magic.magic,6))
1068                memcpy(cur->swh.magic.magic,"SWAPSPACE2",10);
1069        else {
1070                panic("%sUnable to find suspended-data signature (%.10s - misspelled?\n", 
1071                        name_resume, cur->swh.magic.magic);
1072                /* We want to panic even with noresume -- we certainly don't want to add
1073                   out signature into your ext2 filesystem ;-) */
1074        }
1075        if(noresume) {
1076                /* We don't do a sanity check here: we want to restore the swap
1077                   whatever version of kernel made the suspend image;
1078                   We need to write swap, but swap is *not* enabled so
1079                   we must write the device directly */
1080                printk("%s: Fixing swap signatures %s...\n", name_resume, resume_file);
1081                bdev_write_page(bdev, 0, cur);
1082        }
1083
1084        if (prepare_suspend_console())
1085                printk("%sCan't allocate a console... proceeding\n", name_resume);
1086        printk( "%sSignature found, resuming\n", name_resume );
1087        MDELAY(1000);
1088
1089        if (bdev_read_page(bdev, next.val, cur)) return -EIO;
1090        if (sanity_check(&cur->sh))     /* Is this same machine? */     
1091                return -EPERM;
1092        PREPARENEXT;
1093
1094        pagedir_save = cur->sh.suspend_pagedir;
1095        nr_copy_pages = cur->sh.num_pbes;
1096        nr_pgdir_pages = SUSPEND_PD_PAGES(nr_copy_pages);
1097        pagedir_order = get_bitmask_order(nr_pgdir_pages);
1098
1099        pagedir_nosave = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC, pagedir_order);
1100        if (!pagedir_nosave)
1101                return -ENOMEM;
1102
1103        PRINTK( "%sReading pagedir, ", name_resume );
1104
1105        /* We get pages in reverse order of saving! */
1106        for (i=nr_pgdir_pages-1; i>=0; i--) {
1107                BUG_ON (!next.val);
1108                cur = (union diskpage *)((char *) pagedir_nosave)+i;
1109                if (bdev_read_page(bdev, next.val, cur)) return -EIO;
1110                PREPARENEXT;
1111        }
1112        BUG_ON (next.val);
1113
1114        if (relocate_pagedir())
1115                return -ENOMEM;
1116        if (check_pagedir())
1117                return -ENOMEM;
1118
1119        printk( "Reading image data (%d pages): ", nr_copy_pages );
1120        for(i=0; i < nr_copy_pages; i++) {
1121                swp_entry_t swap_address = (pagedir_nosave+i)->swap_address;
1122                if (!(i%100))
1123                        printk( "." );
1124                /* You do not need to check for overlaps...
1125                   ... check_pagedir already did this work */
1126                if (bdev_read_page(bdev, swp_offset(swap_address) * PAGE_SIZE, (char *)((pagedir_nosave+i)->address)))
1127                        return -EIO;
1128        }
1129        printk( "|\n" );
1130        return 0;
1131}
1132
1133static int read_suspend_image(const char * specialfile, int noresume)
1134{
1135        union diskpage *cur;
1136        unsigned long scratch_page = 0;
1137        int error;
1138
1139        resume_device = name_to_kdev_t(specialfile);
1140        scratch_page = get_free_page(GFP_ATOMIC);
1141        cur = (void *) scratch_page;
1142        if (cur) {
1143                struct block_device *bdev;
1144                printk("Resuming from device %s\n", __bdevname(resume_device));
1145                bdev = bdget(kdev_t_to_nr(resume_device));
1146                if (!bdev) {
1147                        printk("No such block device ?!\n");
1148                        BUG();
1149                }
1150                blkdev_get(bdev, FMODE_READ, O_RDONLY, BDEV_RAW);
1151                set_blocksize(bdev, PAGE_SIZE);
1152                error = __read_suspend_image(bdev, cur, noresume);
1153                blkdev_put(bdev, BDEV_RAW);
1154        } else error = -ENOMEM;
1155
1156        if (scratch_page)
1157                free_page(scratch_page);
1158        switch (error) {
1159                case 0:
1160                        PRINTK("Reading resume file was successful\n");
1161                        break;
1162                case -EINVAL:
1163                        break;
1164                case -EIO:
1165                        printk( "%sI/O error\n", name_resume);
1166                        break;
1167                case -ENOENT:
1168                        printk( "%s%s: No such file or directory\n", name_resume, specialfile);
1169                        break;
1170                case -ENOMEM:
1171                        printk( "%sNot enough memory\n", name_resume);
1172                        break;
1173                default:
1174                        printk( "%sError %d resuming\n", name_resume, error );
1175        }
1176        MDELAY(1000);
1177        return error;
1178}
1179
1180/*
1181 * Called from init kernel_thread.
1182 * We check if we have an image and if so we try to resume
1183 */
1184
1185void software_resume(void)
1186{
1187#ifdef CONFIG_SMP
1188        printk(KERN_WARNING "Software Suspend has a malfunctioning SMP support. Disabled :(\n");
1189#else
1190        /* We enable the possibility of machine suspend */
1191        software_suspend_enabled = 1;
1192#endif
1193        if(!resume_status)
1194                return;
1195
1196        printk( "%s", name_resume );
1197        if(resume_status == NORESUME) {
1198                if(resume_file[0])
1199                        read_suspend_image(resume_file, 1);
1200                printk( "disabled\n" );
1201                return;
1202        }
1203        MDELAY(1000);
1204
1205        orig_loglevel = console_loglevel;
1206        console_loglevel = new_loglevel;
1207
1208        if(!resume_file[0] && resume_status == RESUME_SPECIFIED) {
1209                printk( "suspension device unspecified\n" );
1210                return;
1211        }
1212
1213        printk( "resuming from %s\n", resume_file);
1214        if(read_suspend_image(resume_file, 0))
1215                goto read_failure;
1216        do_magic(1);
1217        panic("This never returns");
1218
1219read_failure:
1220        console_loglevel = orig_loglevel;
1221        return;
1222}
1223
1224static int __init resume_setup(char *str)
1225{
1226        if(resume_status)
1227                return 1;
1228
1229        strncpy( resume_file, str, 255 );
1230        resume_status = RESUME_SPECIFIED;
1231
1232        return 1;
1233}
1234
1235static int __init software_noresume(char *str)
1236{
1237        if(!resume_status)
1238                printk(KERN_WARNING "noresume option lacks a resume= option\n");
1239        resume_status = NORESUME;
1240        
1241        return 1;
1242}
1243
1244__setup("noresume", software_noresume);
1245__setup("resume=", resume_setup);
1246
1247EXPORT_SYMBOL(software_suspend);
1248EXPORT_SYMBOL(software_suspend_enabled);
1249EXPORT_SYMBOL(refrigerator);
1250
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.