linux/kernel/power/snapshot.c
<<
>>
Prefs
   1/*
   2 * linux/kernel/power/snapshot.c
   3 *
   4 * This file provides system snapshot/restore functionality for swsusp.
   5 *
   6 * Copyright (C) 1998-2005 Pavel Machek <pavel@suse.cz>
   7 * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
   8 *
   9 * This file is released under the GPLv2.
  10 *
  11 */
  12
  13#include <linux/version.h>
  14#include <linux/module.h>
  15#include <linux/mm.h>
  16#include <linux/suspend.h>
  17#include <linux/delay.h>
  18#include <linux/bitops.h>
  19#include <linux/spinlock.h>
  20#include <linux/kernel.h>
  21#include <linux/pm.h>
  22#include <linux/device.h>
  23#include <linux/init.h>
  24#include <linux/bootmem.h>
  25#include <linux/syscalls.h>
  26#include <linux/console.h>
  27#include <linux/highmem.h>
  28#include <linux/list.h>
  29
  30#include <asm/uaccess.h>
  31#include <asm/mmu_context.h>
  32#include <asm/pgtable.h>
  33#include <asm/tlbflush.h>
  34#include <asm/io.h>
  35
  36#include "power.h"
  37
  38static int swsusp_page_is_free(struct page *);
  39static void swsusp_set_page_forbidden(struct page *);
  40static void swsusp_unset_page_forbidden(struct page *);
  41
  42/* List of PBEs needed for restoring the pages that were allocated before
  43 * the suspend and included in the suspend image, but have also been
  44 * allocated by the "resume" kernel, so their contents cannot be written
  45 * directly to their "original" page frames.
  46 */
  47struct pbe *restore_pblist;
  48
  49/* Pointer to an auxiliary buffer (1 page) */
  50static void *buffer;
  51
  52/**
  53 *      @safe_needed - on resume, for storing the PBE list and the image,
  54 *      we can only use memory pages that do not conflict with the pages
  55 *      used before suspend.  The unsafe pages have PageNosaveFree set
  56 *      and we count them using unsafe_pages.
  57 *
  58 *      Each allocated image page is marked as PageNosave and PageNosaveFree
  59 *      so that swsusp_free() can release it.
  60 */
  61
  62#define PG_ANY          0
  63#define PG_SAFE         1
  64#define PG_UNSAFE_CLEAR 1
  65#define PG_UNSAFE_KEEP  0
  66
  67static unsigned int allocated_unsafe_pages;
  68
  69static void *get_image_page(gfp_t gfp_mask, int safe_needed)
  70{
  71        void *res;
  72
  73        res = (void *)get_zeroed_page(gfp_mask);
  74        if (safe_needed)
  75                while (res && swsusp_page_is_free(virt_to_page(res))) {
  76                        /* The page is unsafe, mark it for swsusp_free() */
  77                        swsusp_set_page_forbidden(virt_to_page(res));
  78                        allocated_unsafe_pages++;
  79                        res = (void *)get_zeroed_page(gfp_mask);
  80                }
  81        if (res) {
  82                swsusp_set_page_forbidden(virt_to_page(res));
  83                swsusp_set_page_free(virt_to_page(res));
  84        }
  85        return res;
  86}
  87
  88unsigned long get_safe_page(gfp_t gfp_mask)
  89{
  90        return (unsigned long)get_image_page(gfp_mask, PG_SAFE);
  91}
  92
  93static struct page *alloc_image_page(gfp_t gfp_mask)
  94{
  95        struct page *page;
  96
  97        page = alloc_page(gfp_mask);
  98        if (page) {
  99                swsusp_set_page_forbidden(page);
 100                swsusp_set_page_free(page);
 101        }
 102        return page;
 103}
 104
 105/**
 106 *      free_image_page - free page represented by @addr, allocated with
 107 *      get_image_page (page flags set by it must be cleared)
 108 */
 109
 110static inline void free_image_page(void *addr, int clear_nosave_free)
 111{
 112        struct page *page;
 113
 114        BUG_ON(!virt_addr_valid(addr));
 115
 116        page = virt_to_page(addr);
 117
 118        swsusp_unset_page_forbidden(page);
 119        if (clear_nosave_free)
 120                swsusp_unset_page_free(page);
 121
 122        __free_page(page);
 123}
 124
 125/* struct linked_page is used to build chains of pages */
 126
 127#define LINKED_PAGE_DATA_SIZE   (PAGE_SIZE - sizeof(void *))
 128
 129struct linked_page {
 130        struct linked_page *next;
 131        char data[LINKED_PAGE_DATA_SIZE];
 132} __attribute__((packed));
 133
 134static inline void
 135free_list_of_pages(struct linked_page *list, int clear_page_nosave)
 136{
 137        while (list) {
 138                struct linked_page *lp = list->next;
 139
 140                free_image_page(list, clear_page_nosave);
 141                list = lp;
 142        }
 143}
 144
 145/**
 146  *     struct chain_allocator is used for allocating small objects out of
 147  *     a linked list of pages called 'the chain'.
 148  *
 149  *     The chain grows each time when there is no room for a new object in
 150  *     the current page.  The allocated objects cannot be freed individually.
 151  *     It is only possible to free them all at once, by freeing the entire
 152  *     chain.
 153  *
 154  *     NOTE: The chain allocator may be inefficient if the allocated objects
 155  *     are not much smaller than PAGE_SIZE.
 156  */
 157
 158struct chain_allocator {
 159        struct linked_page *chain;      /* the chain */
 160        unsigned int used_space;        /* total size of objects allocated out
 161                                         * of the current page
 162                                         */
 163        gfp_t gfp_mask;         /* mask for allocating pages */
 164        int safe_needed;        /* if set, only "safe" pages are allocated */
 165};
 166
 167static void
 168chain_init(struct chain_allocator *ca, gfp_t gfp_mask, int safe_needed)
 169{
 170        ca->chain = NULL;
 171        ca->used_space = LINKED_PAGE_DATA_SIZE;
 172        ca->gfp_mask = gfp_mask;
 173        ca->safe_needed = safe_needed;
 174}
 175
 176static void *chain_alloc(struct chain_allocator *ca, unsigned int size)
 177{
 178        void *ret;
 179
 180        if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) {
 181                struct linked_page *lp;
 182
 183                lp = get_image_page(ca->gfp_mask, ca->safe_needed);
 184                if (!lp)
 185                        return NULL;
 186
 187                lp->next = ca->chain;
 188                ca->chain = lp;
 189                ca->used_space = 0;
 190        }
 191        ret = ca->chain->data + ca->used_space;
 192        ca->used_space += size;
 193        return ret;
 194}
 195
 196/**
 197 *      Data types related to memory bitmaps.
 198 *
 199 *      Memory bitmap is a structure consiting of many linked lists of
 200 *      objects.  The main list's elements are of type struct zone_bitmap
 201 *      and each of them corresonds to one zone.  For each zone bitmap
 202 *      object there is a list of objects of type struct bm_block that
 203 *      represent each blocks of bitmap in which information is stored.
 204 *
 205 *      struct memory_bitmap contains a pointer to the main list of zone
 206 *      bitmap objects, a struct bm_position used for browsing the bitmap,
 207 *      and a pointer to the list of pages used for allocating all of the
 208 *      zone bitmap objects and bitmap block objects.
 209 *
 210 *      NOTE: It has to be possible to lay out the bitmap in memory
 211 *      using only allocations of order 0.  Additionally, the bitmap is
 212 *      designed to work with arbitrary number of zones (this is over the
 213 *      top for now, but let's avoid making unnecessary assumptions ;-).
 214 *
 215 *      struct zone_bitmap contains a pointer to a list of bitmap block
 216 *      objects and a pointer to the bitmap block object that has been
 217 *      most recently used for setting bits.  Additionally, it contains the
 218 *      pfns that correspond to the start and end of the represented zone.
 219 *
 220 *      struct bm_block contains a pointer to the memory page in which
 221 *      information is stored (in the form of a block of bitmap)
 222 *      It also contains the pfns that correspond to the start and end of
 223 *      the represented memory area.
 224 */
 225
 226#define BM_END_OF_MAP   (~0UL)
 227
 228#define BM_BITS_PER_BLOCK       (PAGE_SIZE << 3)
 229
 230struct bm_block {
 231        struct list_head hook;  /* hook into a list of bitmap blocks */
 232        unsigned long start_pfn;        /* pfn represented by the first bit */
 233        unsigned long end_pfn;  /* pfn represented by the last bit plus 1 */
 234        unsigned long *data;    /* bitmap representing pages */
 235};
 236
 237static inline unsigned long bm_block_bits(struct bm_block *bb)
 238{
 239        return bb->end_pfn - bb->start_pfn;
 240}
 241
 242/* strcut bm_position is used for browsing memory bitmaps */
 243
 244struct bm_position {
 245        struct bm_block *block;
 246        int bit;
 247};
 248
 249struct memory_bitmap {
 250        struct list_head blocks;        /* list of bitmap blocks */
 251        struct linked_page *p_list;     /* list of pages used to store zone
 252                                         * bitmap objects and bitmap block
 253                                         * objects
 254                                         */
 255        struct bm_position cur; /* most recently used bit position */
 256};
 257
 258/* Functions that operate on memory bitmaps */
 259
 260static void memory_bm_position_reset(struct memory_bitmap *bm)
 261{
 262        bm->cur.block = list_entry(bm->blocks.next, struct bm_block, hook);
 263        bm->cur.bit = 0;
 264}
 265
 266static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
 267
 268/**
 269 *      create_bm_block_list - create a list of block bitmap objects
 270 *      @nr_blocks - number of blocks to allocate
 271 *      @list - list to put the allocated blocks into
 272 *      @ca - chain allocator to be used for allocating memory
 273 */
 274static int create_bm_block_list(unsigned long pages,
 275                                struct list_head *list,
 276                                struct chain_allocator *ca)
 277{
 278        unsigned int nr_blocks = DIV_ROUND_UP(pages, BM_BITS_PER_BLOCK);
 279
 280        while (nr_blocks-- > 0) {
 281                struct bm_block *bb;
 282
 283                bb = chain_alloc(ca, sizeof(struct bm_block));
 284                if (!bb)
 285                        return -ENOMEM;
 286                list_add(&bb->hook, list);
 287        }
 288
 289        return 0;
 290}
 291
 292struct mem_extent {
 293        struct list_head hook;
 294        unsigned long start;
 295        unsigned long end;
 296};
 297
 298/**
 299 *      free_mem_extents - free a list of memory extents
 300 *      @list - list of extents to empty
 301 */
 302static void free_mem_extents(struct list_head *list)
 303{
 304        struct mem_extent *ext, *aux;
 305
 306        list_for_each_entry_safe(ext, aux, list, hook) {
 307                list_del(&ext->hook);
 308                kfree(ext);
 309        }
 310}
 311
 312/**
 313 *      create_mem_extents - create a list of memory extents representing
 314 *                           contiguous ranges of PFNs
 315 *      @list - list to put the extents into
 316 *      @gfp_mask - mask to use for memory allocations
 317 */
 318static int create_mem_extents(struct list_head *list, gfp_t gfp_mask)
 319{
 320        struct zone *zone;
 321
 322        INIT_LIST_HEAD(list);
 323
 324        for_each_populated_zone(zone) {
 325                unsigned long zone_start, zone_end;
 326                struct mem_extent *ext, *cur, *aux;
 327
 328                zone_start = zone->zone_start_pfn;
 329                zone_end = zone->zone_start_pfn + zone->spanned_pages;
 330
 331                list_for_each_entry(ext, list, hook)
 332                        if (zone_start <= ext->end)
 333                                break;
 334
 335                if (&ext->hook == list || zone_end < ext->start) {
 336                        /* New extent is necessary */
 337                        struct mem_extent *new_ext;
 338
 339                        new_ext = kzalloc(sizeof(struct mem_extent), gfp_mask);
 340                        if (!new_ext) {
 341                                free_mem_extents(list);
 342                                return -ENOMEM;
 343                        }
 344                        new_ext->start = zone_start;
 345                        new_ext->end = zone_end;
 346                        list_add_tail(&new_ext->hook, &ext->hook);
 347                        continue;
 348                }
 349
 350                /* Merge this zone's range of PFNs with the existing one */
 351                if (zone_start < ext->start)
 352                        ext->start = zone_start;
 353                if (zone_end > ext->end)
 354                        ext->end = zone_end;
 355
 356                /* More merging may be possible */
 357                cur = ext;
 358                list_for_each_entry_safe_continue(cur, aux, list, hook) {
 359                        if (zone_end < cur->start)
 360                                break;
 361                        if (zone_end < cur->end)
 362                                ext->end = cur->end;
 363                        list_del(&cur->hook);
 364                        kfree(cur);
 365                }
 366        }
 367
 368        return 0;
 369}
 370
 371/**
 372  *     memory_bm_create - allocate memory for a memory bitmap
 373  */
 374static int
 375memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
 376{
 377        struct chain_allocator ca;
 378        struct list_head mem_extents;
 379        struct mem_extent *ext;
 380        int error;
 381
 382        chain_init(&ca, gfp_mask, safe_needed);
 383        INIT_LIST_HEAD(&bm->blocks);
 384
 385        error = create_mem_extents(&mem_extents, gfp_mask);
 386        if (error)
 387                return error;
 388
 389        list_for_each_entry(ext, &mem_extents, hook) {
 390                struct bm_block *bb;
 391                unsigned long pfn = ext->start;
 392                unsigned long pages = ext->end - ext->start;
 393
 394                bb = list_entry(bm->blocks.prev, struct bm_block, hook);
 395
 396                error = create_bm_block_list(pages, bm->blocks.prev, &ca);
 397                if (error)
 398                        goto Error;
 399
 400                list_for_each_entry_continue(bb, &bm->blocks, hook) {
 401                        bb->data = get_image_page(gfp_mask, safe_needed);
 402                        if (!bb->data) {
 403                                error = -ENOMEM;
 404                                goto Error;
 405                        }
 406
 407                        bb->start_pfn = pfn;
 408                        if (pages >= BM_BITS_PER_BLOCK) {
 409                                pfn += BM_BITS_PER_BLOCK;
 410                                pages -= BM_BITS_PER_BLOCK;
 411                        } else {
 412                                /* This is executed only once in the loop */
 413                                pfn += pages;
 414                        }
 415                        bb->end_pfn = pfn;
 416                }
 417        }
 418
 419        bm->p_list = ca.chain;
 420        memory_bm_position_reset(bm);
 421 Exit:
 422        free_mem_extents(&mem_extents);
 423        return error;
 424
 425 Error:
 426        bm->p_list = ca.chain;
 427        memory_bm_free(bm, PG_UNSAFE_CLEAR);
 428        goto Exit;
 429}
 430
 431/**
 432  *     memory_bm_free - free memory occupied by the memory bitmap @bm
 433  */
 434static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
 435{
 436        struct bm_block *bb;
 437
 438        list_for_each_entry(bb, &bm->blocks, hook)
 439                if (bb->data)
 440                        free_image_page(bb->data, clear_nosave_free);
 441
 442        free_list_of_pages(bm->p_list, clear_nosave_free);
 443
 444        INIT_LIST_HEAD(&bm->blocks);
 445}
 446
 447/**
 448 *      memory_bm_find_bit - find the bit in the bitmap @bm that corresponds
 449 *      to given pfn.  The cur_zone_bm member of @bm and the cur_block member
 450 *      of @bm->cur_zone_bm are updated.
 451 */
 452static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
 453                                void **addr, unsigned int *bit_nr)
 454{
 455        struct bm_block *bb;
 456
 457        /*
 458         * Check if the pfn corresponds to the current bitmap block and find
 459         * the block where it fits if this is not the case.
 460         */
 461        bb = bm->cur.block;
 462        if (pfn < bb->start_pfn)
 463                list_for_each_entry_continue_reverse(bb, &bm->blocks, hook)
 464                        if (pfn >= bb->start_pfn)
 465                                break;
 466
 467        if (pfn >= bb->end_pfn)
 468                list_for_each_entry_continue(bb, &bm->blocks, hook)
 469                        if (pfn >= bb->start_pfn && pfn < bb->end_pfn)
 470                                break;
 471
 472        if (&bb->hook == &bm->blocks)
 473                return -EFAULT;
 474
 475        /* The block has been found */
 476        bm->cur.block = bb;
 477        pfn -= bb->start_pfn;
 478        bm->cur.bit = pfn + 1;
 479        *bit_nr = pfn;
 480        *addr = bb->data;
 481        return 0;
 482}
 483
 484static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
 485{
 486        void *addr;
 487        unsigned int bit;
 488        int error;
 489
 490        error = memory_bm_find_bit(bm, pfn, &addr, &bit);
 491        BUG_ON(error);
 492        set_bit(bit, addr);
 493}
 494
 495static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn)
 496{
 497        void *addr;
 498        unsigned int bit;
 499        int error;
 500
 501        error = memory_bm_find_bit(bm, pfn, &addr, &bit);
 502        if (!error)
 503                set_bit(bit, addr);
 504        return error;
 505}
 506
 507static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn)
 508{
 509        void *addr;
 510        unsigned int bit;
 511        int error;
 512
 513        error = memory_bm_find_bit(bm, pfn, &addr, &bit);
 514        BUG_ON(error);
 515        clear_bit(bit, addr);
 516}
 517
 518static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
 519{
 520        void *addr;
 521        unsigned int bit;
 522        int error;
 523
 524        error = memory_bm_find_bit(bm, pfn, &addr, &bit);
 525        BUG_ON(error);
 526        return test_bit(bit, addr);
 527}
 528
 529static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn)
 530{
 531        void *addr;
 532        unsigned int bit;
 533
 534        return !memory_bm_find_bit(bm, pfn, &addr, &bit);
 535}
 536
 537/**
 538 *      memory_bm_next_pfn - find the pfn that corresponds to the next set bit
 539 *      in the bitmap @bm.  If the pfn cannot be found, BM_END_OF_MAP is
 540 *      returned.
 541 *
 542 *      It is required to run memory_bm_position_reset() before the first call to
 543 *      this function.
 544 */
 545
 546static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
 547{
 548        struct bm_block *bb;
 549        int bit;
 550
 551        bb = bm->cur.block;
 552        do {
 553                bit = bm->cur.bit;
 554                bit = find_next_bit(bb->data, bm_block_bits(bb), bit);
 555                if (bit < bm_block_bits(bb))
 556                        goto Return_pfn;
 557
 558                bb = list_entry(bb->hook.next, struct bm_block, hook);
 559                bm->cur.block = bb;
 560                bm->cur.bit = 0;
 561        } while (&bb->hook != &bm->blocks);
 562
 563        memory_bm_position_reset(bm);
 564        return BM_END_OF_MAP;
 565
 566 Return_pfn:
 567        bm->cur.bit = bit + 1;
 568        return bb->start_pfn + bit;
 569}
 570
 571/**
 572 *      This structure represents a range of page frames the contents of which
 573 *      should not be saved during the suspend.
 574 */
 575
 576struct nosave_region {
 577        struct list_head list;
 578        unsigned long start_pfn;
 579        unsigned long end_pfn;
 580};
 581
 582static LIST_HEAD(nosave_regions);
 583
 584/**
 585 *      register_nosave_region - register a range of page frames the contents
 586 *      of which should not be saved during the suspend (to be used in the early
 587 *      initialization code)
 588 */
 589
 590void __init
 591__register_nosave_region(unsigned long start_pfn, unsigned long end_pfn,
 592                         int use_kmalloc)
 593{
 594        struct nosave_region *region;
 595
 596        if (start_pfn >= end_pfn)
 597                return;
 598
 599        if (!list_empty(&nosave_regions)) {
 600                /* Try to extend the previous region (they should be sorted) */
 601                region = list_entry(nosave_regions.prev,
 602                                        struct nosave_region, list);
 603                if (region->end_pfn == start_pfn) {
 604                        region->end_pfn = end_pfn;
 605                        goto Report;
 606                }
 607        }
 608        if (use_kmalloc) {
 609                /* during init, this shouldn't fail */
 610                region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL);
 611                BUG_ON(!region);
 612        } else
 613                /* This allocation cannot fail */
 614                region = alloc_bootmem_low(sizeof(struct nosave_region));
 615        region->start_pfn = start_pfn;
 616        region->end_pfn = end_pfn;
 617        list_add_tail(&region->list, &nosave_regions);
 618 Report:
 619        printk(KERN_INFO "PM: Registered nosave memory: %016lx - %016lx\n",
 620                start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
 621}
 622
 623/*
 624 * Set bits in this map correspond to the page frames the contents of which
 625 * should not be saved during the suspend.
 626 */
 627static struct memory_bitmap *forbidden_pages_map;
 628
 629/* Set bits in this map correspond to free page frames. */
 630static struct memory_bitmap *free_pages_map;
 631
 632/*
 633 * Each page frame allocated for creating the image is marked by setting the
 634 * corresponding bits in forbidden_pages_map and free_pages_map simultaneously
 635 */
 636
 637void swsusp_set_page_free(struct page *page)
 638{
 639        if (free_pages_map)
 640                memory_bm_set_bit(free_pages_map, page_to_pfn(page));
 641}
 642
 643static int swsusp_page_is_free(struct page *page)
 644{
 645        return free_pages_map ?
 646                memory_bm_test_bit(free_pages_map, page_to_pfn(page)) : 0;
 647}
 648
 649void swsusp_unset_page_free(struct page *page)
 650{
 651        if (free_pages_map)
 652                memory_bm_clear_bit(free_pages_map, page_to_pfn(page));
 653}
 654
 655static void swsusp_set_page_forbidden(struct page *page)
 656{
 657        if (forbidden_pages_map)
 658                memory_bm_set_bit(forbidden_pages_map, page_to_pfn(page));
 659}
 660
 661int swsusp_page_is_forbidden(struct page *page)
 662{
 663        return forbidden_pages_map ?
 664                memory_bm_test_bit(forbidden_pages_map, page_to_pfn(page)) : 0;
 665}
 666
 667static void swsusp_unset_page_forbidden(struct page *page)
 668{
 669        if (forbidden_pages_map)
 670                memory_bm_clear_bit(forbidden_pages_map, page_to_pfn(page));
 671}
 672
 673/**
 674 *      mark_nosave_pages - set bits corresponding to the page frames the
 675 *      contents of which should not be saved in a given bitmap.
 676 */
 677
 678static void mark_nosave_pages(struct memory_bitmap *bm)
 679{
 680        struct nosave_region *region;
 681
 682        if (list_empty(&nosave_regions))
 683                return;
 684
 685        list_for_each_entry(region, &nosave_regions, list) {
 686                unsigned long pfn;
 687
 688                pr_debug("PM: Marking nosave pages: %016lx - %016lx\n",
 689                                region->start_pfn << PAGE_SHIFT,
 690                                region->end_pfn << PAGE_SHIFT);
 691
 692                for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++)
 693                        if (pfn_valid(pfn)) {
 694                                /*
 695                                 * It is safe to ignore the result of
 696                                 * mem_bm_set_bit_check() here, since we won't
 697                                 * touch the PFNs for which the error is
 698                                 * returned anyway.
 699                                 */
 700                                mem_bm_set_bit_check(bm, pfn);
 701                        }
 702        }
 703}
 704
 705/**
 706 *      create_basic_memory_bitmaps - create bitmaps needed for marking page
 707 *      frames that should not be saved and free page frames.  The pointers
 708 *      forbidden_pages_map and free_pages_map are only modified if everything
 709 *      goes well, because we don't want the bits to be used before both bitmaps
 710 *      are set up.
 711 */
 712
 713int create_basic_memory_bitmaps(void)
 714{
 715        struct memory_bitmap *bm1, *bm2;
 716        int error = 0;
 717
 718        BUG_ON(forbidden_pages_map || free_pages_map);
 719
 720        bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
 721        if (!bm1)
 722                return -ENOMEM;
 723
 724        error = memory_bm_create(bm1, GFP_KERNEL, PG_ANY);
 725        if (error)
 726                goto Free_first_object;
 727
 728        bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
 729        if (!bm2)
 730                goto Free_first_bitmap;
 731
 732        error = memory_bm_create(bm2, GFP_KERNEL, PG_ANY);
 733        if (error)
 734                goto Free_second_object;
 735
 736        forbidden_pages_map = bm1;
 737        free_pages_map = bm2;
 738        mark_nosave_pages(forbidden_pages_map);
 739
 740        pr_debug("PM: Basic memory bitmaps created\n");
 741
 742        return 0;
 743
 744 Free_second_object:
 745        kfree(bm2);
 746 Free_first_bitmap:
 747        memory_bm_free(bm1, PG_UNSAFE_CLEAR);
 748 Free_first_object:
 749        kfree(bm1);
 750        return -ENOMEM;
 751}
 752
 753/**
 754 *      free_basic_memory_bitmaps - free memory bitmaps allocated by
 755 *      create_basic_memory_bitmaps().  The auxiliary pointers are necessary
 756 *      so that the bitmaps themselves are not referred to while they are being
 757 *      freed.
 758 */
 759
 760void free_basic_memory_bitmaps(void)
 761{
 762        struct memory_bitmap *bm1, *bm2;
 763
 764        BUG_ON(!(forbidden_pages_map && free_pages_map));
 765
 766        bm1 = forbidden_pages_map;
 767        bm2 = free_pages_map;
 768        forbidden_pages_map = NULL;
 769        free_pages_map = NULL;
 770        memory_bm_free(bm1, PG_UNSAFE_CLEAR);
 771        kfree(bm1);
 772        memory_bm_free(bm2, PG_UNSAFE_CLEAR);
 773        kfree(bm2);
 774
 775        pr_debug("PM: Basic memory bitmaps freed\n");
 776}
 777
 778/**
 779 *      snapshot_additional_pages - estimate the number of additional pages
 780 *      be needed for setting up the suspend image data structures for given
 781 *      zone (usually the returned value is greater than the exact number)
 782 */
 783
 784unsigned int snapshot_additional_pages(struct zone *zone)
 785{
 786        unsigned int res;
 787
 788        res = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK);
 789        res += DIV_ROUND_UP(res * sizeof(struct bm_block), PAGE_SIZE);
 790        return 2 * res;
 791}
 792
 793#ifdef CONFIG_HIGHMEM
 794/**
 795 *      count_free_highmem_pages - compute the total number of free highmem
 796 *      pages, system-wide.
 797 */
 798
 799static unsigned int count_free_highmem_pages(void)
 800{
 801        struct zone *zone;
 802        unsigned int cnt = 0;
 803
 804        for_each_populated_zone(zone)
 805                if (is_highmem(zone))
 806                        cnt += zone_page_state(zone, NR_FREE_PAGES);
 807
 808        return cnt;
 809}
 810
 811/**
 812 *      saveable_highmem_page - Determine whether a highmem page should be
 813 *      included in the suspend image.
 814 *
 815 *      We should save the page if it isn't Nosave or NosaveFree, or Reserved,
 816 *      and it isn't a part of a free chunk of pages.
 817 */
 818static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn)
 819{
 820        struct page *page;
 821
 822        if (!pfn_valid(pfn))
 823                return NULL;
 824
 825        page = pfn_to_page(pfn);
 826        if (page_zone(page) != zone)
 827                return NULL;
 828
 829        BUG_ON(!PageHighMem(page));
 830
 831        if (swsusp_page_is_forbidden(page) ||  swsusp_page_is_free(page) ||
 832            PageReserved(page))
 833                return NULL;
 834
 835        return page;
 836}
 837
 838/**
 839 *      count_highmem_pages - compute the total number of saveable highmem
 840 *      pages.
 841 */
 842
 843unsigned int count_highmem_pages(void)
 844{
 845        struct zone *zone;
 846        unsigned int n = 0;
 847
 848        for_each_zone(zone) {
 849                unsigned long pfn, max_zone_pfn;
 850
 851                if (!is_highmem(zone))
 852                        continue;
 853
 854                mark_free_pages(zone);
 855                max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
 856                for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
 857                        if (saveable_highmem_page(zone, pfn))
 858                                n++;
 859        }
 860        return n;
 861}
 862#else
 863static inline void *saveable_highmem_page(struct zone *z, unsigned long p)
 864{
 865        return NULL;
 866}
 867#endif /* CONFIG_HIGHMEM */
 868
 869/**
 870 *      saveable_page - Determine whether a non-highmem page should be included
 871 *      in the suspend image.
 872 *
 873 *      We should save the page if it isn't Nosave, and is not in the range
 874 *      of pages statically defined as 'unsaveable', and it isn't a part of
 875 *      a free chunk of pages.
 876 */
 877static struct page *saveable_page(struct zone *zone, unsigned long pfn)
 878{
 879        struct page *page;
 880
 881        if (!pfn_valid(pfn))
 882                return NULL;
 883
 884        page = pfn_to_page(pfn);
 885        if (page_zone(page) != zone)
 886                return NULL;
 887
 888        BUG_ON(PageHighMem(page));
 889
 890        if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page))
 891                return NULL;
 892
 893        if (PageReserved(page)
 894            && (!kernel_page_present(page) || pfn_is_nosave(pfn)))
 895                return NULL;
 896
 897        return page;
 898}
 899
 900/**
 901 *      count_data_pages - compute the total number of saveable non-highmem
 902 *      pages.
 903 */
 904
 905unsigned int count_data_pages(void)
 906{
 907        struct zone *zone;
 908        unsigned long pfn, max_zone_pfn;
 909        unsigned int n = 0;
 910
 911        for_each_zone(zone) {
 912                if (is_highmem(zone))
 913                        continue;
 914
 915                mark_free_pages(zone);
 916                max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
 917                for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
 918                        if (saveable_page(zone, pfn))
 919                                n++;
 920        }
 921        return n;
 922}
 923
 924/* This is needed, because copy_page and memcpy are not usable for copying
 925 * task structs.
 926 */
 927static inline void do_copy_page(long *dst, long *src)
 928{
 929        int n;
 930
 931        for (n = PAGE_SIZE / sizeof(long); n; n--)
 932                *dst++ = *src++;
 933}
 934
 935
 936/**
 937 *      safe_copy_page - check if the page we are going to copy is marked as
 938 *              present in the kernel page tables (this always is the case if
 939 *              CONFIG_DEBUG_PAGEALLOC is not set and in that case
 940 *              kernel_page_present() always returns 'true').
 941 */
 942static void safe_copy_page(void *dst, struct page *s_page)
 943{
 944        if (kernel_page_present(s_page)) {
 945                do_copy_page(dst, page_address(s_page));
 946        } else {
 947                kernel_map_pages(s_page, 1, 1);
 948                do_copy_page(dst, page_address(s_page));
 949                kernel_map_pages(s_page, 1, 0);
 950        }
 951}
 952
 953
 954#ifdef CONFIG_HIGHMEM
 955static inline struct page *
 956page_is_saveable(struct zone *zone, unsigned long pfn)
 957{
 958        return is_highmem(zone) ?
 959                saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn);
 960}
 961
 962static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
 963{
 964        struct page *s_page, *d_page;
 965        void *src, *dst;
 966
 967        s_page = pfn_to_page(src_pfn);
 968        d_page = pfn_to_page(dst_pfn);
 969        if (PageHighMem(s_page)) {
 970                src = kmap_atomic(s_page, KM_USER0);
 971                dst = kmap_atomic(d_page, KM_USER1);
 972                do_copy_page(dst, src);
 973                kunmap_atomic(src, KM_USER0);
 974                kunmap_atomic(dst, KM_USER1);
 975        } else {
 976                if (PageHighMem(d_page)) {
 977                        /* Page pointed to by src may contain some kernel
 978                         * data modified by kmap_atomic()
 979                         */
 980                        safe_copy_page(buffer, s_page);
 981                        dst = kmap_atomic(d_page, KM_USER0);
 982                        memcpy(dst, buffer, PAGE_SIZE);
 983                        kunmap_atomic(dst, KM_USER0);
 984                } else {
 985                        safe_copy_page(page_address(d_page), s_page);
 986                }
 987        }
 988}
 989#else
 990#define page_is_saveable(zone, pfn)     saveable_page(zone, pfn)
 991
 992static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
 993{
 994        safe_copy_page(page_address(pfn_to_page(dst_pfn)),
 995                                pfn_to_page(src_pfn));
 996}
 997#endif /* CONFIG_HIGHMEM */
 998
 999static void
1000copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm)
1001{
1002        struct zone *zone;
1003        unsigned long pfn;
1004
1005        for_each_zone(zone) {
1006                unsigned long max_zone_pfn;
1007
1008                mark_free_pages(zone);
1009                max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
1010                for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1011                        if (page_is_saveable(zone, pfn))
1012                                memory_bm_set_bit(orig_bm, pfn);
1013        }
1014        memory_bm_position_reset(orig_bm);
1015        memory_bm_position_reset(copy_bm);
1016        for(;;) {
1017                pfn = memory_bm_next_pfn(orig_bm);
1018                if (unlikely(pfn == BM_END_OF_MAP))
1019                        break;
1020                copy_data_page(memory_bm_next_pfn(copy_bm), pfn);
1021        }
1022}
1023
1024/* Total number of image pages */
1025static unsigned int nr_copy_pages;
1026/* Number of pages needed for saving the original pfns of the image pages */
1027static unsigned int nr_meta_pages;
1028
1029/**
1030 *      swsusp_free - free pages allocated for the suspend.
1031 *
1032 *      Suspend pages are alocated before the atomic copy is made, so we
1033 *      need to release them after the resume.
1034 */
1035
1036void swsusp_free(void)
1037{
1038        struct zone *zone;
1039        unsigned long pfn, max_zone_pfn;
1040
1041        for_each_zone(zone) {
1042                max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
1043                for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1044                        if (pfn_valid(pfn)) {
1045                                struct page *page = pfn_to_page(pfn);
1046
1047                                if (swsusp_page_is_forbidden(page) &&
1048                                    swsusp_page_is_free(page)) {
1049                                        swsusp_unset_page_forbidden(page);
1050                                        swsusp_unset_page_free(page);
1051                                        __free_page(page);
1052                                }
1053                        }
1054        }
1055        nr_copy_pages = 0;
1056        nr_meta_pages = 0;
1057        restore_pblist = NULL;
1058        buffer = NULL;
1059}
1060
1061#ifdef CONFIG_HIGHMEM
1062/**
1063  *     count_pages_for_highmem - compute the number of non-highmem pages
1064  *     that will be necessary for creating copies of highmem pages.
1065  */
1066
1067static unsigned int count_pages_for_highmem(unsigned int nr_highmem)
1068{
1069        unsigned int free_highmem = count_free_highmem_pages();
1070
1071        if (free_highmem >= nr_highmem)
1072                nr_highmem = 0;
1073        else
1074                nr_highmem -= free_highmem;
1075
1076        return nr_highmem;
1077}
1078#else
1079static unsigned int
1080count_pages_for_highmem(unsigned int nr_highmem) { return 0; }
1081#endif /* CONFIG_HIGHMEM */
1082
1083/**
1084 *      enough_free_mem - Make sure we have enough free memory for the
1085 *      snapshot image.
1086 */
1087
1088static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem)
1089{
1090        struct zone *zone;
1091        unsigned int free = 0, meta = 0;
1092
1093        for_each_zone(zone) {
1094                meta += snapshot_additional_pages(zone);
1095                if (!is_highmem(zone))
1096                        free += zone_page_state(zone, NR_FREE_PAGES);
1097        }
1098
1099        nr_pages += count_pages_for_highmem(nr_highmem);
1100        pr_debug("PM: Normal pages needed: %u + %u + %u, available pages: %u\n",
1101                nr_pages, PAGES_FOR_IO, meta, free);
1102
1103        return free > nr_pages + PAGES_FOR_IO + meta;
1104}
1105
1106#ifdef CONFIG_HIGHMEM
1107/**
1108 *      get_highmem_buffer - if there are some highmem pages in the suspend
1109 *      image, we may need the buffer to copy them and/or load their data.
1110 */
1111
1112static inline int get_highmem_buffer(int safe_needed)
1113{
1114        buffer = get_image_page(GFP_ATOMIC | __GFP_COLD, safe_needed);
1115        return buffer ? 0 : -ENOMEM;
1116}
1117
1118/**
1119 *      alloc_highmem_image_pages - allocate some highmem pages for the image.
1120 *      Try to allocate as many pages as needed, but if the number of free
1121 *      highmem pages is lesser than that, allocate them all.
1122 */
1123
1124static inline unsigned int
1125alloc_highmem_image_pages(struct memory_bitmap *bm, unsigned int nr_highmem)
1126{
1127        unsigned int to_alloc = count_free_highmem_pages();
1128
1129        if (to_alloc > nr_highmem)
1130                to_alloc = nr_highmem;
1131
1132        nr_highmem -= to_alloc;
1133        while (to_alloc-- > 0) {
1134                struct page *page;
1135
1136                page = alloc_image_page(__GFP_HIGHMEM);
1137                memory_bm_set_bit(bm, page_to_pfn(page));
1138        }
1139        return nr_highmem;
1140}
1141#else
1142static inline int get_highmem_buffer(int safe_needed) { return 0; }
1143
1144static inline unsigned int
1145alloc_highmem_image_pages(struct memory_bitmap *bm, unsigned int n) { return 0; }
1146#endif /* CONFIG_HIGHMEM */
1147
1148/**
1149 *      swsusp_alloc - allocate memory for the suspend image
1150 *
1151 *      We first try to allocate as many highmem pages as there are
1152 *      saveable highmem pages in the system.  If that fails, we allocate
1153 *      non-highmem pages for the copies of the remaining highmem ones.
1154 *
1155 *      In this approach it is likely that the copies of highmem pages will
1156 *      also be located in the high memory, because of the way in which
1157 *      copy_data_pages() works.
1158 */
1159
1160static int
1161swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm,
1162                unsigned int nr_pages, unsigned int nr_highmem)
1163{
1164        int error;
1165
1166        error = memory_bm_create(orig_bm, GFP_ATOMIC | __GFP_COLD, PG_ANY);
1167        if (error)
1168                goto Free;
1169
1170        error = memory_bm_create(copy_bm, GFP_ATOMIC | __GFP_COLD, PG_ANY);
1171        if (error)
1172                goto Free;
1173
1174        if (nr_highmem > 0) {
1175                error = get_highmem_buffer(PG_ANY);
1176                if (error)
1177                        goto Free;
1178
1179                nr_pages += alloc_highmem_image_pages(copy_bm, nr_highmem);
1180        }
1181        while (nr_pages-- > 0) {
1182                struct page *page = alloc_image_page(GFP_ATOMIC | __GFP_COLD);
1183
1184                if (!page)
1185                        goto Free;
1186
1187                memory_bm_set_bit(copy_bm, page_to_pfn(page));
1188        }
1189        return 0;
1190
1191 Free:
1192        swsusp_free();
1193        return -ENOMEM;
1194}
1195
1196/* Memory bitmap used for marking saveable pages (during suspend) or the
1197 * suspend image pages (during resume)
1198 */
1199static struct memory_bitmap orig_bm;
1200/* Memory bitmap used on suspend for marking allocated pages that will contain
1201 * the copies of saveable pages.  During resume it is initially used for
1202 * marking the suspend image pages, but then its set bits are duplicated in
1203 * @orig_bm and it is released.  Next, on systems with high memory, it may be
1204 * used for marking "safe" highmem pages, but it has to be reinitialized for
1205 * this purpose.
1206 */
1207static struct memory_bitmap copy_bm;
1208
1209asmlinkage int swsusp_save(void)
1210{
1211        unsigned int nr_pages, nr_highmem;
1212
1213        printk(KERN_INFO "PM: Creating hibernation image: \n");
1214
1215        drain_local_pages(NULL);
1216        nr_pages = count_data_pages();
1217        nr_highmem = count_highmem_pages();
1218        printk(KERN_INFO "PM: Need to copy %u pages\n", nr_pages + nr_highmem);
1219
1220        if (!enough_free_mem(nr_pages, nr_highmem)) {
1221                printk(KERN_ERR "PM: Not enough free memory\n");
1222                return -ENOMEM;
1223        }
1224
1225        if (swsusp_alloc(&orig_bm, &copy_bm, nr_pages, nr_highmem)) {
1226                printk(KERN_ERR "PM: Memory allocation failed\n");
1227                return -ENOMEM;
1228        }
1229
1230        /* During allocating of suspend pagedir, new cold pages may appear.
1231         * Kill them.
1232         */
1233        drain_local_pages(NULL);
1234        copy_data_pages(&copy_bm, &orig_bm);
1235
1236        /*
1237         * End of critical section. From now on, we can write to memory,
1238         * but we should not touch disk. This specially means we must _not_
1239         * touch swap space! Except we must write out our image of course.
1240         */
1241
1242        nr_pages += nr_highmem;
1243        nr_copy_pages = nr_pages;
1244        nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE);
1245
1246        printk(KERN_INFO "PM: Hibernation image created (%d pages copied)\n",
1247                nr_pages);
1248
1249        return 0;
1250}
1251
1252#ifndef CONFIG_ARCH_HIBERNATION_HEADER
1253static int init_header_complete(struct swsusp_info *info)
1254{
1255        memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname));
1256        info->version_code = LINUX_VERSION_CODE;
1257        return 0;
1258}
1259
1260static char *check_image_kernel(struct swsusp_info *info)
1261{
1262        if (info->version_code != LINUX_VERSION_CODE)
1263                return "kernel version";
1264        if (strcmp(info->uts.sysname,init_utsname()->sysname))
1265                return "system type";
1266        if (strcmp(info->uts.release,init_utsname()->release))
1267                return "kernel release";
1268        if (strcmp(info->uts.version,init_utsname()->version))
1269                return "version";
1270        if (strcmp(info->uts.machine,init_utsname()->machine))
1271                return "machine";
1272        return NULL;
1273}
1274#endif /* CONFIG_ARCH_HIBERNATION_HEADER */
1275
1276unsigned long snapshot_get_image_size(void)
1277{
1278        return nr_copy_pages + nr_meta_pages + 1;
1279}
1280
1281static int init_header(struct swsusp_info *info)
1282{
1283        memset(info, 0, sizeof(struct swsusp_info));
1284        info->num_physpages = num_physpages;
1285        info->image_pages = nr_copy_pages;
1286        info->pages = snapshot_get_image_size();
1287        info->size = info->pages;
1288        info->size <<= PAGE_SHIFT;
1289        return init_header_complete(info);
1290}
1291
1292/**
1293 *      pack_pfns - pfns corresponding to the set bits found in the bitmap @bm
1294 *      are stored in the array @buf[] (1 page at a time)
1295 */
1296
1297static inline void
1298pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
1299{
1300        int j;
1301
1302        for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
1303                buf[j] = memory_bm_next_pfn(bm);
1304                if (unlikely(buf[j] == BM_END_OF_MAP))
1305                        break;
1306        }
1307}
1308
1309/**
1310 *      snapshot_read_next - used for reading the system memory snapshot.
1311 *
1312 *      On the first call to it @handle should point to a zeroed
1313 *      snapshot_handle structure.  The structure gets updated and a pointer
1314 *      to it should be passed to this function every next time.
1315 *
1316 *      The @count parameter should contain the number of bytes the caller
1317 *      wants to read from the snapshot.  It must not be zero.
1318 *
1319 *      On success the function returns a positive number.  Then, the caller
1320 *      is allowed to read up to the returned number of bytes from the memory
1321 *      location computed by the data_of() macro.  The number returned
1322 *      may be smaller than @count, but this only happens if the read would
1323 *      cross a page boundary otherwise.
1324 *
1325 *      The function returns 0 to indicate the end of data stream condition,
1326 *      and a negative number is returned on error.  In such cases the
1327 *      structure pointed to by @handle is not updated and should not be used
1328 *      any more.
1329 */
1330
1331int snapshot_read_next(struct snapshot_handle *handle, size_t count)
1332{
1333        if (handle->cur > nr_meta_pages + nr_copy_pages)
1334                return 0;
1335
1336        if (!buffer) {
1337                /* This makes the buffer be freed by swsusp_free() */
1338                buffer = get_image_page(GFP_ATOMIC, PG_ANY);
1339                if (!buffer)
1340                        return -ENOMEM;
1341        }
1342        if (!handle->offset) {
1343                int error;
1344
1345                error = init_header((struct swsusp_info *)buffer);
1346                if (error)
1347                        return error;
1348                handle->buffer = buffer;
1349                memory_bm_position_reset(&orig_bm);
1350                memory_bm_position_reset(&copy_bm);
1351        }
1352        if (handle->prev < handle->cur) {
1353                if (handle->cur <= nr_meta_pages) {
1354                        memset(buffer, 0, PAGE_SIZE);
1355                        pack_pfns(buffer, &orig_bm);
1356                } else {
1357                        struct page *page;
1358
1359                        page = pfn_to_page(memory_bm_next_pfn(&copy_bm));
1360                        if (PageHighMem(page)) {
1361                                /* Highmem pages are copied to the buffer,
1362                                 * because we can't return with a kmapped
1363                                 * highmem page (we may not be called again).
1364                                 */
1365                                void *kaddr;
1366
1367                                kaddr = kmap_atomic(page, KM_USER0);
1368                                memcpy(buffer, kaddr, PAGE_SIZE);
1369                                kunmap_atomic(kaddr, KM_USER0);
1370                                handle->buffer = buffer;
1371                        } else {
1372                                handle->buffer = page_address(page);
1373                        }
1374                }
1375                handle->prev = handle->cur;
1376        }
1377        handle->buf_offset = handle->cur_offset;
1378        if (handle->cur_offset + count >= PAGE_SIZE) {
1379                count = PAGE_SIZE - handle->cur_offset;
1380                handle->cur_offset = 0;
1381                handle->cur++;
1382        } else {
1383                handle->cur_offset += count;
1384        }
1385        handle->offset += count;
1386        return count;
1387}
1388
1389/**
1390 *      mark_unsafe_pages - mark the pages that cannot be used for storing
1391 *      the image during resume, because they conflict with the pages that
1392 *      had been used before suspend
1393 */
1394
1395static int mark_unsafe_pages(struct memory_bitmap *bm)
1396{
1397        struct zone *zone;
1398        unsigned long pfn, max_zone_pfn;
1399
1400        /* Clear page flags */
1401        for_each_zone(zone) {
1402                max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
1403                for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1404                        if (pfn_valid(pfn))
1405                                swsusp_unset_page_free(pfn_to_page(pfn));
1406        }
1407
1408        /* Mark pages that correspond to the "original" pfns as "unsafe" */
1409        memory_bm_position_reset(bm);
1410        do {
1411                pfn = memory_bm_next_pfn(bm);
1412                if (likely(pfn != BM_END_OF_MAP)) {
1413                        if (likely(pfn_valid(pfn)))
1414                                swsusp_set_page_free(pfn_to_page(pfn));
1415                        else
1416                                return -EFAULT;
1417                }
1418        } while (pfn != BM_END_OF_MAP);
1419
1420        allocated_unsafe_pages = 0;
1421
1422        return 0;
1423}
1424
1425static void
1426duplicate_memory_bitmap(struct memory_bitmap *dst, struct memory_bitmap *src)
1427{
1428        unsigned long pfn;
1429
1430        memory_bm_position_reset(src);
1431        pfn = memory_bm_next_pfn(src);
1432        while (pfn != BM_END_OF_MAP) {
1433                memory_bm_set_bit(dst, pfn);
1434                pfn = memory_bm_next_pfn(src);
1435        }
1436}
1437
1438static int check_header(struct swsusp_info *info)
1439{
1440        char *reason;
1441
1442        reason = check_image_kernel(info);
1443        if (!reason && info->num_physpages != num_physpages)
1444                reason = "memory size";
1445        if (reason) {
1446                printk(KERN_ERR "PM: Image mismatch: %s\n", reason);
1447                return -EPERM;
1448        }
1449        return 0;
1450}
1451
1452/**
1453 *      load header - check the image header and copy data from it
1454 */
1455
1456static int
1457load_header(struct swsusp_info *info)
1458{
1459        int error;
1460
1461        restore_pblist = NULL;
1462        error = check_header(info);
1463        if (!error) {
1464                nr_copy_pages = info->image_pages;
1465                nr_meta_pages = info->pages - info->image_pages - 1;
1466        }
1467        return error;
1468}
1469
1470/**
1471 *      unpack_orig_pfns - for each element of @buf[] (1 page at a time) set
1472 *      the corresponding bit in the memory bitmap @bm
1473 */
1474static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
1475{
1476        int j;
1477
1478        for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
1479                if (unlikely(buf[j] == BM_END_OF_MAP))
1480                        break;
1481
1482                if (memory_bm_pfn_present(bm, buf[j]))
1483                        memory_bm_set_bit(bm, buf[j]);
1484                else
1485                        return -EFAULT;
1486        }
1487
1488        return 0;
1489}
1490
1491/* List of "safe" pages that may be used to store data loaded from the suspend
1492 * image
1493 */
1494static struct linked_page *safe_pages_list;
1495
1496#ifdef CONFIG_HIGHMEM
1497/* struct highmem_pbe is used for creating the list of highmem pages that
1498 * should be restored atomically during the resume from disk, because the page
1499 * frames they have occupied before the suspend are in use.
1500 */
1501struct highmem_pbe {
1502        struct page *copy_page; /* data is here now */
1503        struct page *orig_page; /* data was here before the suspend */
1504        struct highmem_pbe *next;
1505};
1506
1507/* List of highmem PBEs needed for restoring the highmem pages that were
1508 * allocated before the suspend and included in the suspend image, but have
1509 * also been allocated by the "resume" kernel, so their contents cannot be
1510 * written directly to their "original" page frames.
1511 */
1512static struct highmem_pbe *highmem_pblist;
1513
1514/**
1515 *      count_highmem_image_pages - compute the number of highmem pages in the
1516 *      suspend image.  The bits in the memory bitmap @bm that correspond to the
1517 *      image pages are assumed to be set.
1518 */
1519
1520static unsigned int count_highmem_image_pages(struct memory_bitmap *bm)
1521{
1522        unsigned long pfn;
1523        unsigned int cnt = 0;
1524
1525        memory_bm_position_reset(bm);
1526        pfn = memory_bm_next_pfn(bm);
1527        while (pfn != BM_END_OF_MAP) {
1528                if (PageHighMem(pfn_to_page(pfn)))
1529                        cnt++;
1530
1531                pfn = memory_bm_next_pfn(bm);
1532        }
1533        return cnt;
1534}
1535
1536/**
1537 *      prepare_highmem_image - try to allocate as many highmem pages as
1538 *      there are highmem image pages (@nr_highmem_p points to the variable
1539 *      containing the number of highmem image pages).  The pages that are
1540 *      "safe" (ie. will not be overwritten when the suspend image is
1541 *      restored) have the corresponding bits set in @bm (it must be
1542 *      unitialized).
1543 *
1544 *      NOTE: This function should not be called if there are no highmem
1545 *      image pages.
1546 */
1547
1548static unsigned int safe_highmem_pages;
1549
1550static struct memory_bitmap *safe_highmem_bm;
1551
1552static int
1553prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
1554{
1555        unsigned int to_alloc;
1556
1557        if (memory_bm_create(bm, GFP_ATOMIC, PG_SAFE))
1558                return -ENOMEM;
1559
1560        if (get_highmem_buffer(PG_SAFE))
1561                return -ENOMEM;
1562
1563        to_alloc = count_free_highmem_pages();
1564        if (to_alloc > *nr_highmem_p)
1565                to_alloc = *nr_highmem_p;
1566        else
1567                *nr_highmem_p = to_alloc;
1568
1569        safe_highmem_pages = 0;
1570        while (to_alloc-- > 0) {
1571                struct page *page;
1572
1573                page = alloc_page(__GFP_HIGHMEM);
1574                if (!swsusp_page_is_free(page)) {
1575                        /* The page is "safe", set its bit the bitmap */
1576                        memory_bm_set_bit(bm, page_to_pfn(page));
1577                        safe_highmem_pages++;
1578                }
1579                /* Mark the page as allocated */
1580                swsusp_set_page_forbidden(page);
1581                swsusp_set_page_free(page);
1582        }
1583        memory_bm_position_reset(bm);
1584        safe_highmem_bm = bm;
1585        return 0;
1586}
1587
1588/**
1589 *      get_highmem_page_buffer - for given highmem image page find the buffer
1590 *      that suspend_write_next() should set for its caller to write to.
1591 *
1592 *      If the page is to be saved to its "original" page frame or a copy of
1593 *      the page is to be made in the highmem, @buffer is returned.  Otherwise,
1594 *      the copy of the page is to be made in normal memory, so the address of
1595 *      the copy is returned.
1596 *
1597 *      If @buffer is returned, the caller of suspend_write_next() will write
1598 *      the page's contents to @buffer, so they will have to be copied to the
1599 *      right location on the next call to suspend_write_next() and it is done
1600 *      with the help of copy_last_highmem_page().  For this purpose, if
1601 *      @buffer is returned, @last_highmem page is set to the page to which
1602 *      the data will have to be copied from @buffer.
1603 */
1604
1605static struct page *last_highmem_page;
1606
1607static void *
1608get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
1609{
1610        struct highmem_pbe *pbe;
1611        void *kaddr;
1612
1613        if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) {
1614                /* We have allocated the "original" page frame and we can
1615                 * use it directly to store the loaded page.
1616                 */
1617                last_highmem_page = page;
1618                return buffer;
1619        }
1620        /* The "original" page frame has not been allocated and we have to
1621         * use a "safe" page frame to store the loaded page.
1622         */
1623        pbe = chain_alloc(ca, sizeof(struct highmem_pbe));
1624        if (!pbe) {
1625                swsusp_free();
1626                return ERR_PTR(-ENOMEM);
1627        }
1628        pbe->orig_page = page;
1629        if (safe_highmem_pages > 0) {
1630                struct page *tmp;
1631
1632                /* Copy of the page will be stored in high memory */
1633                kaddr = buffer;
1634                tmp = pfn_to_page(memory_bm_next_pfn(safe_highmem_bm));
1635                safe_highmem_pages--;
1636                last_highmem_page = tmp;
1637                pbe->copy_page = tmp;
1638        } else {
1639                /* Copy of the page will be stored in normal memory */
1640                kaddr = safe_pages_list;
1641                safe_pages_list = safe_pages_list->next;
1642                pbe->copy_page = virt_to_page(kaddr);
1643        }
1644        pbe->next = highmem_pblist;
1645        highmem_pblist = pbe;
1646        return kaddr;
1647}
1648
1649/**
1650 *      copy_last_highmem_page - copy the contents of a highmem image from
1651 *      @buffer, where the caller of snapshot_write_next() has place them,
1652 *      to the right location represented by @last_highmem_page .
1653 */
1654
1655static void copy_last_highmem_page(void)
1656{
1657        if (last_highmem_page) {
1658                void *dst;
1659
1660                dst = kmap_atomic(last_highmem_page, KM_USER0);
1661                memcpy(dst, buffer, PAGE_SIZE);
1662                kunmap_atomic(dst, KM_USER0);
1663                last_highmem_page = NULL;
1664        }
1665}
1666
1667static inline int last_highmem_page_copied(void)
1668{
1669        return !last_highmem_page;
1670}
1671
1672static inline void free_highmem_data(void)
1673{
1674        if (safe_highmem_bm)
1675                memory_bm_free(safe_highmem_bm, PG_UNSAFE_CLEAR);
1676
1677        if (buffer)
1678                free_image_page(buffer, PG_UNSAFE_CLEAR);
1679}
1680#else
1681static inline int get_safe_write_buffer(void) { return 0; }
1682
1683static unsigned int
1684count_highmem_image_pages(struct memory_bitmap *bm) { return 0; }
1685
1686static inline int
1687prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
1688{
1689        return 0;
1690}
1691
1692static inline void *
1693get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
1694{
1695        return ERR_PTR(-EINVAL);
1696}
1697
1698static inline void copy_last_highmem_page(void) {}
1699static inline int last_highmem_page_copied(void) { return 1; }
1700static inline void free_highmem_data(void) {}
1701#endif /* CONFIG_HIGHMEM */
1702
1703/**
1704 *      prepare_image - use the memory bitmap @bm to mark the pages that will
1705 *      be overwritten in the process of restoring the system memory state
1706 *      from the suspend image ("unsafe" pages) and allocate memory for the
1707 *      image.
1708 *
1709 *      The idea is to allocate a new memory bitmap first and then allocate
1710 *      as many pages as needed for the image data, but not to assign these
1711 *      pages to specific tasks initially.  Instead, we just mark them as
1712 *      allocated and create a lists of "safe" pages that will be used
1713 *      later.  On systems with high memory a list of "safe" highmem pages is
1714 *      also created.
1715 */
1716
1717#define PBES_PER_LINKED_PAGE    (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe))
1718
1719static int
1720prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
1721{
1722        unsigned int nr_pages, nr_highmem;
1723        struct linked_page *sp_list, *lp;
1724        int error;
1725
1726        /* If there is no highmem, the buffer will not be necessary */
1727        free_image_page(buffer, PG_UNSAFE_CLEAR);
1728        buffer = NULL;
1729
1730        nr_highmem = count_highmem_image_pages(bm);
1731        error = mark_unsafe_pages(bm);
1732        if (error)
1733                goto Free;
1734
1735        error = memory_bm_create(new_bm, GFP_ATOMIC, PG_SAFE);
1736        if (error)
1737                goto Free;
1738
1739        duplicate_memory_bitmap(new_bm, bm);
1740        memory_bm_free(bm, PG_UNSAFE_KEEP);
1741        if (nr_highmem > 0) {
1742                error = prepare_highmem_image(bm, &nr_highmem);
1743                if (error)
1744                        goto Free;
1745        }
1746        /* Reserve some safe pages for potential later use.
1747         *
1748         * NOTE: This way we make sure there will be enough safe pages for the
1749         * chain_alloc() in get_buffer().  It is a bit wasteful, but
1750         * nr_copy_pages cannot be greater than 50% of the memory anyway.
1751         */
1752        sp_list = NULL;
1753        /* nr_copy_pages cannot be lesser than allocated_unsafe_pages */
1754        nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
1755        nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE);
1756        while (nr_pages > 0) {
1757                lp = get_image_page(GFP_ATOMIC, PG_SAFE);
1758                if (!lp) {
1759                        error = -ENOMEM;
1760                        goto Free;
1761                }
1762                lp->next = sp_list;
1763                sp_list = lp;
1764                nr_pages--;
1765        }
1766        /* Preallocate memory for the image */
1767        safe_pages_list = NULL;
1768        nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
1769        while (nr_pages > 0) {
1770                lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC);
1771                if (!lp) {
1772                        error = -ENOMEM;
1773                        goto Free;
1774                }
1775                if (!swsusp_page_is_free(virt_to_page(lp))) {
1776                        /* The page is "safe", add it to the list */
1777                        lp->next = safe_pages_list;
1778                        safe_pages_list = lp;
1779                }
1780                /* Mark the page as allocated */
1781                swsusp_set_page_forbidden(virt_to_page(lp));
1782                swsusp_set_page_free(virt_to_page(lp));
1783                nr_pages--;
1784        }
1785        /* Free the reserved safe pages so that chain_alloc() can use them */
1786        while (sp_list) {
1787                lp = sp_list->next;
1788                free_image_page(sp_list, PG_UNSAFE_CLEAR);
1789                sp_list = lp;
1790        }
1791        return 0;
1792
1793 Free:
1794        swsusp_free();
1795        return error;
1796}
1797
1798/**
1799 *      get_buffer - compute the address that snapshot_write_next() should
1800 *      set for its caller to write to.
1801 */
1802
1803static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
1804{
1805        struct pbe *pbe;
1806        struct page *page;
1807        unsigned long pfn = memory_bm_next_pfn(bm);
1808
1809        if (pfn == BM_END_OF_MAP)
1810                return ERR_PTR(-EFAULT);
1811
1812        page = pfn_to_page(pfn);
1813        if (PageHighMem(page))
1814                return get_highmem_page_buffer(page, ca);
1815
1816        if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page))
1817                /* We have allocated the "original" page frame and we can
1818                 * use it directly to store the loaded page.
1819                 */
1820                return page_address(page);
1821
1822        /* The "original" page frame has not been allocated and we have to
1823         * use a "safe" page frame to store the loaded page.
1824         */
1825        pbe = chain_alloc(ca, sizeof(struct pbe));
1826        if (!pbe) {
1827                swsusp_free();
1828                return ERR_PTR(-ENOMEM);
1829        }
1830        pbe->orig_address = page_address(page);
1831        pbe->address = safe_pages_list;
1832        safe_pages_list = safe_pages_list->next;
1833        pbe->next = restore_pblist;
1834        restore_pblist = pbe;
1835        return pbe->address;
1836}
1837
1838/**
1839 *      snapshot_write_next - used for writing the system memory snapshot.
1840 *
1841 *      On the first call to it @handle should point to a zeroed
1842 *      snapshot_handle structure.  The structure gets updated and a pointer
1843 *      to it should be passed to this function every next time.
1844 *
1845 *      The @count parameter should contain the number of bytes the caller
1846 *      wants to write to the image.  It must not be zero.
1847 *
1848 *      On success the function returns a positive number.  Then, the caller
1849 *      is allowed to write up to the returned number of bytes to the memory
1850 *      location computed by the data_of() macro.  The number returned
1851 *      may be smaller than @count, but this only happens if the write would
1852 *      cross a page boundary otherwise.
1853 *
1854 *      The function returns 0 to indicate the "end of file" condition,
1855 *      and a negative number is returned on error.  In such cases the
1856 *      structure pointed to by @handle is not updated and should not be used
1857 *      any more.
1858 */
1859
1860int snapshot_write_next(struct snapshot_handle *handle, size_t count)
1861{
1862        static struct chain_allocator ca;
1863        int error = 0;
1864
1865        /* Check if we have already loaded the entire image */
1866        if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages)
1867                return 0;
1868
1869        if (handle->offset == 0) {
1870                if (!buffer)
1871                        /* This makes the buffer be freed by swsusp_free() */
1872                        buffer = get_image_page(GFP_ATOMIC, PG_ANY);
1873
1874                if (!buffer)
1875                        return -ENOMEM;
1876
1877                handle->buffer = buffer;
1878        }
1879        handle->sync_read = 1;
1880        if (handle->prev < handle->cur) {
1881                if (handle->prev == 0) {
1882                        error = load_header(buffer);
1883                        if (error)
1884                                return error;
1885
1886                        error = memory_bm_create(&copy_bm, GFP_ATOMIC, PG_ANY);
1887                        if (error)
1888                                return error;
1889
1890                } else if (handle->prev <= nr_meta_pages) {
1891                        error = unpack_orig_pfns(buffer, &copy_bm);
1892                        if (error)
1893                                return error;
1894
1895                        if (handle->prev == nr_meta_pages) {
1896                                error = prepare_image(&orig_bm, &copy_bm);
1897                                if (error)
1898                                        return error;
1899
1900                                chain_init(&ca, GFP_ATOMIC, PG_SAFE);
1901                                memory_bm_position_reset(&orig_bm);
1902                                restore_pblist = NULL;
1903                                handle->buffer = get_buffer(&orig_bm, &ca);
1904                                handle->sync_read = 0;
1905                                if (IS_ERR(handle->buffer))
1906                                        return PTR_ERR(handle->buffer);
1907                        }
1908                } else {
1909                        copy_last_highmem_page();
1910                        handle->buffer = get_buffer(&orig_bm, &ca);
1911                        if (IS_ERR(handle->buffer))
1912                                return PTR_ERR(handle->buffer);
1913                        if (handle->buffer != buffer)
1914                                handle->sync_read = 0;
1915                }
1916                handle->prev = handle->cur;
1917        }
1918        handle->buf_offset = handle->cur_offset;
1919        if (handle->cur_offset + count >= PAGE_SIZE) {
1920                count = PAGE_SIZE - handle->cur_offset;
1921                handle->cur_offset = 0;
1922                handle->cur++;
1923        } else {
1924                handle->cur_offset += count;
1925        }
1926        handle->offset += count;
1927        return count;
1928}
1929
1930/**
1931 *      snapshot_write_finalize - must be called after the last call to
1932 *      snapshot_write_next() in case the last page in the image happens
1933 *      to be a highmem page and its contents should be stored in the
1934 *      highmem.  Additionally, it releases the memory that will not be
1935 *      used any more.
1936 */
1937
1938void snapshot_write_finalize(struct snapshot_handle *handle)
1939{
1940        copy_last_highmem_page();
1941        /* Free only if we have loaded the image entirely */
1942        if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages) {
1943                memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR);
1944                free_highmem_data();
1945        }
1946}
1947
1948int snapshot_image_loaded(struct snapshot_handle *handle)
1949{
1950        return !(!nr_copy_pages || !last_highmem_page_copied() ||
1951                        handle->cur <= nr_meta_pages + nr_copy_pages);
1952}
1953
1954#ifdef CONFIG_HIGHMEM
1955/* Assumes that @buf is ready and points to a "safe" page */
1956static inline void
1957swap_two_pages_data(struct page *p1, struct page *p2, void *buf)
1958{
1959        void *kaddr1, *kaddr2;
1960
1961        kaddr1 = kmap_atomic(p1, KM_USER0);
1962        kaddr2 = kmap_atomic(p2, KM_USER1);
1963        memcpy(buf, kaddr1, PAGE_SIZE);
1964        memcpy(kaddr1, kaddr2, PAGE_SIZE);
1965        memcpy(kaddr2, buf, PAGE_SIZE);
1966        kunmap_atomic(kaddr1, KM_USER0);
1967        kunmap_atomic(kaddr2, KM_USER1);
1968}
1969
1970/**
1971 *      restore_highmem - for each highmem page that was allocated before
1972 *      the suspend and included in the suspend image, and also has been
1973 *      allocated by the "resume" kernel swap its current (ie. "before
1974 *      resume") contents with the previous (ie. "before suspend") one.
1975 *
1976 *      If the resume eventually fails, we can call this function once
1977 *      again and restore the "before resume" highmem state.
1978 */
1979
1980int restore_highmem(void)
1981{
1982        struct highmem_pbe *pbe = highmem_pblist;
1983        void *buf;
1984
1985        if (!pbe)
1986                return 0;
1987
1988        buf = get_image_page(GFP_ATOMIC, PG_SAFE);
1989        if (!buf)
1990                return -ENOMEM;
1991
1992        while (pbe) {
1993                swap_two_pages_data(pbe->copy_page, pbe->orig_page, buf);
1994                pbe = pbe->next;
1995        }
1996        free_image_page(buf, PG_UNSAFE_CLEAR);
1997        return 0;
1998}
1999#endif /* CONFIG_HIGHMEM */
2000
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.