linux/drivers/xen/balloon.c
<<
>>
Prefs
   1/******************************************************************************
   2 * Xen balloon driver - enables returning/claiming memory to/from Xen.
   3 *
   4 * Copyright (c) 2003, B Dragovic
   5 * Copyright (c) 2003-2004, M Williamson, K Fraser
   6 * Copyright (c) 2005 Dan M. Smith, IBM Corporation
   7 * Copyright (c) 2010 Daniel Kiper
   8 *
   9 * Memory hotplug support was written by Daniel Kiper. Work on
  10 * it was sponsored by Google under Google Summer of Code 2010
  11 * program. Jeremy Fitzhardinge from Citrix was the mentor for
  12 * this project.
  13 *
  14 * This program is free software; you can redistribute it and/or
  15 * modify it under the terms of the GNU General Public License version 2
  16 * as published by the Free Software Foundation; or, when distributed
  17 * separately from the Linux kernel or incorporated into other
  18 * software packages, subject to the following license:
  19 *
  20 * Permission is hereby granted, free of charge, to any person obtaining a copy
  21 * of this source file (the "Software"), to deal in the Software without
  22 * restriction, including without limitation the rights to use, copy, modify,
  23 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  24 * and to permit persons to whom the Software is furnished to do so, subject to
  25 * the following conditions:
  26 *
  27 * The above copyright notice and this permission notice shall be included in
  28 * all copies or substantial portions of the Software.
  29 *
  30 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  31 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  32 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  33 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  34 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  35 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  36 * IN THE SOFTWARE.
  37 */
  38
  39#include <linux/kernel.h>
  40#include <linux/sched.h>
  41#include <linux/errno.h>
  42#include <linux/module.h>
  43#include <linux/mm.h>
  44#include <linux/bootmem.h>
  45#include <linux/pagemap.h>
  46#include <linux/highmem.h>
  47#include <linux/mutex.h>
  48#include <linux/list.h>
  49#include <linux/gfp.h>
  50#include <linux/notifier.h>
  51#include <linux/memory.h>
  52#include <linux/memory_hotplug.h>
  53
  54#include <asm/page.h>
  55#include <asm/pgalloc.h>
  56#include <asm/pgtable.h>
  57#include <asm/tlb.h>
  58#include <asm/e820.h>
  59
  60#include <asm/xen/hypervisor.h>
  61#include <asm/xen/hypercall.h>
  62
  63#include <xen/xen.h>
  64#include <xen/interface/xen.h>
  65#include <xen/interface/memory.h>
  66#include <xen/balloon.h>
  67#include <xen/features.h>
  68#include <xen/page.h>
  69
  70/*
  71 * balloon_process() state:
  72 *
  73 * BP_DONE: done or nothing to do,
  74 * BP_EAGAIN: error, go to sleep,
  75 * BP_ECANCELED: error, balloon operation canceled.
  76 */
  77
  78enum bp_state {
  79        BP_DONE,
  80        BP_EAGAIN,
  81        BP_ECANCELED
  82};
  83
  84
  85static DEFINE_MUTEX(balloon_mutex);
  86
  87struct balloon_stats balloon_stats;
  88EXPORT_SYMBOL_GPL(balloon_stats);
  89
  90/* We increase/decrease in batches which fit in a page */
  91static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
  92
  93#ifdef CONFIG_HIGHMEM
  94#define inc_totalhigh_pages() (totalhigh_pages++)
  95#define dec_totalhigh_pages() (totalhigh_pages--)
  96#else
  97#define inc_totalhigh_pages() do {} while (0)
  98#define dec_totalhigh_pages() do {} while (0)
  99#endif
 100
 101/* List of ballooned pages, threaded through the mem_map array. */
 102static LIST_HEAD(ballooned_pages);
 103
 104/* Main work function, always executed in process context. */
 105static void balloon_process(struct work_struct *work);
 106static DECLARE_DELAYED_WORK(balloon_worker, balloon_process);
 107
 108/* When ballooning out (allocating memory to return to Xen) we don't really
 109   want the kernel to try too hard since that can trigger the oom killer. */
 110#define GFP_BALLOON \
 111        (GFP_HIGHUSER | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC)
 112
 113static void scrub_page(struct page *page)
 114{
 115#ifdef CONFIG_XEN_SCRUB_PAGES
 116        clear_highpage(page);
 117#endif
 118}
 119
 120/* balloon_append: add the given page to the balloon. */
 121static void __balloon_append(struct page *page)
 122{
 123        /* Lowmem is re-populated first, so highmem pages go at list tail. */
 124        if (PageHighMem(page)) {
 125                list_add_tail(&page->lru, &ballooned_pages);
 126                balloon_stats.balloon_high++;
 127        } else {
 128                list_add(&page->lru, &ballooned_pages);
 129                balloon_stats.balloon_low++;
 130        }
 131}
 132
 133static void balloon_append(struct page *page)
 134{
 135        __balloon_append(page);
 136        if (PageHighMem(page))
 137                dec_totalhigh_pages();
 138        totalram_pages--;
 139}
 140
 141/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
 142static struct page *balloon_retrieve(bool prefer_highmem)
 143{
 144        struct page *page;
 145
 146        if (list_empty(&ballooned_pages))
 147                return NULL;
 148
 149        if (prefer_highmem)
 150                page = list_entry(ballooned_pages.prev, struct page, lru);
 151        else
 152                page = list_entry(ballooned_pages.next, struct page, lru);
 153        list_del(&page->lru);
 154
 155        if (PageHighMem(page)) {
 156                balloon_stats.balloon_high--;
 157                inc_totalhigh_pages();
 158        } else
 159                balloon_stats.balloon_low--;
 160
 161        totalram_pages++;
 162
 163        return page;
 164}
 165
 166static struct page *balloon_first_page(void)
 167{
 168        if (list_empty(&ballooned_pages))
 169                return NULL;
 170        return list_entry(ballooned_pages.next, struct page, lru);
 171}
 172
 173static struct page *balloon_next_page(struct page *page)
 174{
 175        struct list_head *next = page->lru.next;
 176        if (next == &ballooned_pages)
 177                return NULL;
 178        return list_entry(next, struct page, lru);
 179}
 180
 181static enum bp_state update_schedule(enum bp_state state)
 182{
 183        if (state == BP_DONE) {
 184                balloon_stats.schedule_delay = 1;
 185                balloon_stats.retry_count = 1;
 186                return BP_DONE;
 187        }
 188
 189        ++balloon_stats.retry_count;
 190
 191        if (balloon_stats.max_retry_count != RETRY_UNLIMITED &&
 192                        balloon_stats.retry_count > balloon_stats.max_retry_count) {
 193                balloon_stats.schedule_delay = 1;
 194                balloon_stats.retry_count = 1;
 195                return BP_ECANCELED;
 196        }
 197
 198        balloon_stats.schedule_delay <<= 1;
 199
 200        if (balloon_stats.schedule_delay > balloon_stats.max_schedule_delay)
 201                balloon_stats.schedule_delay = balloon_stats.max_schedule_delay;
 202
 203        return BP_EAGAIN;
 204}
 205
 206#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
 207static long current_credit(void)
 208{
 209        return balloon_stats.target_pages - balloon_stats.current_pages -
 210                balloon_stats.hotplug_pages;
 211}
 212
 213static bool balloon_is_inflated(void)
 214{
 215        if (balloon_stats.balloon_low || balloon_stats.balloon_high ||
 216                        balloon_stats.balloon_hotplug)
 217                return true;
 218        else
 219                return false;
 220}
 221
 222/*
 223 * reserve_additional_memory() adds memory region of size >= credit above
 224 * max_pfn. New region is section aligned and size is modified to be multiple
 225 * of section size. Those features allow optimal use of address space and
 226 * establish proper alignment when this function is called first time after
 227 * boot (last section not fully populated at boot time contains unused memory
 228 * pages with PG_reserved bit not set; online_pages_range() does not allow page
 229 * onlining in whole range if first onlined page does not have PG_reserved
 230 * bit set). Real size of added memory is established at page onlining stage.
 231 */
 232
 233static enum bp_state reserve_additional_memory(long credit)
 234{
 235        int nid, rc;
 236        u64 hotplug_start_paddr;
 237        unsigned long balloon_hotplug = credit;
 238
 239        hotplug_start_paddr = PFN_PHYS(SECTION_ALIGN_UP(max_pfn));
 240        balloon_hotplug = round_up(balloon_hotplug, PAGES_PER_SECTION);
 241        nid = memory_add_physaddr_to_nid(hotplug_start_paddr);
 242
 243        rc = add_memory(nid, hotplug_start_paddr, balloon_hotplug << PAGE_SHIFT);
 244
 245        if (rc) {
 246                pr_info("xen_balloon: %s: add_memory() failed: %i\n", __func__, rc);
 247                return BP_EAGAIN;
 248        }
 249
 250        balloon_hotplug -= credit;
 251
 252        balloon_stats.hotplug_pages += credit;
 253        balloon_stats.balloon_hotplug = balloon_hotplug;
 254
 255        return BP_DONE;
 256}
 257
 258static void xen_online_page(struct page *page)
 259{
 260        __online_page_set_limits(page);
 261
 262        mutex_lock(&balloon_mutex);
 263
 264        __balloon_append(page);
 265
 266        if (balloon_stats.hotplug_pages)
 267                --balloon_stats.hotplug_pages;
 268        else
 269                --balloon_stats.balloon_hotplug;
 270
 271        mutex_unlock(&balloon_mutex);
 272}
 273
 274static int xen_memory_notifier(struct notifier_block *nb, unsigned long val, void *v)
 275{
 276        if (val == MEM_ONLINE)
 277                schedule_delayed_work(&balloon_worker, 0);
 278
 279        return NOTIFY_OK;
 280}
 281
 282static struct notifier_block xen_memory_nb = {
 283        .notifier_call = xen_memory_notifier,
 284        .priority = 0
 285};
 286#else
 287static long current_credit(void)
 288{
 289        unsigned long target = balloon_stats.target_pages;
 290
 291        target = min(target,
 292                     balloon_stats.current_pages +
 293                     balloon_stats.balloon_low +
 294                     balloon_stats.balloon_high);
 295
 296        return target - balloon_stats.current_pages;
 297}
 298
 299static bool balloon_is_inflated(void)
 300{
 301        if (balloon_stats.balloon_low || balloon_stats.balloon_high)
 302                return true;
 303        else
 304                return false;
 305}
 306
 307static enum bp_state reserve_additional_memory(long credit)
 308{
 309        balloon_stats.target_pages = balloon_stats.current_pages;
 310        return BP_DONE;
 311}
 312#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */
 313
 314static enum bp_state increase_reservation(unsigned long nr_pages)
 315{
 316        int rc;
 317        unsigned long  pfn, i;
 318        struct page   *page;
 319        struct xen_memory_reservation reservation = {
 320                .address_bits = 0,
 321                .extent_order = 0,
 322                .domid        = DOMID_SELF
 323        };
 324
 325#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
 326        if (!balloon_stats.balloon_low && !balloon_stats.balloon_high) {
 327                nr_pages = min(nr_pages, balloon_stats.balloon_hotplug);
 328                balloon_stats.hotplug_pages += nr_pages;
 329                balloon_stats.balloon_hotplug -= nr_pages;
 330                return BP_DONE;
 331        }
 332#endif
 333
 334        if (nr_pages > ARRAY_SIZE(frame_list))
 335                nr_pages = ARRAY_SIZE(frame_list);
 336
 337        page = balloon_first_page();
 338        for (i = 0; i < nr_pages; i++) {
 339                if (!page) {
 340                        nr_pages = i;
 341                        break;
 342                }
 343                frame_list[i] = page_to_pfn(page);
 344                page = balloon_next_page(page);
 345        }
 346
 347        set_xen_guest_handle(reservation.extent_start, frame_list);
 348        reservation.nr_extents = nr_pages;
 349        rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
 350        if (rc <= 0)
 351                return BP_EAGAIN;
 352
 353        for (i = 0; i < rc; i++) {
 354                page = balloon_retrieve(false);
 355                BUG_ON(page == NULL);
 356
 357                pfn = page_to_pfn(page);
 358                BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
 359                       phys_to_machine_mapping_valid(pfn));
 360
 361                set_phys_to_machine(pfn, frame_list[i]);
 362
 363                /* Link back into the page tables if not highmem. */
 364                if (xen_pv_domain() && !PageHighMem(page)) {
 365                        int ret;
 366                        ret = HYPERVISOR_update_va_mapping(
 367                                (unsigned long)__va(pfn << PAGE_SHIFT),
 368                                mfn_pte(frame_list[i], PAGE_KERNEL),
 369                                0);
 370                        BUG_ON(ret);
 371                }
 372
 373                /* Relinquish the page back to the allocator. */
 374                ClearPageReserved(page);
 375                init_page_count(page);
 376                __free_page(page);
 377        }
 378
 379        balloon_stats.current_pages += rc;
 380
 381        return BP_DONE;
 382}
 383
 384static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
 385{
 386        enum bp_state state = BP_DONE;
 387        unsigned long  pfn, i;
 388        struct page   *page;
 389        int ret;
 390        struct xen_memory_reservation reservation = {
 391                .address_bits = 0,
 392                .extent_order = 0,
 393                .domid        = DOMID_SELF
 394        };
 395
 396#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
 397        if (balloon_stats.hotplug_pages) {
 398                nr_pages = min(nr_pages, balloon_stats.hotplug_pages);
 399                balloon_stats.hotplug_pages -= nr_pages;
 400                balloon_stats.balloon_hotplug += nr_pages;
 401                return BP_DONE;
 402        }
 403#endif
 404
 405        if (nr_pages > ARRAY_SIZE(frame_list))
 406                nr_pages = ARRAY_SIZE(frame_list);
 407
 408        for (i = 0; i < nr_pages; i++) {
 409                if ((page = alloc_page(gfp)) == NULL) {
 410                        nr_pages = i;
 411                        state = BP_EAGAIN;
 412                        break;
 413                }
 414
 415                pfn = page_to_pfn(page);
 416                frame_list[i] = pfn_to_mfn(pfn);
 417
 418                scrub_page(page);
 419
 420                if (xen_pv_domain() && !PageHighMem(page)) {
 421                        ret = HYPERVISOR_update_va_mapping(
 422                                (unsigned long)__va(pfn << PAGE_SHIFT),
 423                                __pte_ma(0), 0);
 424                        BUG_ON(ret);
 425                }
 426
 427        }
 428
 429        /* Ensure that ballooned highmem pages don't have kmaps. */
 430        kmap_flush_unused();
 431        flush_tlb_all();
 432
 433        /* No more mappings: invalidate P2M and add to balloon. */
 434        for (i = 0; i < nr_pages; i++) {
 435                pfn = mfn_to_pfn(frame_list[i]);
 436                __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
 437                balloon_append(pfn_to_page(pfn));
 438        }
 439
 440        set_xen_guest_handle(reservation.extent_start, frame_list);
 441        reservation.nr_extents   = nr_pages;
 442        ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
 443        BUG_ON(ret != nr_pages);
 444
 445        balloon_stats.current_pages -= nr_pages;
 446
 447        return state;
 448}
 449
 450/*
 451 * We avoid multiple worker processes conflicting via the balloon mutex.
 452 * We may of course race updates of the target counts (which are protected
 453 * by the balloon lock), or with changes to the Xen hard limit, but we will
 454 * recover from these in time.
 455 */
 456static void balloon_process(struct work_struct *work)
 457{
 458        enum bp_state state = BP_DONE;
 459        long credit;
 460
 461        mutex_lock(&balloon_mutex);
 462
 463        do {
 464                credit = current_credit();
 465
 466                if (credit > 0) {
 467                        if (balloon_is_inflated())
 468                                state = increase_reservation(credit);
 469                        else
 470                                state = reserve_additional_memory(credit);
 471                }
 472
 473                if (credit < 0)
 474                        state = decrease_reservation(-credit, GFP_BALLOON);
 475
 476                state = update_schedule(state);
 477
 478#ifndef CONFIG_PREEMPT
 479                if (need_resched())
 480                        schedule();
 481#endif
 482        } while (credit && state == BP_DONE);
 483
 484        /* Schedule more work if there is some still to be done. */
 485        if (state == BP_EAGAIN)
 486                schedule_delayed_work(&balloon_worker, balloon_stats.schedule_delay * HZ);
 487
 488        mutex_unlock(&balloon_mutex);
 489}
 490
 491/* Resets the Xen limit, sets new target, and kicks off processing. */
 492void balloon_set_new_target(unsigned long target)
 493{
 494        /* No need for lock. Not read-modify-write updates. */
 495        balloon_stats.target_pages = target;
 496        schedule_delayed_work(&balloon_worker, 0);
 497}
 498EXPORT_SYMBOL_GPL(balloon_set_new_target);
 499
 500/**
 501 * alloc_xenballooned_pages - get pages that have been ballooned out
 502 * @nr_pages: Number of pages to get
 503 * @pages: pages returned
 504 * @highmem: allow highmem pages
 505 * @return 0 on success, error otherwise
 506 */
 507int alloc_xenballooned_pages(int nr_pages, struct page **pages, bool highmem)
 508{
 509        int pgno = 0;
 510        struct page *page;
 511        mutex_lock(&balloon_mutex);
 512        while (pgno < nr_pages) {
 513                page = balloon_retrieve(highmem);
 514                if (page && (highmem || !PageHighMem(page))) {
 515                        pages[pgno++] = page;
 516                } else {
 517                        enum bp_state st;
 518                        if (page)
 519                                balloon_append(page);
 520                        st = decrease_reservation(nr_pages - pgno,
 521                                        highmem ? GFP_HIGHUSER : GFP_USER);
 522                        if (st != BP_DONE)
 523                                goto out_undo;
 524                }
 525        }
 526        mutex_unlock(&balloon_mutex);
 527        return 0;
 528 out_undo:
 529        while (pgno)
 530                balloon_append(pages[--pgno]);
 531        /* Free the memory back to the kernel soon */
 532        schedule_delayed_work(&balloon_worker, 0);
 533        mutex_unlock(&balloon_mutex);
 534        return -ENOMEM;
 535}
 536EXPORT_SYMBOL(alloc_xenballooned_pages);
 537
 538/**
 539 * free_xenballooned_pages - return pages retrieved with get_ballooned_pages
 540 * @nr_pages: Number of pages
 541 * @pages: pages to return
 542 */
 543void free_xenballooned_pages(int nr_pages, struct page **pages)
 544{
 545        int i;
 546
 547        mutex_lock(&balloon_mutex);
 548
 549        for (i = 0; i < nr_pages; i++) {
 550                if (pages[i])
 551                        balloon_append(pages[i]);
 552        }
 553
 554        /* The balloon may be too large now. Shrink it if needed. */
 555        if (current_credit())
 556                schedule_delayed_work(&balloon_worker, 0);
 557
 558        mutex_unlock(&balloon_mutex);
 559}
 560EXPORT_SYMBOL(free_xenballooned_pages);
 561
 562static void __init balloon_add_region(unsigned long start_pfn,
 563                                      unsigned long pages)
 564{
 565        unsigned long pfn, extra_pfn_end;
 566        struct page *page;
 567
 568        /*
 569         * If the amount of usable memory has been limited (e.g., with
 570         * the 'mem' command line parameter), don't add pages beyond
 571         * this limit.
 572         */
 573        extra_pfn_end = min(max_pfn, start_pfn + pages);
 574
 575        for (pfn = start_pfn; pfn < extra_pfn_end; pfn++) {
 576                page = pfn_to_page(pfn);
 577                /* totalram_pages and totalhigh_pages do not
 578                   include the boot-time balloon extension, so
 579                   don't subtract from it. */
 580                __balloon_append(page);
 581        }
 582}
 583
 584static int __init balloon_init(void)
 585{
 586        int i;
 587
 588        if (!xen_domain())
 589                return -ENODEV;
 590
 591        pr_info("xen/balloon: Initialising balloon driver.\n");
 592
 593        balloon_stats.current_pages = xen_pv_domain()
 594                ? min(xen_start_info->nr_pages - xen_released_pages, max_pfn)
 595                : max_pfn;
 596        balloon_stats.target_pages  = balloon_stats.current_pages;
 597        balloon_stats.balloon_low   = 0;
 598        balloon_stats.balloon_high  = 0;
 599
 600        balloon_stats.schedule_delay = 1;
 601        balloon_stats.max_schedule_delay = 32;
 602        balloon_stats.retry_count = 1;
 603        balloon_stats.max_retry_count = RETRY_UNLIMITED;
 604
 605#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
 606        balloon_stats.hotplug_pages = 0;
 607        balloon_stats.balloon_hotplug = 0;
 608
 609        set_online_page_callback(&xen_online_page);
 610        register_memory_notifier(&xen_memory_nb);
 611#endif
 612
 613        /*
 614         * Initialize the balloon with pages from the extra memory
 615         * regions (see arch/x86/xen/setup.c).
 616         */
 617        for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++)
 618                if (xen_extra_mem[i].size)
 619                        balloon_add_region(PFN_UP(xen_extra_mem[i].start),
 620                                           PFN_DOWN(xen_extra_mem[i].size));
 621
 622        return 0;
 623}
 624
 625subsys_initcall(balloon_init);
 626
 627MODULE_LICENSE("GPL");
 628
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.