linux/drivers/xen/balloon.c
<<
>>
Prefs
   1/******************************************************************************
   2 * Xen balloon driver - enables returning/claiming memory to/from Xen.
   3 *
   4 * Copyright (c) 2003, B Dragovic
   5 * Copyright (c) 2003-2004, M Williamson, K Fraser
   6 * Copyright (c) 2005 Dan M. Smith, IBM Corporation
   7 * Copyright (c) 2010 Daniel Kiper
   8 *
   9 * Memory hotplug support was written by Daniel Kiper. Work on
  10 * it was sponsored by Google under Google Summer of Code 2010
  11 * program. Jeremy Fitzhardinge from Citrix was the mentor for
  12 * this project.
  13 *
  14 * This program is free software; you can redistribute it and/or
  15 * modify it under the terms of the GNU General Public License version 2
  16 * as published by the Free Software Foundation; or, when distributed
  17 * separately from the Linux kernel or incorporated into other
  18 * software packages, subject to the following license:
  19 *
  20 * Permission is hereby granted, free of charge, to any person obtaining a copy
  21 * of this source file (the "Software"), to deal in the Software without
  22 * restriction, including without limitation the rights to use, copy, modify,
  23 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  24 * and to permit persons to whom the Software is furnished to do so, subject to
  25 * the following conditions:
  26 *
  27 * The above copyright notice and this permission notice shall be included in
  28 * all copies or substantial portions of the Software.
  29 *
  30 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  31 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  32 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  33 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  34 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  35 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  36 * IN THE SOFTWARE.
  37 */
  38
  39#include <linux/kernel.h>
  40#include <linux/sched.h>
  41#include <linux/errno.h>
  42#include <linux/module.h>
  43#include <linux/mm.h>
  44#include <linux/bootmem.h>
  45#include <linux/pagemap.h>
  46#include <linux/highmem.h>
  47#include <linux/mutex.h>
  48#include <linux/list.h>
  49#include <linux/gfp.h>
  50#include <linux/notifier.h>
  51#include <linux/memory.h>
  52#include <linux/memory_hotplug.h>
  53
  54#include <asm/page.h>
  55#include <asm/pgalloc.h>
  56#include <asm/pgtable.h>
  57#include <asm/tlb.h>
  58
  59#include <asm/xen/hypervisor.h>
  60#include <asm/xen/hypercall.h>
  61
  62#include <xen/xen.h>
  63#include <xen/interface/xen.h>
  64#include <xen/interface/memory.h>
  65#include <xen/balloon.h>
  66#include <xen/features.h>
  67#include <xen/page.h>
  68
  69/*
  70 * balloon_process() state:
  71 *
  72 * BP_DONE: done or nothing to do,
  73 * BP_EAGAIN: error, go to sleep,
  74 * BP_ECANCELED: error, balloon operation canceled.
  75 */
  76
  77enum bp_state {
  78        BP_DONE,
  79        BP_EAGAIN,
  80        BP_ECANCELED
  81};
  82
  83
  84static DEFINE_MUTEX(balloon_mutex);
  85
  86struct balloon_stats balloon_stats;
  87EXPORT_SYMBOL_GPL(balloon_stats);
  88
  89/* We increase/decrease in batches which fit in a page */
  90static xen_pfn_t frame_list[PAGE_SIZE / sizeof(unsigned long)];
  91
  92#ifdef CONFIG_HIGHMEM
  93#define inc_totalhigh_pages() (totalhigh_pages++)
  94#define dec_totalhigh_pages() (totalhigh_pages--)
  95#else
  96#define inc_totalhigh_pages() do {} while (0)
  97#define dec_totalhigh_pages() do {} while (0)
  98#endif
  99
 100/* List of ballooned pages, threaded through the mem_map array. */
 101static LIST_HEAD(ballooned_pages);
 102
 103/* Main work function, always executed in process context. */
 104static void balloon_process(struct work_struct *work);
 105static DECLARE_DELAYED_WORK(balloon_worker, balloon_process);
 106
 107/* When ballooning out (allocating memory to return to Xen) we don't really
 108   want the kernel to try too hard since that can trigger the oom killer. */
 109#define GFP_BALLOON \
 110        (GFP_HIGHUSER | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC)
 111
 112static void scrub_page(struct page *page)
 113{
 114#ifdef CONFIG_XEN_SCRUB_PAGES
 115        clear_highpage(page);
 116#endif
 117}
 118
 119/* balloon_append: add the given page to the balloon. */
 120static void __balloon_append(struct page *page)
 121{
 122        /* Lowmem is re-populated first, so highmem pages go at list tail. */
 123        if (PageHighMem(page)) {
 124                list_add_tail(&page->lru, &ballooned_pages);
 125                balloon_stats.balloon_high++;
 126        } else {
 127                list_add(&page->lru, &ballooned_pages);
 128                balloon_stats.balloon_low++;
 129        }
 130}
 131
 132static void balloon_append(struct page *page)
 133{
 134        __balloon_append(page);
 135        if (PageHighMem(page))
 136                dec_totalhigh_pages();
 137        totalram_pages--;
 138}
 139
 140/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
 141static struct page *balloon_retrieve(bool prefer_highmem)
 142{
 143        struct page *page;
 144
 145        if (list_empty(&ballooned_pages))
 146                return NULL;
 147
 148        if (prefer_highmem)
 149                page = list_entry(ballooned_pages.prev, struct page, lru);
 150        else
 151                page = list_entry(ballooned_pages.next, struct page, lru);
 152        list_del(&page->lru);
 153
 154        if (PageHighMem(page)) {
 155                balloon_stats.balloon_high--;
 156                inc_totalhigh_pages();
 157        } else
 158                balloon_stats.balloon_low--;
 159
 160        totalram_pages++;
 161
 162        return page;
 163}
 164
 165static struct page *balloon_first_page(void)
 166{
 167        if (list_empty(&ballooned_pages))
 168                return NULL;
 169        return list_entry(ballooned_pages.next, struct page, lru);
 170}
 171
 172static struct page *balloon_next_page(struct page *page)
 173{
 174        struct list_head *next = page->lru.next;
 175        if (next == &ballooned_pages)
 176                return NULL;
 177        return list_entry(next, struct page, lru);
 178}
 179
 180static enum bp_state update_schedule(enum bp_state state)
 181{
 182        if (state == BP_DONE) {
 183                balloon_stats.schedule_delay = 1;
 184                balloon_stats.retry_count = 1;
 185                return BP_DONE;
 186        }
 187
 188        ++balloon_stats.retry_count;
 189
 190        if (balloon_stats.max_retry_count != RETRY_UNLIMITED &&
 191                        balloon_stats.retry_count > balloon_stats.max_retry_count) {
 192                balloon_stats.schedule_delay = 1;
 193                balloon_stats.retry_count = 1;
 194                return BP_ECANCELED;
 195        }
 196
 197        balloon_stats.schedule_delay <<= 1;
 198
 199        if (balloon_stats.schedule_delay > balloon_stats.max_schedule_delay)
 200                balloon_stats.schedule_delay = balloon_stats.max_schedule_delay;
 201
 202        return BP_EAGAIN;
 203}
 204
 205#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
 206static long current_credit(void)
 207{
 208        return balloon_stats.target_pages - balloon_stats.current_pages -
 209                balloon_stats.hotplug_pages;
 210}
 211
 212static bool balloon_is_inflated(void)
 213{
 214        if (balloon_stats.balloon_low || balloon_stats.balloon_high ||
 215                        balloon_stats.balloon_hotplug)
 216                return true;
 217        else
 218                return false;
 219}
 220
 221/*
 222 * reserve_additional_memory() adds memory region of size >= credit above
 223 * max_pfn. New region is section aligned and size is modified to be multiple
 224 * of section size. Those features allow optimal use of address space and
 225 * establish proper alignment when this function is called first time after
 226 * boot (last section not fully populated at boot time contains unused memory
 227 * pages with PG_reserved bit not set; online_pages_range() does not allow page
 228 * onlining in whole range if first onlined page does not have PG_reserved
 229 * bit set). Real size of added memory is established at page onlining stage.
 230 */
 231
 232static enum bp_state reserve_additional_memory(long credit)
 233{
 234        int nid, rc;
 235        u64 hotplug_start_paddr;
 236        unsigned long balloon_hotplug = credit;
 237
 238        hotplug_start_paddr = PFN_PHYS(SECTION_ALIGN_UP(max_pfn));
 239        balloon_hotplug = round_up(balloon_hotplug, PAGES_PER_SECTION);
 240        nid = memory_add_physaddr_to_nid(hotplug_start_paddr);
 241
 242        rc = add_memory(nid, hotplug_start_paddr, balloon_hotplug << PAGE_SHIFT);
 243
 244        if (rc) {
 245                pr_info("xen_balloon: %s: add_memory() failed: %i\n", __func__, rc);
 246                return BP_EAGAIN;
 247        }
 248
 249        balloon_hotplug -= credit;
 250
 251        balloon_stats.hotplug_pages += credit;
 252        balloon_stats.balloon_hotplug = balloon_hotplug;
 253
 254        return BP_DONE;
 255}
 256
 257static void xen_online_page(struct page *page)
 258{
 259        __online_page_set_limits(page);
 260
 261        mutex_lock(&balloon_mutex);
 262
 263        __balloon_append(page);
 264
 265        if (balloon_stats.hotplug_pages)
 266                --balloon_stats.hotplug_pages;
 267        else
 268                --balloon_stats.balloon_hotplug;
 269
 270        mutex_unlock(&balloon_mutex);
 271}
 272
 273static int xen_memory_notifier(struct notifier_block *nb, unsigned long val, void *v)
 274{
 275        if (val == MEM_ONLINE)
 276                schedule_delayed_work(&balloon_worker, 0);
 277
 278        return NOTIFY_OK;
 279}
 280
 281static struct notifier_block xen_memory_nb = {
 282        .notifier_call = xen_memory_notifier,
 283        .priority = 0
 284};
 285#else
 286static long current_credit(void)
 287{
 288        unsigned long target = balloon_stats.target_pages;
 289
 290        target = min(target,
 291                     balloon_stats.current_pages +
 292                     balloon_stats.balloon_low +
 293                     balloon_stats.balloon_high);
 294
 295        return target - balloon_stats.current_pages;
 296}
 297
 298static bool balloon_is_inflated(void)
 299{
 300        if (balloon_stats.balloon_low || balloon_stats.balloon_high)
 301                return true;
 302        else
 303                return false;
 304}
 305
 306static enum bp_state reserve_additional_memory(long credit)
 307{
 308        balloon_stats.target_pages = balloon_stats.current_pages;
 309        return BP_DONE;
 310}
 311#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */
 312
 313static enum bp_state increase_reservation(unsigned long nr_pages)
 314{
 315        int rc;
 316        unsigned long  pfn, i;
 317        struct page   *page;
 318        struct xen_memory_reservation reservation = {
 319                .address_bits = 0,
 320                .extent_order = 0,
 321                .domid        = DOMID_SELF
 322        };
 323
 324#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
 325        if (!balloon_stats.balloon_low && !balloon_stats.balloon_high) {
 326                nr_pages = min(nr_pages, balloon_stats.balloon_hotplug);
 327                balloon_stats.hotplug_pages += nr_pages;
 328                balloon_stats.balloon_hotplug -= nr_pages;
 329                return BP_DONE;
 330        }
 331#endif
 332
 333        if (nr_pages > ARRAY_SIZE(frame_list))
 334                nr_pages = ARRAY_SIZE(frame_list);
 335
 336        page = balloon_first_page();
 337        for (i = 0; i < nr_pages; i++) {
 338                if (!page) {
 339                        nr_pages = i;
 340                        break;
 341                }
 342                frame_list[i] = page_to_pfn(page);
 343                page = balloon_next_page(page);
 344        }
 345
 346        set_xen_guest_handle(reservation.extent_start, frame_list);
 347        reservation.nr_extents = nr_pages;
 348        rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
 349        if (rc <= 0)
 350                return BP_EAGAIN;
 351
 352        for (i = 0; i < rc; i++) {
 353                page = balloon_retrieve(false);
 354                BUG_ON(page == NULL);
 355
 356                pfn = page_to_pfn(page);
 357                BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
 358                       phys_to_machine_mapping_valid(pfn));
 359
 360                set_phys_to_machine(pfn, frame_list[i]);
 361
 362#ifdef CONFIG_XEN_HAVE_PVMMU
 363                /* Link back into the page tables if not highmem. */
 364                if (xen_pv_domain() && !PageHighMem(page)) {
 365                        int ret;
 366                        ret = HYPERVISOR_update_va_mapping(
 367                                (unsigned long)__va(pfn << PAGE_SHIFT),
 368                                mfn_pte(frame_list[i], PAGE_KERNEL),
 369                                0);
 370                        BUG_ON(ret);
 371                }
 372#endif
 373
 374                /* Relinquish the page back to the allocator. */
 375                ClearPageReserved(page);
 376                init_page_count(page);
 377                __free_page(page);
 378        }
 379
 380        balloon_stats.current_pages += rc;
 381
 382        return BP_DONE;
 383}
 384
 385static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
 386{
 387        enum bp_state state = BP_DONE;
 388        unsigned long  pfn, i;
 389        struct page   *page;
 390        int ret;
 391        struct xen_memory_reservation reservation = {
 392                .address_bits = 0,
 393                .extent_order = 0,
 394                .domid        = DOMID_SELF
 395        };
 396
 397#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
 398        if (balloon_stats.hotplug_pages) {
 399                nr_pages = min(nr_pages, balloon_stats.hotplug_pages);
 400                balloon_stats.hotplug_pages -= nr_pages;
 401                balloon_stats.balloon_hotplug += nr_pages;
 402                return BP_DONE;
 403        }
 404#endif
 405
 406        if (nr_pages > ARRAY_SIZE(frame_list))
 407                nr_pages = ARRAY_SIZE(frame_list);
 408
 409        for (i = 0; i < nr_pages; i++) {
 410                if ((page = alloc_page(gfp)) == NULL) {
 411                        nr_pages = i;
 412                        state = BP_EAGAIN;
 413                        break;
 414                }
 415
 416                pfn = page_to_pfn(page);
 417                frame_list[i] = pfn_to_mfn(pfn);
 418
 419                scrub_page(page);
 420
 421#ifdef CONFIG_XEN_HAVE_PVMMU
 422                if (xen_pv_domain() && !PageHighMem(page)) {
 423                        ret = HYPERVISOR_update_va_mapping(
 424                                (unsigned long)__va(pfn << PAGE_SHIFT),
 425                                __pte_ma(0), 0);
 426                        BUG_ON(ret);
 427                }
 428#endif
 429        }
 430
 431        /* Ensure that ballooned highmem pages don't have kmaps. */
 432        kmap_flush_unused();
 433        flush_tlb_all();
 434
 435        /* No more mappings: invalidate P2M and add to balloon. */
 436        for (i = 0; i < nr_pages; i++) {
 437                pfn = mfn_to_pfn(frame_list[i]);
 438                __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
 439                balloon_append(pfn_to_page(pfn));
 440        }
 441
 442        set_xen_guest_handle(reservation.extent_start, frame_list);
 443        reservation.nr_extents   = nr_pages;
 444        ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
 445        BUG_ON(ret != nr_pages);
 446
 447        balloon_stats.current_pages -= nr_pages;
 448
 449        return state;
 450}
 451
 452/*
 453 * We avoid multiple worker processes conflicting via the balloon mutex.
 454 * We may of course race updates of the target counts (which are protected
 455 * by the balloon lock), or with changes to the Xen hard limit, but we will
 456 * recover from these in time.
 457 */
 458static void balloon_process(struct work_struct *work)
 459{
 460        enum bp_state state = BP_DONE;
 461        long credit;
 462
 463        mutex_lock(&balloon_mutex);
 464
 465        do {
 466                credit = current_credit();
 467
 468                if (credit > 0) {
 469                        if (balloon_is_inflated())
 470                                state = increase_reservation(credit);
 471                        else
 472                                state = reserve_additional_memory(credit);
 473                }
 474
 475                if (credit < 0)
 476                        state = decrease_reservation(-credit, GFP_BALLOON);
 477
 478                state = update_schedule(state);
 479
 480#ifndef CONFIG_PREEMPT
 481                if (need_resched())
 482                        schedule();
 483#endif
 484        } while (credit && state == BP_DONE);
 485
 486        /* Schedule more work if there is some still to be done. */
 487        if (state == BP_EAGAIN)
 488                schedule_delayed_work(&balloon_worker, balloon_stats.schedule_delay * HZ);
 489
 490        mutex_unlock(&balloon_mutex);
 491}
 492
 493/* Resets the Xen limit, sets new target, and kicks off processing. */
 494void balloon_set_new_target(unsigned long target)
 495{
 496        /* No need for lock. Not read-modify-write updates. */
 497        balloon_stats.target_pages = target;
 498        schedule_delayed_work(&balloon_worker, 0);
 499}
 500EXPORT_SYMBOL_GPL(balloon_set_new_target);
 501
 502/**
 503 * alloc_xenballooned_pages - get pages that have been ballooned out
 504 * @nr_pages: Number of pages to get
 505 * @pages: pages returned
 506 * @highmem: allow highmem pages
 507 * @return 0 on success, error otherwise
 508 */
 509int alloc_xenballooned_pages(int nr_pages, struct page **pages, bool highmem)
 510{
 511        int pgno = 0;
 512        struct page *page;
 513        mutex_lock(&balloon_mutex);
 514        while (pgno < nr_pages) {
 515                page = balloon_retrieve(highmem);
 516                if (page && (highmem || !PageHighMem(page))) {
 517                        pages[pgno++] = page;
 518                } else {
 519                        enum bp_state st;
 520                        if (page)
 521                                balloon_append(page);
 522                        st = decrease_reservation(nr_pages - pgno,
 523                                        highmem ? GFP_HIGHUSER : GFP_USER);
 524                        if (st != BP_DONE)
 525                                goto out_undo;
 526                }
 527        }
 528        mutex_unlock(&balloon_mutex);
 529        return 0;
 530 out_undo:
 531        while (pgno)
 532                balloon_append(pages[--pgno]);
 533        /* Free the memory back to the kernel soon */
 534        schedule_delayed_work(&balloon_worker, 0);
 535        mutex_unlock(&balloon_mutex);
 536        return -ENOMEM;
 537}
 538EXPORT_SYMBOL(alloc_xenballooned_pages);
 539
 540/**
 541 * free_xenballooned_pages - return pages retrieved with get_ballooned_pages
 542 * @nr_pages: Number of pages
 543 * @pages: pages to return
 544 */
 545void free_xenballooned_pages(int nr_pages, struct page **pages)
 546{
 547        int i;
 548
 549        mutex_lock(&balloon_mutex);
 550
 551        for (i = 0; i < nr_pages; i++) {
 552                if (pages[i])
 553                        balloon_append(pages[i]);
 554        }
 555
 556        /* The balloon may be too large now. Shrink it if needed. */
 557        if (current_credit())
 558                schedule_delayed_work(&balloon_worker, 0);
 559
 560        mutex_unlock(&balloon_mutex);
 561}
 562EXPORT_SYMBOL(free_xenballooned_pages);
 563
 564static void __init balloon_add_region(unsigned long start_pfn,
 565                                      unsigned long pages)
 566{
 567        unsigned long pfn, extra_pfn_end;
 568        struct page *page;
 569
 570        /*
 571         * If the amount of usable memory has been limited (e.g., with
 572         * the 'mem' command line parameter), don't add pages beyond
 573         * this limit.
 574         */
 575        extra_pfn_end = min(max_pfn, start_pfn + pages);
 576
 577        for (pfn = start_pfn; pfn < extra_pfn_end; pfn++) {
 578                page = pfn_to_page(pfn);
 579                /* totalram_pages and totalhigh_pages do not
 580                   include the boot-time balloon extension, so
 581                   don't subtract from it. */
 582                __balloon_append(page);
 583        }
 584}
 585
 586static int __init balloon_init(void)
 587{
 588        int i;
 589
 590        if (!xen_domain())
 591                return -ENODEV;
 592
 593        pr_info("xen/balloon: Initialising balloon driver.\n");
 594
 595        balloon_stats.current_pages = xen_pv_domain()
 596                ? min(xen_start_info->nr_pages - xen_released_pages, max_pfn)
 597                : max_pfn;
 598        balloon_stats.target_pages  = balloon_stats.current_pages;
 599        balloon_stats.balloon_low   = 0;
 600        balloon_stats.balloon_high  = 0;
 601
 602        balloon_stats.schedule_delay = 1;
 603        balloon_stats.max_schedule_delay = 32;
 604        balloon_stats.retry_count = 1;
 605        balloon_stats.max_retry_count = RETRY_UNLIMITED;
 606
 607#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
 608        balloon_stats.hotplug_pages = 0;
 609        balloon_stats.balloon_hotplug = 0;
 610
 611        set_online_page_callback(&xen_online_page);
 612        register_memory_notifier(&xen_memory_nb);
 613#endif
 614
 615        /*
 616         * Initialize the balloon with pages from the extra memory
 617         * regions (see arch/x86/xen/setup.c).
 618         */
 619        for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++)
 620                if (xen_extra_mem[i].size)
 621                        balloon_add_region(PFN_UP(xen_extra_mem[i].start),
 622                                           PFN_DOWN(xen_extra_mem[i].size));
 623
 624        return 0;
 625}
 626
 627subsys_initcall(balloon_init);
 628
 629MODULE_LICENSE("GPL");
 630
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.