linux/drivers/xen/balloon.c
<<
>>
Prefs
   1/******************************************************************************
   2 * balloon.c
   3 *
   4 * Xen balloon driver - enables returning/claiming memory to/from Xen.
   5 *
   6 * Copyright (c) 2003, B Dragovic
   7 * Copyright (c) 2003-2004, M Williamson, K Fraser
   8 * Copyright (c) 2005 Dan M. Smith, IBM Corporation
   9 *
  10 * This program is free software; you can redistribute it and/or
  11 * modify it under the terms of the GNU General Public License version 2
  12 * as published by the Free Software Foundation; or, when distributed
  13 * separately from the Linux kernel or incorporated into other
  14 * software packages, subject to the following license:
  15 *
  16 * Permission is hereby granted, free of charge, to any person obtaining a copy
  17 * of this source file (the "Software"), to deal in the Software without
  18 * restriction, including without limitation the rights to use, copy, modify,
  19 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  20 * and to permit persons to whom the Software is furnished to do so, subject to
  21 * the following conditions:
  22 *
  23 * The above copyright notice and this permission notice shall be included in
  24 * all copies or substantial portions of the Software.
  25 *
  26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  27 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  28 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  29 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  30 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  31 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  32 * IN THE SOFTWARE.
  33 */
  34
  35#include <linux/kernel.h>
  36#include <linux/module.h>
  37#include <linux/sched.h>
  38#include <linux/errno.h>
  39#include <linux/mm.h>
  40#include <linux/bootmem.h>
  41#include <linux/pagemap.h>
  42#include <linux/highmem.h>
  43#include <linux/mutex.h>
  44#include <linux/list.h>
  45#include <linux/sysdev.h>
  46
  47#include <asm/page.h>
  48#include <asm/pgalloc.h>
  49#include <asm/pgtable.h>
  50#include <asm/uaccess.h>
  51#include <asm/tlb.h>
  52
  53#include <asm/xen/hypervisor.h>
  54#include <asm/xen/hypercall.h>
  55#include <xen/interface/xen.h>
  56#include <xen/interface/memory.h>
  57#include <xen/xenbus.h>
  58#include <xen/features.h>
  59#include <xen/page.h>
  60
  61#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
  62
  63#define BALLOON_CLASS_NAME "xen_memory"
  64
  65struct balloon_stats {
  66        /* We aim for 'current allocation' == 'target allocation'. */
  67        unsigned long current_pages;
  68        unsigned long target_pages;
  69        /* We may hit the hard limit in Xen. If we do then we remember it. */
  70        unsigned long hard_limit;
  71        /*
  72         * Drivers may alter the memory reservation independently, but they
  73         * must inform the balloon driver so we avoid hitting the hard limit.
  74         */
  75        unsigned long driver_pages;
  76        /* Number of pages in high- and low-memory balloons. */
  77        unsigned long balloon_low;
  78        unsigned long balloon_high;
  79};
  80
  81static DEFINE_MUTEX(balloon_mutex);
  82
  83static struct sys_device balloon_sysdev;
  84
  85static int register_balloon(struct sys_device *sysdev);
  86
  87/*
  88 * Protects atomic reservation decrease/increase against concurrent increases.
  89 * Also protects non-atomic updates of current_pages and driver_pages, and
  90 * balloon lists.
  91 */
  92static DEFINE_SPINLOCK(balloon_lock);
  93
  94static struct balloon_stats balloon_stats;
  95
  96/* We increase/decrease in batches which fit in a page */
  97static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
  98
  99#ifdef CONFIG_HIGHMEM
 100#define inc_totalhigh_pages() (totalhigh_pages++)
 101#define dec_totalhigh_pages() (totalhigh_pages--)
 102#else
 103#define inc_totalhigh_pages() do {} while(0)
 104#define dec_totalhigh_pages() do {} while(0)
 105#endif
 106
 107/* List of ballooned pages, threaded through the mem_map array. */
 108static LIST_HEAD(ballooned_pages);
 109
 110/* Main work function, always executed in process context. */
 111static void balloon_process(struct work_struct *work);
 112static DECLARE_WORK(balloon_worker, balloon_process);
 113static struct timer_list balloon_timer;
 114
 115/* When ballooning out (allocating memory to return to Xen) we don't really
 116   want the kernel to try too hard since that can trigger the oom killer. */
 117#define GFP_BALLOON \
 118        (GFP_HIGHUSER | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC)
 119
 120static void scrub_page(struct page *page)
 121{
 122#ifdef CONFIG_XEN_SCRUB_PAGES
 123        clear_highpage(page);
 124#endif
 125}
 126
 127/* balloon_append: add the given page to the balloon. */
 128static void balloon_append(struct page *page)
 129{
 130        /* Lowmem is re-populated first, so highmem pages go at list tail. */
 131        if (PageHighMem(page)) {
 132                list_add_tail(&page->lru, &ballooned_pages);
 133                balloon_stats.balloon_high++;
 134                dec_totalhigh_pages();
 135        } else {
 136                list_add(&page->lru, &ballooned_pages);
 137                balloon_stats.balloon_low++;
 138        }
 139}
 140
 141/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
 142static struct page *balloon_retrieve(void)
 143{
 144        struct page *page;
 145
 146        if (list_empty(&ballooned_pages))
 147                return NULL;
 148
 149        page = list_entry(ballooned_pages.next, struct page, lru);
 150        list_del(&page->lru);
 151
 152        if (PageHighMem(page)) {
 153                balloon_stats.balloon_high--;
 154                inc_totalhigh_pages();
 155        }
 156        else
 157                balloon_stats.balloon_low--;
 158
 159        return page;
 160}
 161
 162static struct page *balloon_first_page(void)
 163{
 164        if (list_empty(&ballooned_pages))
 165                return NULL;
 166        return list_entry(ballooned_pages.next, struct page, lru);
 167}
 168
 169static struct page *balloon_next_page(struct page *page)
 170{
 171        struct list_head *next = page->lru.next;
 172        if (next == &ballooned_pages)
 173                return NULL;
 174        return list_entry(next, struct page, lru);
 175}
 176
 177static void balloon_alarm(unsigned long unused)
 178{
 179        schedule_work(&balloon_worker);
 180}
 181
 182static unsigned long current_target(void)
 183{
 184        unsigned long target = min(balloon_stats.target_pages, balloon_stats.hard_limit);
 185
 186        target = min(target,
 187                     balloon_stats.current_pages +
 188                     balloon_stats.balloon_low +
 189                     balloon_stats.balloon_high);
 190
 191        return target;
 192}
 193
 194static int increase_reservation(unsigned long nr_pages)
 195{
 196        unsigned long  pfn, i, flags;
 197        struct page   *page;
 198        long           rc;
 199        struct xen_memory_reservation reservation = {
 200                .address_bits = 0,
 201                .extent_order = 0,
 202                .domid        = DOMID_SELF
 203        };
 204
 205        if (nr_pages > ARRAY_SIZE(frame_list))
 206                nr_pages = ARRAY_SIZE(frame_list);
 207
 208        spin_lock_irqsave(&balloon_lock, flags);
 209
 210        page = balloon_first_page();
 211        for (i = 0; i < nr_pages; i++) {
 212                BUG_ON(page == NULL);
 213                frame_list[i] = page_to_pfn(page);
 214                page = balloon_next_page(page);
 215        }
 216
 217        set_xen_guest_handle(reservation.extent_start, frame_list);
 218        reservation.nr_extents = nr_pages;
 219        rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
 220        if (rc < nr_pages) {
 221                if (rc > 0) {
 222                        int ret;
 223
 224                        /* We hit the Xen hard limit: reprobe. */
 225                        reservation.nr_extents = rc;
 226                        ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
 227                                                   &reservation);
 228                        BUG_ON(ret != rc);
 229                }
 230                if (rc >= 0)
 231                        balloon_stats.hard_limit = (balloon_stats.current_pages + rc -
 232                                                    balloon_stats.driver_pages);
 233                goto out;
 234        }
 235
 236        for (i = 0; i < nr_pages; i++) {
 237                page = balloon_retrieve();
 238                BUG_ON(page == NULL);
 239
 240                pfn = page_to_pfn(page);
 241                BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
 242                       phys_to_machine_mapping_valid(pfn));
 243
 244                set_phys_to_machine(pfn, frame_list[i]);
 245
 246                /* Link back into the page tables if not highmem. */
 247                if (pfn < max_low_pfn) {
 248                        int ret;
 249                        ret = HYPERVISOR_update_va_mapping(
 250                                (unsigned long)__va(pfn << PAGE_SHIFT),
 251                                mfn_pte(frame_list[i], PAGE_KERNEL),
 252                                0);
 253                        BUG_ON(ret);
 254                }
 255
 256                /* Relinquish the page back to the allocator. */
 257                ClearPageReserved(page);
 258                init_page_count(page);
 259                __free_page(page);
 260        }
 261
 262        balloon_stats.current_pages += nr_pages;
 263        totalram_pages = balloon_stats.current_pages;
 264
 265 out:
 266        spin_unlock_irqrestore(&balloon_lock, flags);
 267
 268        return 0;
 269}
 270
 271static int decrease_reservation(unsigned long nr_pages)
 272{
 273        unsigned long  pfn, i, flags;
 274        struct page   *page;
 275        int            need_sleep = 0;
 276        int ret;
 277        struct xen_memory_reservation reservation = {
 278                .address_bits = 0,
 279                .extent_order = 0,
 280                .domid        = DOMID_SELF
 281        };
 282
 283        if (nr_pages > ARRAY_SIZE(frame_list))
 284                nr_pages = ARRAY_SIZE(frame_list);
 285
 286        for (i = 0; i < nr_pages; i++) {
 287                if ((page = alloc_page(GFP_BALLOON)) == NULL) {
 288                        nr_pages = i;
 289                        need_sleep = 1;
 290                        break;
 291                }
 292
 293                pfn = page_to_pfn(page);
 294                frame_list[i] = pfn_to_mfn(pfn);
 295
 296                scrub_page(page);
 297
 298                if (!PageHighMem(page)) {
 299                        ret = HYPERVISOR_update_va_mapping(
 300                                (unsigned long)__va(pfn << PAGE_SHIFT),
 301                                __pte_ma(0), 0);
 302                        BUG_ON(ret);
 303                }
 304
 305        }
 306
 307        /* Ensure that ballooned highmem pages don't have kmaps. */
 308        kmap_flush_unused();
 309        flush_tlb_all();
 310
 311        spin_lock_irqsave(&balloon_lock, flags);
 312
 313        /* No more mappings: invalidate P2M and add to balloon. */
 314        for (i = 0; i < nr_pages; i++) {
 315                pfn = mfn_to_pfn(frame_list[i]);
 316                set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
 317                balloon_append(pfn_to_page(pfn));
 318        }
 319
 320        set_xen_guest_handle(reservation.extent_start, frame_list);
 321        reservation.nr_extents   = nr_pages;
 322        ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
 323        BUG_ON(ret != nr_pages);
 324
 325        balloon_stats.current_pages -= nr_pages;
 326        totalram_pages = balloon_stats.current_pages;
 327
 328        spin_unlock_irqrestore(&balloon_lock, flags);
 329
 330        return need_sleep;
 331}
 332
 333/*
 334 * We avoid multiple worker processes conflicting via the balloon mutex.
 335 * We may of course race updates of the target counts (which are protected
 336 * by the balloon lock), or with changes to the Xen hard limit, but we will
 337 * recover from these in time.
 338 */
 339static void balloon_process(struct work_struct *work)
 340{
 341        int need_sleep = 0;
 342        long credit;
 343
 344        mutex_lock(&balloon_mutex);
 345
 346        do {
 347                credit = current_target() - balloon_stats.current_pages;
 348                if (credit > 0)
 349                        need_sleep = (increase_reservation(credit) != 0);
 350                if (credit < 0)
 351                        need_sleep = (decrease_reservation(-credit) != 0);
 352
 353#ifndef CONFIG_PREEMPT
 354                if (need_resched())
 355                        schedule();
 356#endif
 357        } while ((credit != 0) && !need_sleep);
 358
 359        /* Schedule more work if there is some still to be done. */
 360        if (current_target() != balloon_stats.current_pages)
 361                mod_timer(&balloon_timer, jiffies + HZ);
 362
 363        mutex_unlock(&balloon_mutex);
 364}
 365
 366/* Resets the Xen limit, sets new target, and kicks off processing. */
 367static void balloon_set_new_target(unsigned long target)
 368{
 369        /* No need for lock. Not read-modify-write updates. */
 370        balloon_stats.hard_limit   = ~0UL;
 371        balloon_stats.target_pages = target;
 372        schedule_work(&balloon_worker);
 373}
 374
 375static struct xenbus_watch target_watch =
 376{
 377        .node = "memory/target"
 378};
 379
 380/* React to a change in the target key */
 381static void watch_target(struct xenbus_watch *watch,
 382                         const char **vec, unsigned int len)
 383{
 384        unsigned long long new_target;
 385        int err;
 386
 387        err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target);
 388        if (err != 1) {
 389                /* This is ok (for domain0 at least) - so just return */
 390                return;
 391        }
 392
 393        /* The given memory/target value is in KiB, so it needs converting to
 394         * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
 395         */
 396        balloon_set_new_target(new_target >> (PAGE_SHIFT - 10));
 397}
 398
 399static int balloon_init_watcher(struct notifier_block *notifier,
 400                                unsigned long event,
 401                                void *data)
 402{
 403        int err;
 404
 405        err = register_xenbus_watch(&target_watch);
 406        if (err)
 407                printk(KERN_ERR "Failed to set balloon watcher\n");
 408
 409        return NOTIFY_DONE;
 410}
 411
 412static struct notifier_block xenstore_notifier;
 413
 414static int __init balloon_init(void)
 415{
 416        unsigned long pfn;
 417        struct page *page;
 418
 419        if (!xen_pv_domain())
 420                return -ENODEV;
 421
 422        pr_info("xen_balloon: Initialising balloon driver.\n");
 423
 424        balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn);
 425        totalram_pages   = balloon_stats.current_pages;
 426        balloon_stats.target_pages  = balloon_stats.current_pages;
 427        balloon_stats.balloon_low   = 0;
 428        balloon_stats.balloon_high  = 0;
 429        balloon_stats.driver_pages  = 0UL;
 430        balloon_stats.hard_limit    = ~0UL;
 431
 432        init_timer(&balloon_timer);
 433        balloon_timer.data = 0;
 434        balloon_timer.function = balloon_alarm;
 435
 436        register_balloon(&balloon_sysdev);
 437
 438        /* Initialise the balloon with excess memory space. */
 439        for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
 440                page = pfn_to_page(pfn);
 441                if (!PageReserved(page))
 442                        balloon_append(page);
 443        }
 444
 445        target_watch.callback = watch_target;
 446        xenstore_notifier.notifier_call = balloon_init_watcher;
 447
 448        register_xenstore_notifier(&xenstore_notifier);
 449
 450        return 0;
 451}
 452
 453subsys_initcall(balloon_init);
 454
 455static void balloon_exit(void)
 456{
 457    /* XXX - release balloon here */
 458    return;
 459}
 460
 461module_exit(balloon_exit);
 462
 463#define BALLOON_SHOW(name, format, args...)                             \
 464        static ssize_t show_##name(struct sys_device *dev,              \
 465                                   struct sysdev_attribute *attr,       \
 466                                   char *buf)                           \
 467        {                                                               \
 468                return sprintf(buf, format, ##args);                    \
 469        }                                                               \
 470        static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL)
 471
 472BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages));
 473BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_low));
 474BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_high));
 475BALLOON_SHOW(hard_limit_kb,
 476             (balloon_stats.hard_limit!=~0UL) ? "%lu\n" : "???\n",
 477             (balloon_stats.hard_limit!=~0UL) ? PAGES2KB(balloon_stats.hard_limit) : 0);
 478BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(balloon_stats.driver_pages));
 479
 480static ssize_t show_target_kb(struct sys_device *dev, struct sysdev_attribute *attr,
 481                              char *buf)
 482{
 483        return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages));
 484}
 485
 486static ssize_t store_target_kb(struct sys_device *dev,
 487                               struct sysdev_attribute *attr,
 488                               const char *buf,
 489                               size_t count)
 490{
 491        char *endchar;
 492        unsigned long long target_bytes;
 493
 494        if (!capable(CAP_SYS_ADMIN))
 495                return -EPERM;
 496
 497        target_bytes = simple_strtoull(buf, &endchar, 0) * 1024;
 498
 499        balloon_set_new_target(target_bytes >> PAGE_SHIFT);
 500
 501        return count;
 502}
 503
 504static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR,
 505                   show_target_kb, store_target_kb);
 506
 507
 508static ssize_t show_target(struct sys_device *dev, struct sysdev_attribute *attr,
 509                              char *buf)
 510{
 511        return sprintf(buf, "%llu\n",
 512                       (unsigned long long)balloon_stats.target_pages
 513                       << PAGE_SHIFT);
 514}
 515
 516static ssize_t store_target(struct sys_device *dev,
 517                            struct sysdev_attribute *attr,
 518                            const char *buf,
 519                            size_t count)
 520{
 521        char *endchar;
 522        unsigned long long target_bytes;
 523
 524        if (!capable(CAP_SYS_ADMIN))
 525                return -EPERM;
 526
 527        target_bytes = memparse(buf, &endchar);
 528
 529        balloon_set_new_target(target_bytes >> PAGE_SHIFT);
 530
 531        return count;
 532}
 533
 534static SYSDEV_ATTR(target, S_IRUGO | S_IWUSR,
 535                   show_target, store_target);
 536
 537
 538static struct sysdev_attribute *balloon_attrs[] = {
 539        &attr_target_kb,
 540        &attr_target,
 541};
 542
 543static struct attribute *balloon_info_attrs[] = {
 544        &attr_current_kb.attr,
 545        &attr_low_kb.attr,
 546        &attr_high_kb.attr,
 547        &attr_hard_limit_kb.attr,
 548        &attr_driver_kb.attr,
 549        NULL
 550};
 551
 552static struct attribute_group balloon_info_group = {
 553        .name = "info",
 554        .attrs = balloon_info_attrs,
 555};
 556
 557static struct sysdev_class balloon_sysdev_class = {
 558        .name = BALLOON_CLASS_NAME,
 559};
 560
 561static int register_balloon(struct sys_device *sysdev)
 562{
 563        int i, error;
 564
 565        error = sysdev_class_register(&balloon_sysdev_class);
 566        if (error)
 567                return error;
 568
 569        sysdev->id = 0;
 570        sysdev->cls = &balloon_sysdev_class;
 571
 572        error = sysdev_register(sysdev);
 573        if (error) {
 574                sysdev_class_unregister(&balloon_sysdev_class);
 575                return error;
 576        }
 577
 578        for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) {
 579                error = sysdev_create_file(sysdev, balloon_attrs[i]);
 580                if (error)
 581                        goto fail;
 582        }
 583
 584        error = sysfs_create_group(&sysdev->kobj, &balloon_info_group);
 585        if (error)
 586                goto fail;
 587
 588        return 0;
 589
 590 fail:
 591        while (--i >= 0)
 592                sysdev_remove_file(sysdev, balloon_attrs[i]);
 593        sysdev_unregister(sysdev);
 594        sysdev_class_unregister(&balloon_sysdev_class);
 595        return error;
 596}
 597
 598MODULE_LICENSE("GPL");
 599
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.