linux/drivers/xen/balloon.c
<<
>>
Prefs
   1/******************************************************************************
   2 * balloon.c
   3 *
   4 * Xen balloon driver - enables returning/claiming memory to/from Xen.
   5 *
   6 * Copyright (c) 2003, B Dragovic
   7 * Copyright (c) 2003-2004, M Williamson, K Fraser
   8 * Copyright (c) 2005 Dan M. Smith, IBM Corporation
   9 *
  10 * This program is free software; you can redistribute it and/or
  11 * modify it under the terms of the GNU General Public License version 2
  12 * as published by the Free Software Foundation; or, when distributed
  13 * separately from the Linux kernel or incorporated into other
  14 * software packages, subject to the following license:
  15 *
  16 * Permission is hereby granted, free of charge, to any person obtaining a copy
  17 * of this source file (the "Software"), to deal in the Software without
  18 * restriction, including without limitation the rights to use, copy, modify,
  19 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  20 * and to permit persons to whom the Software is furnished to do so, subject to
  21 * the following conditions:
  22 *
  23 * The above copyright notice and this permission notice shall be included in
  24 * all copies or substantial portions of the Software.
  25 *
  26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  27 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  28 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  29 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  30 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  31 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  32 * IN THE SOFTWARE.
  33 */
  34
  35#include <linux/kernel.h>
  36#include <linux/module.h>
  37#include <linux/sched.h>
  38#include <linux/errno.h>
  39#include <linux/mm.h>
  40#include <linux/bootmem.h>
  41#include <linux/pagemap.h>
  42#include <linux/highmem.h>
  43#include <linux/mutex.h>
  44#include <linux/list.h>
  45#include <linux/sysdev.h>
  46
  47#include <asm/page.h>
  48#include <asm/pgalloc.h>
  49#include <asm/pgtable.h>
  50#include <asm/uaccess.h>
  51#include <asm/tlb.h>
  52
  53#include <asm/xen/hypervisor.h>
  54#include <asm/xen/hypercall.h>
  55#include <xen/interface/xen.h>
  56#include <xen/interface/memory.h>
  57#include <xen/xenbus.h>
  58#include <xen/features.h>
  59#include <xen/page.h>
  60
  61#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
  62
  63#define BALLOON_CLASS_NAME "xen_memory"
  64
  65struct balloon_stats {
  66        /* We aim for 'current allocation' == 'target allocation'. */
  67        unsigned long current_pages;
  68        unsigned long target_pages;
  69        /* We may hit the hard limit in Xen. If we do then we remember it. */
  70        unsigned long hard_limit;
  71        /*
  72         * Drivers may alter the memory reservation independently, but they
  73         * must inform the balloon driver so we avoid hitting the hard limit.
  74         */
  75        unsigned long driver_pages;
  76        /* Number of pages in high- and low-memory balloons. */
  77        unsigned long balloon_low;
  78        unsigned long balloon_high;
  79};
  80
  81static DEFINE_MUTEX(balloon_mutex);
  82
  83static struct sys_device balloon_sysdev;
  84
  85static int register_balloon(struct sys_device *sysdev);
  86
  87/*
  88 * Protects atomic reservation decrease/increase against concurrent increases.
  89 * Also protects non-atomic updates of current_pages and driver_pages, and
  90 * balloon lists.
  91 */
  92static DEFINE_SPINLOCK(balloon_lock);
  93
  94static struct balloon_stats balloon_stats;
  95
  96/* We increase/decrease in batches which fit in a page */
  97static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
  98
  99/* VM /proc information for memory */
 100extern unsigned long totalram_pages;
 101
 102#ifdef CONFIG_HIGHMEM
 103extern unsigned long totalhigh_pages;
 104#define inc_totalhigh_pages() (totalhigh_pages++)
 105#define dec_totalhigh_pages() (totalhigh_pages--)
 106#else
 107#define inc_totalhigh_pages() do {} while(0)
 108#define dec_totalhigh_pages() do {} while(0)
 109#endif
 110
 111/* List of ballooned pages, threaded through the mem_map array. */
 112static LIST_HEAD(ballooned_pages);
 113
 114/* Main work function, always executed in process context. */
 115static void balloon_process(struct work_struct *work);
 116static DECLARE_WORK(balloon_worker, balloon_process);
 117static struct timer_list balloon_timer;
 118
 119/* When ballooning out (allocating memory to return to Xen) we don't really
 120   want the kernel to try too hard since that can trigger the oom killer. */
 121#define GFP_BALLOON \
 122        (GFP_HIGHUSER | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC)
 123
 124static void scrub_page(struct page *page)
 125{
 126#ifdef CONFIG_XEN_SCRUB_PAGES
 127        clear_highpage(page);
 128#endif
 129}
 130
 131/* balloon_append: add the given page to the balloon. */
 132static void balloon_append(struct page *page)
 133{
 134        /* Lowmem is re-populated first, so highmem pages go at list tail. */
 135        if (PageHighMem(page)) {
 136                list_add_tail(&page->lru, &ballooned_pages);
 137                balloon_stats.balloon_high++;
 138                dec_totalhigh_pages();
 139        } else {
 140                list_add(&page->lru, &ballooned_pages);
 141                balloon_stats.balloon_low++;
 142        }
 143}
 144
 145/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
 146static struct page *balloon_retrieve(void)
 147{
 148        struct page *page;
 149
 150        if (list_empty(&ballooned_pages))
 151                return NULL;
 152
 153        page = list_entry(ballooned_pages.next, struct page, lru);
 154        list_del(&page->lru);
 155
 156        if (PageHighMem(page)) {
 157                balloon_stats.balloon_high--;
 158                inc_totalhigh_pages();
 159        }
 160        else
 161                balloon_stats.balloon_low--;
 162
 163        return page;
 164}
 165
 166static struct page *balloon_first_page(void)
 167{
 168        if (list_empty(&ballooned_pages))
 169                return NULL;
 170        return list_entry(ballooned_pages.next, struct page, lru);
 171}
 172
 173static struct page *balloon_next_page(struct page *page)
 174{
 175        struct list_head *next = page->lru.next;
 176        if (next == &ballooned_pages)
 177                return NULL;
 178        return list_entry(next, struct page, lru);
 179}
 180
 181static void balloon_alarm(unsigned long unused)
 182{
 183        schedule_work(&balloon_worker);
 184}
 185
 186static unsigned long current_target(void)
 187{
 188        unsigned long target = min(balloon_stats.target_pages, balloon_stats.hard_limit);
 189
 190        target = min(target,
 191                     balloon_stats.current_pages +
 192                     balloon_stats.balloon_low +
 193                     balloon_stats.balloon_high);
 194
 195        return target;
 196}
 197
 198static int increase_reservation(unsigned long nr_pages)
 199{
 200        unsigned long  pfn, i, flags;
 201        struct page   *page;
 202        long           rc;
 203        struct xen_memory_reservation reservation = {
 204                .address_bits = 0,
 205                .extent_order = 0,
 206                .domid        = DOMID_SELF
 207        };
 208
 209        if (nr_pages > ARRAY_SIZE(frame_list))
 210                nr_pages = ARRAY_SIZE(frame_list);
 211
 212        spin_lock_irqsave(&balloon_lock, flags);
 213
 214        page = balloon_first_page();
 215        for (i = 0; i < nr_pages; i++) {
 216                BUG_ON(page == NULL);
 217                frame_list[i] = page_to_pfn(page);;
 218                page = balloon_next_page(page);
 219        }
 220
 221        set_xen_guest_handle(reservation.extent_start, frame_list);
 222        reservation.nr_extents = nr_pages;
 223        rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
 224        if (rc < nr_pages) {
 225                if (rc > 0) {
 226                        int ret;
 227
 228                        /* We hit the Xen hard limit: reprobe. */
 229                        reservation.nr_extents = rc;
 230                        ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
 231                                                   &reservation);
 232                        BUG_ON(ret != rc);
 233                }
 234                if (rc >= 0)
 235                        balloon_stats.hard_limit = (balloon_stats.current_pages + rc -
 236                                                    balloon_stats.driver_pages);
 237                goto out;
 238        }
 239
 240        for (i = 0; i < nr_pages; i++) {
 241                page = balloon_retrieve();
 242                BUG_ON(page == NULL);
 243
 244                pfn = page_to_pfn(page);
 245                BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
 246                       phys_to_machine_mapping_valid(pfn));
 247
 248                set_phys_to_machine(pfn, frame_list[i]);
 249
 250                /* Link back into the page tables if not highmem. */
 251                if (pfn < max_low_pfn) {
 252                        int ret;
 253                        ret = HYPERVISOR_update_va_mapping(
 254                                (unsigned long)__va(pfn << PAGE_SHIFT),
 255                                mfn_pte(frame_list[i], PAGE_KERNEL),
 256                                0);
 257                        BUG_ON(ret);
 258                }
 259
 260                /* Relinquish the page back to the allocator. */
 261                ClearPageReserved(page);
 262                init_page_count(page);
 263                __free_page(page);
 264        }
 265
 266        balloon_stats.current_pages += nr_pages;
 267        totalram_pages = balloon_stats.current_pages;
 268
 269 out:
 270        spin_unlock_irqrestore(&balloon_lock, flags);
 271
 272        return 0;
 273}
 274
 275static int decrease_reservation(unsigned long nr_pages)
 276{
 277        unsigned long  pfn, i, flags;
 278        struct page   *page;
 279        int            need_sleep = 0;
 280        int ret;
 281        struct xen_memory_reservation reservation = {
 282                .address_bits = 0,
 283                .extent_order = 0,
 284                .domid        = DOMID_SELF
 285        };
 286
 287        if (nr_pages > ARRAY_SIZE(frame_list))
 288                nr_pages = ARRAY_SIZE(frame_list);
 289
 290        for (i = 0; i < nr_pages; i++) {
 291                if ((page = alloc_page(GFP_BALLOON)) == NULL) {
 292                        nr_pages = i;
 293                        need_sleep = 1;
 294                        break;
 295                }
 296
 297                pfn = page_to_pfn(page);
 298                frame_list[i] = pfn_to_mfn(pfn);
 299
 300                scrub_page(page);
 301
 302                if (!PageHighMem(page)) {
 303                        ret = HYPERVISOR_update_va_mapping(
 304                                (unsigned long)__va(pfn << PAGE_SHIFT),
 305                                __pte_ma(0), 0);
 306                        BUG_ON(ret);
 307                }
 308
 309        }
 310
 311        /* Ensure that ballooned highmem pages don't have kmaps. */
 312        kmap_flush_unused();
 313        flush_tlb_all();
 314
 315        spin_lock_irqsave(&balloon_lock, flags);
 316
 317        /* No more mappings: invalidate P2M and add to balloon. */
 318        for (i = 0; i < nr_pages; i++) {
 319                pfn = mfn_to_pfn(frame_list[i]);
 320                set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
 321                balloon_append(pfn_to_page(pfn));
 322        }
 323
 324        set_xen_guest_handle(reservation.extent_start, frame_list);
 325        reservation.nr_extents   = nr_pages;
 326        ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
 327        BUG_ON(ret != nr_pages);
 328
 329        balloon_stats.current_pages -= nr_pages;
 330        totalram_pages = balloon_stats.current_pages;
 331
 332        spin_unlock_irqrestore(&balloon_lock, flags);
 333
 334        return need_sleep;
 335}
 336
 337/*
 338 * We avoid multiple worker processes conflicting via the balloon mutex.
 339 * We may of course race updates of the target counts (which are protected
 340 * by the balloon lock), or with changes to the Xen hard limit, but we will
 341 * recover from these in time.
 342 */
 343static void balloon_process(struct work_struct *work)
 344{
 345        int need_sleep = 0;
 346        long credit;
 347
 348        mutex_lock(&balloon_mutex);
 349
 350        do {
 351                credit = current_target() - balloon_stats.current_pages;
 352                if (credit > 0)
 353                        need_sleep = (increase_reservation(credit) != 0);
 354                if (credit < 0)
 355                        need_sleep = (decrease_reservation(-credit) != 0);
 356
 357#ifndef CONFIG_PREEMPT
 358                if (need_resched())
 359                        schedule();
 360#endif
 361        } while ((credit != 0) && !need_sleep);
 362
 363        /* Schedule more work if there is some still to be done. */
 364        if (current_target() != balloon_stats.current_pages)
 365                mod_timer(&balloon_timer, jiffies + HZ);
 366
 367        mutex_unlock(&balloon_mutex);
 368}
 369
 370/* Resets the Xen limit, sets new target, and kicks off processing. */
 371static void balloon_set_new_target(unsigned long target)
 372{
 373        /* No need for lock. Not read-modify-write updates. */
 374        balloon_stats.hard_limit   = ~0UL;
 375        balloon_stats.target_pages = target;
 376        schedule_work(&balloon_worker);
 377}
 378
 379static struct xenbus_watch target_watch =
 380{
 381        .node = "memory/target"
 382};
 383
 384/* React to a change in the target key */
 385static void watch_target(struct xenbus_watch *watch,
 386                         const char **vec, unsigned int len)
 387{
 388        unsigned long long new_target;
 389        int err;
 390
 391        err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target);
 392        if (err != 1) {
 393                /* This is ok (for domain0 at least) - so just return */
 394                return;
 395        }
 396
 397        /* The given memory/target value is in KiB, so it needs converting to
 398         * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
 399         */
 400        balloon_set_new_target(new_target >> (PAGE_SHIFT - 10));
 401}
 402
 403static int balloon_init_watcher(struct notifier_block *notifier,
 404                                unsigned long event,
 405                                void *data)
 406{
 407        int err;
 408
 409        err = register_xenbus_watch(&target_watch);
 410        if (err)
 411                printk(KERN_ERR "Failed to set balloon watcher\n");
 412
 413        return NOTIFY_DONE;
 414}
 415
 416static struct notifier_block xenstore_notifier;
 417
 418static int __init balloon_init(void)
 419{
 420        unsigned long pfn;
 421        struct page *page;
 422
 423        if (!xen_pv_domain())
 424                return -ENODEV;
 425
 426        pr_info("xen_balloon: Initialising balloon driver.\n");
 427
 428        balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn);
 429        totalram_pages   = balloon_stats.current_pages;
 430        balloon_stats.target_pages  = balloon_stats.current_pages;
 431        balloon_stats.balloon_low   = 0;
 432        balloon_stats.balloon_high  = 0;
 433        balloon_stats.driver_pages  = 0UL;
 434        balloon_stats.hard_limit    = ~0UL;
 435
 436        init_timer(&balloon_timer);
 437        balloon_timer.data = 0;
 438        balloon_timer.function = balloon_alarm;
 439
 440        register_balloon(&balloon_sysdev);
 441
 442        /* Initialise the balloon with excess memory space. */
 443        for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
 444                page = pfn_to_page(pfn);
 445                if (!PageReserved(page))
 446                        balloon_append(page);
 447        }
 448
 449        target_watch.callback = watch_target;
 450        xenstore_notifier.notifier_call = balloon_init_watcher;
 451
 452        register_xenstore_notifier(&xenstore_notifier);
 453
 454        return 0;
 455}
 456
 457subsys_initcall(balloon_init);
 458
 459static void balloon_exit(void)
 460{
 461    /* XXX - release balloon here */
 462    return;
 463}
 464
 465module_exit(balloon_exit);
 466
 467#define BALLOON_SHOW(name, format, args...)                             \
 468        static ssize_t show_##name(struct sys_device *dev,              \
 469                                   struct sysdev_attribute *attr,       \
 470                                   char *buf)                           \
 471        {                                                               \
 472                return sprintf(buf, format, ##args);                    \
 473        }                                                               \
 474        static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL)
 475
 476BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages));
 477BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_low));
 478BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_high));
 479BALLOON_SHOW(hard_limit_kb,
 480             (balloon_stats.hard_limit!=~0UL) ? "%lu\n" : "???\n",
 481             (balloon_stats.hard_limit!=~0UL) ? PAGES2KB(balloon_stats.hard_limit) : 0);
 482BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(balloon_stats.driver_pages));
 483
 484static ssize_t show_target_kb(struct sys_device *dev, struct sysdev_attribute *attr,
 485                              char *buf)
 486{
 487        return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages));
 488}
 489
 490static ssize_t store_target_kb(struct sys_device *dev,
 491                               struct sysdev_attribute *attr,
 492                               const char *buf,
 493                               size_t count)
 494{
 495        char *endchar;
 496        unsigned long long target_bytes;
 497
 498        if (!capable(CAP_SYS_ADMIN))
 499                return -EPERM;
 500
 501        target_bytes = simple_strtoull(buf, &endchar, 0) * 1024;
 502
 503        balloon_set_new_target(target_bytes >> PAGE_SHIFT);
 504
 505        return count;
 506}
 507
 508static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR,
 509                   show_target_kb, store_target_kb);
 510
 511
 512static ssize_t show_target(struct sys_device *dev, struct sysdev_attribute *attr,
 513                              char *buf)
 514{
 515        return sprintf(buf, "%llu\n",
 516                       (unsigned long long)balloon_stats.target_pages
 517                       << PAGE_SHIFT);
 518}
 519
 520static ssize_t store_target(struct sys_device *dev,
 521                            struct sysdev_attribute *attr,
 522                            const char *buf,
 523                            size_t count)
 524{
 525        char *endchar;
 526        unsigned long long target_bytes;
 527
 528        if (!capable(CAP_SYS_ADMIN))
 529                return -EPERM;
 530
 531        target_bytes = memparse(buf, &endchar);
 532
 533        balloon_set_new_target(target_bytes >> PAGE_SHIFT);
 534
 535        return count;
 536}
 537
 538static SYSDEV_ATTR(target, S_IRUGO | S_IWUSR,
 539                   show_target, store_target);
 540
 541
 542static struct sysdev_attribute *balloon_attrs[] = {
 543        &attr_target_kb,
 544        &attr_target,
 545};
 546
 547static struct attribute *balloon_info_attrs[] = {
 548        &attr_current_kb.attr,
 549        &attr_low_kb.attr,
 550        &attr_high_kb.attr,
 551        &attr_hard_limit_kb.attr,
 552        &attr_driver_kb.attr,
 553        NULL
 554};
 555
 556static struct attribute_group balloon_info_group = {
 557        .name = "info",
 558        .attrs = balloon_info_attrs,
 559};
 560
 561static struct sysdev_class balloon_sysdev_class = {
 562        .name = BALLOON_CLASS_NAME,
 563};
 564
 565static int register_balloon(struct sys_device *sysdev)
 566{
 567        int i, error;
 568
 569        error = sysdev_class_register(&balloon_sysdev_class);
 570        if (error)
 571                return error;
 572
 573        sysdev->id = 0;
 574        sysdev->cls = &balloon_sysdev_class;
 575
 576        error = sysdev_register(sysdev);
 577        if (error) {
 578                sysdev_class_unregister(&balloon_sysdev_class);
 579                return error;
 580        }
 581
 582        for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) {
 583                error = sysdev_create_file(sysdev, balloon_attrs[i]);
 584                if (error)
 585                        goto fail;
 586        }
 587
 588        error = sysfs_create_group(&sysdev->kobj, &balloon_info_group);
 589        if (error)
 590                goto fail;
 591
 592        return 0;
 593
 594 fail:
 595        while (--i >= 0)
 596                sysdev_remove_file(sysdev, balloon_attrs[i]);
 597        sysdev_unregister(sysdev);
 598        sysdev_class_unregister(&balloon_sysdev_class);
 599        return error;
 600}
 601
 602MODULE_LICENSE("GPL");
 603
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.