1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39#include <linux/kernel.h>
40#include <linux/sched.h>
41#include <linux/errno.h>
42#include <linux/module.h>
43#include <linux/mm.h>
44#include <linux/bootmem.h>
45#include <linux/pagemap.h>
46#include <linux/highmem.h>
47#include <linux/mutex.h>
48#include <linux/list.h>
49#include <linux/gfp.h>
50#include <linux/notifier.h>
51#include <linux/memory.h>
52#include <linux/memory_hotplug.h>
53
54#include <asm/page.h>
55#include <asm/pgalloc.h>
56#include <asm/pgtable.h>
57#include <asm/tlb.h>
58
59#include <asm/xen/hypervisor.h>
60#include <asm/xen/hypercall.h>
61
62#include <xen/xen.h>
63#include <xen/interface/xen.h>
64#include <xen/interface/memory.h>
65#include <xen/balloon.h>
66#include <xen/features.h>
67#include <xen/page.h>
68
69
70
71
72
73
74
75
76
77enum bp_state {
78 BP_DONE,
79 BP_EAGAIN,
80 BP_ECANCELED
81};
82
83
84static DEFINE_MUTEX(balloon_mutex);
85
86struct balloon_stats balloon_stats;
87EXPORT_SYMBOL_GPL(balloon_stats);
88
89
90static xen_pfn_t frame_list[PAGE_SIZE / sizeof(unsigned long)];
91
92#ifdef CONFIG_HIGHMEM
93#define inc_totalhigh_pages() (totalhigh_pages++)
94#define dec_totalhigh_pages() (totalhigh_pages--)
95#else
96#define inc_totalhigh_pages() do {} while (0)
97#define dec_totalhigh_pages() do {} while (0)
98#endif
99
100
101static LIST_HEAD(ballooned_pages);
102
103
104static void balloon_process(struct work_struct *work);
105static DECLARE_DELAYED_WORK(balloon_worker, balloon_process);
106
107
108
109#define GFP_BALLOON \
110 (GFP_HIGHUSER | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC)
111
112static void scrub_page(struct page *page)
113{
114#ifdef CONFIG_XEN_SCRUB_PAGES
115 clear_highpage(page);
116#endif
117}
118
119
120static void __balloon_append(struct page *page)
121{
122
123 if (PageHighMem(page)) {
124 list_add_tail(&page->lru, &ballooned_pages);
125 balloon_stats.balloon_high++;
126 } else {
127 list_add(&page->lru, &ballooned_pages);
128 balloon_stats.balloon_low++;
129 }
130}
131
132static void balloon_append(struct page *page)
133{
134 __balloon_append(page);
135 if (PageHighMem(page))
136 dec_totalhigh_pages();
137 totalram_pages--;
138}
139
140
141static struct page *balloon_retrieve(bool prefer_highmem)
142{
143 struct page *page;
144
145 if (list_empty(&ballooned_pages))
146 return NULL;
147
148 if (prefer_highmem)
149 page = list_entry(ballooned_pages.prev, struct page, lru);
150 else
151 page = list_entry(ballooned_pages.next, struct page, lru);
152 list_del(&page->lru);
153
154 if (PageHighMem(page)) {
155 balloon_stats.balloon_high--;
156 inc_totalhigh_pages();
157 } else
158 balloon_stats.balloon_low--;
159
160 totalram_pages++;
161
162 return page;
163}
164
165static struct page *balloon_first_page(void)
166{
167 if (list_empty(&ballooned_pages))
168 return NULL;
169 return list_entry(ballooned_pages.next, struct page, lru);
170}
171
172static struct page *balloon_next_page(struct page *page)
173{
174 struct list_head *next = page->lru.next;
175 if (next == &ballooned_pages)
176 return NULL;
177 return list_entry(next, struct page, lru);
178}
179
180static enum bp_state update_schedule(enum bp_state state)
181{
182 if (state == BP_DONE) {
183 balloon_stats.schedule_delay = 1;
184 balloon_stats.retry_count = 1;
185 return BP_DONE;
186 }
187
188 ++balloon_stats.retry_count;
189
190 if (balloon_stats.max_retry_count != RETRY_UNLIMITED &&
191 balloon_stats.retry_count > balloon_stats.max_retry_count) {
192 balloon_stats.schedule_delay = 1;
193 balloon_stats.retry_count = 1;
194 return BP_ECANCELED;
195 }
196
197 balloon_stats.schedule_delay <<= 1;
198
199 if (balloon_stats.schedule_delay > balloon_stats.max_schedule_delay)
200 balloon_stats.schedule_delay = balloon_stats.max_schedule_delay;
201
202 return BP_EAGAIN;
203}
204
205#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
206static long current_credit(void)
207{
208 return balloon_stats.target_pages - balloon_stats.current_pages -
209 balloon_stats.hotplug_pages;
210}
211
212static bool balloon_is_inflated(void)
213{
214 if (balloon_stats.balloon_low || balloon_stats.balloon_high ||
215 balloon_stats.balloon_hotplug)
216 return true;
217 else
218 return false;
219}
220
221
222
223
224
225
226
227
228
229
230
231
232static enum bp_state reserve_additional_memory(long credit)
233{
234 int nid, rc;
235 u64 hotplug_start_paddr;
236 unsigned long balloon_hotplug = credit;
237
238 hotplug_start_paddr = PFN_PHYS(SECTION_ALIGN_UP(max_pfn));
239 balloon_hotplug = round_up(balloon_hotplug, PAGES_PER_SECTION);
240 nid = memory_add_physaddr_to_nid(hotplug_start_paddr);
241
242 rc = add_memory(nid, hotplug_start_paddr, balloon_hotplug << PAGE_SHIFT);
243
244 if (rc) {
245 pr_info("xen_balloon: %s: add_memory() failed: %i\n", __func__, rc);
246 return BP_EAGAIN;
247 }
248
249 balloon_hotplug -= credit;
250
251 balloon_stats.hotplug_pages += credit;
252 balloon_stats.balloon_hotplug = balloon_hotplug;
253
254 return BP_DONE;
255}
256
257static void xen_online_page(struct page *page)
258{
259 __online_page_set_limits(page);
260
261 mutex_lock(&balloon_mutex);
262
263 __balloon_append(page);
264
265 if (balloon_stats.hotplug_pages)
266 --balloon_stats.hotplug_pages;
267 else
268 --balloon_stats.balloon_hotplug;
269
270 mutex_unlock(&balloon_mutex);
271}
272
273static int xen_memory_notifier(struct notifier_block *nb, unsigned long val, void *v)
274{
275 if (val == MEM_ONLINE)
276 schedule_delayed_work(&balloon_worker, 0);
277
278 return NOTIFY_OK;
279}
280
281static struct notifier_block xen_memory_nb = {
282 .notifier_call = xen_memory_notifier,
283 .priority = 0
284};
285#else
286static long current_credit(void)
287{
288 unsigned long target = balloon_stats.target_pages;
289
290 target = min(target,
291 balloon_stats.current_pages +
292 balloon_stats.balloon_low +
293 balloon_stats.balloon_high);
294
295 return target - balloon_stats.current_pages;
296}
297
298static bool balloon_is_inflated(void)
299{
300 if (balloon_stats.balloon_low || balloon_stats.balloon_high)
301 return true;
302 else
303 return false;
304}
305
306static enum bp_state reserve_additional_memory(long credit)
307{
308 balloon_stats.target_pages = balloon_stats.current_pages;
309 return BP_DONE;
310}
311#endif
312
313static enum bp_state increase_reservation(unsigned long nr_pages)
314{
315 int rc;
316 unsigned long pfn, i;
317 struct page *page;
318 struct xen_memory_reservation reservation = {
319 .address_bits = 0,
320 .extent_order = 0,
321 .domid = DOMID_SELF
322 };
323
324#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
325 if (!balloon_stats.balloon_low && !balloon_stats.balloon_high) {
326 nr_pages = min(nr_pages, balloon_stats.balloon_hotplug);
327 balloon_stats.hotplug_pages += nr_pages;
328 balloon_stats.balloon_hotplug -= nr_pages;
329 return BP_DONE;
330 }
331#endif
332
333 if (nr_pages > ARRAY_SIZE(frame_list))
334 nr_pages = ARRAY_SIZE(frame_list);
335
336 page = balloon_first_page();
337 for (i = 0; i < nr_pages; i++) {
338 if (!page) {
339 nr_pages = i;
340 break;
341 }
342 frame_list[i] = page_to_pfn(page);
343 page = balloon_next_page(page);
344 }
345
346 set_xen_guest_handle(reservation.extent_start, frame_list);
347 reservation.nr_extents = nr_pages;
348 rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
349 if (rc <= 0)
350 return BP_EAGAIN;
351
352 for (i = 0; i < rc; i++) {
353 page = balloon_retrieve(false);
354 BUG_ON(page == NULL);
355
356 pfn = page_to_pfn(page);
357 BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
358 phys_to_machine_mapping_valid(pfn));
359
360 set_phys_to_machine(pfn, frame_list[i]);
361
362
363 if (xen_pv_domain() && !PageHighMem(page)) {
364 int ret;
365 ret = HYPERVISOR_update_va_mapping(
366 (unsigned long)__va(pfn << PAGE_SHIFT),
367 mfn_pte(frame_list[i], PAGE_KERNEL),
368 0);
369 BUG_ON(ret);
370 }
371
372
373 ClearPageReserved(page);
374 init_page_count(page);
375 __free_page(page);
376 }
377
378 balloon_stats.current_pages += rc;
379
380 return BP_DONE;
381}
382
383static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
384{
385 enum bp_state state = BP_DONE;
386 unsigned long pfn, i;
387 struct page *page;
388 int ret;
389 struct xen_memory_reservation reservation = {
390 .address_bits = 0,
391 .extent_order = 0,
392 .domid = DOMID_SELF
393 };
394
395#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
396 if (balloon_stats.hotplug_pages) {
397 nr_pages = min(nr_pages, balloon_stats.hotplug_pages);
398 balloon_stats.hotplug_pages -= nr_pages;
399 balloon_stats.balloon_hotplug += nr_pages;
400 return BP_DONE;
401 }
402#endif
403
404 if (nr_pages > ARRAY_SIZE(frame_list))
405 nr_pages = ARRAY_SIZE(frame_list);
406
407 for (i = 0; i < nr_pages; i++) {
408 if ((page = alloc_page(gfp)) == NULL) {
409 nr_pages = i;
410 state = BP_EAGAIN;
411 break;
412 }
413
414 pfn = page_to_pfn(page);
415 frame_list[i] = pfn_to_mfn(pfn);
416
417 scrub_page(page);
418
419 if (xen_pv_domain() && !PageHighMem(page)) {
420 ret = HYPERVISOR_update_va_mapping(
421 (unsigned long)__va(pfn << PAGE_SHIFT),
422 __pte_ma(0), 0);
423 BUG_ON(ret);
424 }
425
426 }
427
428
429 kmap_flush_unused();
430 flush_tlb_all();
431
432
433 for (i = 0; i < nr_pages; i++) {
434 pfn = mfn_to_pfn(frame_list[i]);
435 __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
436 balloon_append(pfn_to_page(pfn));
437 }
438
439 set_xen_guest_handle(reservation.extent_start, frame_list);
440 reservation.nr_extents = nr_pages;
441 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
442 BUG_ON(ret != nr_pages);
443
444 balloon_stats.current_pages -= nr_pages;
445
446 return state;
447}
448
449
450
451
452
453
454
455static void balloon_process(struct work_struct *work)
456{
457 enum bp_state state = BP_DONE;
458 long credit;
459
460 mutex_lock(&balloon_mutex);
461
462 do {
463 credit = current_credit();
464
465 if (credit > 0) {
466 if (balloon_is_inflated())
467 state = increase_reservation(credit);
468 else
469 state = reserve_additional_memory(credit);
470 }
471
472 if (credit < 0)
473 state = decrease_reservation(-credit, GFP_BALLOON);
474
475 state = update_schedule(state);
476
477#ifndef CONFIG_PREEMPT
478 if (need_resched())
479 schedule();
480#endif
481 } while (credit && state == BP_DONE);
482
483
484 if (state == BP_EAGAIN)
485 schedule_delayed_work(&balloon_worker, balloon_stats.schedule_delay * HZ);
486
487 mutex_unlock(&balloon_mutex);
488}
489
490
491void balloon_set_new_target(unsigned long target)
492{
493
494 balloon_stats.target_pages = target;
495 schedule_delayed_work(&balloon_worker, 0);
496}
497EXPORT_SYMBOL_GPL(balloon_set_new_target);
498
499
500
501
502
503
504
505
506int alloc_xenballooned_pages(int nr_pages, struct page **pages, bool highmem)
507{
508 int pgno = 0;
509 struct page *page;
510 mutex_lock(&balloon_mutex);
511 while (pgno < nr_pages) {
512 page = balloon_retrieve(highmem);
513 if (page && (highmem || !PageHighMem(page))) {
514 pages[pgno++] = page;
515 } else {
516 enum bp_state st;
517 if (page)
518 balloon_append(page);
519 st = decrease_reservation(nr_pages - pgno,
520 highmem ? GFP_HIGHUSER : GFP_USER);
521 if (st != BP_DONE)
522 goto out_undo;
523 }
524 }
525 mutex_unlock(&balloon_mutex);
526 return 0;
527 out_undo:
528 while (pgno)
529 balloon_append(pages[--pgno]);
530
531 schedule_delayed_work(&balloon_worker, 0);
532 mutex_unlock(&balloon_mutex);
533 return -ENOMEM;
534}
535EXPORT_SYMBOL(alloc_xenballooned_pages);
536
537
538
539
540
541
542void free_xenballooned_pages(int nr_pages, struct page **pages)
543{
544 int i;
545
546 mutex_lock(&balloon_mutex);
547
548 for (i = 0; i < nr_pages; i++) {
549 if (pages[i])
550 balloon_append(pages[i]);
551 }
552
553
554 if (current_credit())
555 schedule_delayed_work(&balloon_worker, 0);
556
557 mutex_unlock(&balloon_mutex);
558}
559EXPORT_SYMBOL(free_xenballooned_pages);
560
561static void __init balloon_add_region(unsigned long start_pfn,
562 unsigned long pages)
563{
564 unsigned long pfn, extra_pfn_end;
565 struct page *page;
566
567
568
569
570
571
572 extra_pfn_end = min(max_pfn, start_pfn + pages);
573
574 for (pfn = start_pfn; pfn < extra_pfn_end; pfn++) {
575 page = pfn_to_page(pfn);
576
577
578
579 __balloon_append(page);
580 }
581}
582
583static int __init balloon_init(void)
584{
585 int i;
586
587 if (!xen_domain())
588 return -ENODEV;
589
590 pr_info("xen/balloon: Initialising balloon driver.\n");
591
592 balloon_stats.current_pages = xen_pv_domain()
593 ? min(xen_start_info->nr_pages - xen_released_pages, max_pfn)
594 : max_pfn;
595 balloon_stats.target_pages = balloon_stats.current_pages;
596 balloon_stats.balloon_low = 0;
597 balloon_stats.balloon_high = 0;
598
599 balloon_stats.schedule_delay = 1;
600 balloon_stats.max_schedule_delay = 32;
601 balloon_stats.retry_count = 1;
602 balloon_stats.max_retry_count = RETRY_UNLIMITED;
603
604#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
605 balloon_stats.hotplug_pages = 0;
606 balloon_stats.balloon_hotplug = 0;
607
608 set_online_page_callback(&xen_online_page);
609 register_memory_notifier(&xen_memory_nb);
610#endif
611
612
613
614
615
616 for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++)
617 if (xen_extra_mem[i].size)
618 balloon_add_region(PFN_UP(xen_extra_mem[i].start),
619 PFN_DOWN(xen_extra_mem[i].size));
620
621 return 0;
622}
623
624subsys_initcall(balloon_init);
625
626MODULE_LICENSE("GPL");
627