1
2
3
4#include <linux/mm.h>
5#include <linux/slab.h>
6#include <linux/mmzone.h>
7#include <linux/bootmem.h>
8#include <linux/highmem.h>
9#include <linux/export.h>
10#include <linux/spinlock.h>
11#include <linux/vmalloc.h>
12#include "internal.h"
13#include <asm/dma.h>
14#include <asm/pgalloc.h>
15#include <asm/pgtable.h>
16
17
18
19
20
21
22#ifdef CONFIG_SPARSEMEM_EXTREME
23struct mem_section *mem_section[NR_SECTION_ROOTS]
24 ____cacheline_internodealigned_in_smp;
25#else
26struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]
27 ____cacheline_internodealigned_in_smp;
28#endif
29EXPORT_SYMBOL(mem_section);
30
31#ifdef NODE_NOT_IN_PAGE_FLAGS
32
33
34
35
36
37#if MAX_NUMNODES <= 256
38static u8 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
39#else
40static u16 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
41#endif
42
43int page_to_nid(const struct page *page)
44{
45 return section_to_node_table[page_to_section(page)];
46}
47EXPORT_SYMBOL(page_to_nid);
48
49static void set_section_nid(unsigned long section_nr, int nid)
50{
51 section_to_node_table[section_nr] = nid;
52}
53#else
54static inline void set_section_nid(unsigned long section_nr, int nid)
55{
56}
57#endif
58
59#ifdef CONFIG_SPARSEMEM_EXTREME
60static struct mem_section noinline __init_refok *sparse_index_alloc(int nid)
61{
62 struct mem_section *section = NULL;
63 unsigned long array_size = SECTIONS_PER_ROOT *
64 sizeof(struct mem_section);
65
66 if (slab_is_available()) {
67 if (node_state(nid, N_HIGH_MEMORY))
68 section = kzalloc_node(array_size, GFP_KERNEL, nid);
69 else
70 section = kzalloc(array_size, GFP_KERNEL);
71 } else {
72 section = alloc_bootmem_node(NODE_DATA(nid), array_size);
73 }
74
75 return section;
76}
77
78static int __meminit sparse_index_init(unsigned long section_nr, int nid)
79{
80 unsigned long root = SECTION_NR_TO_ROOT(section_nr);
81 struct mem_section *section;
82 int ret = 0;
83
84 if (mem_section[root])
85 return -EEXIST;
86
87 section = sparse_index_alloc(nid);
88 if (!section)
89 return -ENOMEM;
90
91 mem_section[root] = section;
92
93 return ret;
94}
95#else
96static inline int sparse_index_init(unsigned long section_nr, int nid)
97{
98 return 0;
99}
100#endif
101
102
103
104
105
106
107int __section_nr(struct mem_section* ms)
108{
109 unsigned long root_nr;
110 struct mem_section* root;
111
112 for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) {
113 root = __nr_to_section(root_nr * SECTIONS_PER_ROOT);
114 if (!root)
115 continue;
116
117 if ((ms >= root) && (ms < (root + SECTIONS_PER_ROOT)))
118 break;
119 }
120
121 VM_BUG_ON(root_nr == NR_SECTION_ROOTS);
122
123 return (root_nr * SECTIONS_PER_ROOT) + (ms - root);
124}
125
126
127
128
129
130
131
132static inline unsigned long sparse_encode_early_nid(int nid)
133{
134 return (nid << SECTION_NID_SHIFT);
135}
136
137static inline int sparse_early_nid(struct mem_section *section)
138{
139 return (section->section_mem_map >> SECTION_NID_SHIFT);
140}
141
142
143void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn,
144 unsigned long *end_pfn)
145{
146 unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT);
147
148
149
150
151
152 if (*start_pfn > max_sparsemem_pfn) {
153 mminit_dprintk(MMINIT_WARNING, "pfnvalidation",
154 "Start of range %lu -> %lu exceeds SPARSEMEM max %lu\n",
155 *start_pfn, *end_pfn, max_sparsemem_pfn);
156 WARN_ON_ONCE(1);
157 *start_pfn = max_sparsemem_pfn;
158 *end_pfn = max_sparsemem_pfn;
159 } else if (*end_pfn > max_sparsemem_pfn) {
160 mminit_dprintk(MMINIT_WARNING, "pfnvalidation",
161 "End of range %lu -> %lu exceeds SPARSEMEM max %lu\n",
162 *start_pfn, *end_pfn, max_sparsemem_pfn);
163 WARN_ON_ONCE(1);
164 *end_pfn = max_sparsemem_pfn;
165 }
166}
167
168
169void __init memory_present(int nid, unsigned long start, unsigned long end)
170{
171 unsigned long pfn;
172
173 start &= PAGE_SECTION_MASK;
174 mminit_validate_memmodel_limits(&start, &end);
175 for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) {
176 unsigned long section = pfn_to_section_nr(pfn);
177 struct mem_section *ms;
178
179 sparse_index_init(section, nid);
180 set_section_nid(section, nid);
181
182 ms = __nr_to_section(section);
183 if (!ms->section_mem_map)
184 ms->section_mem_map = sparse_encode_early_nid(nid) |
185 SECTION_MARKED_PRESENT;
186 }
187}
188
189
190
191
192
193unsigned long __init node_memmap_size_bytes(int nid, unsigned long start_pfn,
194 unsigned long end_pfn)
195{
196 unsigned long pfn;
197 unsigned long nr_pages = 0;
198
199 mminit_validate_memmodel_limits(&start_pfn, &end_pfn);
200 for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
201 if (nid != early_pfn_to_nid(pfn))
202 continue;
203
204 if (pfn_present(pfn))
205 nr_pages += PAGES_PER_SECTION;
206 }
207
208 return nr_pages * sizeof(struct page);
209}
210
211
212
213
214
215
216static unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long pnum)
217{
218 return (unsigned long)(mem_map - (section_nr_to_pfn(pnum)));
219}
220
221
222
223
224struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum)
225{
226
227 coded_mem_map &= SECTION_MAP_MASK;
228 return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum);
229}
230
231static int __meminit sparse_init_one_section(struct mem_section *ms,
232 unsigned long pnum, struct page *mem_map,
233 unsigned long *pageblock_bitmap)
234{
235 if (!present_section(ms))
236 return -EINVAL;
237
238 ms->section_mem_map &= ~SECTION_MAP_MASK;
239 ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum) |
240 SECTION_HAS_MEM_MAP;
241 ms->pageblock_flags = pageblock_bitmap;
242
243 return 1;
244}
245
246unsigned long usemap_size(void)
247{
248 unsigned long size_bytes;
249 size_bytes = roundup(SECTION_BLOCKFLAGS_BITS, 8) / 8;
250 size_bytes = roundup(size_bytes, sizeof(unsigned long));
251 return size_bytes;
252}
253
254#ifdef CONFIG_MEMORY_HOTPLUG
255static unsigned long *__kmalloc_section_usemap(void)
256{
257 return kmalloc(usemap_size(), GFP_KERNEL);
258}
259#endif
260
261#ifdef CONFIG_MEMORY_HOTREMOVE
262static unsigned long * __init
263sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
264 unsigned long size)
265{
266 unsigned long goal, limit;
267 unsigned long *p;
268 int nid;
269
270
271
272
273
274
275
276
277
278
279 goal = __pa(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT);
280 limit = goal + (1UL << PA_SECTION_SHIFT);
281 nid = early_pfn_to_nid(goal >> PAGE_SHIFT);
282again:
283 p = ___alloc_bootmem_node_nopanic(NODE_DATA(nid), size,
284 SMP_CACHE_BYTES, goal, limit);
285 if (!p && limit) {
286 limit = 0;
287 goto again;
288 }
289 return p;
290}
291
292static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
293{
294 unsigned long usemap_snr, pgdat_snr;
295 static unsigned long old_usemap_snr = NR_MEM_SECTIONS;
296 static unsigned long old_pgdat_snr = NR_MEM_SECTIONS;
297 struct pglist_data *pgdat = NODE_DATA(nid);
298 int usemap_nid;
299
300 usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT);
301 pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
302 if (usemap_snr == pgdat_snr)
303 return;
304
305 if (old_usemap_snr == usemap_snr && old_pgdat_snr == pgdat_snr)
306
307 return;
308
309 old_usemap_snr = usemap_snr;
310 old_pgdat_snr = pgdat_snr;
311
312 usemap_nid = sparse_early_nid(__nr_to_section(usemap_snr));
313 if (usemap_nid != nid) {
314 printk(KERN_INFO
315 "node %d must be removed before remove section %ld\n",
316 nid, usemap_snr);
317 return;
318 }
319
320
321
322
323
324
325 printk(KERN_INFO "Section %ld and %ld (node %d)", usemap_snr,
326 pgdat_snr, nid);
327 printk(KERN_CONT
328 " have a circular dependency on usemap and pgdat allocations\n");
329}
330#else
331static unsigned long * __init
332sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
333 unsigned long size)
334{
335 return alloc_bootmem_node_nopanic(pgdat, size);
336}
337
338static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
339{
340}
341#endif
342
343static void __init sparse_early_usemaps_alloc_node(unsigned long**usemap_map,
344 unsigned long pnum_begin,
345 unsigned long pnum_end,
346 unsigned long usemap_count, int nodeid)
347{
348 void *usemap;
349 unsigned long pnum;
350 int size = usemap_size();
351
352 usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nodeid),
353 size * usemap_count);
354 if (!usemap) {
355 printk(KERN_WARNING "%s: allocation failed\n", __func__);
356 return;
357 }
358
359 for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
360 if (!present_section_nr(pnum))
361 continue;
362 usemap_map[pnum] = usemap;
363 usemap += size;
364 check_usemap_section_nr(nodeid, usemap_map[pnum]);
365 }
366}
367
368#ifndef CONFIG_SPARSEMEM_VMEMMAP
369struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid)
370{
371 struct page *map;
372 unsigned long size;
373
374 map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION);
375 if (map)
376 return map;
377
378 size = PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION);
379 map = __alloc_bootmem_node_high(NODE_DATA(nid), size,
380 PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
381 return map;
382}
383void __init sparse_mem_maps_populate_node(struct page **map_map,
384 unsigned long pnum_begin,
385 unsigned long pnum_end,
386 unsigned long map_count, int nodeid)
387{
388 void *map;
389 unsigned long pnum;
390 unsigned long size = sizeof(struct page) * PAGES_PER_SECTION;
391
392 map = alloc_remap(nodeid, size * map_count);
393 if (map) {
394 for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
395 if (!present_section_nr(pnum))
396 continue;
397 map_map[pnum] = map;
398 map += size;
399 }
400 return;
401 }
402
403 size = PAGE_ALIGN(size);
404 map = __alloc_bootmem_node_high(NODE_DATA(nodeid), size * map_count,
405 PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
406 if (map) {
407 for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
408 if (!present_section_nr(pnum))
409 continue;
410 map_map[pnum] = map;
411 map += size;
412 }
413 return;
414 }
415
416
417 for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
418 struct mem_section *ms;
419
420 if (!present_section_nr(pnum))
421 continue;
422 map_map[pnum] = sparse_mem_map_populate(pnum, nodeid);
423 if (map_map[pnum])
424 continue;
425 ms = __nr_to_section(pnum);
426 printk(KERN_ERR "%s: sparsemem memory map backing failed "
427 "some memory will not be available.\n", __func__);
428 ms->section_mem_map = 0;
429 }
430}
431#endif
432
433#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
434static void __init sparse_early_mem_maps_alloc_node(struct page **map_map,
435 unsigned long pnum_begin,
436 unsigned long pnum_end,
437 unsigned long map_count, int nodeid)
438{
439 sparse_mem_maps_populate_node(map_map, pnum_begin, pnum_end,
440 map_count, nodeid);
441}
442#else
443static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
444{
445 struct page *map;
446 struct mem_section *ms = __nr_to_section(pnum);
447 int nid = sparse_early_nid(ms);
448
449 map = sparse_mem_map_populate(pnum, nid);
450 if (map)
451 return map;
452
453 printk(KERN_ERR "%s: sparsemem memory map backing failed "
454 "some memory will not be available.\n", __func__);
455 ms->section_mem_map = 0;
456 return NULL;
457}
458#endif
459
460void __attribute__((weak)) __meminit vmemmap_populate_print_last(void)
461{
462}
463
464
465
466
467
468void __init sparse_init(void)
469{
470 unsigned long pnum;
471 struct page *map;
472 unsigned long *usemap;
473 unsigned long **usemap_map;
474 int size;
475 int nodeid_begin = 0;
476 unsigned long pnum_begin = 0;
477 unsigned long usemap_count;
478#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
479 unsigned long map_count;
480 int size2;
481 struct page **map_map;
482#endif
483
484
485 set_pageblock_order();
486
487
488
489
490
491
492
493
494
495
496
497
498 size = sizeof(unsigned long *) * NR_MEM_SECTIONS;
499 usemap_map = alloc_bootmem(size);
500 if (!usemap_map)
501 panic("can not allocate usemap_map\n");
502
503 for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
504 struct mem_section *ms;
505
506 if (!present_section_nr(pnum))
507 continue;
508 ms = __nr_to_section(pnum);
509 nodeid_begin = sparse_early_nid(ms);
510 pnum_begin = pnum;
511 break;
512 }
513 usemap_count = 1;
514 for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) {
515 struct mem_section *ms;
516 int nodeid;
517
518 if (!present_section_nr(pnum))
519 continue;
520 ms = __nr_to_section(pnum);
521 nodeid = sparse_early_nid(ms);
522 if (nodeid == nodeid_begin) {
523 usemap_count++;
524 continue;
525 }
526
527 sparse_early_usemaps_alloc_node(usemap_map, pnum_begin, pnum,
528 usemap_count, nodeid_begin);
529
530 nodeid_begin = nodeid;
531 pnum_begin = pnum;
532 usemap_count = 1;
533 }
534
535 sparse_early_usemaps_alloc_node(usemap_map, pnum_begin, NR_MEM_SECTIONS,
536 usemap_count, nodeid_begin);
537
538#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
539 size2 = sizeof(struct page *) * NR_MEM_SECTIONS;
540 map_map = alloc_bootmem(size2);
541 if (!map_map)
542 panic("can not allocate map_map\n");
543
544 for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
545 struct mem_section *ms;
546
547 if (!present_section_nr(pnum))
548 continue;
549 ms = __nr_to_section(pnum);
550 nodeid_begin = sparse_early_nid(ms);
551 pnum_begin = pnum;
552 break;
553 }
554 map_count = 1;
555 for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) {
556 struct mem_section *ms;
557 int nodeid;
558
559 if (!present_section_nr(pnum))
560 continue;
561 ms = __nr_to_section(pnum);
562 nodeid = sparse_early_nid(ms);
563 if (nodeid == nodeid_begin) {
564 map_count++;
565 continue;
566 }
567
568 sparse_early_mem_maps_alloc_node(map_map, pnum_begin, pnum,
569 map_count, nodeid_begin);
570
571 nodeid_begin = nodeid;
572 pnum_begin = pnum;
573 map_count = 1;
574 }
575
576 sparse_early_mem_maps_alloc_node(map_map, pnum_begin, NR_MEM_SECTIONS,
577 map_count, nodeid_begin);
578#endif
579
580 for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
581 if (!present_section_nr(pnum))
582 continue;
583
584 usemap = usemap_map[pnum];
585 if (!usemap)
586 continue;
587
588#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
589 map = map_map[pnum];
590#else
591 map = sparse_early_mem_map_alloc(pnum);
592#endif
593 if (!map)
594 continue;
595
596 sparse_init_one_section(__nr_to_section(pnum), pnum, map,
597 usemap);
598 }
599
600 vmemmap_populate_print_last();
601
602#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
603 free_bootmem(__pa(map_map), size2);
604#endif
605 free_bootmem(__pa(usemap_map), size);
606}
607
608#ifdef CONFIG_MEMORY_HOTPLUG
609#ifdef CONFIG_SPARSEMEM_VMEMMAP
610static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid,
611 unsigned long nr_pages)
612{
613
614 return sparse_mem_map_populate(pnum, nid);
615}
616static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages)
617{
618 return;
619}
620static void free_map_bootmem(struct page *memmap, unsigned long nr_pages)
621{
622}
623#else
624static struct page *__kmalloc_section_memmap(unsigned long nr_pages)
625{
626 struct page *page, *ret;
627 unsigned long memmap_size = sizeof(struct page) * nr_pages;
628
629 page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size));
630 if (page)
631 goto got_map_page;
632
633 ret = vmalloc(memmap_size);
634 if (ret)
635 goto got_map_ptr;
636
637 return NULL;
638got_map_page:
639 ret = (struct page *)pfn_to_kaddr(page_to_pfn(page));
640got_map_ptr:
641 memset(ret, 0, memmap_size);
642
643 return ret;
644}
645
646static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid,
647 unsigned long nr_pages)
648{
649 return __kmalloc_section_memmap(nr_pages);
650}
651
652static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages)
653{
654 if (is_vmalloc_addr(memmap))
655 vfree(memmap);
656 else
657 free_pages((unsigned long)memmap,
658 get_order(sizeof(struct page) * nr_pages));
659}
660
661static void free_map_bootmem(struct page *memmap, unsigned long nr_pages)
662{
663 unsigned long maps_section_nr, removing_section_nr, i;
664 unsigned long magic;
665 struct page *page = virt_to_page(memmap);
666
667 for (i = 0; i < nr_pages; i++, page++) {
668 magic = (unsigned long) page->lru.next;
669
670 BUG_ON(magic == NODE_INFO);
671
672 maps_section_nr = pfn_to_section_nr(page_to_pfn(page));
673 removing_section_nr = page->private;
674
675
676
677
678
679
680
681
682
683 if (maps_section_nr != removing_section_nr)
684 put_page_bootmem(page);
685 }
686}
687#endif
688
689static void free_section_usemap(struct page *memmap, unsigned long *usemap)
690{
691 struct page *usemap_page;
692 unsigned long nr_pages;
693
694 if (!usemap)
695 return;
696
697 usemap_page = virt_to_page(usemap);
698
699
700
701 if (PageSlab(usemap_page)) {
702 kfree(usemap);
703 if (memmap)
704 __kfree_section_memmap(memmap, PAGES_PER_SECTION);
705 return;
706 }
707
708
709
710
711
712
713 if (memmap) {
714 nr_pages = PAGE_ALIGN(PAGES_PER_SECTION * sizeof(struct page))
715 >> PAGE_SHIFT;
716
717 free_map_bootmem(memmap, nr_pages);
718 }
719}
720
721
722
723
724
725
726int __meminit sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
727 int nr_pages)
728{
729 unsigned long section_nr = pfn_to_section_nr(start_pfn);
730 struct pglist_data *pgdat = zone->zone_pgdat;
731 struct mem_section *ms;
732 struct page *memmap;
733 unsigned long *usemap;
734 unsigned long flags;
735 int ret;
736
737
738
739
740
741 ret = sparse_index_init(section_nr, pgdat->node_id);
742 if (ret < 0 && ret != -EEXIST)
743 return ret;
744 memmap = kmalloc_section_memmap(section_nr, pgdat->node_id, nr_pages);
745 if (!memmap)
746 return -ENOMEM;
747 usemap = __kmalloc_section_usemap();
748 if (!usemap) {
749 __kfree_section_memmap(memmap, nr_pages);
750 return -ENOMEM;
751 }
752
753 pgdat_resize_lock(pgdat, &flags);
754
755 ms = __pfn_to_section(start_pfn);
756 if (ms->section_mem_map & SECTION_MARKED_PRESENT) {
757 ret = -EEXIST;
758 goto out;
759 }
760
761 ms->section_mem_map |= SECTION_MARKED_PRESENT;
762
763 ret = sparse_init_one_section(ms, section_nr, memmap, usemap);
764
765out:
766 pgdat_resize_unlock(pgdat, &flags);
767 if (ret <= 0) {
768 kfree(usemap);
769 __kfree_section_memmap(memmap, nr_pages);
770 }
771 return ret;
772}
773
774void sparse_remove_one_section(struct zone *zone, struct mem_section *ms)
775{
776 struct page *memmap = NULL;
777 unsigned long *usemap = NULL;
778
779 if (ms->section_mem_map) {
780 usemap = ms->pageblock_flags;
781 memmap = sparse_decode_mem_map(ms->section_mem_map,
782 __section_nr(ms));
783 ms->section_mem_map = 0;
784 ms->pageblock_flags = NULL;
785 }
786
787 free_section_usemap(memmap, usemap);
788}
789#endif
790