1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17#include <linux/kernel.h>
18#include <linux/mm.h>
19#include <linux/swap.h>
20#include <linux/bootmem.h>
21#include <linux/acpi.h>
22#include <linux/efi.h>
23#include <linux/nodemask.h>
24#include <asm/pgalloc.h>
25#include <asm/tlb.h>
26#include <asm/meminit.h>
27#include <asm/numa.h>
28#include <asm/sections.h>
29
30
31
32
33
34struct early_node_data {
35 struct ia64_node_data *node_data;
36 pg_data_t *pgdat;
37 unsigned long pernode_addr;
38 unsigned long pernode_size;
39 struct bootmem_data bootmem_data;
40 unsigned long num_physpages;
41 unsigned long num_dma_physpages;
42 unsigned long min_pfn;
43 unsigned long max_pfn;
44};
45
46static struct early_node_data mem_data[MAX_NUMNODES] __initdata;
47
48
49
50
51
52
53
54
55
56
57
58static void __init reassign_cpu_only_nodes(void)
59{
60 struct node_memblk_s *p;
61 int i, j, k, nnode, nid, cpu, cpunid, pxm;
62 u8 cslit, slit;
63 static DECLARE_BITMAP(nodes_with_mem, MAX_NUMNODES) __initdata;
64 static u8 numa_slit_fix[MAX_NUMNODES * MAX_NUMNODES] __initdata;
65 static int node_flip[MAX_NUMNODES] __initdata;
66 static int old_nid_map[NR_CPUS] __initdata;
67
68 for (nnode = 0, p = &node_memblk[0]; p < &node_memblk[num_node_memblks]; p++)
69 if (!test_bit(p->nid, (void *) nodes_with_mem)) {
70 set_bit(p->nid, (void *) nodes_with_mem);
71 nnode++;
72 }
73
74
75
76
77 if (nnode == num_online_nodes())
78 return;
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95 nid = 0;
96 for_each_online_node(i) {
97 if (test_bit(i, (void *) nodes_with_mem)) {
98
99
100
101
102 node_flip[nid] = i;
103
104 if (i == nid) {
105 nid++;
106 continue;
107 }
108
109 for (p = &node_memblk[0]; p < &node_memblk[num_node_memblks]; p++)
110 if (p->nid == i)
111 p->nid = nid;
112
113 cpunid = nid;
114 nid++;
115 } else
116 cpunid = MAX_NUMNODES;
117
118 for (cpu = 0; cpu < NR_CPUS; cpu++)
119 if (node_cpuid[cpu].nid == i) {
120
121
122
123
124 if (cpunid < MAX_NUMNODES) {
125 pxm = nid_to_pxm_map[i];
126 pxm_to_nid_map[pxm] =
127 node_cpuid[cpu].nid = cpunid;
128 continue;
129 }
130
131
132
133
134
135
136 slit = 0xff;
137 k = 2*num_online_nodes();
138 for_each_online_node(j) {
139 if (i == j)
140 continue;
141 else if (test_bit(j, (void *) nodes_with_mem)) {
142 cslit = numa_slit[i * num_online_nodes() + j];
143 if (cslit < slit) {
144 k = num_online_nodes() + j;
145 slit = cslit;
146 }
147 }
148 }
149
150
151 old_nid_map[cpu] = node_cpuid[cpu].nid;
152 node_cpuid[cpu].nid = k;
153 }
154 }
155
156
157
158
159 for (cpu = 0; cpu < NR_CPUS; cpu++)
160 if (node_cpuid[cpu].nid == (2*num_online_nodes())) {
161 pxm = nid_to_pxm_map[old_nid_map[cpu]];
162 pxm_to_nid_map[pxm] = node_cpuid[cpu].nid = nnode - 1;
163 } else {
164 for (i = 0; i < nnode; i++) {
165 if (node_flip[i] != (node_cpuid[cpu].nid - num_online_nodes()))
166 continue;
167
168 pxm = nid_to_pxm_map[old_nid_map[cpu]];
169 pxm_to_nid_map[pxm] = node_cpuid[cpu].nid = i;
170 break;
171 }
172 }
173
174
175
176
177
178 for (i = 0; i < nnode; i++)
179 for (j = 0; j < nnode; j++)
180 numa_slit_fix[i * nnode + j] =
181 numa_slit[node_flip[i] * num_online_nodes() + node_flip[j]];
182
183 memcpy(numa_slit, numa_slit_fix, sizeof (numa_slit));
184
185 nodes_clear(node_online_map);
186 for (i = 0; i < nnode; i++)
187 node_set_online(i);
188
189 return;
190}
191
192
193
194
195
196#define NODEDATA_ALIGN(addr, node) \
197 ((((addr) + 1024*1024-1) & ~(1024*1024-1)) + (node)*PERCPU_PAGE_SIZE)
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212static int __init build_node_maps(unsigned long start, unsigned long len,
213 int node)
214{
215 unsigned long cstart, epfn, end = start + len;
216 struct bootmem_data *bdp = &mem_data[node].bootmem_data;
217
218 epfn = GRANULEROUNDUP(end) >> PAGE_SHIFT;
219 cstart = GRANULEROUNDDOWN(start);
220
221 if (!bdp->node_low_pfn) {
222 bdp->node_boot_start = cstart;
223 bdp->node_low_pfn = epfn;
224 } else {
225 bdp->node_boot_start = min(cstart, bdp->node_boot_start);
226 bdp->node_low_pfn = max(epfn, bdp->node_low_pfn);
227 }
228
229 min_low_pfn = min(min_low_pfn, bdp->node_boot_start>>PAGE_SHIFT);
230 max_low_pfn = max(max_low_pfn, bdp->node_low_pfn);
231
232 return 0;
233}
234
235
236
237
238
239
240
241
242
243
244static int early_nr_phys_cpus_node(int node)
245{
246 int cpu, n = 0;
247
248 for (cpu = 0; cpu < NR_CPUS; cpu++)
249 if (node == node_cpuid[cpu].nid)
250 if ((cpu == 0) || node_cpuid[cpu].phys_id)
251 n++;
252
253 return n;
254}
255
256
257
258
259
260
261
262
263
264
265static int early_nr_cpus_node(int node)
266{
267 int cpu, n = 0;
268
269 for (cpu = 0; cpu < NR_CPUS; cpu++)
270 if (node == node_cpuid[cpu].nid)
271 n++;
272
273 return n;
274}
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304static int __init find_pernode_space(unsigned long start, unsigned long len,
305 int node)
306{
307 unsigned long epfn, cpu, cpus, phys_cpus;
308 unsigned long pernodesize = 0, pernode, pages, mapsize;
309 void *cpu_data;
310 struct bootmem_data *bdp = &mem_data[node].bootmem_data;
311
312 epfn = (start + len) >> PAGE_SHIFT;
313
314 pages = bdp->node_low_pfn - (bdp->node_boot_start >> PAGE_SHIFT);
315 mapsize = bootmem_bootmap_pages(pages) << PAGE_SHIFT;
316
317
318
319
320
321 if (start < bdp->node_boot_start || epfn > bdp->node_low_pfn)
322 return 0;
323
324
325 if (mem_data[node].pernode_addr)
326 return 0;
327
328
329
330
331
332 cpus = early_nr_cpus_node(node);
333 phys_cpus = early_nr_phys_cpus_node(node);
334 pernodesize += PERCPU_PAGE_SIZE * cpus;
335 pernodesize += node * L1_CACHE_BYTES;
336 pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t));
337 pernodesize += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
338 pernodesize = PAGE_ALIGN(pernodesize);
339 pernode = NODEDATA_ALIGN(start, node);
340
341
342 if (start + len > (pernode + pernodesize + mapsize)) {
343 mem_data[node].pernode_addr = pernode;
344 mem_data[node].pernode_size = pernodesize;
345 memset(__va(pernode), 0, pernodesize);
346
347 cpu_data = (void *)pernode;
348 pernode += PERCPU_PAGE_SIZE * cpus;
349 pernode += node * L1_CACHE_BYTES;
350
351 mem_data[node].pgdat = __va(pernode);
352 pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));
353
354 mem_data[node].node_data = __va(pernode);
355 pernode += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
356
357 mem_data[node].pgdat->bdata = bdp;
358 pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));
359
360
361
362
363
364
365 for (cpu = 0; cpu < NR_CPUS; cpu++) {
366 if (node == node_cpuid[cpu].nid) {
367 memcpy(__va(cpu_data), __phys_per_cpu_start,
368 __per_cpu_end - __per_cpu_start);
369 __per_cpu_offset[cpu] = (char*)__va(cpu_data) -
370 __per_cpu_start;
371 cpu_data += PERCPU_PAGE_SIZE;
372 }
373 }
374 }
375
376 return 0;
377}
378
379
380
381
382
383
384
385
386
387
388
389
390static int __init free_node_bootmem(unsigned long start, unsigned long len,
391 int node)
392{
393 free_bootmem_node(mem_data[node].pgdat, start, len);
394
395 return 0;
396}
397
398
399
400
401
402
403
404
405static void __init reserve_pernode_space(void)
406{
407 unsigned long base, size, pages;
408 struct bootmem_data *bdp;
409 int node;
410
411 for_each_online_node(node) {
412 pg_data_t *pdp = mem_data[node].pgdat;
413
414 bdp = pdp->bdata;
415
416
417 pages = bdp->node_low_pfn - (bdp->node_boot_start>>PAGE_SHIFT);
418 size = bootmem_bootmap_pages(pages) << PAGE_SHIFT;
419 base = __pa(bdp->node_bootmem_map);
420 reserve_bootmem_node(pdp, base, size);
421
422
423 size = mem_data[node].pernode_size;
424 base = __pa(mem_data[node].pernode_addr);
425 reserve_bootmem_node(pdp, base, size);
426 }
427}
428
429
430
431
432
433
434
435
436
437static void __init initialize_pernode_data(void)
438{
439 int cpu, node;
440 pg_data_t *pgdat_list[MAX_NUMNODES];
441
442 for_each_online_node(node)
443 pgdat_list[node] = mem_data[node].pgdat;
444
445
446 for_each_online_node(node) {
447 memcpy(mem_data[node].node_data->pg_data_ptrs, pgdat_list,
448 sizeof(pgdat_list));
449 }
450
451
452 for (cpu = 0; cpu < NR_CPUS; cpu++) {
453 node = node_cpuid[cpu].nid;
454 per_cpu(cpu_info, cpu).node_data = mem_data[node].node_data;
455 }
456}
457
458
459
460
461
462
463
464void __init find_memory(void)
465{
466 int node;
467
468 reserve_memory();
469
470 if (num_online_nodes() == 0) {
471 printk(KERN_ERR "node info missing!\n");
472 node_set_online(0);
473 }
474
475 min_low_pfn = -1;
476 max_low_pfn = 0;
477
478 if (num_online_nodes() > 1)
479 reassign_cpu_only_nodes();
480
481
482 efi_memmap_walk(filter_rsvd_memory, build_node_maps);
483 efi_memmap_walk(filter_rsvd_memory, find_pernode_space);
484
485
486
487
488
489 for (node = MAX_NUMNODES - 1; node >= 0; node--) {
490 unsigned long pernode, pernodesize, map;
491 struct bootmem_data *bdp;
492
493 if (!node_online(node))
494 continue;
495
496 bdp = &mem_data[node].bootmem_data;
497 pernode = mem_data[node].pernode_addr;
498 pernodesize = mem_data[node].pernode_size;
499 map = pernode + pernodesize;
500
501
502 if (!pernode)
503 panic("pernode space for node %d "
504 "could not be allocated!", node);
505
506 init_bootmem_node(mem_data[node].pgdat,
507 map>>PAGE_SHIFT,
508 bdp->node_boot_start>>PAGE_SHIFT,
509 bdp->node_low_pfn);
510 }
511
512 efi_memmap_walk(filter_rsvd_memory, free_node_bootmem);
513
514 reserve_pernode_space();
515 initialize_pernode_data();
516
517 max_pfn = max_low_pfn;
518
519 find_initrd();
520}
521
522
523
524
525
526
527
528void *per_cpu_init(void)
529{
530 int cpu;
531
532 if (smp_processor_id() == 0) {
533 for (cpu = 0; cpu < NR_CPUS; cpu++) {
534 per_cpu(local_per_cpu_offset, cpu) =
535 __per_cpu_offset[cpu];
536 }
537 }
538
539 return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
540}
541
542
543
544
545
546
547
548void show_mem(void)
549{
550 int i, total_reserved = 0;
551 int total_shared = 0, total_cached = 0;
552 unsigned long total_present = 0;
553 pg_data_t *pgdat;
554
555 printk("Mem-info:\n");
556 show_free_areas();
557 printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
558 for_each_pgdat(pgdat) {
559 unsigned long present = pgdat->node_present_pages;
560 int shared = 0, cached = 0, reserved = 0;
561 printk("Node ID: %d\n", pgdat->node_id);
562 for(i = 0; i < pgdat->node_spanned_pages; i++) {
563 if (!ia64_pfn_valid(pgdat->node_start_pfn+i))
564 continue;
565 if (PageReserved(pgdat->node_mem_map+i))
566 reserved++;
567 else if (PageSwapCache(pgdat->node_mem_map+i))
568 cached++;
569 else if (page_count(pgdat->node_mem_map+i))
570 shared += page_count(pgdat->node_mem_map+i)-1;
571 }
572 total_present += present;
573 total_reserved += reserved;
574 total_cached += cached;
575 total_shared += shared;
576 printk("\t%ld pages of RAM\n", present);
577 printk("\t%d reserved pages\n", reserved);
578 printk("\t%d pages shared\n", shared);
579 printk("\t%d pages swap cached\n", cached);
580 }
581 printk("%ld pages of RAM\n", total_present);
582 printk("%d reserved pages\n", total_reserved);
583 printk("%d pages shared\n", total_shared);
584 printk("%d pages swap cached\n", total_cached);
585 printk("Total of %ld pages in page table cache\n", pgtable_cache_size);
586 printk("%d free buffer pages\n", nr_free_buffer_pages());
587}
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602void call_pernode_memory(unsigned long start, unsigned long len, void *arg)
603{
604 unsigned long rs, re, end = start + len;
605 void (*func)(unsigned long, unsigned long, int);
606 int i;
607
608 start = PAGE_ALIGN(start);
609 end &= PAGE_MASK;
610 if (start >= end)
611 return;
612
613 func = arg;
614
615 if (!num_node_memblks) {
616
617 if (start < end)
618 (*func)(start, end - start, 0);
619 return;
620 }
621
622 for (i = 0; i < num_node_memblks; i++) {
623 rs = max(start, node_memblk[i].start_paddr);
624 re = min(end, node_memblk[i].start_paddr +
625 node_memblk[i].size);
626
627 if (rs < re)
628 (*func)(rs, re - rs, node_memblk[i].nid);
629
630 if (re == end)
631 break;
632 }
633}
634
635
636
637
638
639
640
641
642
643
644
645
646static __init int count_node_pages(unsigned long start, unsigned long len, int node)
647{
648 unsigned long end = start + len;
649
650 mem_data[node].num_physpages += len >> PAGE_SHIFT;
651 if (start <= __pa(MAX_DMA_ADDRESS))
652 mem_data[node].num_dma_physpages +=
653 (min(end, __pa(MAX_DMA_ADDRESS)) - start) >>PAGE_SHIFT;
654 start = GRANULEROUNDDOWN(start);
655 start = ORDERROUNDDOWN(start);
656 end = GRANULEROUNDUP(end);
657 mem_data[node].max_pfn = max(mem_data[node].max_pfn,
658 end >> PAGE_SHIFT);
659 mem_data[node].min_pfn = min(mem_data[node].min_pfn,
660 start >> PAGE_SHIFT);
661
662 return 0;
663}
664
665
666
667
668
669
670
671void __init paging_init(void)
672{
673 unsigned long max_dma;
674 unsigned long zones_size[MAX_NR_ZONES];
675 unsigned long zholes_size[MAX_NR_ZONES];
676 unsigned long pfn_offset = 0;
677 int node;
678
679 max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT;
680
681
682 for_each_online_node(node)
683 mem_data[node].min_pfn = ~0UL;
684
685 efi_memmap_walk(filter_rsvd_memory, count_node_pages);
686
687 for_each_online_node(node) {
688 memset(zones_size, 0, sizeof(zones_size));
689 memset(zholes_size, 0, sizeof(zholes_size));
690
691 num_physpages += mem_data[node].num_physpages;
692
693 if (mem_data[node].min_pfn >= max_dma) {
694
695 zones_size[ZONE_NORMAL] = mem_data[node].max_pfn -
696 mem_data[node].min_pfn;
697 zholes_size[ZONE_NORMAL] = mem_data[node].max_pfn -
698 mem_data[node].min_pfn -
699 mem_data[node].num_physpages;
700 } else if (mem_data[node].max_pfn < max_dma) {
701
702 zones_size[ZONE_DMA] = mem_data[node].max_pfn -
703 mem_data[node].min_pfn;
704 zholes_size[ZONE_DMA] = mem_data[node].max_pfn -
705 mem_data[node].min_pfn -
706 mem_data[node].num_dma_physpages;
707 } else {
708
709 zones_size[ZONE_DMA] = max_dma -
710 mem_data[node].min_pfn;
711 zholes_size[ZONE_DMA] = zones_size[ZONE_DMA] -
712 mem_data[node].num_dma_physpages;
713 zones_size[ZONE_NORMAL] = mem_data[node].max_pfn -
714 max_dma;
715 zholes_size[ZONE_NORMAL] = zones_size[ZONE_NORMAL] -
716 (mem_data[node].num_physpages -
717 mem_data[node].num_dma_physpages);
718 }
719
720 if (node == 0) {
721 vmalloc_end -=
722 PAGE_ALIGN(max_low_pfn * sizeof(struct page));
723 vmem_map = (struct page *) vmalloc_end;
724
725 efi_memmap_walk(create_mem_map_page_table, NULL);
726 printk("Virtual mem_map starts at 0x%p\n", vmem_map);
727 }
728
729 pfn_offset = mem_data[node].min_pfn;
730
731 NODE_DATA(node)->node_mem_map = vmem_map + pfn_offset;
732 free_area_init_node(node, NODE_DATA(node), zones_size,
733 pfn_offset, zholes_size);
734 }
735
736 zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
737}
738