1
2
3
4
5
6
7
8
9
10
11
12#include <linux/kernel.h>
13#include <linux/acpi.h>
14#include <linux/mmzone.h>
15#include <linux/bitmap.h>
16#include <linux/module.h>
17#include <linux/topology.h>
18#include <linux/bootmem.h>
19#include <linux/mm.h>
20#include <asm/proto.h>
21#include <asm/numa.h>
22#include <asm/e820.h>
23#include <asm/genapic.h>
24
25int acpi_numa __initdata;
26
27static struct acpi_table_slit *acpi_slit;
28
29static nodemask_t nodes_parsed __initdata;
30static struct bootnode nodes[MAX_NUMNODES] __initdata;
31static struct bootnode nodes_add[MAX_NUMNODES];
32static int found_add_area __initdata;
33int hotadd_percent __initdata = 0;
34
35static int num_node_memblks __initdata;
36static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
37static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata;
38
39
40
41#define NODE_MIN_SIZE (4*1024*1024)
42
43static __init int setup_node(int pxm)
44{
45 return acpi_map_pxm_to_node(pxm);
46}
47
48static __init int conflicting_memblks(unsigned long start, unsigned long end)
49{
50 int i;
51 for (i = 0; i < num_node_memblks; i++) {
52 struct bootnode *nd = &node_memblk_range[i];
53 if (nd->start == nd->end)
54 continue;
55 if (nd->end > start && nd->start < end)
56 return memblk_nodeid[i];
57 if (nd->end == end && nd->start == start)
58 return memblk_nodeid[i];
59 }
60 return -1;
61}
62
63static __init void cutoff_node(int i, unsigned long start, unsigned long end)
64{
65 struct bootnode *nd = &nodes[i];
66
67 if (found_add_area)
68 return;
69
70 if (nd->start < start) {
71 nd->start = start;
72 if (nd->end < nd->start)
73 nd->start = nd->end;
74 }
75 if (nd->end > end) {
76 nd->end = end;
77 if (nd->start > nd->end)
78 nd->start = nd->end;
79 }
80}
81
82static __init void bad_srat(void)
83{
84 int i;
85 printk(KERN_ERR "SRAT: SRAT not used.\n");
86 acpi_numa = -1;
87 found_add_area = 0;
88 for (i = 0; i < MAX_LOCAL_APIC; i++)
89 apicid_to_node[i] = NUMA_NO_NODE;
90 for (i = 0; i < MAX_NUMNODES; i++)
91 nodes_add[i].start = nodes[i].end = 0;
92 remove_all_active_ranges();
93}
94
95static __init inline int srat_disabled(void)
96{
97 return numa_off || acpi_numa < 0;
98}
99
100
101void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
102{
103 acpi_slit = slit;
104}
105
106
107void __init
108acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
109{
110 int pxm, node;
111 int apic_id;
112
113 if (srat_disabled())
114 return;
115 if (pa->header.length != sizeof(struct acpi_srat_cpu_affinity)) {
116 bad_srat();
117 return;
118 }
119 if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
120 return;
121 pxm = pa->proximity_domain_lo;
122 node = setup_node(pxm);
123 if (node < 0) {
124 printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
125 bad_srat();
126 return;
127 }
128
129 if (is_uv_system())
130 apic_id = (pa->apic_id << 8) | pa->local_sapic_eid;
131 else
132 apic_id = pa->apic_id;
133 apicid_to_node[apic_id] = node;
134 acpi_numa = 1;
135 printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n",
136 pxm, apic_id, node);
137}
138
139static int update_end_of_memory(unsigned long end) {return -1;}
140static int hotadd_enough_memory(struct bootnode *nd) {return 1;}
141#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
142static inline int save_add_info(void) {return 1;}
143#else
144static inline int save_add_info(void) {return 0;}
145#endif
146
147
148
149
150
151static int __init
152reserve_hotadd(int node, unsigned long start, unsigned long end)
153{
154 unsigned long s_pfn = start >> PAGE_SHIFT;
155 unsigned long e_pfn = end >> PAGE_SHIFT;
156 int ret = 0, changed = 0;
157 struct bootnode *nd = &nodes_add[node];
158
159
160
161
162
163
164
165 if ((signed long)(end - start) < NODE_MIN_SIZE) {
166 printk(KERN_ERR "SRAT: Hotplug area too small\n");
167 return -1;
168 }
169
170
171 if (absent_pages_in_range(s_pfn, e_pfn) != e_pfn - s_pfn) {
172 printk(KERN_ERR
173 "SRAT: Hotplug area %lu -> %lu has existing memory\n",
174 s_pfn, e_pfn);
175 return -1;
176 }
177
178 if (!hotadd_enough_memory(&nodes_add[node])) {
179 printk(KERN_ERR "SRAT: Hotplug area too large\n");
180 return -1;
181 }
182
183
184
185 if (nd->start == nd->end) {
186 nd->start = start;
187 nd->end = end;
188 changed = 1;
189 } else {
190 if (nd->start == end) {
191 nd->start = start;
192 changed = 1;
193 }
194 if (nd->end == start) {
195 nd->end = end;
196 changed = 1;
197 }
198 if (!changed)
199 printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
200 }
201
202 ret = update_end_of_memory(nd->end);
203
204 if (changed)
205 printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end);
206 return ret;
207}
208
209
210void __init
211acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
212{
213 struct bootnode *nd, oldnode;
214 unsigned long start, end;
215 int node, pxm;
216 int i;
217
218 if (srat_disabled())
219 return;
220 if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) {
221 bad_srat();
222 return;
223 }
224 if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
225 return;
226
227 if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && !save_add_info())
228 return;
229 start = ma->base_address;
230 end = start + ma->length;
231 pxm = ma->proximity_domain;
232 node = setup_node(pxm);
233 if (node < 0) {
234 printk(KERN_ERR "SRAT: Too many proximity domains.\n");
235 bad_srat();
236 return;
237 }
238 i = conflicting_memblks(start, end);
239 if (i == node) {
240 printk(KERN_WARNING
241 "SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n",
242 pxm, start, end, nodes[i].start, nodes[i].end);
243 } else if (i >= 0) {
244 printk(KERN_ERR
245 "SRAT: PXM %d (%lx-%lx) overlaps with PXM %d (%Lx-%Lx)\n",
246 pxm, start, end, node_to_pxm(i),
247 nodes[i].start, nodes[i].end);
248 bad_srat();
249 return;
250 }
251 nd = &nodes[node];
252 oldnode = *nd;
253 if (!node_test_and_set(node, nodes_parsed)) {
254 nd->start = start;
255 nd->end = end;
256 } else {
257 if (start < nd->start)
258 nd->start = start;
259 if (nd->end < end)
260 nd->end = end;
261 }
262
263 printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
264 start, end);
265 e820_register_active_regions(node, start >> PAGE_SHIFT,
266 end >> PAGE_SHIFT);
267 push_node_boundaries(node, nd->start >> PAGE_SHIFT,
268 nd->end >> PAGE_SHIFT);
269
270 if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) &&
271 (reserve_hotadd(node, start, end) < 0)) {
272
273 printk(KERN_NOTICE "SRAT: Hotplug region ignored\n");
274 *nd = oldnode;
275 if ((nd->start | nd->end) == 0)
276 node_clear(node, nodes_parsed);
277 }
278
279 node_memblk_range[num_node_memblks].start = start;
280 node_memblk_range[num_node_memblks].end = end;
281 memblk_nodeid[num_node_memblks] = node;
282 num_node_memblks++;
283}
284
285
286
287static int __init nodes_cover_memory(const struct bootnode *nodes)
288{
289 int i;
290 unsigned long pxmram, e820ram;
291
292 pxmram = 0;
293 for_each_node_mask(i, nodes_parsed) {
294 unsigned long s = nodes[i].start >> PAGE_SHIFT;
295 unsigned long e = nodes[i].end >> PAGE_SHIFT;
296 pxmram += e - s;
297 pxmram -= absent_pages_in_range(s, e);
298 if ((long)pxmram < 0)
299 pxmram = 0;
300 }
301
302 e820ram = end_pfn - absent_pages_in_range(0, end_pfn);
303
304 if ((long)(e820ram - pxmram) >= 1*1024*1024) {
305 printk(KERN_ERR
306 "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n",
307 (pxmram << PAGE_SHIFT) >> 20,
308 (e820ram << PAGE_SHIFT) >> 20);
309 return 0;
310 }
311 return 1;
312}
313
314static void __init unparse_node(int node)
315{
316 int i;
317 node_clear(node, nodes_parsed);
318 for (i = 0; i < MAX_LOCAL_APIC; i++) {
319 if (apicid_to_node[i] == node)
320 apicid_to_node[i] = NUMA_NO_NODE;
321 }
322}
323
324void __init acpi_numa_arch_fixup(void) {}
325
326
327int __init acpi_scan_nodes(unsigned long start, unsigned long end)
328{
329 int i;
330
331 if (acpi_numa <= 0)
332 return -1;
333
334
335 for (i = 0; i < MAX_NUMNODES; i++) {
336 cutoff_node(i, start, end);
337
338
339
340
341 if (nodes[i].end &&
342 (nodes[i].end - nodes[i].start) < NODE_MIN_SIZE) {
343 unparse_node(i);
344 node_set_offline(i);
345 }
346 }
347
348 if (!nodes_cover_memory(nodes)) {
349 bad_srat();
350 return -1;
351 }
352
353 memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks,
354 memblk_nodeid);
355 if (memnode_shift < 0) {
356 printk(KERN_ERR
357 "SRAT: No NUMA node hash function found. Contact maintainer\n");
358 bad_srat();
359 return -1;
360 }
361
362 node_possible_map = nodes_parsed;
363
364
365 for_each_node_mask(i, node_possible_map)
366 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
367
368
369 for_each_node_mask(i, node_possible_map)
370 if (!node_online(i))
371 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
372
373 for (i = 0; i < NR_CPUS; i++) {
374 int node = early_cpu_to_node(i);
375
376 if (node == NUMA_NO_NODE)
377 continue;
378 if (!node_isset(node, node_possible_map))
379 numa_set_node(i, NUMA_NO_NODE);
380 }
381 numa_init_array();
382 return 0;
383}
384
385#ifdef CONFIG_NUMA_EMU
386static int fake_node_to_pxm_map[MAX_NUMNODES] __initdata = {
387 [0 ... MAX_NUMNODES-1] = PXM_INVAL
388};
389static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = {
390 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
391};
392static int __init find_node_by_addr(unsigned long addr)
393{
394 int ret = NUMA_NO_NODE;
395 int i;
396
397 for_each_node_mask(i, nodes_parsed) {
398
399
400
401
402
403 if (addr >= nodes[i].start && addr < nodes[i].end) {
404 ret = i;
405 break;
406 }
407 }
408 return ret;
409}
410
411
412
413
414
415
416
417
418
419void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
420{
421 int i, j;
422
423 printk(KERN_INFO "Faking PXM affinity for fake nodes on real "
424 "topology.\n");
425 for (i = 0; i < num_nodes; i++) {
426 int nid, pxm;
427
428 nid = find_node_by_addr(fake_nodes[i].start);
429 if (nid == NUMA_NO_NODE)
430 continue;
431 pxm = node_to_pxm(nid);
432 if (pxm == PXM_INVAL)
433 continue;
434 fake_node_to_pxm_map[i] = pxm;
435
436
437
438
439 for (j = 0; j < MAX_LOCAL_APIC; j++)
440 if (apicid_to_node[j] == nid)
441 fake_apicid_to_node[j] = i;
442 }
443 for (i = 0; i < num_nodes; i++)
444 __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
445 memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
446
447 nodes_clear(nodes_parsed);
448 for (i = 0; i < num_nodes; i++)
449 if (fake_nodes[i].start != fake_nodes[i].end)
450 node_set(i, nodes_parsed);
451 WARN_ON(!nodes_cover_memory(fake_nodes));
452}
453
454static int null_slit_node_compare(int a, int b)
455{
456 return node_to_pxm(a) == node_to_pxm(b);
457}
458#else
459static int null_slit_node_compare(int a, int b)
460{
461 return a == b;
462}
463#endif
464
465void __init srat_reserve_add_area(int nodeid)
466{
467 if (found_add_area && nodes_add[nodeid].end) {
468 u64 total_mb;
469
470 printk(KERN_INFO "SRAT: Reserving hot-add memory space "
471 "for node %d at %Lx-%Lx\n",
472 nodeid, nodes_add[nodeid].start, nodes_add[nodeid].end);
473 total_mb = (nodes_add[nodeid].end - nodes_add[nodeid].start)
474 >> PAGE_SHIFT;
475 total_mb *= sizeof(struct page);
476 total_mb >>= 20;
477 printk(KERN_INFO "SRAT: This will cost you %Lu MB of "
478 "pre-allocated memory.\n", (unsigned long long)total_mb);
479 reserve_bootmem_node(NODE_DATA(nodeid), nodes_add[nodeid].start,
480 nodes_add[nodeid].end - nodes_add[nodeid].start,
481 BOOTMEM_DEFAULT);
482 }
483}
484
485int __node_distance(int a, int b)
486{
487 int index;
488
489 if (!acpi_slit)
490 return null_slit_node_compare(a, b) ? LOCAL_DISTANCE :
491 REMOTE_DISTANCE;
492 index = acpi_slit->locality_count * node_to_pxm(a);
493 return acpi_slit->entry[index + node_to_pxm(b)];
494}
495
496EXPORT_SYMBOL(__node_distance);
497
498int memory_add_physaddr_to_nid(u64 start)
499{
500 int i, ret = 0;
501
502 for_each_node(i)
503 if (nodes_add[i].start <= start && nodes_add[i].end > start)
504 ret = i;
505
506 return ret;
507}
508EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
509
510