linux-bk/arch/x86_64/mm/numa.c
<<
>>
Prefs
   1/* 
   2 * Generic VM initialization for x86-64 NUMA setups.
   3 * Copyright 2002,2003 Andi Kleen, SuSE Labs.
   4 */ 
   5#include <linux/kernel.h>
   6#include <linux/mm.h>
   7#include <linux/string.h>
   8#include <linux/init.h>
   9#include <linux/bootmem.h>
  10#include <linux/mmzone.h>
  11#include <linux/ctype.h>
  12#include <linux/module.h>
  13#include <asm/e820.h>
  14#include <asm/proto.h>
  15#include <asm/dma.h>
  16#include <asm/numa.h>
  17
  18#ifndef Dprintk
  19#define Dprintk(x...)
  20#endif
  21
  22struct pglist_data *node_data[MAXNODE];
  23bootmem_data_t plat_node_bdata[MAX_NUMNODES];
  24
  25int memnode_shift;
  26u8  memnodemap[NODEMAPSIZE];
  27
  28unsigned char cpu_to_node[NR_CPUS];  
  29cpumask_t     node_to_cpumask[MAXNODE]; 
  30
  31static int numa_off __initdata; 
  32
  33unsigned long nodes_present; 
  34
  35int __init compute_hash_shift(struct node *nodes)
  36{
  37        int i; 
  38        int shift = 24;
  39        u64 addr;
  40        
  41        /* When in doubt use brute force. */
  42        while (shift < 48) { 
  43                memset(memnodemap,0xff,sizeof(*memnodemap) * NODEMAPSIZE); 
  44                for (i = 0; i < numnodes; i++) { 
  45                        if (nodes[i].start == nodes[i].end) 
  46                                continue;
  47                        for (addr = nodes[i].start; 
  48                             addr < nodes[i].end; 
  49                             addr += (1UL << shift)) {
  50                                if (memnodemap[addr >> shift] != 0xff && 
  51                                    memnodemap[addr >> shift] != i) { 
  52                                        printk(KERN_INFO 
  53                                            "node %d shift %d addr %Lx conflict %d\n", 
  54                                               i, shift, addr, memnodemap[addr>>shift]);
  55                                        goto next; 
  56                                } 
  57                                memnodemap[addr >> shift] = i; 
  58                        } 
  59                } 
  60                return shift; 
  61        next:
  62                shift++; 
  63        } 
  64        memset(memnodemap,0,sizeof(*memnodemap) * NODEMAPSIZE); 
  65        return -1; 
  66}
  67
  68/* Initialize bootmem allocator for a node */
  69void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
  70{ 
  71        unsigned long start_pfn, end_pfn, bootmap_pages, bootmap_size, bootmap_start; 
  72        unsigned long nodedata_phys;
  73        const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE);
  74
  75        start = round_up(start, ZONE_ALIGN); 
  76
  77        printk("Bootmem setup node %d %016lx-%016lx\n", nodeid, start, end);
  78
  79        start_pfn = start >> PAGE_SHIFT;
  80        end_pfn = end >> PAGE_SHIFT;
  81
  82        nodedata_phys = find_e820_area(start, end, pgdat_size); 
  83        if (nodedata_phys == -1L) 
  84                panic("Cannot find memory pgdat in node %d\n", nodeid);
  85
  86        Dprintk("nodedata_phys %lx\n", nodedata_phys); 
  87
  88        node_data[nodeid] = phys_to_virt(nodedata_phys);
  89        memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t));
  90        NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid];
  91        NODE_DATA(nodeid)->node_start_pfn = start_pfn;
  92        NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
  93
  94        /* Find a place for the bootmem map */
  95        bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); 
  96        bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE);
  97        bootmap_start = find_e820_area(bootmap_start, end, bootmap_pages<<PAGE_SHIFT);
  98        if (bootmap_start == -1L) 
  99                panic("Not enough continuous space for bootmap on node %d", nodeid); 
 100        Dprintk("bootmap start %lu pages %lu\n", bootmap_start, bootmap_pages); 
 101        
 102        bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
 103                                         bootmap_start >> PAGE_SHIFT, 
 104                                         start_pfn, end_pfn); 
 105
 106        e820_bootmem_free(NODE_DATA(nodeid), start, end);
 107
 108        reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size); 
 109        reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, bootmap_pages<<PAGE_SHIFT);
 110        if (nodeid + 1 > numnodes)
 111                numnodes = nodeid + 1;
 112        node_set_online(nodeid);
 113} 
 114
 115/* Initialize final allocator for a zone */
 116void __init setup_node_zones(int nodeid)
 117{ 
 118        unsigned long start_pfn, end_pfn; 
 119        unsigned long zones[MAX_NR_ZONES];
 120        unsigned long dma_end_pfn;
 121
 122        memset(zones, 0, sizeof(unsigned long) * MAX_NR_ZONES); 
 123
 124        start_pfn = node_start_pfn(nodeid);
 125        end_pfn = node_end_pfn(nodeid);
 126
 127        Dprintk(KERN_INFO "setting up node %d %lx-%lx\n", nodeid, start_pfn, end_pfn);
 128        
 129        /* All nodes > 0 have a zero length zone DMA */ 
 130        dma_end_pfn = __pa(MAX_DMA_ADDRESS) >> PAGE_SHIFT; 
 131        if (start_pfn < dma_end_pfn) { 
 132                zones[ZONE_DMA] = dma_end_pfn - start_pfn;
 133                zones[ZONE_NORMAL] = end_pfn - dma_end_pfn; 
 134        } else { 
 135                zones[ZONE_NORMAL] = end_pfn - start_pfn; 
 136        } 
 137    
 138        free_area_init_node(nodeid, NODE_DATA(nodeid), NULL, zones, 
 139                            start_pfn, NULL); 
 140} 
 141
 142void __init numa_init_array(void)
 143{
 144        int rr, i;
 145        /* There are unfortunately some poorly designed mainboards around
 146           that only connect memory to a single CPU. This breaks the 1:1 cpu->node
 147           mapping. To avoid this fill in the mapping for all possible
 148           CPUs, as the number of CPUs is not known yet. 
 149           We round robin the existing nodes. */
 150        rr = 0;
 151        for (i = 0; i < MAXNODE; i++) {
 152                if (node_online(i))
 153                        continue;
 154                rr = find_next_bit(node_online_map, MAX_NUMNODES, rr);
 155                if (rr == MAX_NUMNODES)
 156                        rr = find_first_bit(node_online_map, MAX_NUMNODES);
 157                node_data[i] = node_data[rr];
 158                cpu_to_node[i] = rr;
 159                rr++; 
 160        }
 161
 162        set_bit(0, &node_to_cpumask[cpu_to_node(0)]);
 163}
 164
 165void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
 166{ 
 167        int i;
 168
 169#ifdef CONFIG_K8_NUMA
 170        if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT))
 171                return;
 172#endif
 173        printk(KERN_INFO "%s\n",
 174               numa_off ? "NUMA turned off" : "No NUMA configuration found");
 175
 176        printk(KERN_INFO "Faking a node at %016lx-%016lx\n", 
 177               start_pfn << PAGE_SHIFT,
 178               end_pfn << PAGE_SHIFT); 
 179                /* setup dummy node covering all memory */ 
 180        memnode_shift = 63; 
 181        memnodemap[0] = 0;
 182        numnodes = 1;
 183        for (i = 0; i < NR_CPUS; i++)
 184                cpu_to_node[i] = 0;
 185        node_to_cpumask[0] = cpumask_of_cpu(0);
 186        setup_node_bootmem(0, start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
 187}
 188
 189__init void numa_add_cpu(int cpu)
 190{
 191        /* BP is initialized elsewhere */
 192        if (cpu) 
 193                set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
 194} 
 195
 196unsigned long __init numa_free_all_bootmem(void) 
 197{ 
 198        int i;
 199        unsigned long pages = 0;
 200        for_all_nodes(i) {
 201                pages += free_all_bootmem_node(NODE_DATA(i));
 202        }
 203        return pages;
 204} 
 205
 206void __init paging_init(void)
 207{ 
 208        int i;
 209        for_all_nodes(i) { 
 210                setup_node_zones(i); 
 211        }
 212} 
 213
 214/* [numa=off] */
 215__init int numa_setup(char *opt) 
 216{ 
 217        if (!strncmp(opt,"off",3))
 218                numa_off = 1;
 219        return 1;
 220} 
 221
 222EXPORT_SYMBOL(cpu_to_node);
 223EXPORT_SYMBOL(node_to_cpumask);
 224EXPORT_SYMBOL(memnode_shift);
 225EXPORT_SYMBOL(memnodemap);
 226EXPORT_SYMBOL(node_data);
 227
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.