linux/mm/sparse.c
<<
>>
Prefs
   1/*
   2 * sparse memory mappings.
   3 */
   4#include <linux/mm.h>
   5#include <linux/mmzone.h>
   6#include <linux/bootmem.h>
   7#include <linux/highmem.h>
   8#include <linux/module.h>
   9#include <linux/spinlock.h>
  10#include <linux/vmalloc.h>
  11#include <asm/dma.h>
  12
  13/*
  14 * Permanent SPARSEMEM data:
  15 *
  16 * 1) mem_section       - memory sections, mem_map's for valid memory
  17 */
  18#ifdef CONFIG_SPARSEMEM_EXTREME
  19struct mem_section *mem_section[NR_SECTION_ROOTS]
  20        ____cacheline_internodealigned_in_smp;
  21#else
  22struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]
  23        ____cacheline_internodealigned_in_smp;
  24#endif
  25EXPORT_SYMBOL(mem_section);
  26
  27#ifdef NODE_NOT_IN_PAGE_FLAGS
  28/*
  29 * If we did not store the node number in the page then we have to
  30 * do a lookup in the section_to_node_table in order to find which
  31 * node the page belongs to.
  32 */
  33#if MAX_NUMNODES <= 256
  34static u8 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
  35#else
  36static u16 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
  37#endif
  38
  39int page_to_nid(struct page *page)
  40{
  41        return section_to_node_table[page_to_section(page)];
  42}
  43EXPORT_SYMBOL(page_to_nid);
  44#endif
  45
  46#ifdef CONFIG_SPARSEMEM_EXTREME
  47static struct mem_section noinline __init_refok *sparse_index_alloc(int nid)
  48{
  49        struct mem_section *section = NULL;
  50        unsigned long array_size = SECTIONS_PER_ROOT *
  51                                   sizeof(struct mem_section);
  52
  53        if (slab_is_available())
  54                section = kmalloc_node(array_size, GFP_KERNEL, nid);
  55        else
  56                section = alloc_bootmem_node(NODE_DATA(nid), array_size);
  57
  58        if (section)
  59                memset(section, 0, array_size);
  60
  61        return section;
  62}
  63
  64static int __meminit sparse_index_init(unsigned long section_nr, int nid)
  65{
  66        static DEFINE_SPINLOCK(index_init_lock);
  67        unsigned long root = SECTION_NR_TO_ROOT(section_nr);
  68        struct mem_section *section;
  69        int ret = 0;
  70
  71#ifdef NODE_NOT_IN_PAGE_FLAGS
  72        section_to_node_table[section_nr] = nid;
  73#endif
  74
  75        if (mem_section[root])
  76                return -EEXIST;
  77
  78        section = sparse_index_alloc(nid);
  79        /*
  80         * This lock keeps two different sections from
  81         * reallocating for the same index
  82         */
  83        spin_lock(&index_init_lock);
  84
  85        if (mem_section[root]) {
  86                ret = -EEXIST;
  87                goto out;
  88        }
  89
  90        mem_section[root] = section;
  91out:
  92        spin_unlock(&index_init_lock);
  93        return ret;
  94}
  95#else /* !SPARSEMEM_EXTREME */
  96static inline int sparse_index_init(unsigned long section_nr, int nid)
  97{
  98        return 0;
  99}
 100#endif
 101
 102/*
 103 * Although written for the SPARSEMEM_EXTREME case, this happens
 104 * to also work for the flat array case becase
 105 * NR_SECTION_ROOTS==NR_MEM_SECTIONS.
 106 */
 107int __section_nr(struct mem_section* ms)
 108{
 109        unsigned long root_nr;
 110        struct mem_section* root;
 111
 112        for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) {
 113                root = __nr_to_section(root_nr * SECTIONS_PER_ROOT);
 114                if (!root)
 115                        continue;
 116
 117                if ((ms >= root) && (ms < (root + SECTIONS_PER_ROOT)))
 118                     break;
 119        }
 120
 121        return (root_nr * SECTIONS_PER_ROOT) + (ms - root);
 122}
 123
 124/*
 125 * During early boot, before section_mem_map is used for an actual
 126 * mem_map, we use section_mem_map to store the section's NUMA
 127 * node.  This keeps us from having to use another data structure.  The
 128 * node information is cleared just before we store the real mem_map.
 129 */
 130static inline unsigned long sparse_encode_early_nid(int nid)
 131{
 132        return (nid << SECTION_NID_SHIFT);
 133}
 134
 135static inline int sparse_early_nid(struct mem_section *section)
 136{
 137        return (section->section_mem_map >> SECTION_NID_SHIFT);
 138}
 139
 140/* Record a memory area against a node. */
 141void __init memory_present(int nid, unsigned long start, unsigned long end)
 142{
 143        unsigned long pfn;
 144
 145        start &= PAGE_SECTION_MASK;
 146        for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) {
 147                unsigned long section = pfn_to_section_nr(pfn);
 148                struct mem_section *ms;
 149
 150                sparse_index_init(section, nid);
 151
 152                ms = __nr_to_section(section);
 153                if (!ms->section_mem_map)
 154                        ms->section_mem_map = sparse_encode_early_nid(nid) |
 155                                                        SECTION_MARKED_PRESENT;
 156        }
 157}
 158
 159/*
 160 * Only used by the i386 NUMA architecures, but relatively
 161 * generic code.
 162 */
 163unsigned long __init node_memmap_size_bytes(int nid, unsigned long start_pfn,
 164                                                     unsigned long end_pfn)
 165{
 166        unsigned long pfn;
 167        unsigned long nr_pages = 0;
 168
 169        for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
 170                if (nid != early_pfn_to_nid(pfn))
 171                        continue;
 172
 173                if (pfn_valid(pfn))
 174                        nr_pages += PAGES_PER_SECTION;
 175        }
 176
 177        return nr_pages * sizeof(struct page);
 178}
 179
 180/*
 181 * Subtle, we encode the real pfn into the mem_map such that
 182 * the identity pfn - section_mem_map will return the actual
 183 * physical page frame number.
 184 */
 185static unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long pnum)
 186{
 187        return (unsigned long)(mem_map - (section_nr_to_pfn(pnum)));
 188}
 189
 190/*
 191 * We need this if we ever free the mem_maps.  While not implemented yet,
 192 * this function is included for parity with its sibling.
 193 */
 194static __attribute((unused))
 195struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum)
 196{
 197        return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum);
 198}
 199
 200static int __meminit sparse_init_one_section(struct mem_section *ms,
 201                unsigned long pnum, struct page *mem_map)
 202{
 203        if (!valid_section(ms))
 204                return -EINVAL;
 205
 206        ms->section_mem_map &= ~SECTION_MAP_MASK;
 207        ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum);
 208
 209        return 1;
 210}
 211
 212static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
 213{
 214        struct page *map;
 215        struct mem_section *ms = __nr_to_section(pnum);
 216        int nid = sparse_early_nid(ms);
 217
 218        map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION);
 219        if (map)
 220                return map;
 221
 222        map = alloc_bootmem_node(NODE_DATA(nid),
 223                        sizeof(struct page) * PAGES_PER_SECTION);
 224        if (map)
 225                return map;
 226
 227        printk(KERN_WARNING "%s: allocation failed\n", __FUNCTION__);
 228        ms->section_mem_map = 0;
 229        return NULL;
 230}
 231
 232/*
 233 * Allocate the accumulated non-linear sections, allocate a mem_map
 234 * for each and record the physical to section mapping.
 235 */
 236void __init sparse_init(void)
 237{
 238        unsigned long pnum;
 239        struct page *map;
 240
 241        for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
 242                if (!valid_section_nr(pnum))
 243                        continue;
 244
 245                map = sparse_early_mem_map_alloc(pnum);
 246                if (!map)
 247                        continue;
 248                sparse_init_one_section(__nr_to_section(pnum), pnum, map);
 249        }
 250}
 251
 252#ifdef CONFIG_MEMORY_HOTPLUG
 253static struct page *__kmalloc_section_memmap(unsigned long nr_pages)
 254{
 255        struct page *page, *ret;
 256        unsigned long memmap_size = sizeof(struct page) * nr_pages;
 257
 258        page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size));
 259        if (page)
 260                goto got_map_page;
 261
 262        ret = vmalloc(memmap_size);
 263        if (ret)
 264                goto got_map_ptr;
 265
 266        return NULL;
 267got_map_page:
 268        ret = (struct page *)pfn_to_kaddr(page_to_pfn(page));
 269got_map_ptr:
 270        memset(ret, 0, memmap_size);
 271
 272        return ret;
 273}
 274
 275static int vaddr_in_vmalloc_area(void *addr)
 276{
 277        if (addr >= (void *)VMALLOC_START &&
 278            addr < (void *)VMALLOC_END)
 279                return 1;
 280        return 0;
 281}
 282
 283static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages)
 284{
 285        if (vaddr_in_vmalloc_area(memmap))
 286                vfree(memmap);
 287        else
 288                free_pages((unsigned long)memmap,
 289                           get_order(sizeof(struct page) * nr_pages));
 290}
 291
 292/*
 293 * returns the number of sections whose mem_maps were properly
 294 * set.  If this is <=0, then that means that the passed-in
 295 * map was not consumed and must be freed.
 296 */
 297int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
 298                           int nr_pages)
 299{
 300        unsigned long section_nr = pfn_to_section_nr(start_pfn);
 301        struct pglist_data *pgdat = zone->zone_pgdat;
 302        struct mem_section *ms;
 303        struct page *memmap;
 304        unsigned long flags;
 305        int ret;
 306
 307        /*
 308         * no locking for this, because it does its own
 309         * plus, it does a kmalloc
 310         */
 311        sparse_index_init(section_nr, pgdat->node_id);
 312        memmap = __kmalloc_section_memmap(nr_pages);
 313
 314        pgdat_resize_lock(pgdat, &flags);
 315
 316        ms = __pfn_to_section(start_pfn);
 317        if (ms->section_mem_map & SECTION_MARKED_PRESENT) {
 318                ret = -EEXIST;
 319                goto out;
 320        }
 321        ms->section_mem_map |= SECTION_MARKED_PRESENT;
 322
 323        ret = sparse_init_one_section(ms, section_nr, memmap);
 324
 325out:
 326        pgdat_resize_unlock(pgdat, &flags);
 327        if (ret <= 0)
 328                __kfree_section_memmap(memmap, nr_pages);
 329        return ret;
 330}
 331#endif
 332
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.