linux/mm/sparse.c
<<
>>
Prefs
   1/*
   2 * sparse memory mappings.
   3 */
   4#include <linux/mm.h>
   5#include <linux/mmzone.h>
   6#include <linux/bootmem.h>
   7#include <linux/highmem.h>
   8#include <linux/module.h>
   9#include <linux/spinlock.h>
  10#include <linux/vmalloc.h>
  11#include <asm/dma.h>
  12#include <asm/pgalloc.h>
  13#include <asm/pgtable.h>
  14
  15/*
  16 * Permanent SPARSEMEM data:
  17 *
  18 * 1) mem_section       - memory sections, mem_map's for valid memory
  19 */
  20#ifdef CONFIG_SPARSEMEM_EXTREME
  21struct mem_section *mem_section[NR_SECTION_ROOTS]
  22        ____cacheline_internodealigned_in_smp;
  23#else
  24struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]
  25        ____cacheline_internodealigned_in_smp;
  26#endif
  27EXPORT_SYMBOL(mem_section);
  28
  29#ifdef NODE_NOT_IN_PAGE_FLAGS
  30/*
  31 * If we did not store the node number in the page then we have to
  32 * do a lookup in the section_to_node_table in order to find which
  33 * node the page belongs to.
  34 */
  35#if MAX_NUMNODES <= 256
  36static u8 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
  37#else
  38static u16 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
  39#endif
  40
  41int page_to_nid(struct page *page)
  42{
  43        return section_to_node_table[page_to_section(page)];
  44}
  45EXPORT_SYMBOL(page_to_nid);
  46
  47static void set_section_nid(unsigned long section_nr, int nid)
  48{
  49        section_to_node_table[section_nr] = nid;
  50}
  51#else /* !NODE_NOT_IN_PAGE_FLAGS */
  52static inline void set_section_nid(unsigned long section_nr, int nid)
  53{
  54}
  55#endif
  56
  57#ifdef CONFIG_SPARSEMEM_EXTREME
  58static struct mem_section noinline __init_refok *sparse_index_alloc(int nid)
  59{
  60        struct mem_section *section = NULL;
  61        unsigned long array_size = SECTIONS_PER_ROOT *
  62                                   sizeof(struct mem_section);
  63
  64        if (slab_is_available())
  65                section = kmalloc_node(array_size, GFP_KERNEL, nid);
  66        else
  67                section = alloc_bootmem_node(NODE_DATA(nid), array_size);
  68
  69        if (section)
  70                memset(section, 0, array_size);
  71
  72        return section;
  73}
  74
  75static int __meminit sparse_index_init(unsigned long section_nr, int nid)
  76{
  77        static DEFINE_SPINLOCK(index_init_lock);
  78        unsigned long root = SECTION_NR_TO_ROOT(section_nr);
  79        struct mem_section *section;
  80        int ret = 0;
  81
  82        if (mem_section[root])
  83                return -EEXIST;
  84
  85        section = sparse_index_alloc(nid);
  86        if (!section)
  87                return -ENOMEM;
  88        /*
  89         * This lock keeps two different sections from
  90         * reallocating for the same index
  91         */
  92        spin_lock(&index_init_lock);
  93
  94        if (mem_section[root]) {
  95                ret = -EEXIST;
  96                goto out;
  97        }
  98
  99        mem_section[root] = section;
 100out:
 101        spin_unlock(&index_init_lock);
 102        return ret;
 103}
 104#else /* !SPARSEMEM_EXTREME */
 105static inline int sparse_index_init(unsigned long section_nr, int nid)
 106{
 107        return 0;
 108}
 109#endif
 110
 111/*
 112 * Although written for the SPARSEMEM_EXTREME case, this happens
 113 * to also work for the flat array case because
 114 * NR_SECTION_ROOTS==NR_MEM_SECTIONS.
 115 */
 116int __section_nr(struct mem_section* ms)
 117{
 118        unsigned long root_nr;
 119        struct mem_section* root;
 120
 121        for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) {
 122                root = __nr_to_section(root_nr * SECTIONS_PER_ROOT);
 123                if (!root)
 124                        continue;
 125
 126                if ((ms >= root) && (ms < (root + SECTIONS_PER_ROOT)))
 127                     break;
 128        }
 129
 130        return (root_nr * SECTIONS_PER_ROOT) + (ms - root);
 131}
 132
 133/*
 134 * During early boot, before section_mem_map is used for an actual
 135 * mem_map, we use section_mem_map to store the section's NUMA
 136 * node.  This keeps us from having to use another data structure.  The
 137 * node information is cleared just before we store the real mem_map.
 138 */
 139static inline unsigned long sparse_encode_early_nid(int nid)
 140{
 141        return (nid << SECTION_NID_SHIFT);
 142}
 143
 144static inline int sparse_early_nid(struct mem_section *section)
 145{
 146        return (section->section_mem_map >> SECTION_NID_SHIFT);
 147}
 148
 149/* Record a memory area against a node. */
 150void __init memory_present(int nid, unsigned long start, unsigned long end)
 151{
 152        unsigned long max_arch_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT);
 153        unsigned long pfn;
 154
 155        /*
 156         * Sanity checks - do not allow an architecture to pass
 157         * in larger pfns than the maximum scope of sparsemem:
 158         */
 159        if (start >= max_arch_pfn)
 160                return;
 161        if (end >= max_arch_pfn)
 162                end = max_arch_pfn;
 163
 164        start &= PAGE_SECTION_MASK;
 165        for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) {
 166                unsigned long section = pfn_to_section_nr(pfn);
 167                struct mem_section *ms;
 168
 169                sparse_index_init(section, nid);
 170                set_section_nid(section, nid);
 171
 172                ms = __nr_to_section(section);
 173                if (!ms->section_mem_map)
 174                        ms->section_mem_map = sparse_encode_early_nid(nid) |
 175                                                        SECTION_MARKED_PRESENT;
 176        }
 177}
 178
 179/*
 180 * Only used by the i386 NUMA architecures, but relatively
 181 * generic code.
 182 */
 183unsigned long __init node_memmap_size_bytes(int nid, unsigned long start_pfn,
 184                                                     unsigned long end_pfn)
 185{
 186        unsigned long pfn;
 187        unsigned long nr_pages = 0;
 188
 189        for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
 190                if (nid != early_pfn_to_nid(pfn))
 191                        continue;
 192
 193                if (pfn_present(pfn))
 194                        nr_pages += PAGES_PER_SECTION;
 195        }
 196
 197        return nr_pages * sizeof(struct page);
 198}
 199
 200/*
 201 * Subtle, we encode the real pfn into the mem_map such that
 202 * the identity pfn - section_mem_map will return the actual
 203 * physical page frame number.
 204 */
 205static unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long pnum)
 206{
 207        return (unsigned long)(mem_map - (section_nr_to_pfn(pnum)));
 208}
 209
 210/*
 211 * We need this if we ever free the mem_maps.  While not implemented yet,
 212 * this function is included for parity with its sibling.
 213 */
 214static __attribute((unused))
 215struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum)
 216{
 217        return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum);
 218}
 219
 220static int __meminit sparse_init_one_section(struct mem_section *ms,
 221                unsigned long pnum, struct page *mem_map,
 222                unsigned long *pageblock_bitmap)
 223{
 224        if (!present_section(ms))
 225                return -EINVAL;
 226
 227        ms->section_mem_map &= ~SECTION_MAP_MASK;
 228        ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum) |
 229                                                        SECTION_HAS_MEM_MAP;
 230        ms->pageblock_flags = pageblock_bitmap;
 231
 232        return 1;
 233}
 234
 235static unsigned long usemap_size(void)
 236{
 237        unsigned long size_bytes;
 238        size_bytes = roundup(SECTION_BLOCKFLAGS_BITS, 8) / 8;
 239        size_bytes = roundup(size_bytes, sizeof(unsigned long));
 240        return size_bytes;
 241}
 242
 243#ifdef CONFIG_MEMORY_HOTPLUG
 244static unsigned long *__kmalloc_section_usemap(void)
 245{
 246        return kmalloc(usemap_size(), GFP_KERNEL);
 247}
 248#endif /* CONFIG_MEMORY_HOTPLUG */
 249
 250static unsigned long *__init sparse_early_usemap_alloc(unsigned long pnum)
 251{
 252        unsigned long *usemap;
 253        struct mem_section *ms = __nr_to_section(pnum);
 254        int nid = sparse_early_nid(ms);
 255
 256        usemap = alloc_bootmem_node(NODE_DATA(nid), usemap_size());
 257        if (usemap)
 258                return usemap;
 259
 260        /* Stupid: suppress gcc warning for SPARSEMEM && !NUMA */
 261        nid = 0;
 262
 263        printk(KERN_WARNING "%s: allocation failed\n", __FUNCTION__);
 264        return NULL;
 265}
 266
 267#ifndef CONFIG_SPARSEMEM_VMEMMAP
 268struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid)
 269{
 270        struct page *map;
 271
 272        map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION);
 273        if (map)
 274                return map;
 275
 276        map = alloc_bootmem_node(NODE_DATA(nid),
 277                        sizeof(struct page) * PAGES_PER_SECTION);
 278        return map;
 279}
 280#endif /* !CONFIG_SPARSEMEM_VMEMMAP */
 281
 282struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
 283{
 284        struct page *map;
 285        struct mem_section *ms = __nr_to_section(pnum);
 286        int nid = sparse_early_nid(ms);
 287
 288        map = sparse_mem_map_populate(pnum, nid);
 289        if (map)
 290                return map;
 291
 292        printk(KERN_ERR "%s: sparsemem memory map backing failed "
 293                        "some memory will not be available.\n", __FUNCTION__);
 294        ms->section_mem_map = 0;
 295        return NULL;
 296}
 297
 298/*
 299 * Allocate the accumulated non-linear sections, allocate a mem_map
 300 * for each and record the physical to section mapping.
 301 */
 302void __init sparse_init(void)
 303{
 304        unsigned long pnum;
 305        struct page *map;
 306        unsigned long *usemap;
 307
 308        for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
 309                if (!present_section_nr(pnum))
 310                        continue;
 311
 312                map = sparse_early_mem_map_alloc(pnum);
 313                if (!map)
 314                        continue;
 315
 316                usemap = sparse_early_usemap_alloc(pnum);
 317                if (!usemap)
 318                        continue;
 319
 320                sparse_init_one_section(__nr_to_section(pnum), pnum, map,
 321                                                                usemap);
 322        }
 323}
 324
 325#ifdef CONFIG_MEMORY_HOTPLUG
 326#ifdef CONFIG_SPARSEMEM_VMEMMAP
 327static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid,
 328                                                 unsigned long nr_pages)
 329{
 330        /* This will make the necessary allocations eventually. */
 331        return sparse_mem_map_populate(pnum, nid);
 332}
 333static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages)
 334{
 335        return; /* XXX: Not implemented yet */
 336}
 337#else
 338static struct page *__kmalloc_section_memmap(unsigned long nr_pages)
 339{
 340        struct page *page, *ret;
 341        unsigned long memmap_size = sizeof(struct page) * nr_pages;
 342
 343        page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size));
 344        if (page)
 345                goto got_map_page;
 346
 347        ret = vmalloc(memmap_size);
 348        if (ret)
 349                goto got_map_ptr;
 350
 351        return NULL;
 352got_map_page:
 353        ret = (struct page *)pfn_to_kaddr(page_to_pfn(page));
 354got_map_ptr:
 355        memset(ret, 0, memmap_size);
 356
 357        return ret;
 358}
 359
 360static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid,
 361                                                  unsigned long nr_pages)
 362{
 363        return __kmalloc_section_memmap(nr_pages);
 364}
 365
 366static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages)
 367{
 368        if (is_vmalloc_addr(memmap))
 369                vfree(memmap);
 370        else
 371                free_pages((unsigned long)memmap,
 372                           get_order(sizeof(struct page) * nr_pages));
 373}
 374#endif /* CONFIG_SPARSEMEM_VMEMMAP */
 375
 376/*
 377 * returns the number of sections whose mem_maps were properly
 378 * set.  If this is <=0, then that means that the passed-in
 379 * map was not consumed and must be freed.
 380 */
 381int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
 382                           int nr_pages)
 383{
 384        unsigned long section_nr = pfn_to_section_nr(start_pfn);
 385        struct pglist_data *pgdat = zone->zone_pgdat;
 386        struct mem_section *ms;
 387        struct page *memmap;
 388        unsigned long *usemap;
 389        unsigned long flags;
 390        int ret;
 391
 392        /*
 393         * no locking for this, because it does its own
 394         * plus, it does a kmalloc
 395         */
 396        ret = sparse_index_init(section_nr, pgdat->node_id);
 397        if (ret < 0 && ret != -EEXIST)
 398                return ret;
 399        memmap = kmalloc_section_memmap(section_nr, pgdat->node_id, nr_pages);
 400        if (!memmap)
 401                return -ENOMEM;
 402        usemap = __kmalloc_section_usemap();
 403        if (!usemap) {
 404                __kfree_section_memmap(memmap, nr_pages);
 405                return -ENOMEM;
 406        }
 407
 408        pgdat_resize_lock(pgdat, &flags);
 409
 410        ms = __pfn_to_section(start_pfn);
 411        if (ms->section_mem_map & SECTION_MARKED_PRESENT) {
 412                ret = -EEXIST;
 413                goto out;
 414        }
 415
 416        ms->section_mem_map |= SECTION_MARKED_PRESENT;
 417
 418        ret = sparse_init_one_section(ms, section_nr, memmap, usemap);
 419
 420out:
 421        pgdat_resize_unlock(pgdat, &flags);
 422        if (ret <= 0) {
 423                kfree(usemap);
 424                __kfree_section_memmap(memmap, nr_pages);
 425        }
 426        return ret;
 427}
 428#endif
 429
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.