linux/mm/memory_hotplug.c
<<
>>
Prefs
   1/*
   2 *  linux/mm/memory_hotplug.c
   3 *
   4 *  Copyright (C)
   5 */
   6
   7#include <linux/stddef.h>
   8#include <linux/mm.h>
   9#include <linux/swap.h>
  10#include <linux/interrupt.h>
  11#include <linux/pagemap.h>
  12#include <linux/bootmem.h>
  13#include <linux/compiler.h>
  14#include <linux/module.h>
  15#include <linux/pagevec.h>
  16#include <linux/writeback.h>
  17#include <linux/slab.h>
  18#include <linux/sysctl.h>
  19#include <linux/cpu.h>
  20#include <linux/memory.h>
  21#include <linux/memory_hotplug.h>
  22#include <linux/highmem.h>
  23#include <linux/vmalloc.h>
  24#include <linux/ioport.h>
  25#include <linux/cpuset.h>
  26
  27#include <asm/tlbflush.h>
  28
  29/* add this memory to iomem resource */
  30static struct resource *register_memory_resource(u64 start, u64 size)
  31{
  32        struct resource *res;
  33        res = kzalloc(sizeof(struct resource), GFP_KERNEL);
  34        BUG_ON(!res);
  35
  36        res->name = "System RAM";
  37        res->start = start;
  38        res->end = start + size - 1;
  39        res->flags = IORESOURCE_MEM;
  40        if (request_resource(&iomem_resource, res) < 0) {
  41                printk("System RAM resource %llx - %llx cannot be added\n",
  42                (unsigned long long)res->start, (unsigned long long)res->end);
  43                kfree(res);
  44                res = NULL;
  45        }
  46        return res;
  47}
  48
  49static void release_memory_resource(struct resource *res)
  50{
  51        if (!res)
  52                return;
  53        release_resource(res);
  54        kfree(res);
  55        return;
  56}
  57
  58
  59#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
  60static int __add_zone(struct zone *zone, unsigned long phys_start_pfn)
  61{
  62        struct pglist_data *pgdat = zone->zone_pgdat;
  63        int nr_pages = PAGES_PER_SECTION;
  64        int nid = pgdat->node_id;
  65        int zone_type;
  66
  67        zone_type = zone - pgdat->node_zones;
  68        if (!zone->wait_table) {
  69                int ret = 0;
  70                ret = init_currently_empty_zone(zone, phys_start_pfn,
  71                                                nr_pages, MEMMAP_HOTPLUG);
  72                if (ret < 0)
  73                        return ret;
  74        }
  75        memmap_init_zone(nr_pages, nid, zone_type,
  76                         phys_start_pfn, MEMMAP_HOTPLUG);
  77        return 0;
  78}
  79
  80static int __add_section(struct zone *zone, unsigned long phys_start_pfn)
  81{
  82        int nr_pages = PAGES_PER_SECTION;
  83        int ret;
  84
  85        if (pfn_valid(phys_start_pfn))
  86                return -EEXIST;
  87
  88        ret = sparse_add_one_section(zone, phys_start_pfn, nr_pages);
  89
  90        if (ret < 0)
  91                return ret;
  92
  93        ret = __add_zone(zone, phys_start_pfn);
  94
  95        if (ret < 0)
  96                return ret;
  97
  98        return register_new_memory(__pfn_to_section(phys_start_pfn));
  99}
 100
 101/*
 102 * Reasonably generic function for adding memory.  It is
 103 * expected that archs that support memory hotplug will
 104 * call this function after deciding the zone to which to
 105 * add the new pages.
 106 */
 107int __add_pages(struct zone *zone, unsigned long phys_start_pfn,
 108                 unsigned long nr_pages)
 109{
 110        unsigned long i;
 111        int err = 0;
 112        int start_sec, end_sec;
 113        /* during initialize mem_map, align hot-added range to section */
 114        start_sec = pfn_to_section_nr(phys_start_pfn);
 115        end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1);
 116
 117        for (i = start_sec; i <= end_sec; i++) {
 118                err = __add_section(zone, i << PFN_SECTION_SHIFT);
 119
 120                /*
 121                 * EEXIST is finally dealed with by ioresource collision
 122                 * check. see add_memory() => register_memory_resource()
 123                 * Warning will be printed if there is collision.
 124                 */
 125                if (err && (err != -EEXIST))
 126                        break;
 127                err = 0;
 128        }
 129
 130        return err;
 131}
 132EXPORT_SYMBOL_GPL(__add_pages);
 133
 134static void grow_zone_span(struct zone *zone,
 135                unsigned long start_pfn, unsigned long end_pfn)
 136{
 137        unsigned long old_zone_end_pfn;
 138
 139        zone_span_writelock(zone);
 140
 141        old_zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages;
 142        if (start_pfn < zone->zone_start_pfn)
 143                zone->zone_start_pfn = start_pfn;
 144
 145        zone->spanned_pages = max(old_zone_end_pfn, end_pfn) -
 146                                zone->zone_start_pfn;
 147
 148        zone_span_writeunlock(zone);
 149}
 150
 151static void grow_pgdat_span(struct pglist_data *pgdat,
 152                unsigned long start_pfn, unsigned long end_pfn)
 153{
 154        unsigned long old_pgdat_end_pfn =
 155                pgdat->node_start_pfn + pgdat->node_spanned_pages;
 156
 157        if (start_pfn < pgdat->node_start_pfn)
 158                pgdat->node_start_pfn = start_pfn;
 159
 160        pgdat->node_spanned_pages = max(old_pgdat_end_pfn, end_pfn) -
 161                                        pgdat->node_start_pfn;
 162}
 163
 164int online_pages(unsigned long pfn, unsigned long nr_pages)
 165{
 166        unsigned long i;
 167        unsigned long flags;
 168        unsigned long onlined_pages = 0;
 169        struct resource res;
 170        u64 section_end;
 171        unsigned long start_pfn;
 172        struct zone *zone;
 173        int need_zonelists_rebuild = 0;
 174
 175        /*
 176         * This doesn't need a lock to do pfn_to_page().
 177         * The section can't be removed here because of the
 178         * memory_block->state_sem.
 179         */
 180        zone = page_zone(pfn_to_page(pfn));
 181        pgdat_resize_lock(zone->zone_pgdat, &flags);
 182        grow_zone_span(zone, pfn, pfn + nr_pages);
 183        grow_pgdat_span(zone->zone_pgdat, pfn, pfn + nr_pages);
 184        pgdat_resize_unlock(zone->zone_pgdat, &flags);
 185
 186        /*
 187         * If this zone is not populated, then it is not in zonelist.
 188         * This means the page allocator ignores this zone.
 189         * So, zonelist must be updated after online.
 190         */
 191        if (!populated_zone(zone))
 192                need_zonelists_rebuild = 1;
 193
 194        res.start = (u64)pfn << PAGE_SHIFT;
 195        res.end = res.start + ((u64)nr_pages << PAGE_SHIFT) - 1;
 196        res.flags = IORESOURCE_MEM; /* we just need system ram */
 197        section_end = res.end;
 198
 199        while ((res.start < res.end) && (find_next_system_ram(&res) >= 0)) {
 200                start_pfn = (unsigned long)(res.start >> PAGE_SHIFT);
 201                nr_pages = (unsigned long)
 202                           ((res.end + 1 - res.start) >> PAGE_SHIFT);
 203
 204                if (PageReserved(pfn_to_page(start_pfn))) {
 205                        /* this region's page is not onlined now */
 206                        for (i = 0; i < nr_pages; i++) {
 207                                struct page *page = pfn_to_page(start_pfn + i);
 208                                online_page(page);
 209                                onlined_pages++;
 210                        }
 211                }
 212
 213                res.start = res.end + 1;
 214                res.end = section_end;
 215        }
 216        zone->present_pages += onlined_pages;
 217        zone->zone_pgdat->node_present_pages += onlined_pages;
 218
 219        setup_per_zone_pages_min();
 220
 221        if (need_zonelists_rebuild)
 222                build_all_zonelists();
 223        vm_total_pages = nr_free_pagecache_pages();
 224        writeback_set_ratelimit();
 225        return 0;
 226}
 227#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
 228
 229static pg_data_t *hotadd_new_pgdat(int nid, u64 start)
 230{
 231        struct pglist_data *pgdat;
 232        unsigned long zones_size[MAX_NR_ZONES] = {0};
 233        unsigned long zholes_size[MAX_NR_ZONES] = {0};
 234        unsigned long start_pfn = start >> PAGE_SHIFT;
 235
 236        pgdat = arch_alloc_nodedata(nid);
 237        if (!pgdat)
 238                return NULL;
 239
 240        arch_refresh_nodedata(nid, pgdat);
 241
 242        /* we can use NODE_DATA(nid) from here */
 243
 244        /* init node's zones as empty zones, we don't have any present pages.*/
 245        free_area_init_node(nid, pgdat, zones_size, start_pfn, zholes_size);
 246
 247        return pgdat;
 248}
 249
 250static void rollback_node_hotadd(int nid, pg_data_t *pgdat)
 251{
 252        arch_refresh_nodedata(nid, NULL);
 253        arch_free_nodedata(pgdat);
 254        return;
 255}
 256
 257
 258int add_memory(int nid, u64 start, u64 size)
 259{
 260        pg_data_t *pgdat = NULL;
 261        int new_pgdat = 0;
 262        struct resource *res;
 263        int ret;
 264
 265        res = register_memory_resource(start, size);
 266        if (!res)
 267                return -EEXIST;
 268
 269        if (!node_online(nid)) {
 270                pgdat = hotadd_new_pgdat(nid, start);
 271                if (!pgdat)
 272                        return -ENOMEM;
 273                new_pgdat = 1;
 274                ret = kswapd_run(nid);
 275                if (ret)
 276                        goto error;
 277        }
 278
 279        /* call arch's memory hotadd */
 280        ret = arch_add_memory(nid, start, size);
 281
 282        if (ret < 0)
 283                goto error;
 284
 285        /* we online node here. we can't roll back from here. */
 286        node_set_online(nid);
 287
 288        cpuset_track_online_nodes();
 289
 290        if (new_pgdat) {
 291                ret = register_one_node(nid);
 292                /*
 293                 * If sysfs file of new node can't create, cpu on the node
 294                 * can't be hot-added. There is no rollback way now.
 295                 * So, check by BUG_ON() to catch it reluctantly..
 296                 */
 297                BUG_ON(ret);
 298        }
 299
 300        return ret;
 301error:
 302        /* rollback pgdat allocation and others */
 303        if (new_pgdat)
 304                rollback_node_hotadd(nid, pgdat);
 305        if (res)
 306                release_memory_resource(res);
 307
 308        return ret;
 309}
 310EXPORT_SYMBOL_GPL(add_memory);
 311
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.