linux/arch/powerpc/kernel/machine_kexec_64.c
<<
>>
Prefs
   1/*
   2 * PPC64 code to handle Linux booting another kernel.
   3 *
   4 * Copyright (C) 2004-2005, IBM Corp.
   5 *
   6 * Created by: Milton D Miller II
   7 *
   8 * This source code is licensed under the GNU General Public License,
   9 * Version 2.  See the file COPYING for more details.
  10 */
  11
  12
  13#include <linux/kexec.h>
  14#include <linux/smp.h>
  15#include <linux/thread_info.h>
  16#include <linux/errno.h>
  17
  18#include <asm/page.h>
  19#include <asm/current.h>
  20#include <asm/machdep.h>
  21#include <asm/cacheflush.h>
  22#include <asm/paca.h>
  23#include <asm/mmu.h>
  24#include <asm/sections.h>       /* _end */
  25#include <asm/prom.h>
  26#include <asm/smp.h>
  27
  28int default_machine_kexec_prepare(struct kimage *image)
  29{
  30        int i;
  31        unsigned long begin, end;       /* limits of segment */
  32        unsigned long low, high;        /* limits of blocked memory range */
  33        struct device_node *node;
  34        const unsigned long *basep;
  35        const unsigned int *sizep;
  36
  37        if (!ppc_md.hpte_clear_all)
  38                return -ENOENT;
  39
  40        /*
  41         * Since we use the kernel fault handlers and paging code to
  42         * handle the virtual mode, we must make sure no destination
  43         * overlaps kernel static data or bss.
  44         */
  45        for (i = 0; i < image->nr_segments; i++)
  46                if (image->segment[i].mem < __pa(_end))
  47                        return -ETXTBSY;
  48
  49        /*
  50         * For non-LPAR, we absolutely can not overwrite the mmu hash
  51         * table, since we are still using the bolted entries in it to
  52         * do the copy.  Check that here.
  53         *
  54         * It is safe if the end is below the start of the blocked
  55         * region (end <= low), or if the beginning is after the
  56         * end of the blocked region (begin >= high).  Use the
  57         * boolean identity !(a || b)  === (!a && !b).
  58         */
  59        if (htab_address) {
  60                low = __pa(htab_address);
  61                high = low + htab_size_bytes;
  62
  63                for (i = 0; i < image->nr_segments; i++) {
  64                        begin = image->segment[i].mem;
  65                        end = begin + image->segment[i].memsz;
  66
  67                        if ((begin < high) && (end > low))
  68                                return -ETXTBSY;
  69                }
  70        }
  71
  72        /* We also should not overwrite the tce tables */
  73        for (node = of_find_node_by_type(NULL, "pci"); node != NULL;
  74                        node = of_find_node_by_type(node, "pci")) {
  75                basep = of_get_property(node, "linux,tce-base", NULL);
  76                sizep = of_get_property(node, "linux,tce-size", NULL);
  77                if (basep == NULL || sizep == NULL)
  78                        continue;
  79
  80                low = *basep;
  81                high = low + (*sizep);
  82
  83                for (i = 0; i < image->nr_segments; i++) {
  84                        begin = image->segment[i].mem;
  85                        end = begin + image->segment[i].memsz;
  86
  87                        if ((begin < high) && (end > low))
  88                                return -ETXTBSY;
  89                }
  90        }
  91
  92        return 0;
  93}
  94
  95#define IND_FLAGS (IND_DESTINATION | IND_INDIRECTION | IND_DONE | IND_SOURCE)
  96
  97static void copy_segments(unsigned long ind)
  98{
  99        unsigned long entry;
 100        unsigned long *ptr;
 101        void *dest;
 102        void *addr;
 103
 104        /*
 105         * We rely on kexec_load to create a lists that properly
 106         * initializes these pointers before they are used.
 107         * We will still crash if the list is wrong, but at least
 108         * the compiler will be quiet.
 109         */
 110        ptr = NULL;
 111        dest = NULL;
 112
 113        for (entry = ind; !(entry & IND_DONE); entry = *ptr++) {
 114                addr = __va(entry & PAGE_MASK);
 115
 116                switch (entry & IND_FLAGS) {
 117                case IND_DESTINATION:
 118                        dest = addr;
 119                        break;
 120                case IND_INDIRECTION:
 121                        ptr = addr;
 122                        break;
 123                case IND_SOURCE:
 124                        copy_page(dest, addr);
 125                        dest += PAGE_SIZE;
 126                }
 127        }
 128}
 129
 130void kexec_copy_flush(struct kimage *image)
 131{
 132        long i, nr_segments = image->nr_segments;
 133        struct  kexec_segment ranges[KEXEC_SEGMENT_MAX];
 134
 135        /* save the ranges on the stack to efficiently flush the icache */
 136        memcpy(ranges, image->segment, sizeof(ranges));
 137
 138        /*
 139         * After this call we may not use anything allocated in dynamic
 140         * memory, including *image.
 141         *
 142         * Only globals and the stack are allowed.
 143         */
 144        copy_segments(image->head);
 145
 146        /*
 147         * we need to clear the icache for all dest pages sometime,
 148         * including ones that were in place on the original copy
 149         */
 150        for (i = 0; i < nr_segments; i++)
 151                flush_icache_range((unsigned long)__va(ranges[i].mem),
 152                        (unsigned long)__va(ranges[i].mem + ranges[i].memsz));
 153}
 154
 155#ifdef CONFIG_SMP
 156
 157/* FIXME: we should schedule this function to be called on all cpus based
 158 * on calling the interrupts, but we would like to call it off irq level
 159 * so that the interrupt controller is clean.
 160 */
 161static void kexec_smp_down(void *arg)
 162{
 163        if (ppc_md.kexec_cpu_down)
 164                ppc_md.kexec_cpu_down(0, 1);
 165
 166        local_irq_disable();
 167        kexec_smp_wait();
 168        /* NOTREACHED */
 169}
 170
 171static void kexec_prepare_cpus(void)
 172{
 173        int my_cpu, i, notified=-1;
 174
 175        smp_call_function(kexec_smp_down, NULL, /* wait */0);
 176        my_cpu = get_cpu();
 177
 178        /* check the others cpus are now down (via paca hw cpu id == -1) */
 179        for (i=0; i < NR_CPUS; i++) {
 180                if (i == my_cpu)
 181                        continue;
 182
 183                while (paca[i].hw_cpu_id != -1) {
 184                        barrier();
 185                        if (!cpu_possible(i)) {
 186                                printk("kexec: cpu %d hw_cpu_id %d is not"
 187                                                " possible, ignoring\n",
 188                                                i, paca[i].hw_cpu_id);
 189                                break;
 190                        }
 191                        if (!cpu_online(i)) {
 192                                /* Fixme: this can be spinning in
 193                                 * pSeries_secondary_wait with a paca
 194                                 * waiting for it to go online.
 195                                 */
 196                                printk("kexec: cpu %d hw_cpu_id %d is not"
 197                                                " online, ignoring\n",
 198                                                i, paca[i].hw_cpu_id);
 199                                break;
 200                        }
 201                        if (i != notified) {
 202                                printk( "kexec: waiting for cpu %d (physical"
 203                                                " %d) to go down\n",
 204                                                i, paca[i].hw_cpu_id);
 205                                notified = i;
 206                        }
 207                }
 208        }
 209
 210        /* after we tell the others to go down */
 211        if (ppc_md.kexec_cpu_down)
 212                ppc_md.kexec_cpu_down(0, 0);
 213
 214        put_cpu();
 215
 216        local_irq_disable();
 217}
 218
 219#else /* ! SMP */
 220
 221static void kexec_prepare_cpus(void)
 222{
 223        /*
 224         * move the secondarys to us so that we can copy
 225         * the new kernel 0-0x100 safely
 226         *
 227         * do this if kexec in setup.c ?
 228         *
 229         * We need to release the cpus if we are ever going from an
 230         * UP to an SMP kernel.
 231         */
 232        smp_release_cpus();
 233        if (ppc_md.kexec_cpu_down)
 234                ppc_md.kexec_cpu_down(0, 0);
 235        local_irq_disable();
 236}
 237
 238#endif /* SMP */
 239
 240/*
 241 * kexec thread structure and stack.
 242 *
 243 * We need to make sure that this is 16384-byte aligned due to the
 244 * way process stacks are handled.  It also must be statically allocated
 245 * or allocated as part of the kimage, because everything else may be
 246 * overwritten when we copy the kexec image.  We piggyback on the
 247 * "init_task" linker section here to statically allocate a stack.
 248 *
 249 * We could use a smaller stack if we don't care about anything using
 250 * current, but that audit has not been performed.
 251 */
 252static union thread_union kexec_stack
 253        __attribute__((__section__(".data.init_task"))) = { };
 254
 255/* Our assembly helper, in kexec_stub.S */
 256extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start,
 257                                        void *image, void *control,
 258                                        void (*clear_all)(void)) ATTRIB_NORET;
 259
 260/* too late to fail here */
 261void default_machine_kexec(struct kimage *image)
 262{
 263        /* prepare control code if any */
 264
 265        /*
 266        * If the kexec boot is the normal one, need to shutdown other cpus
 267        * into our wait loop and quiesce interrupts.
 268        * Otherwise, in the case of crashed mode (crashing_cpu >= 0),
 269        * stopping other CPUs and collecting their pt_regs is done before
 270        * using debugger IPI.
 271        */
 272
 273        if (crashing_cpu == -1)
 274                kexec_prepare_cpus();
 275
 276        /* switch to a staticly allocated stack.  Based on irq stack code.
 277         * XXX: the task struct will likely be invalid once we do the copy!
 278         */
 279        kexec_stack.thread_info.task = current_thread_info()->task;
 280        kexec_stack.thread_info.flags = 0;
 281
 282        /* Some things are best done in assembly.  Finding globals with
 283         * a toc is easier in C, so pass in what we can.
 284         */
 285        kexec_sequence(&kexec_stack, image->start, image,
 286                        page_address(image->control_code_page),
 287                        ppc_md.hpte_clear_all);
 288        /* NOTREACHED */
 289}
 290
 291/* Values we need to export to the second kernel via the device tree. */
 292static unsigned long htab_base;
 293
 294static struct property htab_base_prop = {
 295        .name = "linux,htab-base",
 296        .length = sizeof(unsigned long),
 297        .value = &htab_base,
 298};
 299
 300static struct property htab_size_prop = {
 301        .name = "linux,htab-size",
 302        .length = sizeof(unsigned long),
 303        .value = &htab_size_bytes,
 304};
 305
 306static int __init export_htab_values(void)
 307{
 308        struct device_node *node;
 309        struct property *prop;
 310
 311        /* On machines with no htab htab_address is NULL */
 312        if (!htab_address)
 313                return -ENODEV;
 314
 315        node = of_find_node_by_path("/chosen");
 316        if (!node)
 317                return -ENODEV;
 318
 319        /* remove any stale propertys so ours can be found */
 320        prop = of_find_property(node, htab_base_prop.name, NULL);
 321        if (prop)
 322                prom_remove_property(node, prop);
 323        prop = of_find_property(node, htab_size_prop.name, NULL);
 324        if (prop)
 325                prom_remove_property(node, prop);
 326
 327        htab_base = __pa(htab_address);
 328        prom_add_property(node, &htab_base_prop);
 329        prom_add_property(node, &htab_size_prop);
 330
 331        of_node_put(node);
 332        return 0;
 333}
 334late_initcall(export_htab_values);
 335
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.