linux/arch/powerpc/kernel/crash.c
<<
>>
Prefs
   1/*
   2 * Architecture specific (PPC64) functions for kexec based crash dumps.
   3 *
   4 * Copyright (C) 2005, IBM Corp.
   5 *
   6 * Created by: Haren Myneni
   7 *
   8 * This source code is licensed under the GNU General Public License,
   9 * Version 2.  See the file COPYING for more details.
  10 *
  11 */
  12
  13#undef DEBUG
  14
  15#include <linux/kernel.h>
  16#include <linux/smp.h>
  17#include <linux/reboot.h>
  18#include <linux/kexec.h>
  19#include <linux/bootmem.h>
  20#include <linux/crash_dump.h>
  21#include <linux/delay.h>
  22#include <linux/elf.h>
  23#include <linux/elfcore.h>
  24#include <linux/init.h>
  25#include <linux/irq.h>
  26#include <linux/types.h>
  27#include <linux/lmb.h>
  28
  29#include <asm/processor.h>
  30#include <asm/machdep.h>
  31#include <asm/kexec.h>
  32#include <asm/kdump.h>
  33#include <asm/prom.h>
  34#include <asm/firmware.h>
  35#include <asm/smp.h>
  36#include <asm/system.h>
  37#include <asm/setjmp.h>
  38
  39#ifdef DEBUG
  40#include <asm/udbg.h>
  41#define DBG(fmt...) udbg_printf(fmt)
  42#else
  43#define DBG(fmt...)
  44#endif
  45
  46/* This keeps a track of which one is crashing cpu. */
  47int crashing_cpu = -1;
  48static cpumask_t cpus_in_crash = CPU_MASK_NONE;
  49cpumask_t cpus_in_sr = CPU_MASK_NONE;
  50
  51#define CRASH_HANDLER_MAX 2
  52/* NULL terminated list of shutdown handles */
  53static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX+1];
  54static DEFINE_SPINLOCK(crash_handlers_lock);
  55
  56#ifdef CONFIG_SMP
  57static atomic_t enter_on_soft_reset = ATOMIC_INIT(0);
  58
  59void crash_ipi_callback(struct pt_regs *regs)
  60{
  61        int cpu = smp_processor_id();
  62
  63        if (!cpu_online(cpu))
  64                return;
  65
  66        hard_irq_disable();
  67        if (!cpu_isset(cpu, cpus_in_crash))
  68                crash_save_cpu(regs, cpu);
  69        cpu_set(cpu, cpus_in_crash);
  70
  71        /*
  72         * Entered via soft-reset - could be the kdump
  73         * process is invoked using soft-reset or user activated
  74         * it if some CPU did not respond to an IPI.
  75         * For soft-reset, the secondary CPU can enter this func
  76         * twice. 1 - using IPI, and 2. soft-reset.
  77         * Tell the kexec CPU that entered via soft-reset and ready
  78         * to go down.
  79         */
  80        if (cpu_isset(cpu, cpus_in_sr)) {
  81                cpu_clear(cpu, cpus_in_sr);
  82                atomic_inc(&enter_on_soft_reset);
  83        }
  84
  85        /*
  86         * Starting the kdump boot.
  87         * This barrier is needed to make sure that all CPUs are stopped.
  88         * If not, soft-reset will be invoked to bring other CPUs.
  89         */
  90        while (!cpu_isset(crashing_cpu, cpus_in_crash))
  91                cpu_relax();
  92
  93        if (ppc_md.kexec_cpu_down)
  94                ppc_md.kexec_cpu_down(1, 1);
  95
  96#ifdef CONFIG_PPC64
  97        kexec_smp_wait();
  98#else
  99        for (;;);       /* FIXME */
 100#endif
 101
 102        /* NOTREACHED */
 103}
 104
 105/*
 106 * Wait until all CPUs are entered via soft-reset.
 107 */
 108static void crash_soft_reset_check(int cpu)
 109{
 110        unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
 111
 112        cpu_clear(cpu, cpus_in_sr);
 113        while (atomic_read(&enter_on_soft_reset) != ncpus)
 114                cpu_relax();
 115}
 116
 117
 118static void crash_kexec_prepare_cpus(int cpu)
 119{
 120        unsigned int msecs;
 121
 122        unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
 123
 124        crash_send_ipi(crash_ipi_callback);
 125        smp_wmb();
 126
 127        /*
 128         * FIXME: Until we will have the way to stop other CPUSs reliabally,
 129         * the crash CPU will send an IPI and wait for other CPUs to
 130         * respond.
 131         * Delay of at least 10 seconds.
 132         */
 133        printk(KERN_EMERG "Sending IPI to other cpus...\n");
 134        msecs = 10000;
 135        while ((cpus_weight(cpus_in_crash) < ncpus) && (--msecs > 0)) {
 136                cpu_relax();
 137                mdelay(1);
 138        }
 139
 140        /* Would it be better to replace the trap vector here? */
 141
 142        /*
 143         * FIXME: In case if we do not get all CPUs, one possibility: ask the
 144         * user to do soft reset such that we get all.
 145         * Soft-reset will be used until better mechanism is implemented.
 146         */
 147        if (cpus_weight(cpus_in_crash) < ncpus) {
 148                printk(KERN_EMERG "done waiting: %d cpu(s) not responding\n",
 149                        ncpus - cpus_weight(cpus_in_crash));
 150                printk(KERN_EMERG "Activate soft-reset to stop other cpu(s)\n");
 151                cpus_in_sr = CPU_MASK_NONE;
 152                atomic_set(&enter_on_soft_reset, 0);
 153                while (cpus_weight(cpus_in_crash) < ncpus)
 154                        cpu_relax();
 155        }
 156        /*
 157         * Make sure all CPUs are entered via soft-reset if the kdump is
 158         * invoked using soft-reset.
 159         */
 160        if (cpu_isset(cpu, cpus_in_sr))
 161                crash_soft_reset_check(cpu);
 162        /* Leave the IPI callback set */
 163}
 164
 165/*
 166 * This function will be called by secondary cpus or by kexec cpu
 167 * if soft-reset is activated to stop some CPUs.
 168 */
 169void crash_kexec_secondary(struct pt_regs *regs)
 170{
 171        int cpu = smp_processor_id();
 172        unsigned long flags;
 173        int msecs = 5;
 174
 175        local_irq_save(flags);
 176        /* Wait 5ms if the kexec CPU is not entered yet. */
 177        while (crashing_cpu < 0) {
 178                if (--msecs < 0) {
 179                        /*
 180                         * Either kdump image is not loaded or
 181                         * kdump process is not started - Probably xmon
 182                         * exited using 'x'(exit and recover) or
 183                         * kexec_should_crash() failed for all running tasks.
 184                         */
 185                        cpu_clear(cpu, cpus_in_sr);
 186                        local_irq_restore(flags);
 187                        return;
 188                }
 189                mdelay(1);
 190                cpu_relax();
 191        }
 192        if (cpu == crashing_cpu) {
 193                /*
 194                 * Panic CPU will enter this func only via soft-reset.
 195                 * Wait until all secondary CPUs entered and
 196                 * then start kexec boot.
 197                 */
 198                crash_soft_reset_check(cpu);
 199                cpu_set(crashing_cpu, cpus_in_crash);
 200                if (ppc_md.kexec_cpu_down)
 201                        ppc_md.kexec_cpu_down(1, 0);
 202                machine_kexec(kexec_crash_image);
 203                /* NOTREACHED */
 204        }
 205        crash_ipi_callback(regs);
 206}
 207
 208#else
 209static void crash_kexec_prepare_cpus(int cpu)
 210{
 211        /*
 212         * move the secondarys to us so that we can copy
 213         * the new kernel 0-0x100 safely
 214         *
 215         * do this if kexec in setup.c ?
 216         */
 217#ifdef CONFIG_PPC64
 218        smp_release_cpus();
 219#else
 220        /* FIXME */
 221#endif
 222}
 223
 224void crash_kexec_secondary(struct pt_regs *regs)
 225{
 226        cpus_in_sr = CPU_MASK_NONE;
 227}
 228#endif
 229#ifdef CONFIG_SPU_BASE
 230
 231#include <asm/spu.h>
 232#include <asm/spu_priv1.h>
 233
 234struct crash_spu_info {
 235        struct spu *spu;
 236        u32 saved_spu_runcntl_RW;
 237        u32 saved_spu_status_R;
 238        u32 saved_spu_npc_RW;
 239        u64 saved_mfc_sr1_RW;
 240        u64 saved_mfc_dar;
 241        u64 saved_mfc_dsisr;
 242};
 243
 244#define CRASH_NUM_SPUS  16      /* Enough for current hardware */
 245static struct crash_spu_info crash_spu_info[CRASH_NUM_SPUS];
 246
 247static void crash_kexec_stop_spus(void)
 248{
 249        struct spu *spu;
 250        int i;
 251        u64 tmp;
 252
 253        for (i = 0; i < CRASH_NUM_SPUS; i++) {
 254                if (!crash_spu_info[i].spu)
 255                        continue;
 256
 257                spu = crash_spu_info[i].spu;
 258
 259                crash_spu_info[i].saved_spu_runcntl_RW =
 260                        in_be32(&spu->problem->spu_runcntl_RW);
 261                crash_spu_info[i].saved_spu_status_R =
 262                        in_be32(&spu->problem->spu_status_R);
 263                crash_spu_info[i].saved_spu_npc_RW =
 264                        in_be32(&spu->problem->spu_npc_RW);
 265
 266                crash_spu_info[i].saved_mfc_dar    = spu_mfc_dar_get(spu);
 267                crash_spu_info[i].saved_mfc_dsisr  = spu_mfc_dsisr_get(spu);
 268                tmp = spu_mfc_sr1_get(spu);
 269                crash_spu_info[i].saved_mfc_sr1_RW = tmp;
 270
 271                tmp &= ~MFC_STATE1_MASTER_RUN_CONTROL_MASK;
 272                spu_mfc_sr1_set(spu, tmp);
 273
 274                __delay(200);
 275        }
 276}
 277
 278void crash_register_spus(struct list_head *list)
 279{
 280        struct spu *spu;
 281
 282        list_for_each_entry(spu, list, full_list) {
 283                if (WARN_ON(spu->number >= CRASH_NUM_SPUS))
 284                        continue;
 285
 286                crash_spu_info[spu->number].spu = spu;
 287        }
 288}
 289
 290#else
 291static inline void crash_kexec_stop_spus(void)
 292{
 293}
 294#endif /* CONFIG_SPU_BASE */
 295
 296/*
 297 * Register a function to be called on shutdown.  Only use this if you
 298 * can't reset your device in the second kernel.
 299 */
 300int crash_shutdown_register(crash_shutdown_t handler)
 301{
 302        unsigned int i, rc;
 303
 304        spin_lock(&crash_handlers_lock);
 305        for (i = 0 ; i < CRASH_HANDLER_MAX; i++)
 306                if (!crash_shutdown_handles[i]) {
 307                        /* Insert handle at first empty entry */
 308                        crash_shutdown_handles[i] = handler;
 309                        rc = 0;
 310                        break;
 311                }
 312
 313        if (i == CRASH_HANDLER_MAX) {
 314                printk(KERN_ERR "Crash shutdown handles full, "
 315                       "not registered.\n");
 316                rc = 1;
 317        }
 318
 319        spin_unlock(&crash_handlers_lock);
 320        return rc;
 321}
 322EXPORT_SYMBOL(crash_shutdown_register);
 323
 324int crash_shutdown_unregister(crash_shutdown_t handler)
 325{
 326        unsigned int i, rc;
 327
 328        spin_lock(&crash_handlers_lock);
 329        for (i = 0 ; i < CRASH_HANDLER_MAX; i++)
 330                if (crash_shutdown_handles[i] == handler)
 331                        break;
 332
 333        if (i == CRASH_HANDLER_MAX) {
 334                printk(KERN_ERR "Crash shutdown handle not found\n");
 335                rc = 1;
 336        } else {
 337                /* Shift handles down */
 338                for (; crash_shutdown_handles[i]; i++)
 339                        crash_shutdown_handles[i] =
 340                                crash_shutdown_handles[i+1];
 341                rc = 0;
 342        }
 343
 344        spin_unlock(&crash_handlers_lock);
 345        return rc;
 346}
 347EXPORT_SYMBOL(crash_shutdown_unregister);
 348
 349static unsigned long crash_shutdown_buf[JMP_BUF_LEN];
 350
 351static int handle_fault(struct pt_regs *regs)
 352{
 353        longjmp(crash_shutdown_buf, 1);
 354        return 0;
 355}
 356
 357void default_machine_crash_shutdown(struct pt_regs *regs)
 358{
 359        unsigned int i;
 360        int (*old_handler)(struct pt_regs *regs);
 361
 362
 363        /*
 364         * This function is only called after the system
 365         * has panicked or is otherwise in a critical state.
 366         * The minimum amount of code to allow a kexec'd kernel
 367         * to run successfully needs to happen here.
 368         *
 369         * In practice this means stopping other cpus in
 370         * an SMP system.
 371         * The kernel is broken so disable interrupts.
 372         */
 373        hard_irq_disable();
 374
 375        for_each_irq(i) {
 376                struct irq_desc *desc = irq_desc + i;
 377
 378                if (desc->status & IRQ_INPROGRESS)
 379                        desc->chip->eoi(i);
 380
 381                if (!(desc->status & IRQ_DISABLED))
 382                        desc->chip->disable(i);
 383        }
 384
 385        /*
 386         * Call registered shutdown routines savely.  Swap out
 387         * __debugger_fault_handler, and replace on exit.
 388         */
 389        old_handler = __debugger_fault_handler;
 390        __debugger_fault_handler = handle_fault;
 391        for (i = 0; crash_shutdown_handles[i]; i++) {
 392                if (setjmp(crash_shutdown_buf) == 0) {
 393                        /*
 394                         * Insert syncs and delay to ensure
 395                         * instructions in the dangerous region don't
 396                         * leak away from this protected region.
 397                         */
 398                        asm volatile("sync; isync");
 399                        /* dangerous region */
 400                        crash_shutdown_handles[i]();
 401                        asm volatile("sync; isync");
 402                }
 403        }
 404        __debugger_fault_handler = old_handler;
 405
 406        /*
 407         * Make a note of crashing cpu. Will be used in machine_kexec
 408         * such that another IPI will not be sent.
 409         */
 410        crashing_cpu = smp_processor_id();
 411        crash_save_cpu(regs, crashing_cpu);
 412        crash_kexec_prepare_cpus(crashing_cpu);
 413        cpu_set(crashing_cpu, cpus_in_crash);
 414        crash_kexec_stop_spus();
 415        if (ppc_md.kexec_cpu_down)
 416                ppc_md.kexec_cpu_down(1, 0);
 417}
 418
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.