linux/arch/s390/kernel/nmi.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 *   Machine check handler
   4 *
   5 *    Copyright IBM Corp. 2000, 2009
   6 *    Author(s): Ingo Adlung <adlung@de.ibm.com>,
   7 *               Martin Schwidefsky <schwidefsky@de.ibm.com>,
   8 *               Cornelia Huck <cornelia.huck@de.ibm.com>,
   9 *               Heiko Carstens <heiko.carstens@de.ibm.com>,
  10 */
  11
  12#include <linux/kernel_stat.h>
  13#include <linux/init.h>
  14#include <linux/errno.h>
  15#include <linux/hardirq.h>
  16#include <linux/log2.h>
  17#include <linux/kprobes.h>
  18#include <linux/kmemleak.h>
  19#include <linux/time.h>
  20#include <linux/module.h>
  21#include <linux/sched/signal.h>
  22
  23#include <linux/export.h>
  24#include <asm/lowcore.h>
  25#include <asm/smp.h>
  26#include <asm/stp.h>
  27#include <asm/cputime.h>
  28#include <asm/nmi.h>
  29#include <asm/crw.h>
  30#include <asm/switch_to.h>
  31#include <asm/ctl_reg.h>
  32#include <asm/asm-offsets.h>
  33#include <linux/kvm_host.h>
  34
  35struct mcck_struct {
  36        unsigned int kill_task : 1;
  37        unsigned int channel_report : 1;
  38        unsigned int warning : 1;
  39        unsigned int stp_queue : 1;
  40        unsigned long mcck_code;
  41};
  42
  43static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck);
  44static struct kmem_cache *mcesa_cache;
  45static unsigned long mcesa_origin_lc;
  46
  47static inline int nmi_needs_mcesa(void)
  48{
  49        return MACHINE_HAS_VX || MACHINE_HAS_GS;
  50}
  51
  52static inline unsigned long nmi_get_mcesa_size(void)
  53{
  54        if (MACHINE_HAS_GS)
  55                return MCESA_MAX_SIZE;
  56        return MCESA_MIN_SIZE;
  57}
  58
  59/*
  60 * The initial machine check extended save area for the boot CPU.
  61 * It will be replaced by nmi_init() with an allocated structure.
  62 * The structure is required for machine check happening early in
  63 * the boot process.
  64 */
  65static struct mcesa boot_mcesa __initdata __aligned(MCESA_MAX_SIZE);
  66
  67void __init nmi_alloc_boot_cpu(struct lowcore *lc)
  68{
  69        if (!nmi_needs_mcesa())
  70                return;
  71        lc->mcesad = (unsigned long) &boot_mcesa;
  72        if (MACHINE_HAS_GS)
  73                lc->mcesad |= ilog2(MCESA_MAX_SIZE);
  74}
  75
  76static int __init nmi_init(void)
  77{
  78        unsigned long origin, cr0, size;
  79
  80        if (!nmi_needs_mcesa())
  81                return 0;
  82        size = nmi_get_mcesa_size();
  83        if (size > MCESA_MIN_SIZE)
  84                mcesa_origin_lc = ilog2(size);
  85        /* create slab cache for the machine-check-extended-save-areas */
  86        mcesa_cache = kmem_cache_create("nmi_save_areas", size, size, 0, NULL);
  87        if (!mcesa_cache)
  88                panic("Couldn't create nmi save area cache");
  89        origin = (unsigned long) kmem_cache_alloc(mcesa_cache, GFP_KERNEL);
  90        if (!origin)
  91                panic("Couldn't allocate nmi save area");
  92        /* The pointer is stored with mcesa_bits ORed in */
  93        kmemleak_not_leak((void *) origin);
  94        __ctl_store(cr0, 0, 0);
  95        __ctl_clear_bit(0, 28); /* disable lowcore protection */
  96        /* Replace boot_mcesa on the boot CPU */
  97        S390_lowcore.mcesad = origin | mcesa_origin_lc;
  98        __ctl_load(cr0, 0, 0);
  99        return 0;
 100}
 101early_initcall(nmi_init);
 102
 103int nmi_alloc_per_cpu(struct lowcore *lc)
 104{
 105        unsigned long origin;
 106
 107        if (!nmi_needs_mcesa())
 108                return 0;
 109        origin = (unsigned long) kmem_cache_alloc(mcesa_cache, GFP_KERNEL);
 110        if (!origin)
 111                return -ENOMEM;
 112        /* The pointer is stored with mcesa_bits ORed in */
 113        kmemleak_not_leak((void *) origin);
 114        lc->mcesad = origin | mcesa_origin_lc;
 115        return 0;
 116}
 117
 118void nmi_free_per_cpu(struct lowcore *lc)
 119{
 120        if (!nmi_needs_mcesa())
 121                return;
 122        kmem_cache_free(mcesa_cache, (void *)(lc->mcesad & MCESA_ORIGIN_MASK));
 123}
 124
 125static notrace void s390_handle_damage(void)
 126{
 127        smp_emergency_stop();
 128        disabled_wait();
 129        while (1);
 130}
 131NOKPROBE_SYMBOL(s390_handle_damage);
 132
 133/*
 134 * Main machine check handler function. Will be called with interrupts disabled
 135 * and machine checks enabled.
 136 */
 137void __s390_handle_mcck(void)
 138{
 139        struct mcck_struct mcck;
 140
 141        /*
 142         * Disable machine checks and get the current state of accumulated
 143         * machine checks. Afterwards delete the old state and enable machine
 144         * checks again.
 145         */
 146        local_mcck_disable();
 147        mcck = *this_cpu_ptr(&cpu_mcck);
 148        memset(this_cpu_ptr(&cpu_mcck), 0, sizeof(mcck));
 149        local_mcck_enable();
 150
 151        if (mcck.channel_report)
 152                crw_handle_channel_report();
 153        /*
 154         * A warning may remain for a prolonged period on the bare iron.
 155         * (actually until the machine is powered off, or the problem is gone)
 156         * So we just stop listening for the WARNING MCH and avoid continuously
 157         * being interrupted.  One caveat is however, that we must do this per
 158         * processor and cannot use the smp version of ctl_clear_bit().
 159         * On VM we only get one interrupt per virtally presented machinecheck.
 160         * Though one suffices, we may get one interrupt per (virtual) cpu.
 161         */
 162        if (mcck.warning) {     /* WARNING pending ? */
 163                static int mchchk_wng_posted = 0;
 164
 165                /* Use single cpu clear, as we cannot handle smp here. */
 166                __ctl_clear_bit(14, 24);        /* Disable WARNING MCH */
 167                if (xchg(&mchchk_wng_posted, 1) == 0)
 168                        kill_cad_pid(SIGPWR, 1);
 169        }
 170        if (mcck.stp_queue)
 171                stp_queue_work();
 172        if (mcck.kill_task) {
 173                local_irq_enable();
 174                printk(KERN_EMERG "mcck: Terminating task because of machine "
 175                       "malfunction (code 0x%016lx).\n", mcck.mcck_code);
 176                printk(KERN_EMERG "mcck: task: %s, pid: %d.\n",
 177                       current->comm, current->pid);
 178                do_exit(SIGSEGV);
 179        }
 180}
 181
 182void noinstr s390_handle_mcck(void)
 183{
 184        trace_hardirqs_off();
 185        __s390_handle_mcck();
 186        trace_hardirqs_on();
 187}
 188/*
 189 * returns 0 if all required registers are available
 190 * returns 1 otherwise
 191 */
 192static int notrace s390_check_registers(union mci mci, int umode)
 193{
 194        union ctlreg2 cr2;
 195        int kill_task;
 196
 197        kill_task = 0;
 198
 199        if (!mci.gr) {
 200                /*
 201                 * General purpose registers couldn't be restored and have
 202                 * unknown contents. Stop system or terminate process.
 203                 */
 204                if (!umode)
 205                        s390_handle_damage();
 206                kill_task = 1;
 207        }
 208        /* Check control registers */
 209        if (!mci.cr) {
 210                /*
 211                 * Control registers have unknown contents.
 212                 * Can't recover and therefore stopping machine.
 213                 */
 214                s390_handle_damage();
 215        }
 216        if (!mci.fp) {
 217                /*
 218                 * Floating point registers can't be restored. If the
 219                 * kernel currently uses floating point registers the
 220                 * system is stopped. If the process has its floating
 221                 * pointer registers loaded it is terminated.
 222                 */
 223                if (S390_lowcore.fpu_flags & KERNEL_VXR_V0V7)
 224                        s390_handle_damage();
 225                if (!test_cpu_flag(CIF_FPU))
 226                        kill_task = 1;
 227        }
 228        if (!mci.fc) {
 229                /*
 230                 * Floating point control register can't be restored.
 231                 * If the kernel currently uses the floating pointer
 232                 * registers and needs the FPC register the system is
 233                 * stopped. If the process has its floating pointer
 234                 * registers loaded it is terminated.
 235                 */
 236                if (S390_lowcore.fpu_flags & KERNEL_FPC)
 237                        s390_handle_damage();
 238                if (!test_cpu_flag(CIF_FPU))
 239                        kill_task = 1;
 240        }
 241
 242        if (MACHINE_HAS_VX) {
 243                if (!mci.vr) {
 244                        /*
 245                         * Vector registers can't be restored. If the kernel
 246                         * currently uses vector registers the system is
 247                         * stopped. If the process has its vector registers
 248                         * loaded it is terminated.
 249                         */
 250                        if (S390_lowcore.fpu_flags & KERNEL_VXR)
 251                                s390_handle_damage();
 252                        if (!test_cpu_flag(CIF_FPU))
 253                                kill_task = 1;
 254                }
 255        }
 256        /* Check if access registers are valid */
 257        if (!mci.ar) {
 258                /*
 259                 * Access registers have unknown contents.
 260                 * Terminating task.
 261                 */
 262                kill_task = 1;
 263        }
 264        /* Check guarded storage registers */
 265        cr2.val = S390_lowcore.cregs_save_area[2];
 266        if (cr2.gse) {
 267                if (!mci.gs) {
 268                        /*
 269                         * Guarded storage register can't be restored and
 270                         * the current processes uses guarded storage.
 271                         * It has to be terminated.
 272                         */
 273                        kill_task = 1;
 274                }
 275        }
 276        /* Check if old PSW is valid */
 277        if (!mci.wp) {
 278                /*
 279                 * Can't tell if we come from user or kernel mode
 280                 * -> stopping machine.
 281                 */
 282                s390_handle_damage();
 283        }
 284        /* Check for invalid kernel instruction address */
 285        if (!mci.ia && !umode) {
 286                /*
 287                 * The instruction address got lost while running
 288                 * in the kernel -> stopping machine.
 289                 */
 290                s390_handle_damage();
 291        }
 292
 293        if (!mci.ms || !mci.pm || !mci.ia)
 294                kill_task = 1;
 295
 296        return kill_task;
 297}
 298NOKPROBE_SYMBOL(s390_check_registers);
 299
 300/*
 301 * Backup the guest's machine check info to its description block
 302 */
 303static void notrace s390_backup_mcck_info(struct pt_regs *regs)
 304{
 305        struct mcck_volatile_info *mcck_backup;
 306        struct sie_page *sie_page;
 307
 308        /* r14 contains the sie block, which was set in sie64a */
 309        struct kvm_s390_sie_block *sie_block =
 310                        (struct kvm_s390_sie_block *) regs->gprs[14];
 311
 312        if (sie_block == NULL)
 313                /* Something's seriously wrong, stop system. */
 314                s390_handle_damage();
 315
 316        sie_page = container_of(sie_block, struct sie_page, sie_block);
 317        mcck_backup = &sie_page->mcck_info;
 318        mcck_backup->mcic = S390_lowcore.mcck_interruption_code &
 319                                ~(MCCK_CODE_CP | MCCK_CODE_EXT_DAMAGE);
 320        mcck_backup->ext_damage_code = S390_lowcore.external_damage_code;
 321        mcck_backup->failing_storage_address
 322                        = S390_lowcore.failing_storage_address;
 323}
 324NOKPROBE_SYMBOL(s390_backup_mcck_info);
 325
 326#define MAX_IPD_COUNT   29
 327#define MAX_IPD_TIME    (5 * 60 * USEC_PER_SEC) /* 5 minutes */
 328
 329#define ED_STP_ISLAND   6       /* External damage STP island check */
 330#define ED_STP_SYNC     7       /* External damage STP sync check */
 331
 332#define MCCK_CODE_NO_GUEST      (MCCK_CODE_CP | MCCK_CODE_EXT_DAMAGE)
 333
 334/*
 335 * machine check handler.
 336 */
 337int notrace s390_do_machine_check(struct pt_regs *regs)
 338{
 339        static int ipd_count;
 340        static DEFINE_SPINLOCK(ipd_lock);
 341        static unsigned long long last_ipd;
 342        struct mcck_struct *mcck;
 343        unsigned long long tmp;
 344        union mci mci;
 345        unsigned long mcck_dam_code;
 346        int mcck_pending = 0;
 347
 348        nmi_enter();
 349
 350        if (user_mode(regs))
 351                update_timer_mcck();
 352        inc_irq_stat(NMI_NMI);
 353        mci.val = S390_lowcore.mcck_interruption_code;
 354        mcck = this_cpu_ptr(&cpu_mcck);
 355
 356        if (mci.sd) {
 357                /* System damage -> stopping machine */
 358                s390_handle_damage();
 359        }
 360
 361        /*
 362         * Reinject the instruction processing damages' machine checks
 363         * including Delayed Access Exception into the guest
 364         * instead of damaging the host if they happen in the guest.
 365         */
 366        if (mci.pd && !test_cpu_flag(CIF_MCCK_GUEST)) {
 367                if (mci.b) {
 368                        /* Processing backup -> verify if we can survive this */
 369                        u64 z_mcic, o_mcic, t_mcic;
 370                        z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<29);
 371                        o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 |
 372                                  1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 |
 373                                  1ULL<<30 | 1ULL<<21 | 1ULL<<20 | 1ULL<<17 |
 374                                  1ULL<<16);
 375                        t_mcic = mci.val;
 376
 377                        if (((t_mcic & z_mcic) != 0) ||
 378                            ((t_mcic & o_mcic) != o_mcic)) {
 379                                s390_handle_damage();
 380                        }
 381
 382                        /*
 383                         * Nullifying exigent condition, therefore we might
 384                         * retry this instruction.
 385                         */
 386                        spin_lock(&ipd_lock);
 387                        tmp = get_tod_clock();
 388                        if (((tmp - last_ipd) >> 12) < MAX_IPD_TIME)
 389                                ipd_count++;
 390                        else
 391                                ipd_count = 1;
 392                        last_ipd = tmp;
 393                        if (ipd_count == MAX_IPD_COUNT)
 394                                s390_handle_damage();
 395                        spin_unlock(&ipd_lock);
 396                } else {
 397                        /* Processing damage -> stopping machine */
 398                        s390_handle_damage();
 399                }
 400        }
 401        if (s390_check_registers(mci, user_mode(regs))) {
 402                /*
 403                 * Couldn't restore all register contents for the
 404                 * user space process -> mark task for termination.
 405                 */
 406                mcck->kill_task = 1;
 407                mcck->mcck_code = mci.val;
 408                mcck_pending = 1;
 409        }
 410
 411        /*
 412         * Backup the machine check's info if it happens when the guest
 413         * is running.
 414         */
 415        if (test_cpu_flag(CIF_MCCK_GUEST))
 416                s390_backup_mcck_info(regs);
 417
 418        if (mci.cd) {
 419                /* Timing facility damage */
 420                s390_handle_damage();
 421        }
 422        if (mci.ed && mci.ec) {
 423                /* External damage */
 424                if (S390_lowcore.external_damage_code & (1U << ED_STP_SYNC))
 425                        mcck->stp_queue |= stp_sync_check();
 426                if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND))
 427                        mcck->stp_queue |= stp_island_check();
 428                mcck_pending = 1;
 429        }
 430
 431        /*
 432         * Reinject storage related machine checks into the guest if they
 433         * happen when the guest is running.
 434         */
 435        if (!test_cpu_flag(CIF_MCCK_GUEST)) {
 436                if (mci.se)
 437                        /* Storage error uncorrected */
 438                        s390_handle_damage();
 439                if (mci.ke)
 440                        /* Storage key-error uncorrected */
 441                        s390_handle_damage();
 442                if (mci.ds && mci.fa)
 443                        /* Storage degradation */
 444                        s390_handle_damage();
 445        }
 446        if (mci.cp) {
 447                /* Channel report word pending */
 448                mcck->channel_report = 1;
 449                mcck_pending = 1;
 450        }
 451        if (mci.w) {
 452                /* Warning pending */
 453                mcck->warning = 1;
 454                mcck_pending = 1;
 455        }
 456
 457        /*
 458         * If there are only Channel Report Pending and External Damage
 459         * machine checks, they will not be reinjected into the guest
 460         * because they refer to host conditions only.
 461         */
 462        mcck_dam_code = (mci.val & MCIC_SUBCLASS_MASK);
 463        if (test_cpu_flag(CIF_MCCK_GUEST) &&
 464        (mcck_dam_code & MCCK_CODE_NO_GUEST) != mcck_dam_code) {
 465                /* Set exit reason code for host's later handling */
 466                *((long *)(regs->gprs[15] + __SF_SIE_REASON)) = -EINTR;
 467        }
 468        clear_cpu_flag(CIF_MCCK_GUEST);
 469
 470        if (user_mode(regs) && mcck_pending) {
 471                nmi_exit();
 472                return 1;
 473        }
 474
 475        if (mcck_pending)
 476                schedule_mcck_handler();
 477
 478        nmi_exit();
 479        return 0;
 480}
 481NOKPROBE_SYMBOL(s390_do_machine_check);
 482
 483static int __init machine_check_init(void)
 484{
 485        ctl_set_bit(14, 25);    /* enable external damage MCH */
 486        ctl_set_bit(14, 27);    /* enable system recovery MCH */
 487        ctl_set_bit(14, 24);    /* enable warning MCH */
 488        return 0;
 489}
 490early_initcall(machine_check_init);
 491