linux/arch/powerpc/platforms/powernv/idle.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * PowerNV cpuidle code
   4 *
   5 * Copyright 2015 IBM Corp.
   6 */
   7
   8#include <linux/types.h>
   9#include <linux/mm.h>
  10#include <linux/slab.h>
  11#include <linux/of.h>
  12#include <linux/device.h>
  13#include <linux/cpu.h>
  14
  15#include <asm/asm-prototypes.h>
  16#include <asm/firmware.h>
  17#include <asm/interrupt.h>
  18#include <asm/machdep.h>
  19#include <asm/opal.h>
  20#include <asm/cputhreads.h>
  21#include <asm/cpuidle.h>
  22#include <asm/code-patching.h>
  23#include <asm/smp.h>
  24#include <asm/runlatch.h>
  25#include <asm/dbell.h>
  26
  27#include "powernv.h"
  28#include "subcore.h"
  29
  30/* Power ISA 3.0 allows for stop states 0x0 - 0xF */
  31#define MAX_STOP_STATE  0xF
  32
  33#define P9_STOP_SPR_MSR 2000
  34#define P9_STOP_SPR_PSSCR      855
  35
  36static u32 supported_cpuidle_states;
  37struct pnv_idle_states_t *pnv_idle_states;
  38int nr_pnv_idle_states;
  39
  40/*
  41 * The default stop state that will be used by ppc_md.power_save
  42 * function on platforms that support stop instruction.
  43 */
  44static u64 pnv_default_stop_val;
  45static u64 pnv_default_stop_mask;
  46static bool default_stop_found;
  47
  48/*
  49 * First stop state levels when SPR and TB loss can occur.
  50 */
  51static u64 pnv_first_tb_loss_level = MAX_STOP_STATE + 1;
  52static u64 deep_spr_loss_state = MAX_STOP_STATE + 1;
  53
  54/*
  55 * psscr value and mask of the deepest stop idle state.
  56 * Used when a cpu is offlined.
  57 */
  58static u64 pnv_deepest_stop_psscr_val;
  59static u64 pnv_deepest_stop_psscr_mask;
  60static u64 pnv_deepest_stop_flag;
  61static bool deepest_stop_found;
  62
  63static unsigned long power7_offline_type;
  64
  65static int pnv_save_sprs_for_deep_states(void)
  66{
  67        int cpu;
  68        int rc;
  69
  70        /*
  71         * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric across
  72         * all cpus at boot. Get these reg values of current cpu and use the
  73         * same across all cpus.
  74         */
  75        uint64_t lpcr_val       = mfspr(SPRN_LPCR);
  76        uint64_t hid0_val       = mfspr(SPRN_HID0);
  77        uint64_t hmeer_val      = mfspr(SPRN_HMEER);
  78        uint64_t msr_val = MSR_IDLE;
  79        uint64_t psscr_val = pnv_deepest_stop_psscr_val;
  80
  81        for_each_present_cpu(cpu) {
  82                uint64_t pir = get_hard_smp_processor_id(cpu);
  83                uint64_t hsprg0_val = (uint64_t)paca_ptrs[cpu];
  84
  85                rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val);
  86                if (rc != 0)
  87                        return rc;
  88
  89                rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
  90                if (rc != 0)
  91                        return rc;
  92
  93                if (cpu_has_feature(CPU_FTR_ARCH_300)) {
  94                        rc = opal_slw_set_reg(pir, P9_STOP_SPR_MSR, msr_val);
  95                        if (rc)
  96                                return rc;
  97
  98                        rc = opal_slw_set_reg(pir,
  99                                              P9_STOP_SPR_PSSCR, psscr_val);
 100
 101                        if (rc)
 102                                return rc;
 103                }
 104
 105                /* HIDs are per core registers */
 106                if (cpu_thread_in_core(cpu) == 0) {
 107
 108                        rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val);
 109                        if (rc != 0)
 110                                return rc;
 111
 112                        rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val);
 113                        if (rc != 0)
 114                                return rc;
 115
 116                        /* Only p8 needs to set extra HID regiters */
 117                        if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
 118                                uint64_t hid1_val = mfspr(SPRN_HID1);
 119                                uint64_t hid4_val = mfspr(SPRN_HID4);
 120                                uint64_t hid5_val = mfspr(SPRN_HID5);
 121
 122                                rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val);
 123                                if (rc != 0)
 124                                        return rc;
 125
 126                                rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val);
 127                                if (rc != 0)
 128                                        return rc;
 129
 130                                rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val);
 131                                if (rc != 0)
 132                                        return rc;
 133                        }
 134                }
 135        }
 136
 137        return 0;
 138}
 139
 140u32 pnv_get_supported_cpuidle_states(void)
 141{
 142        return supported_cpuidle_states;
 143}
 144EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states);
 145
 146static void pnv_fastsleep_workaround_apply(void *info)
 147
 148{
 149        int rc;
 150        int *err = info;
 151
 152        rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP,
 153                                        OPAL_CONFIG_IDLE_APPLY);
 154        if (rc)
 155                *err = 1;
 156}
 157
 158static bool power7_fastsleep_workaround_entry = true;
 159static bool power7_fastsleep_workaround_exit = true;
 160
 161/*
 162 * Used to store fastsleep workaround state
 163 * 0 - Workaround applied/undone at fastsleep entry/exit path (Default)
 164 * 1 - Workaround applied once, never undone.
 165 */
 166static u8 fastsleep_workaround_applyonce;
 167
 168static ssize_t show_fastsleep_workaround_applyonce(struct device *dev,
 169                struct device_attribute *attr, char *buf)
 170{
 171        return sprintf(buf, "%u\n", fastsleep_workaround_applyonce);
 172}
 173
 174static ssize_t store_fastsleep_workaround_applyonce(struct device *dev,
 175                struct device_attribute *attr, const char *buf,
 176                size_t count)
 177{
 178        cpumask_t primary_thread_mask;
 179        int err;
 180        u8 val;
 181
 182        if (kstrtou8(buf, 0, &val) || val != 1)
 183                return -EINVAL;
 184
 185        if (fastsleep_workaround_applyonce == 1)
 186                return count;
 187
 188        /*
 189         * fastsleep_workaround_applyonce = 1 implies
 190         * fastsleep workaround needs to be left in 'applied' state on all
 191         * the cores. Do this by-
 192         * 1. Disable the 'undo' workaround in fastsleep exit path
 193         * 2. Sendi IPIs to all the cores which have at least one online thread
 194         * 3. Disable the 'apply' workaround in fastsleep entry path
 195         *
 196         * There is no need to send ipi to cores which have all threads
 197         * offlined, as last thread of the core entering fastsleep or deeper
 198         * state would have applied workaround.
 199         */
 200        power7_fastsleep_workaround_exit = false;
 201
 202        get_online_cpus();
 203        primary_thread_mask = cpu_online_cores_map();
 204        on_each_cpu_mask(&primary_thread_mask,
 205                                pnv_fastsleep_workaround_apply,
 206                                &err, 1);
 207        put_online_cpus();
 208        if (err) {
 209                pr_err("fastsleep_workaround_applyonce change failed while running pnv_fastsleep_workaround_apply");
 210                goto fail;
 211        }
 212
 213        power7_fastsleep_workaround_entry = false;
 214
 215        fastsleep_workaround_applyonce = 1;
 216
 217        return count;
 218fail:
 219        return -EIO;
 220}
 221
 222static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600,
 223                        show_fastsleep_workaround_applyonce,
 224                        store_fastsleep_workaround_applyonce);
 225
 226static inline void atomic_start_thread_idle(void)
 227{
 228        int cpu = raw_smp_processor_id();
 229        int first = cpu_first_thread_sibling(cpu);
 230        int thread_nr = cpu_thread_in_core(cpu);
 231        unsigned long *state = &paca_ptrs[first]->idle_state;
 232
 233        clear_bit(thread_nr, state);
 234}
 235
 236static inline void atomic_stop_thread_idle(void)
 237{
 238        int cpu = raw_smp_processor_id();
 239        int first = cpu_first_thread_sibling(cpu);
 240        int thread_nr = cpu_thread_in_core(cpu);
 241        unsigned long *state = &paca_ptrs[first]->idle_state;
 242
 243        set_bit(thread_nr, state);
 244}
 245
 246static inline void atomic_lock_thread_idle(void)
 247{
 248        int cpu = raw_smp_processor_id();
 249        int first = cpu_first_thread_sibling(cpu);
 250        unsigned long *state = &paca_ptrs[first]->idle_state;
 251
 252        while (unlikely(test_and_set_bit_lock(NR_PNV_CORE_IDLE_LOCK_BIT, state)))
 253                barrier();
 254}
 255
 256static inline void atomic_unlock_and_stop_thread_idle(void)
 257{
 258        int cpu = raw_smp_processor_id();
 259        int first = cpu_first_thread_sibling(cpu);
 260        unsigned long thread = 1UL << cpu_thread_in_core(cpu);
 261        unsigned long *state = &paca_ptrs[first]->idle_state;
 262        u64 s = READ_ONCE(*state);
 263        u64 new, tmp;
 264
 265        BUG_ON(!(s & PNV_CORE_IDLE_LOCK_BIT));
 266        BUG_ON(s & thread);
 267
 268again:
 269        new = (s | thread) & ~PNV_CORE_IDLE_LOCK_BIT;
 270        tmp = cmpxchg(state, s, new);
 271        if (unlikely(tmp != s)) {
 272                s = tmp;
 273                goto again;
 274        }
 275}
 276
 277static inline void atomic_unlock_thread_idle(void)
 278{
 279        int cpu = raw_smp_processor_id();
 280        int first = cpu_first_thread_sibling(cpu);
 281        unsigned long *state = &paca_ptrs[first]->idle_state;
 282
 283        BUG_ON(!test_bit(NR_PNV_CORE_IDLE_LOCK_BIT, state));
 284        clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, state);
 285}
 286
 287/* P7 and P8 */
 288struct p7_sprs {
 289        /* per core */
 290        u64 tscr;
 291        u64 worc;
 292
 293        /* per subcore */
 294        u64 sdr1;
 295        u64 rpr;
 296
 297        /* per thread */
 298        u64 lpcr;
 299        u64 hfscr;
 300        u64 fscr;
 301        u64 purr;
 302        u64 spurr;
 303        u64 dscr;
 304        u64 wort;
 305
 306        /* per thread SPRs that get lost in shallow states */
 307        u64 amr;
 308        u64 iamr;
 309        u64 amor;
 310        u64 uamor;
 311};
 312
 313static unsigned long power7_idle_insn(unsigned long type)
 314{
 315        int cpu = raw_smp_processor_id();
 316        int first = cpu_first_thread_sibling(cpu);
 317        unsigned long *state = &paca_ptrs[first]->idle_state;
 318        unsigned long thread = 1UL << cpu_thread_in_core(cpu);
 319        unsigned long core_thread_mask = (1UL << threads_per_core) - 1;
 320        unsigned long srr1;
 321        bool full_winkle;
 322        struct p7_sprs sprs = {}; /* avoid false use-uninitialised */
 323        bool sprs_saved = false;
 324        int rc;
 325
 326        if (unlikely(type != PNV_THREAD_NAP)) {
 327                atomic_lock_thread_idle();
 328
 329                BUG_ON(!(*state & thread));
 330                *state &= ~thread;
 331
 332                if (power7_fastsleep_workaround_entry) {
 333                        if ((*state & core_thread_mask) == 0) {
 334                                rc = opal_config_cpu_idle_state(
 335                                                OPAL_CONFIG_IDLE_FASTSLEEP,
 336                                                OPAL_CONFIG_IDLE_APPLY);
 337                                BUG_ON(rc);
 338                        }
 339                }
 340
 341                if (type == PNV_THREAD_WINKLE) {
 342                        sprs.tscr       = mfspr(SPRN_TSCR);
 343                        sprs.worc       = mfspr(SPRN_WORC);
 344
 345                        sprs.sdr1       = mfspr(SPRN_SDR1);
 346                        sprs.rpr        = mfspr(SPRN_RPR);
 347
 348                        sprs.lpcr       = mfspr(SPRN_LPCR);
 349                        if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
 350                                sprs.hfscr      = mfspr(SPRN_HFSCR);
 351                                sprs.fscr       = mfspr(SPRN_FSCR);
 352                        }
 353                        sprs.purr       = mfspr(SPRN_PURR);
 354                        sprs.spurr      = mfspr(SPRN_SPURR);
 355                        sprs.dscr       = mfspr(SPRN_DSCR);
 356                        sprs.wort       = mfspr(SPRN_WORT);
 357
 358                        sprs_saved = true;
 359
 360                        /*
 361                         * Increment winkle counter and set all winkle bits if
 362                         * all threads are winkling. This allows wakeup side to
 363                         * distinguish between fast sleep and winkle state
 364                         * loss. Fast sleep still has to resync the timebase so
 365                         * this may not be a really big win.
 366                         */
 367                        *state += 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT;
 368                        if ((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS)
 369                                        >> PNV_CORE_IDLE_WINKLE_COUNT_SHIFT
 370                                        == threads_per_core)
 371                                *state |= PNV_CORE_IDLE_THREAD_WINKLE_BITS;
 372                        WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0);
 373                }
 374
 375                atomic_unlock_thread_idle();
 376        }
 377
 378        if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
 379                sprs.amr        = mfspr(SPRN_AMR);
 380                sprs.iamr       = mfspr(SPRN_IAMR);
 381                sprs.amor       = mfspr(SPRN_AMOR);
 382                sprs.uamor      = mfspr(SPRN_UAMOR);
 383        }
 384
 385        local_paca->thread_idle_state = type;
 386        srr1 = isa206_idle_insn_mayloss(type);          /* go idle */
 387        local_paca->thread_idle_state = PNV_THREAD_RUNNING;
 388
 389        WARN_ON_ONCE(!srr1);
 390        WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR));
 391
 392        if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
 393                if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) {
 394                        /*
 395                         * We don't need an isync after the mtsprs here because
 396                         * the upcoming mtmsrd is execution synchronizing.
 397                         */
 398                        mtspr(SPRN_AMR,         sprs.amr);
 399                        mtspr(SPRN_IAMR,        sprs.iamr);
 400                        mtspr(SPRN_AMOR,        sprs.amor);
 401                        mtspr(SPRN_UAMOR,       sprs.uamor);
 402                }
 403        }
 404
 405        if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI))
 406                hmi_exception_realmode(NULL);
 407
 408        if (likely((srr1 & SRR1_WAKESTATE) != SRR1_WS_HVLOSS)) {
 409                if (unlikely(type != PNV_THREAD_NAP)) {
 410                        atomic_lock_thread_idle();
 411                        if (type == PNV_THREAD_WINKLE) {
 412                                WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0);
 413                                *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT;
 414                                *state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT);
 415                        }
 416                        atomic_unlock_and_stop_thread_idle();
 417                }
 418                return srr1;
 419        }
 420
 421        /* HV state loss */
 422        BUG_ON(type == PNV_THREAD_NAP);
 423
 424        atomic_lock_thread_idle();
 425
 426        full_winkle = false;
 427        if (type == PNV_THREAD_WINKLE) {
 428                WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0);
 429                *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT;
 430                if (*state & (thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT)) {
 431                        *state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT);
 432                        full_winkle = true;
 433                        BUG_ON(!sprs_saved);
 434                }
 435        }
 436
 437        WARN_ON(*state & thread);
 438
 439        if ((*state & core_thread_mask) != 0)
 440                goto core_woken;
 441
 442        /* Per-core SPRs */
 443        if (full_winkle) {
 444                mtspr(SPRN_TSCR,        sprs.tscr);
 445                mtspr(SPRN_WORC,        sprs.worc);
 446        }
 447
 448        if (power7_fastsleep_workaround_exit) {
 449                rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP,
 450                                                OPAL_CONFIG_IDLE_UNDO);
 451                BUG_ON(rc);
 452        }
 453
 454        /* TB */
 455        if (opal_resync_timebase() != OPAL_SUCCESS)
 456                BUG();
 457
 458core_woken:
 459        if (!full_winkle)
 460                goto subcore_woken;
 461
 462        if ((*state & local_paca->subcore_sibling_mask) != 0)
 463                goto subcore_woken;
 464
 465        /* Per-subcore SPRs */
 466        mtspr(SPRN_SDR1,        sprs.sdr1);
 467        mtspr(SPRN_RPR,         sprs.rpr);
 468
 469subcore_woken:
 470        /*
 471         * isync after restoring shared SPRs and before unlocking. Unlock
 472         * only contains hwsync which does not necessarily do the right
 473         * thing for SPRs.
 474         */
 475        isync();
 476        atomic_unlock_and_stop_thread_idle();
 477
 478        /* Fast sleep does not lose SPRs */
 479        if (!full_winkle)
 480                return srr1;
 481
 482        /* Per-thread SPRs */
 483        mtspr(SPRN_LPCR,        sprs.lpcr);
 484        if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
 485                mtspr(SPRN_HFSCR,       sprs.hfscr);
 486                mtspr(SPRN_FSCR,        sprs.fscr);
 487        }
 488        mtspr(SPRN_PURR,        sprs.purr);
 489        mtspr(SPRN_SPURR,       sprs.spurr);
 490        mtspr(SPRN_DSCR,        sprs.dscr);
 491        mtspr(SPRN_WORT,        sprs.wort);
 492
 493        mtspr(SPRN_SPRG3,       local_paca->sprg_vdso);
 494
 495        /*
 496         * The SLB has to be restored here, but it sometimes still
 497         * contains entries, so the __ variant must be used to prevent
 498         * multi hits.
 499         */
 500        __slb_restore_bolted_realmode();
 501
 502        return srr1;
 503}
 504
 505extern unsigned long idle_kvm_start_guest(unsigned long srr1);
 506
 507#ifdef CONFIG_HOTPLUG_CPU
 508static unsigned long power7_offline(void)
 509{
 510        unsigned long srr1;
 511
 512        mtmsr(MSR_IDLE);
 513
 514#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 515        /* Tell KVM we're entering idle. */
 516        /******************************************************/
 517        /*  N O T E   W E L L    ! ! !    N O T E   W E L L   */
 518        /* The following store to HSTATE_HWTHREAD_STATE(r13)  */
 519        /* MUST occur in real mode, i.e. with the MMU off,    */
 520        /* and the MMU must stay off until we clear this flag */
 521        /* and test HSTATE_HWTHREAD_REQ(r13) in               */
 522        /* pnv_powersave_wakeup in this file.                 */
 523        /* The reason is that another thread can switch the   */
 524        /* MMU to a guest context whenever this flag is set   */
 525        /* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on,    */
 526        /* that would potentially cause this thread to start  */
 527        /* executing instructions from guest memory in        */
 528        /* hypervisor mode, leading to a host crash or data   */
 529        /* corruption, or worse.                              */
 530        /******************************************************/
 531        local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE;
 532#endif
 533
 534        __ppc64_runlatch_off();
 535        srr1 = power7_idle_insn(power7_offline_type);
 536        __ppc64_runlatch_on();
 537
 538#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 539        local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL;
 540        /* Order setting hwthread_state vs. testing hwthread_req */
 541        smp_mb();
 542        if (local_paca->kvm_hstate.hwthread_req)
 543                srr1 = idle_kvm_start_guest(srr1);
 544#endif
 545
 546        mtmsr(MSR_KERNEL);
 547
 548        return srr1;
 549}
 550#endif
 551
 552void power7_idle_type(unsigned long type)
 553{
 554        unsigned long srr1;
 555
 556        if (!prep_irq_for_idle_irqsoff())
 557                return;
 558
 559        mtmsr(MSR_IDLE);
 560        __ppc64_runlatch_off();
 561        srr1 = power7_idle_insn(type);
 562        __ppc64_runlatch_on();
 563        mtmsr(MSR_KERNEL);
 564
 565        fini_irq_for_idle_irqsoff();
 566        irq_set_pending_from_srr1(srr1);
 567}
 568
 569static void power7_idle(void)
 570{
 571        if (!powersave_nap)
 572                return;
 573
 574        power7_idle_type(PNV_THREAD_NAP);
 575}
 576
 577struct p9_sprs {
 578        /* per core */
 579        u64 ptcr;
 580        u64 rpr;
 581        u64 tscr;
 582        u64 ldbar;
 583
 584        /* per thread */
 585        u64 lpcr;
 586        u64 hfscr;
 587        u64 fscr;
 588        u64 pid;
 589        u64 purr;
 590        u64 spurr;
 591        u64 dscr;
 592        u64 wort;
 593        u64 ciabr;
 594
 595        u64 mmcra;
 596        u32 mmcr0;
 597        u32 mmcr1;
 598        u64 mmcr2;
 599
 600        /* per thread SPRs that get lost in shallow states */
 601        u64 amr;
 602        u64 iamr;
 603        u64 amor;
 604        u64 uamor;
 605};
 606
 607static unsigned long power9_idle_stop(unsigned long psscr)
 608{
 609        int cpu = raw_smp_processor_id();
 610        int first = cpu_first_thread_sibling(cpu);
 611        unsigned long *state = &paca_ptrs[first]->idle_state;
 612        unsigned long core_thread_mask = (1UL << threads_per_core) - 1;
 613        unsigned long srr1;
 614        unsigned long pls;
 615        unsigned long mmcr0 = 0;
 616        unsigned long mmcra = 0;
 617        struct p9_sprs sprs = {}; /* avoid false used-uninitialised */
 618        bool sprs_saved = false;
 619
 620        if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
 621                /* EC=ESL=0 case */
 622
 623                /*
 624                 * Wake synchronously. SRESET via xscom may still cause
 625                 * a 0x100 powersave wakeup with SRR1 reason!
 626                 */
 627                srr1 = isa300_idle_stop_noloss(psscr);          /* go idle */
 628                if (likely(!srr1))
 629                        return 0;
 630
 631                /*
 632                 * Registers not saved, can't recover!
 633                 * This would be a hardware bug
 634                 */
 635                BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS);
 636
 637                goto out;
 638        }
 639
 640        /* EC=ESL=1 case */
 641#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 642        if (cpu_has_feature(CPU_FTR_P9_TM_XER_SO_BUG)) {
 643                local_paca->requested_psscr = psscr;
 644                /* order setting requested_psscr vs testing dont_stop */
 645                smp_mb();
 646                if (atomic_read(&local_paca->dont_stop)) {
 647                        local_paca->requested_psscr = 0;
 648                        return 0;
 649                }
 650        }
 651#endif
 652
 653        if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) {
 654                 /*
 655                  * POWER9 DD2 can incorrectly set PMAO when waking up
 656                  * after a state-loss idle. Saving and restoring MMCR0
 657                  * over idle is a workaround.
 658                  */
 659                mmcr0           = mfspr(SPRN_MMCR0);
 660        }
 661
 662        if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) {
 663                sprs.lpcr       = mfspr(SPRN_LPCR);
 664                sprs.hfscr      = mfspr(SPRN_HFSCR);
 665                sprs.fscr       = mfspr(SPRN_FSCR);
 666                sprs.pid        = mfspr(SPRN_PID);
 667                sprs.purr       = mfspr(SPRN_PURR);
 668                sprs.spurr      = mfspr(SPRN_SPURR);
 669                sprs.dscr       = mfspr(SPRN_DSCR);
 670                sprs.wort       = mfspr(SPRN_WORT);
 671                sprs.ciabr      = mfspr(SPRN_CIABR);
 672
 673                sprs.mmcra      = mfspr(SPRN_MMCRA);
 674                sprs.mmcr0      = mfspr(SPRN_MMCR0);
 675                sprs.mmcr1      = mfspr(SPRN_MMCR1);
 676                sprs.mmcr2      = mfspr(SPRN_MMCR2);
 677
 678                sprs.ptcr       = mfspr(SPRN_PTCR);
 679                sprs.rpr        = mfspr(SPRN_RPR);
 680                sprs.tscr       = mfspr(SPRN_TSCR);
 681                if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
 682                        sprs.ldbar = mfspr(SPRN_LDBAR);
 683
 684                sprs_saved = true;
 685
 686                atomic_start_thread_idle();
 687        }
 688
 689        sprs.amr        = mfspr(SPRN_AMR);
 690        sprs.iamr       = mfspr(SPRN_IAMR);
 691        sprs.amor       = mfspr(SPRN_AMOR);
 692        sprs.uamor      = mfspr(SPRN_UAMOR);
 693
 694        srr1 = isa300_idle_stop_mayloss(psscr);         /* go idle */
 695
 696#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 697        local_paca->requested_psscr = 0;
 698#endif
 699
 700        psscr = mfspr(SPRN_PSSCR);
 701
 702        WARN_ON_ONCE(!srr1);
 703        WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR));
 704
 705        if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) {
 706                /*
 707                 * We don't need an isync after the mtsprs here because the
 708                 * upcoming mtmsrd is execution synchronizing.
 709                 */
 710                mtspr(SPRN_AMR,         sprs.amr);
 711                mtspr(SPRN_IAMR,        sprs.iamr);
 712                mtspr(SPRN_AMOR,        sprs.amor);
 713                mtspr(SPRN_UAMOR,       sprs.uamor);
 714
 715                /*
 716                 * Workaround for POWER9 DD2.0, if we lost resources, the ERAT
 717                 * might have been corrupted and needs flushing. We also need
 718                 * to reload MMCR0 (see mmcr0 comment above).
 719                 */
 720                if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) {
 721                        asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT);
 722                        mtspr(SPRN_MMCR0, mmcr0);
 723                }
 724
 725                /*
 726                 * DD2.2 and earlier need to set then clear bit 60 in MMCRA
 727                 * to ensure the PMU starts running.
 728                 */
 729                mmcra = mfspr(SPRN_MMCRA);
 730                mmcra |= PPC_BIT(60);
 731                mtspr(SPRN_MMCRA, mmcra);
 732                mmcra &= ~PPC_BIT(60);
 733                mtspr(SPRN_MMCRA, mmcra);
 734        }
 735
 736        if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI))
 737                hmi_exception_realmode(NULL);
 738
 739        /*
 740         * On POWER9, SRR1 bits do not match exactly as expected.
 741         * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so
 742         * just always test PSSCR for SPR/TB state loss.
 743         */
 744        pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT;
 745        if (likely(pls < deep_spr_loss_state)) {
 746                if (sprs_saved)
 747                        atomic_stop_thread_idle();
 748                goto out;
 749        }
 750
 751        /* HV state loss */
 752        BUG_ON(!sprs_saved);
 753
 754        atomic_lock_thread_idle();
 755
 756        if ((*state & core_thread_mask) != 0)
 757                goto core_woken;
 758
 759        /* Per-core SPRs */
 760        mtspr(SPRN_PTCR,        sprs.ptcr);
 761        mtspr(SPRN_RPR,         sprs.rpr);
 762        mtspr(SPRN_TSCR,        sprs.tscr);
 763
 764        if (pls >= pnv_first_tb_loss_level) {
 765                /* TB loss */
 766                if (opal_resync_timebase() != OPAL_SUCCESS)
 767                        BUG();
 768        }
 769
 770        /*
 771         * isync after restoring shared SPRs and before unlocking. Unlock
 772         * only contains hwsync which does not necessarily do the right
 773         * thing for SPRs.
 774         */
 775        isync();
 776
 777core_woken:
 778        atomic_unlock_and_stop_thread_idle();
 779
 780        /* Per-thread SPRs */
 781        mtspr(SPRN_LPCR,        sprs.lpcr);
 782        mtspr(SPRN_HFSCR,       sprs.hfscr);
 783        mtspr(SPRN_FSCR,        sprs.fscr);
 784        mtspr(SPRN_PID,         sprs.pid);
 785        mtspr(SPRN_PURR,        sprs.purr);
 786        mtspr(SPRN_SPURR,       sprs.spurr);
 787        mtspr(SPRN_DSCR,        sprs.dscr);
 788        mtspr(SPRN_WORT,        sprs.wort);
 789        mtspr(SPRN_CIABR,       sprs.ciabr);
 790
 791        mtspr(SPRN_MMCRA,       sprs.mmcra);
 792        mtspr(SPRN_MMCR0,       sprs.mmcr0);
 793        mtspr(SPRN_MMCR1,       sprs.mmcr1);
 794        mtspr(SPRN_MMCR2,       sprs.mmcr2);
 795        if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
 796                mtspr(SPRN_LDBAR, sprs.ldbar);
 797
 798        mtspr(SPRN_SPRG3,       local_paca->sprg_vdso);
 799
 800        if (!radix_enabled())
 801                __slb_restore_bolted_realmode();
 802
 803out:
 804        mtmsr(MSR_KERNEL);
 805
 806        return srr1;
 807}
 808
 809#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 810/*
 811 * This is used in working around bugs in thread reconfiguration
 812 * on POWER9 (at least up to Nimbus DD2.2) relating to transactional
 813 * memory and the way that XER[SO] is checkpointed.
 814 * This function forces the core into SMT4 in order by asking
 815 * all other threads not to stop, and sending a message to any
 816 * that are in a stop state.
 817 * Must be called with preemption disabled.
 818 */
 819void pnv_power9_force_smt4_catch(void)
 820{
 821        int cpu, cpu0, thr;
 822        int awake_threads = 1;          /* this thread is awake */
 823        int poke_threads = 0;
 824        int need_awake = threads_per_core;
 825
 826        cpu = smp_processor_id();
 827        cpu0 = cpu & ~(threads_per_core - 1);
 828        for (thr = 0; thr < threads_per_core; ++thr) {
 829                if (cpu != cpu0 + thr)
 830                        atomic_inc(&paca_ptrs[cpu0+thr]->dont_stop);
 831        }
 832        /* order setting dont_stop vs testing requested_psscr */
 833        smp_mb();
 834        for (thr = 0; thr < threads_per_core; ++thr) {
 835                if (!paca_ptrs[cpu0+thr]->requested_psscr)
 836                        ++awake_threads;
 837                else
 838                        poke_threads |= (1 << thr);
 839        }
 840
 841        /* If at least 3 threads are awake, the core is in SMT4 already */
 842        if (awake_threads < need_awake) {
 843                /* We have to wake some threads; we'll use msgsnd */
 844                for (thr = 0; thr < threads_per_core; ++thr) {
 845                        if (poke_threads & (1 << thr)) {
 846                                ppc_msgsnd_sync();
 847                                ppc_msgsnd(PPC_DBELL_MSGTYPE, 0,
 848                                           paca_ptrs[cpu0+thr]->hw_cpu_id);
 849                        }
 850                }
 851                /* now spin until at least 3 threads are awake */
 852                do {
 853                        for (thr = 0; thr < threads_per_core; ++thr) {
 854                                if ((poke_threads & (1 << thr)) &&
 855                                    !paca_ptrs[cpu0+thr]->requested_psscr) {
 856                                        ++awake_threads;
 857                                        poke_threads &= ~(1 << thr);
 858                                }
 859                        }
 860                } while (awake_threads < need_awake);
 861        }
 862}
 863EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_catch);
 864
 865void pnv_power9_force_smt4_release(void)
 866{
 867        int cpu, cpu0, thr;
 868
 869        cpu = smp_processor_id();
 870        cpu0 = cpu & ~(threads_per_core - 1);
 871
 872        /* clear all the dont_stop flags */
 873        for (thr = 0; thr < threads_per_core; ++thr) {
 874                if (cpu != cpu0 + thr)
 875                        atomic_dec(&paca_ptrs[cpu0+thr]->dont_stop);
 876        }
 877}
 878EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_release);
 879#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 880
 881struct p10_sprs {
 882        /*
 883         * SPRs that get lost in shallow states:
 884         *
 885         * P10 loses CR, LR, CTR, FPSCR, VSCR, XER, TAR, SPRG2, and HSPRG1
 886         * isa300 idle routines restore CR, LR.
 887         * CTR is volatile
 888         * idle thread doesn't use FP or VEC
 889         * kernel doesn't use TAR
 890         * HSPRG1 is only live in HV interrupt entry
 891         * SPRG2 is only live in KVM guests, KVM handles it.
 892         */
 893};
 894
 895static unsigned long power10_idle_stop(unsigned long psscr)
 896{
 897        int cpu = raw_smp_processor_id();
 898        int first = cpu_first_thread_sibling(cpu);
 899        unsigned long *state = &paca_ptrs[first]->idle_state;
 900        unsigned long core_thread_mask = (1UL << threads_per_core) - 1;
 901        unsigned long srr1;
 902        unsigned long pls;
 903//      struct p10_sprs sprs = {}; /* avoid false used-uninitialised */
 904        bool sprs_saved = false;
 905
 906        if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
 907                /* EC=ESL=0 case */
 908
 909                /*
 910                 * Wake synchronously. SRESET via xscom may still cause
 911                 * a 0x100 powersave wakeup with SRR1 reason!
 912                 */
 913                srr1 = isa300_idle_stop_noloss(psscr);          /* go idle */
 914                if (likely(!srr1))
 915                        return 0;
 916
 917                /*
 918                 * Registers not saved, can't recover!
 919                 * This would be a hardware bug
 920                 */
 921                BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS);
 922
 923                goto out;
 924        }
 925
 926        /* EC=ESL=1 case */
 927        if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) {
 928                /* XXX: save SPRs for deep state loss here. */
 929
 930                sprs_saved = true;
 931
 932                atomic_start_thread_idle();
 933        }
 934
 935        srr1 = isa300_idle_stop_mayloss(psscr);         /* go idle */
 936
 937        psscr = mfspr(SPRN_PSSCR);
 938
 939        WARN_ON_ONCE(!srr1);
 940        WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR));
 941
 942        if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI))
 943                hmi_exception_realmode(NULL);
 944
 945        /*
 946         * On POWER10, SRR1 bits do not match exactly as expected.
 947         * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so
 948         * just always test PSSCR for SPR/TB state loss.
 949         */
 950        pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT;
 951        if (likely(pls < deep_spr_loss_state)) {
 952                if (sprs_saved)
 953                        atomic_stop_thread_idle();
 954                goto out;
 955        }
 956
 957        /* HV state loss */
 958        BUG_ON(!sprs_saved);
 959
 960        atomic_lock_thread_idle();
 961
 962        if ((*state & core_thread_mask) != 0)
 963                goto core_woken;
 964
 965        /* XXX: restore per-core SPRs here */
 966
 967        if (pls >= pnv_first_tb_loss_level) {
 968                /* TB loss */
 969                if (opal_resync_timebase() != OPAL_SUCCESS)
 970                        BUG();
 971        }
 972
 973        /*
 974         * isync after restoring shared SPRs and before unlocking. Unlock
 975         * only contains hwsync which does not necessarily do the right
 976         * thing for SPRs.
 977         */
 978        isync();
 979
 980core_woken:
 981        atomic_unlock_and_stop_thread_idle();
 982
 983        /* XXX: restore per-thread SPRs here */
 984
 985        if (!radix_enabled())
 986                __slb_restore_bolted_realmode();
 987
 988out:
 989        mtmsr(MSR_KERNEL);
 990
 991        return srr1;
 992}
 993
 994#ifdef CONFIG_HOTPLUG_CPU
 995static unsigned long arch300_offline_stop(unsigned long psscr)
 996{
 997        unsigned long srr1;
 998
 999        if (cpu_has_feature(CPU_FTR_ARCH_31))
1000                srr1 = power10_idle_stop(psscr);
1001        else
1002                srr1 = power9_idle_stop(psscr);
1003
1004        return srr1;
1005}
1006#endif
1007
1008void arch300_idle_type(unsigned long stop_psscr_val,
1009                                      unsigned long stop_psscr_mask)
1010{
1011        unsigned long psscr;
1012        unsigned long srr1;
1013
1014        if (!prep_irq_for_idle_irqsoff())
1015                return;
1016
1017        psscr = mfspr(SPRN_PSSCR);
1018        psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val;
1019
1020        __ppc64_runlatch_off();
1021        if (cpu_has_feature(CPU_FTR_ARCH_31))
1022                srr1 = power10_idle_stop(psscr);
1023        else
1024                srr1 = power9_idle_stop(psscr);
1025        __ppc64_runlatch_on();
1026
1027        fini_irq_for_idle_irqsoff();
1028
1029        irq_set_pending_from_srr1(srr1);
1030}
1031
1032/*
1033 * Used for ppc_md.power_save which needs a function with no parameters
1034 */
1035static void arch300_idle(void)
1036{
1037        arch300_idle_type(pnv_default_stop_val, pnv_default_stop_mask);
1038}
1039
1040#ifdef CONFIG_HOTPLUG_CPU
1041
1042void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val)
1043{
1044        u64 pir = get_hard_smp_processor_id(cpu);
1045
1046        mtspr(SPRN_LPCR, lpcr_val);
1047
1048        /*
1049         * Program the LPCR via stop-api only if the deepest stop state
1050         * can lose hypervisor context.
1051         */
1052        if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT)
1053                opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
1054}
1055
1056/*
1057 * pnv_cpu_offline: A function that puts the CPU into the deepest
1058 * available platform idle state on a CPU-Offline.
1059 * interrupts hard disabled and no lazy irq pending.
1060 */
1061unsigned long pnv_cpu_offline(unsigned int cpu)
1062{
1063        unsigned long srr1;
1064
1065        __ppc64_runlatch_off();
1066
1067        if (cpu_has_feature(CPU_FTR_ARCH_300) && deepest_stop_found) {
1068                unsigned long psscr;
1069
1070                psscr = mfspr(SPRN_PSSCR);
1071                psscr = (psscr & ~pnv_deepest_stop_psscr_mask) |
1072                                                pnv_deepest_stop_psscr_val;
1073                srr1 = arch300_offline_stop(psscr);
1074        } else if (cpu_has_feature(CPU_FTR_ARCH_206) && power7_offline_type) {
1075                srr1 = power7_offline();
1076        } else {
1077                /* This is the fallback method. We emulate snooze */
1078                while (!generic_check_cpu_restart(cpu)) {
1079                        HMT_low();
1080                        HMT_very_low();
1081                }
1082                srr1 = 0;
1083                HMT_medium();
1084        }
1085
1086        __ppc64_runlatch_on();
1087
1088        return srr1;
1089}
1090#endif
1091
1092/*
1093 * Power ISA 3.0 idle initialization.
1094 *
1095 * POWER ISA 3.0 defines a new SPR Processor stop Status and Control
1096 * Register (PSSCR) to control idle behavior.
1097 *
1098 * PSSCR layout:
1099 * ----------------------------------------------------------
1100 * | PLS | /// | SD | ESL | EC | PSLL | /// | TR | MTL | RL |
1101 * ----------------------------------------------------------
1102 * 0      4     41   42    43   44     48    54   56    60
1103 *
1104 * PSSCR key fields:
1105 *      Bits 0:3  - Power-Saving Level Status (PLS). This field indicates the
1106 *      lowest power-saving state the thread entered since stop instruction was
1107 *      last executed.
1108 *
1109 *      Bit 41 - Status Disable(SD)
1110 *      0 - Shows PLS entries
1111 *      1 - PLS entries are all 0
1112 *
1113 *      Bit 42 - Enable State Loss
1114 *      0 - No state is lost irrespective of other fields
1115 *      1 - Allows state loss
1116 *
1117 *      Bit 43 - Exit Criterion
1118 *      0 - Exit from power-save mode on any interrupt
1119 *      1 - Exit from power-save mode controlled by LPCR's PECE bits
1120 *
1121 *      Bits 44:47 - Power-Saving Level Limit
1122 *      This limits the power-saving level that can be entered into.
1123 *
1124 *      Bits 60:63 - Requested Level
1125 *      Used to specify which power-saving level must be entered on executing
1126 *      stop instruction
1127 */
1128
1129int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags)
1130{
1131        int err = 0;
1132
1133        /*
1134         * psscr_mask == 0xf indicates an older firmware.
1135         * Set remaining fields of psscr to the default values.
1136         * See NOTE above definition of PSSCR_HV_DEFAULT_VAL
1137         */
1138        if (*psscr_mask == 0xf) {
1139                *psscr_val = *psscr_val | PSSCR_HV_DEFAULT_VAL;
1140                *psscr_mask = PSSCR_HV_DEFAULT_MASK;
1141                return err;
1142        }
1143
1144        /*
1145         * New firmware is expected to set the psscr_val bits correctly.
1146         * Validate that the following invariants are correctly maintained by
1147         * the new firmware.
1148         * - ESL bit value matches the EC bit value.
1149         * - ESL bit is set for all the deep stop states.
1150         */
1151        if (GET_PSSCR_ESL(*psscr_val) != GET_PSSCR_EC(*psscr_val)) {
1152                err = ERR_EC_ESL_MISMATCH;
1153        } else if ((flags & OPAL_PM_LOSE_FULL_CONTEXT) &&
1154                GET_PSSCR_ESL(*psscr_val) == 0) {
1155                err = ERR_DEEP_STATE_ESL_MISMATCH;
1156        }
1157
1158        return err;
1159}
1160
1161/*
1162 * pnv_arch300_idle_init: Initializes the default idle state, first
1163 *                        deep idle state and deepest idle state on
1164 *                        ISA 3.0 CPUs.
1165 *
1166 * @np: /ibm,opal/power-mgt device node
1167 * @flags: cpu-idle-state-flags array
1168 * @dt_idle_states: Number of idle state entries
1169 * Returns 0 on success
1170 */
1171static void __init pnv_arch300_idle_init(void)
1172{
1173        u64 max_residency_ns = 0;
1174        int i;
1175
1176        /* stop is not really architected, we only have p9,p10 drivers */
1177        if (!pvr_version_is(PVR_POWER10) && !pvr_version_is(PVR_POWER9))
1178                return;
1179
1180        /*
1181         * pnv_deepest_stop_{val,mask} should be set to values corresponding to
1182         * the deepest stop state.
1183         *
1184         * pnv_default_stop_{val,mask} should be set to values corresponding to
1185         * the deepest loss-less (OPAL_PM_STOP_INST_FAST) stop state.
1186         */
1187        pnv_first_tb_loss_level = MAX_STOP_STATE + 1;
1188        deep_spr_loss_state = MAX_STOP_STATE + 1;
1189        for (i = 0; i < nr_pnv_idle_states; i++) {
1190                int err;
1191                struct pnv_idle_states_t *state = &pnv_idle_states[i];
1192                u64 psscr_rl = state->psscr_val & PSSCR_RL_MASK;
1193
1194                /* No deep loss driver implemented for POWER10 yet */
1195                if (pvr_version_is(PVR_POWER10) &&
1196                                state->flags & (OPAL_PM_TIMEBASE_STOP|OPAL_PM_LOSE_FULL_CONTEXT))
1197                        continue;
1198
1199                if ((state->flags & OPAL_PM_TIMEBASE_STOP) &&
1200                     (pnv_first_tb_loss_level > psscr_rl))
1201                        pnv_first_tb_loss_level = psscr_rl;
1202
1203                if ((state->flags & OPAL_PM_LOSE_FULL_CONTEXT) &&
1204                     (deep_spr_loss_state > psscr_rl))
1205                        deep_spr_loss_state = psscr_rl;
1206
1207                /*
1208                 * The idle code does not deal with TB loss occurring
1209                 * in a shallower state than SPR loss, so force it to
1210                 * behave like SPRs are lost if TB is lost. POWER9 would
1211                 * never encouter this, but a POWER8 core would if it
1212                 * implemented the stop instruction. So this is for forward
1213                 * compatibility.
1214                 */
1215                if ((state->flags & OPAL_PM_TIMEBASE_STOP) &&
1216                     (deep_spr_loss_state > psscr_rl))
1217                        deep_spr_loss_state = psscr_rl;
1218
1219                err = validate_psscr_val_mask(&state->psscr_val,
1220                                              &state->psscr_mask,
1221                                              state->flags);
1222                if (err) {
1223                        report_invalid_psscr_val(state->psscr_val, err);
1224                        continue;
1225                }
1226
1227                state->valid = true;
1228
1229                if (max_residency_ns < state->residency_ns) {
1230                        max_residency_ns = state->residency_ns;
1231                        pnv_deepest_stop_psscr_val = state->psscr_val;
1232                        pnv_deepest_stop_psscr_mask = state->psscr_mask;
1233                        pnv_deepest_stop_flag = state->flags;
1234                        deepest_stop_found = true;
1235                }
1236
1237                if (!default_stop_found &&
1238                    (state->flags & OPAL_PM_STOP_INST_FAST)) {
1239                        pnv_default_stop_val = state->psscr_val;
1240                        pnv_default_stop_mask = state->psscr_mask;
1241                        default_stop_found = true;
1242                        WARN_ON(state->flags & OPAL_PM_LOSE_FULL_CONTEXT);
1243                }
1244        }
1245
1246        if (unlikely(!default_stop_found)) {
1247                pr_warn("cpuidle-powernv: No suitable default stop state found. Disabling platform idle.\n");
1248        } else {
1249                ppc_md.power_save = arch300_idle;
1250                pr_info("cpuidle-powernv: Default stop: psscr = 0x%016llx,mask=0x%016llx\n",
1251                        pnv_default_stop_val, pnv_default_stop_mask);
1252        }
1253
1254        if (unlikely(!deepest_stop_found)) {
1255                pr_warn("cpuidle-powernv: No suitable stop state for CPU-Hotplug. Offlined CPUs will busy wait");
1256        } else {
1257                pr_info("cpuidle-powernv: Deepest stop: psscr = 0x%016llx,mask=0x%016llx\n",
1258                        pnv_deepest_stop_psscr_val,
1259                        pnv_deepest_stop_psscr_mask);
1260        }
1261
1262        pr_info("cpuidle-powernv: First stop level that may lose SPRs = 0x%llx\n",
1263                deep_spr_loss_state);
1264
1265        pr_info("cpuidle-powernv: First stop level that may lose timebase = 0x%llx\n",
1266                pnv_first_tb_loss_level);
1267}
1268
1269static void __init pnv_disable_deep_states(void)
1270{
1271        /*
1272         * The stop-api is unable to restore hypervisor
1273         * resources on wakeup from platform idle states which
1274         * lose full context. So disable such states.
1275         */
1276        supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT;
1277        pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n");
1278        pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n");
1279
1280        if (cpu_has_feature(CPU_FTR_ARCH_300) &&
1281            (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) {
1282                /*
1283                 * Use the default stop state for CPU-Hotplug
1284                 * if available.
1285                 */
1286                if (default_stop_found) {
1287                        pnv_deepest_stop_psscr_val = pnv_default_stop_val;
1288                        pnv_deepest_stop_psscr_mask = pnv_default_stop_mask;
1289                        pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n",
1290                                pnv_deepest_stop_psscr_val);
1291                } else { /* Fallback to snooze loop for CPU-Hotplug */
1292                        deepest_stop_found = false;
1293                        pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n");
1294                }
1295        }
1296}
1297
1298/*
1299 * Probe device tree for supported idle states
1300 */
1301static void __init pnv_probe_idle_states(void)
1302{
1303        int i;
1304
1305        if (nr_pnv_idle_states < 0) {
1306                pr_warn("cpuidle-powernv: no idle states found in the DT\n");
1307                return;
1308        }
1309
1310        if (cpu_has_feature(CPU_FTR_ARCH_300))
1311                pnv_arch300_idle_init();
1312
1313        for (i = 0; i < nr_pnv_idle_states; i++)
1314                supported_cpuidle_states |= pnv_idle_states[i].flags;
1315}
1316
1317/*
1318 * This function parses device-tree and populates all the information
1319 * into pnv_idle_states structure. It also sets up nr_pnv_idle_states
1320 * which is the number of cpuidle states discovered through device-tree.
1321 */
1322
1323static int pnv_parse_cpuidle_dt(void)
1324{
1325        struct device_node *np;
1326        int nr_idle_states, i;
1327        int rc = 0;
1328        u32 *temp_u32;
1329        u64 *temp_u64;
1330        const char **temp_string;
1331
1332        np = of_find_node_by_path("/ibm,opal/power-mgt");
1333        if (!np) {
1334                pr_warn("opal: PowerMgmt Node not found\n");
1335                return -ENODEV;
1336        }
1337        nr_idle_states = of_property_count_u32_elems(np,
1338                                                "ibm,cpu-idle-state-flags");
1339
1340        pnv_idle_states = kcalloc(nr_idle_states, sizeof(*pnv_idle_states),
1341                                  GFP_KERNEL);
1342        temp_u32 = kcalloc(nr_idle_states, sizeof(u32),  GFP_KERNEL);
1343        temp_u64 = kcalloc(nr_idle_states, sizeof(u64),  GFP_KERNEL);
1344        temp_string = kcalloc(nr_idle_states, sizeof(char *),  GFP_KERNEL);
1345
1346        if (!(pnv_idle_states && temp_u32 && temp_u64 && temp_string)) {
1347                pr_err("Could not allocate memory for dt parsing\n");
1348                rc = -ENOMEM;
1349                goto out;
1350        }
1351
1352        /* Read flags */
1353        if (of_property_read_u32_array(np, "ibm,cpu-idle-state-flags",
1354                                       temp_u32, nr_idle_states)) {
1355                pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n");
1356                rc = -EINVAL;
1357                goto out;
1358        }
1359        for (i = 0; i < nr_idle_states; i++)
1360                pnv_idle_states[i].flags = temp_u32[i];
1361
1362        /* Read latencies */
1363        if (of_property_read_u32_array(np, "ibm,cpu-idle-state-latencies-ns",
1364                                       temp_u32, nr_idle_states)) {
1365                pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n");
1366                rc = -EINVAL;
1367                goto out;
1368        }
1369        for (i = 0; i < nr_idle_states; i++)
1370                pnv_idle_states[i].latency_ns = temp_u32[i];
1371
1372        /* Read residencies */
1373        if (of_property_read_u32_array(np, "ibm,cpu-idle-state-residency-ns",
1374                                       temp_u32, nr_idle_states)) {
1375                pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-residency-ns in DT\n");
1376                rc = -EINVAL;
1377                goto out;
1378        }
1379        for (i = 0; i < nr_idle_states; i++)
1380                pnv_idle_states[i].residency_ns = temp_u32[i];
1381
1382        /* For power9 and later */
1383        if (cpu_has_feature(CPU_FTR_ARCH_300)) {
1384                /* Read pm_crtl_val */
1385                if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr",
1386                                               temp_u64, nr_idle_states)) {
1387                        pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n");
1388                        rc = -EINVAL;
1389                        goto out;
1390                }
1391                for (i = 0; i < nr_idle_states; i++)
1392                        pnv_idle_states[i].psscr_val = temp_u64[i];
1393
1394                /* Read pm_crtl_mask */
1395                if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr-mask",
1396                                               temp_u64, nr_idle_states)) {
1397                        pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr-mask in DT\n");
1398                        rc = -EINVAL;
1399                        goto out;
1400                }
1401                for (i = 0; i < nr_idle_states; i++)
1402                        pnv_idle_states[i].psscr_mask = temp_u64[i];
1403        }
1404
1405        /*
1406         * power8 specific properties ibm,cpu-idle-state-pmicr-mask and
1407         * ibm,cpu-idle-state-pmicr-val were never used and there is no
1408         * plan to use it in near future. Hence, not parsing these properties
1409         */
1410
1411        if (of_property_read_string_array(np, "ibm,cpu-idle-state-names",
1412                                          temp_string, nr_idle_states) < 0) {
1413                pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-names in DT\n");
1414                rc = -EINVAL;
1415                goto out;
1416        }
1417        for (i = 0; i < nr_idle_states; i++)
1418                strlcpy(pnv_idle_states[i].name, temp_string[i],
1419                        PNV_IDLE_NAME_LEN);
1420        nr_pnv_idle_states = nr_idle_states;
1421        rc = 0;
1422out:
1423        kfree(temp_u32);
1424        kfree(temp_u64);
1425        kfree(temp_string);
1426        return rc;
1427}
1428
1429static int __init pnv_init_idle_states(void)
1430{
1431        int cpu;
1432        int rc = 0;
1433
1434        /* Set up PACA fields */
1435        for_each_present_cpu(cpu) {
1436                struct paca_struct *p = paca_ptrs[cpu];
1437
1438                p->idle_state = 0;
1439                if (cpu == cpu_first_thread_sibling(cpu))
1440                        p->idle_state = (1 << threads_per_core) - 1;
1441
1442                if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
1443                        /* P7/P8 nap */
1444                        p->thread_idle_state = PNV_THREAD_RUNNING;
1445                } else if (pvr_version_is(PVR_POWER9)) {
1446                        /* P9 stop workarounds */
1447#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
1448                        p->requested_psscr = 0;
1449                        atomic_set(&p->dont_stop, 0);
1450#endif
1451                }
1452        }
1453
1454        /* In case we error out nr_pnv_idle_states will be zero */
1455        nr_pnv_idle_states = 0;
1456        supported_cpuidle_states = 0;
1457
1458        if (cpuidle_disable != IDLE_NO_OVERRIDE)
1459                goto out;
1460        rc = pnv_parse_cpuidle_dt();
1461        if (rc)
1462                return rc;
1463        pnv_probe_idle_states();
1464
1465        if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
1466                if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
1467                        power7_fastsleep_workaround_entry = false;
1468                        power7_fastsleep_workaround_exit = false;
1469                } else {
1470                        /*
1471                         * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that
1472                         * workaround is needed to use fastsleep. Provide sysfs
1473                         * control to choose how this workaround has to be
1474                         * applied.
1475                         */
1476                        device_create_file(cpu_subsys.dev_root,
1477                                &dev_attr_fastsleep_workaround_applyonce);
1478                }
1479
1480                update_subcore_sibling_mask();
1481
1482                if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED) {
1483                        ppc_md.power_save = power7_idle;
1484                        power7_offline_type = PNV_THREAD_NAP;
1485                }
1486
1487                if ((supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED) &&
1488                           (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT))
1489                        power7_offline_type = PNV_THREAD_WINKLE;
1490                else if ((supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED) ||
1491                           (supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1))
1492                        power7_offline_type = PNV_THREAD_SLEEP;
1493        }
1494
1495        if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) {
1496                if (pnv_save_sprs_for_deep_states())
1497                        pnv_disable_deep_states();
1498        }
1499
1500out:
1501        return 0;
1502}
1503machine_subsys_initcall(powernv, pnv_init_idle_states);
1504