linux/kernel/cpu.c
<<
>>
Prefs
   1/* CPU control.
   2 * (C) 2001, 2002, 2003, 2004 Rusty Russell
   3 *
   4 * This code is licenced under the GPL.
   5 */
   6#include <linux/sched/mm.h>
   7#include <linux/proc_fs.h>
   8#include <linux/smp.h>
   9#include <linux/init.h>
  10#include <linux/notifier.h>
  11#include <linux/sched/signal.h>
  12#include <linux/sched/hotplug.h>
  13#include <linux/sched/isolation.h>
  14#include <linux/sched/task.h>
  15#include <linux/sched/smt.h>
  16#include <linux/unistd.h>
  17#include <linux/cpu.h>
  18#include <linux/oom.h>
  19#include <linux/rcupdate.h>
  20#include <linux/export.h>
  21#include <linux/bug.h>
  22#include <linux/kthread.h>
  23#include <linux/stop_machine.h>
  24#include <linux/mutex.h>
  25#include <linux/gfp.h>
  26#include <linux/suspend.h>
  27#include <linux/lockdep.h>
  28#include <linux/tick.h>
  29#include <linux/irq.h>
  30#include <linux/nmi.h>
  31#include <linux/smpboot.h>
  32#include <linux/relay.h>
  33#include <linux/slab.h>
  34#include <linux/scs.h>
  35#include <linux/percpu-rwsem.h>
  36#include <linux/cpuset.h>
  37#include <linux/random.h>
  38#include <linux/cc_platform.h>
  39
  40#include <trace/events/power.h>
  41#define CREATE_TRACE_POINTS
  42#include <trace/events/cpuhp.h>
  43
  44#include "smpboot.h"
  45
  46/**
  47 * struct cpuhp_cpu_state - Per cpu hotplug state storage
  48 * @state:      The current cpu state
  49 * @target:     The target state
  50 * @fail:       Current CPU hotplug callback state
  51 * @thread:     Pointer to the hotplug thread
  52 * @should_run: Thread should execute
  53 * @rollback:   Perform a rollback
  54 * @single:     Single callback invocation
  55 * @bringup:    Single callback bringup or teardown selector
  56 * @cpu:        CPU number
  57 * @node:       Remote CPU node; for multi-instance, do a
  58 *              single entry callback for install/remove
  59 * @last:       For multi-instance rollback, remember how far we got
  60 * @cb_state:   The state for a single callback (install/uninstall)
  61 * @result:     Result of the operation
  62 * @done_up:    Signal completion to the issuer of the task for cpu-up
  63 * @done_down:  Signal completion to the issuer of the task for cpu-down
  64 */
  65struct cpuhp_cpu_state {
  66        enum cpuhp_state        state;
  67        enum cpuhp_state        target;
  68        enum cpuhp_state        fail;
  69#ifdef CONFIG_SMP
  70        struct task_struct      *thread;
  71        bool                    should_run;
  72        bool                    rollback;
  73        bool                    single;
  74        bool                    bringup;
  75        struct hlist_node       *node;
  76        struct hlist_node       *last;
  77        enum cpuhp_state        cb_state;
  78        int                     result;
  79        struct completion       done_up;
  80        struct completion       done_down;
  81#endif
  82};
  83
  84static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = {
  85        .fail = CPUHP_INVALID,
  86};
  87
  88#ifdef CONFIG_SMP
  89cpumask_t cpus_booted_once_mask;
  90#endif
  91
  92#if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
  93static struct lockdep_map cpuhp_state_up_map =
  94        STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_map);
  95static struct lockdep_map cpuhp_state_down_map =
  96        STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map);
  97
  98
  99static inline void cpuhp_lock_acquire(bool bringup)
 100{
 101        lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
 102}
 103
 104static inline void cpuhp_lock_release(bool bringup)
 105{
 106        lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
 107}
 108#else
 109
 110static inline void cpuhp_lock_acquire(bool bringup) { }
 111static inline void cpuhp_lock_release(bool bringup) { }
 112
 113#endif
 114
 115/**
 116 * struct cpuhp_step - Hotplug state machine step
 117 * @name:       Name of the step
 118 * @startup:    Startup function of the step
 119 * @teardown:   Teardown function of the step
 120 * @cant_stop:  Bringup/teardown can't be stopped at this step
 121 * @multi_instance:     State has multiple instances which get added afterwards
 122 */
 123struct cpuhp_step {
 124        const char              *name;
 125        union {
 126                int             (*single)(unsigned int cpu);
 127                int             (*multi)(unsigned int cpu,
 128                                         struct hlist_node *node);
 129        } startup;
 130        union {
 131                int             (*single)(unsigned int cpu);
 132                int             (*multi)(unsigned int cpu,
 133                                         struct hlist_node *node);
 134        } teardown;
 135        /* private: */
 136        struct hlist_head       list;
 137        /* public: */
 138        bool                    cant_stop;
 139        bool                    multi_instance;
 140};
 141
 142static DEFINE_MUTEX(cpuhp_state_mutex);
 143static struct cpuhp_step cpuhp_hp_states[];
 144
 145static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
 146{
 147        return cpuhp_hp_states + state;
 148}
 149
 150static bool cpuhp_step_empty(bool bringup, struct cpuhp_step *step)
 151{
 152        return bringup ? !step->startup.single : !step->teardown.single;
 153}
 154
 155/**
 156 * cpuhp_invoke_callback - Invoke the callbacks for a given state
 157 * @cpu:        The cpu for which the callback should be invoked
 158 * @state:      The state to do callbacks for
 159 * @bringup:    True if the bringup callback should be invoked
 160 * @node:       For multi-instance, do a single entry callback for install/remove
 161 * @lastp:      For multi-instance rollback, remember how far we got
 162 *
 163 * Called from cpu hotplug and from the state register machinery.
 164 *
 165 * Return: %0 on success or a negative errno code
 166 */
 167static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
 168                                 bool bringup, struct hlist_node *node,
 169                                 struct hlist_node **lastp)
 170{
 171        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 172        struct cpuhp_step *step = cpuhp_get_step(state);
 173        int (*cbm)(unsigned int cpu, struct hlist_node *node);
 174        int (*cb)(unsigned int cpu);
 175        int ret, cnt;
 176
 177        if (st->fail == state) {
 178                st->fail = CPUHP_INVALID;
 179                return -EAGAIN;
 180        }
 181
 182        if (cpuhp_step_empty(bringup, step)) {
 183                WARN_ON_ONCE(1);
 184                return 0;
 185        }
 186
 187        if (!step->multi_instance) {
 188                WARN_ON_ONCE(lastp && *lastp);
 189                cb = bringup ? step->startup.single : step->teardown.single;
 190
 191                trace_cpuhp_enter(cpu, st->target, state, cb);
 192                ret = cb(cpu);
 193                trace_cpuhp_exit(cpu, st->state, state, ret);
 194                return ret;
 195        }
 196        cbm = bringup ? step->startup.multi : step->teardown.multi;
 197
 198        /* Single invocation for instance add/remove */
 199        if (node) {
 200                WARN_ON_ONCE(lastp && *lastp);
 201                trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
 202                ret = cbm(cpu, node);
 203                trace_cpuhp_exit(cpu, st->state, state, ret);
 204                return ret;
 205        }
 206
 207        /* State transition. Invoke on all instances */
 208        cnt = 0;
 209        hlist_for_each(node, &step->list) {
 210                if (lastp && node == *lastp)
 211                        break;
 212
 213                trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
 214                ret = cbm(cpu, node);
 215                trace_cpuhp_exit(cpu, st->state, state, ret);
 216                if (ret) {
 217                        if (!lastp)
 218                                goto err;
 219
 220                        *lastp = node;
 221                        return ret;
 222                }
 223                cnt++;
 224        }
 225        if (lastp)
 226                *lastp = NULL;
 227        return 0;
 228err:
 229        /* Rollback the instances if one failed */
 230        cbm = !bringup ? step->startup.multi : step->teardown.multi;
 231        if (!cbm)
 232                return ret;
 233
 234        hlist_for_each(node, &step->list) {
 235                if (!cnt--)
 236                        break;
 237
 238                trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
 239                ret = cbm(cpu, node);
 240                trace_cpuhp_exit(cpu, st->state, state, ret);
 241                /*
 242                 * Rollback must not fail,
 243                 */
 244                WARN_ON_ONCE(ret);
 245        }
 246        return ret;
 247}
 248
 249#ifdef CONFIG_SMP
 250static bool cpuhp_is_ap_state(enum cpuhp_state state)
 251{
 252        /*
 253         * The extra check for CPUHP_TEARDOWN_CPU is only for documentation
 254         * purposes as that state is handled explicitly in cpu_down.
 255         */
 256        return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
 257}
 258
 259static inline void wait_for_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
 260{
 261        struct completion *done = bringup ? &st->done_up : &st->done_down;
 262        wait_for_completion(done);
 263}
 264
 265static inline void complete_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
 266{
 267        struct completion *done = bringup ? &st->done_up : &st->done_down;
 268        complete(done);
 269}
 270
 271/*
 272 * The former STARTING/DYING states, ran with IRQs disabled and must not fail.
 273 */
 274static bool cpuhp_is_atomic_state(enum cpuhp_state state)
 275{
 276        return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE;
 277}
 278
 279/* Serializes the updates to cpu_online_mask, cpu_present_mask */
 280static DEFINE_MUTEX(cpu_add_remove_lock);
 281bool cpuhp_tasks_frozen;
 282EXPORT_SYMBOL_GPL(cpuhp_tasks_frozen);
 283
 284/*
 285 * The following two APIs (cpu_maps_update_begin/done) must be used when
 286 * attempting to serialize the updates to cpu_online_mask & cpu_present_mask.
 287 */
 288void cpu_maps_update_begin(void)
 289{
 290        mutex_lock(&cpu_add_remove_lock);
 291}
 292
 293void cpu_maps_update_done(void)
 294{
 295        mutex_unlock(&cpu_add_remove_lock);
 296}
 297
 298/*
 299 * If set, cpu_up and cpu_down will return -EBUSY and do nothing.
 300 * Should always be manipulated under cpu_add_remove_lock
 301 */
 302static int cpu_hotplug_disabled;
 303
 304#ifdef CONFIG_HOTPLUG_CPU
 305
 306DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock);
 307
 308void cpus_read_lock(void)
 309{
 310        percpu_down_read(&cpu_hotplug_lock);
 311}
 312EXPORT_SYMBOL_GPL(cpus_read_lock);
 313
 314int cpus_read_trylock(void)
 315{
 316        return percpu_down_read_trylock(&cpu_hotplug_lock);
 317}
 318EXPORT_SYMBOL_GPL(cpus_read_trylock);
 319
 320void cpus_read_unlock(void)
 321{
 322        percpu_up_read(&cpu_hotplug_lock);
 323}
 324EXPORT_SYMBOL_GPL(cpus_read_unlock);
 325
 326void cpus_write_lock(void)
 327{
 328        percpu_down_write(&cpu_hotplug_lock);
 329}
 330
 331void cpus_write_unlock(void)
 332{
 333        percpu_up_write(&cpu_hotplug_lock);
 334}
 335
 336void lockdep_assert_cpus_held(void)
 337{
 338        /*
 339         * We can't have hotplug operations before userspace starts running,
 340         * and some init codepaths will knowingly not take the hotplug lock.
 341         * This is all valid, so mute lockdep until it makes sense to report
 342         * unheld locks.
 343         */
 344        if (system_state < SYSTEM_RUNNING)
 345                return;
 346
 347        percpu_rwsem_assert_held(&cpu_hotplug_lock);
 348}
 349
 350#ifdef CONFIG_LOCKDEP
 351int lockdep_is_cpus_held(void)
 352{
 353        return percpu_rwsem_is_held(&cpu_hotplug_lock);
 354}
 355#endif
 356
 357static void lockdep_acquire_cpus_lock(void)
 358{
 359        rwsem_acquire(&cpu_hotplug_lock.dep_map, 0, 0, _THIS_IP_);
 360}
 361
 362static void lockdep_release_cpus_lock(void)
 363{
 364        rwsem_release(&cpu_hotplug_lock.dep_map, _THIS_IP_);
 365}
 366
 367/*
 368 * Wait for currently running CPU hotplug operations to complete (if any) and
 369 * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
 370 * the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the
 371 * hotplug path before performing hotplug operations. So acquiring that lock
 372 * guarantees mutual exclusion from any currently running hotplug operations.
 373 */
 374void cpu_hotplug_disable(void)
 375{
 376        cpu_maps_update_begin();
 377        cpu_hotplug_disabled++;
 378        cpu_maps_update_done();
 379}
 380EXPORT_SYMBOL_GPL(cpu_hotplug_disable);
 381
 382static void __cpu_hotplug_enable(void)
 383{
 384        if (WARN_ONCE(!cpu_hotplug_disabled, "Unbalanced cpu hotplug enable\n"))
 385                return;
 386        cpu_hotplug_disabled--;
 387}
 388
 389void cpu_hotplug_enable(void)
 390{
 391        cpu_maps_update_begin();
 392        __cpu_hotplug_enable();
 393        cpu_maps_update_done();
 394}
 395EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
 396
 397#else
 398
 399static void lockdep_acquire_cpus_lock(void)
 400{
 401}
 402
 403static void lockdep_release_cpus_lock(void)
 404{
 405}
 406
 407#endif  /* CONFIG_HOTPLUG_CPU */
 408
 409/*
 410 * Architectures that need SMT-specific errata handling during SMT hotplug
 411 * should override this.
 412 */
 413void __weak arch_smt_update(void) { }
 414
 415#ifdef CONFIG_HOTPLUG_SMT
 416enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
 417
 418void __init cpu_smt_disable(bool force)
 419{
 420        if (!cpu_smt_possible())
 421                return;
 422
 423        if (force) {
 424                pr_info("SMT: Force disabled\n");
 425                cpu_smt_control = CPU_SMT_FORCE_DISABLED;
 426        } else {
 427                pr_info("SMT: disabled\n");
 428                cpu_smt_control = CPU_SMT_DISABLED;
 429        }
 430}
 431
 432/*
 433 * The decision whether SMT is supported can only be done after the full
 434 * CPU identification. Called from architecture code.
 435 */
 436void __init cpu_smt_check_topology(void)
 437{
 438        if (!topology_smt_supported())
 439                cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
 440}
 441
 442static int __init smt_cmdline_disable(char *str)
 443{
 444        cpu_smt_disable(str && !strcmp(str, "force"));
 445        return 0;
 446}
 447early_param("nosmt", smt_cmdline_disable);
 448
 449static inline bool cpu_smt_allowed(unsigned int cpu)
 450{
 451        if (cpu_smt_control == CPU_SMT_ENABLED)
 452                return true;
 453
 454        if (topology_is_primary_thread(cpu))
 455                return true;
 456
 457        /*
 458         * On x86 it's required to boot all logical CPUs at least once so
 459         * that the init code can get a chance to set CR4.MCE on each
 460         * CPU. Otherwise, a broadcasted MCE observing CR4.MCE=0b on any
 461         * core will shutdown the machine.
 462         */
 463        return !cpumask_test_cpu(cpu, &cpus_booted_once_mask);
 464}
 465
 466/* Returns true if SMT is not supported of forcefully (irreversibly) disabled */
 467bool cpu_smt_possible(void)
 468{
 469        return cpu_smt_control != CPU_SMT_FORCE_DISABLED &&
 470                cpu_smt_control != CPU_SMT_NOT_SUPPORTED;
 471}
 472EXPORT_SYMBOL_GPL(cpu_smt_possible);
 473#else
 474static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
 475#endif
 476
 477static inline enum cpuhp_state
 478cpuhp_set_state(int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state target)
 479{
 480        enum cpuhp_state prev_state = st->state;
 481        bool bringup = st->state < target;
 482
 483        st->rollback = false;
 484        st->last = NULL;
 485
 486        st->target = target;
 487        st->single = false;
 488        st->bringup = bringup;
 489        if (cpu_dying(cpu) != !bringup)
 490                set_cpu_dying(cpu, !bringup);
 491
 492        return prev_state;
 493}
 494
 495static inline void
 496cpuhp_reset_state(int cpu, struct cpuhp_cpu_state *st,
 497                  enum cpuhp_state prev_state)
 498{
 499        bool bringup = !st->bringup;
 500
 501        st->target = prev_state;
 502
 503        /*
 504         * Already rolling back. No need invert the bringup value or to change
 505         * the current state.
 506         */
 507        if (st->rollback)
 508                return;
 509
 510        st->rollback = true;
 511
 512        /*
 513         * If we have st->last we need to undo partial multi_instance of this
 514         * state first. Otherwise start undo at the previous state.
 515         */
 516        if (!st->last) {
 517                if (st->bringup)
 518                        st->state--;
 519                else
 520                        st->state++;
 521        }
 522
 523        st->bringup = bringup;
 524        if (cpu_dying(cpu) != !bringup)
 525                set_cpu_dying(cpu, !bringup);
 526}
 527
 528/* Regular hotplug invocation of the AP hotplug thread */
 529static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st)
 530{
 531        if (!st->single && st->state == st->target)
 532                return;
 533
 534        st->result = 0;
 535        /*
 536         * Make sure the above stores are visible before should_run becomes
 537         * true. Paired with the mb() above in cpuhp_thread_fun()
 538         */
 539        smp_mb();
 540        st->should_run = true;
 541        wake_up_process(st->thread);
 542        wait_for_ap_thread(st, st->bringup);
 543}
 544
 545static int cpuhp_kick_ap(int cpu, struct cpuhp_cpu_state *st,
 546                         enum cpuhp_state target)
 547{
 548        enum cpuhp_state prev_state;
 549        int ret;
 550
 551        prev_state = cpuhp_set_state(cpu, st, target);
 552        __cpuhp_kick_ap(st);
 553        if ((ret = st->result)) {
 554                cpuhp_reset_state(cpu, st, prev_state);
 555                __cpuhp_kick_ap(st);
 556        }
 557
 558        return ret;
 559}
 560
 561static int bringup_wait_for_ap(unsigned int cpu)
 562{
 563        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 564
 565        /* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */
 566        wait_for_ap_thread(st, true);
 567        if (WARN_ON_ONCE((!cpu_online(cpu))))
 568                return -ECANCELED;
 569
 570        /* Unpark the hotplug thread of the target cpu */
 571        kthread_unpark(st->thread);
 572
 573        /*
 574         * SMT soft disabling on X86 requires to bring the CPU out of the
 575         * BIOS 'wait for SIPI' state in order to set the CR4.MCE bit.  The
 576         * CPU marked itself as booted_once in notify_cpu_starting() so the
 577         * cpu_smt_allowed() check will now return false if this is not the
 578         * primary sibling.
 579         */
 580        if (!cpu_smt_allowed(cpu))
 581                return -ECANCELED;
 582
 583        if (st->target <= CPUHP_AP_ONLINE_IDLE)
 584                return 0;
 585
 586        return cpuhp_kick_ap(cpu, st, st->target);
 587}
 588
 589static int bringup_cpu(unsigned int cpu)
 590{
 591        struct task_struct *idle = idle_thread_get(cpu);
 592        int ret;
 593
 594        /*
 595         * Reset stale stack state from the last time this CPU was online.
 596         */
 597        scs_task_reset(idle);
 598        kasan_unpoison_task_stack(idle);
 599
 600        /*
 601         * Some architectures have to walk the irq descriptors to
 602         * setup the vector space for the cpu which comes online.
 603         * Prevent irq alloc/free across the bringup.
 604         */
 605        irq_lock_sparse();
 606
 607        /* Arch-specific enabling code. */
 608        ret = __cpu_up(cpu, idle);
 609        irq_unlock_sparse();
 610        if (ret)
 611                return ret;
 612        return bringup_wait_for_ap(cpu);
 613}
 614
 615static int finish_cpu(unsigned int cpu)
 616{
 617        struct task_struct *idle = idle_thread_get(cpu);
 618        struct mm_struct *mm = idle->active_mm;
 619
 620        /*
 621         * idle_task_exit() will have switched to &init_mm, now
 622         * clean up any remaining active_mm state.
 623         */
 624        if (mm != &init_mm)
 625                idle->active_mm = &init_mm;
 626        mmdrop(mm);
 627        return 0;
 628}
 629
 630/*
 631 * Hotplug state machine related functions
 632 */
 633
 634/*
 635 * Get the next state to run. Empty ones will be skipped. Returns true if a
 636 * state must be run.
 637 *
 638 * st->state will be modified ahead of time, to match state_to_run, as if it
 639 * has already ran.
 640 */
 641static bool cpuhp_next_state(bool bringup,
 642                             enum cpuhp_state *state_to_run,
 643                             struct cpuhp_cpu_state *st,
 644                             enum cpuhp_state target)
 645{
 646        do {
 647                if (bringup) {
 648                        if (st->state >= target)
 649                                return false;
 650
 651                        *state_to_run = ++st->state;
 652                } else {
 653                        if (st->state <= target)
 654                                return false;
 655
 656                        *state_to_run = st->state--;
 657                }
 658
 659                if (!cpuhp_step_empty(bringup, cpuhp_get_step(*state_to_run)))
 660                        break;
 661        } while (true);
 662
 663        return true;
 664}
 665
 666static int cpuhp_invoke_callback_range(bool bringup,
 667                                       unsigned int cpu,
 668                                       struct cpuhp_cpu_state *st,
 669                                       enum cpuhp_state target)
 670{
 671        enum cpuhp_state state;
 672        int err = 0;
 673
 674        while (cpuhp_next_state(bringup, &state, st, target)) {
 675                err = cpuhp_invoke_callback(cpu, state, bringup, NULL, NULL);
 676                if (err)
 677                        break;
 678        }
 679
 680        return err;
 681}
 682
 683static inline bool can_rollback_cpu(struct cpuhp_cpu_state *st)
 684{
 685        if (IS_ENABLED(CONFIG_HOTPLUG_CPU))
 686                return true;
 687        /*
 688         * When CPU hotplug is disabled, then taking the CPU down is not
 689         * possible because takedown_cpu() and the architecture and
 690         * subsystem specific mechanisms are not available. So the CPU
 691         * which would be completely unplugged again needs to stay around
 692         * in the current state.
 693         */
 694        return st->state <= CPUHP_BRINGUP_CPU;
 695}
 696
 697static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
 698                              enum cpuhp_state target)
 699{
 700        enum cpuhp_state prev_state = st->state;
 701        int ret = 0;
 702
 703        ret = cpuhp_invoke_callback_range(true, cpu, st, target);
 704        if (ret) {
 705                pr_debug("CPU UP failed (%d) CPU %u state %s (%d)\n",
 706                         ret, cpu, cpuhp_get_step(st->state)->name,
 707                         st->state);
 708
 709                cpuhp_reset_state(cpu, st, prev_state);
 710                if (can_rollback_cpu(st))
 711                        WARN_ON(cpuhp_invoke_callback_range(false, cpu, st,
 712                                                            prev_state));
 713        }
 714        return ret;
 715}
 716
 717/*
 718 * The cpu hotplug threads manage the bringup and teardown of the cpus
 719 */
 720static int cpuhp_should_run(unsigned int cpu)
 721{
 722        struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
 723
 724        return st->should_run;
 725}
 726
 727/*
 728 * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke
 729 * callbacks when a state gets [un]installed at runtime.
 730 *
 731 * Each invocation of this function by the smpboot thread does a single AP
 732 * state callback.
 733 *
 734 * It has 3 modes of operation:
 735 *  - single: runs st->cb_state
 736 *  - up:     runs ++st->state, while st->state < st->target
 737 *  - down:   runs st->state--, while st->state > st->target
 738 *
 739 * When complete or on error, should_run is cleared and the completion is fired.
 740 */
 741static void cpuhp_thread_fun(unsigned int cpu)
 742{
 743        struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
 744        bool bringup = st->bringup;
 745        enum cpuhp_state state;
 746
 747        if (WARN_ON_ONCE(!st->should_run))
 748                return;
 749
 750        /*
 751         * ACQUIRE for the cpuhp_should_run() load of ->should_run. Ensures
 752         * that if we see ->should_run we also see the rest of the state.
 753         */
 754        smp_mb();
 755
 756        /*
 757         * The BP holds the hotplug lock, but we're now running on the AP,
 758         * ensure that anybody asserting the lock is held, will actually find
 759         * it so.
 760         */
 761        lockdep_acquire_cpus_lock();
 762        cpuhp_lock_acquire(bringup);
 763
 764        if (st->single) {
 765                state = st->cb_state;
 766                st->should_run = false;
 767        } else {
 768                st->should_run = cpuhp_next_state(bringup, &state, st, st->target);
 769                if (!st->should_run)
 770                        goto end;
 771        }
 772
 773        WARN_ON_ONCE(!cpuhp_is_ap_state(state));
 774
 775        if (cpuhp_is_atomic_state(state)) {
 776                local_irq_disable();
 777                st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
 778                local_irq_enable();
 779
 780                /*
 781                 * STARTING/DYING must not fail!
 782                 */
 783                WARN_ON_ONCE(st->result);
 784        } else {
 785                st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
 786        }
 787
 788        if (st->result) {
 789                /*
 790                 * If we fail on a rollback, we're up a creek without no
 791                 * paddle, no way forward, no way back. We loose, thanks for
 792                 * playing.
 793                 */
 794                WARN_ON_ONCE(st->rollback);
 795                st->should_run = false;
 796        }
 797
 798end:
 799        cpuhp_lock_release(bringup);
 800        lockdep_release_cpus_lock();
 801
 802        if (!st->should_run)
 803                complete_ap_thread(st, bringup);
 804}
 805
 806/* Invoke a single callback on a remote cpu */
 807static int
 808cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
 809                         struct hlist_node *node)
 810{
 811        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 812        int ret;
 813
 814        if (!cpu_online(cpu))
 815                return 0;
 816
 817        cpuhp_lock_acquire(false);
 818        cpuhp_lock_release(false);
 819
 820        cpuhp_lock_acquire(true);
 821        cpuhp_lock_release(true);
 822
 823        /*
 824         * If we are up and running, use the hotplug thread. For early calls
 825         * we invoke the thread function directly.
 826         */
 827        if (!st->thread)
 828                return cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
 829
 830        st->rollback = false;
 831        st->last = NULL;
 832
 833        st->node = node;
 834        st->bringup = bringup;
 835        st->cb_state = state;
 836        st->single = true;
 837
 838        __cpuhp_kick_ap(st);
 839
 840        /*
 841         * If we failed and did a partial, do a rollback.
 842         */
 843        if ((ret = st->result) && st->last) {
 844                st->rollback = true;
 845                st->bringup = !bringup;
 846
 847                __cpuhp_kick_ap(st);
 848        }
 849
 850        /*
 851         * Clean up the leftovers so the next hotplug operation wont use stale
 852         * data.
 853         */
 854        st->node = st->last = NULL;
 855        return ret;
 856}
 857
 858static int cpuhp_kick_ap_work(unsigned int cpu)
 859{
 860        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 861        enum cpuhp_state prev_state = st->state;
 862        int ret;
 863
 864        cpuhp_lock_acquire(false);
 865        cpuhp_lock_release(false);
 866
 867        cpuhp_lock_acquire(true);
 868        cpuhp_lock_release(true);
 869
 870        trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work);
 871        ret = cpuhp_kick_ap(cpu, st, st->target);
 872        trace_cpuhp_exit(cpu, st->state, prev_state, ret);
 873
 874        return ret;
 875}
 876
 877static struct smp_hotplug_thread cpuhp_threads = {
 878        .store                  = &cpuhp_state.thread,
 879        .thread_should_run      = cpuhp_should_run,
 880        .thread_fn              = cpuhp_thread_fun,
 881        .thread_comm            = "cpuhp/%u",
 882        .selfparking            = true,
 883};
 884
 885static __init void cpuhp_init_state(void)
 886{
 887        struct cpuhp_cpu_state *st;
 888        int cpu;
 889
 890        for_each_possible_cpu(cpu) {
 891                st = per_cpu_ptr(&cpuhp_state, cpu);
 892                init_completion(&st->done_up);
 893                init_completion(&st->done_down);
 894        }
 895}
 896
 897void __init cpuhp_threads_init(void)
 898{
 899        cpuhp_init_state();
 900        BUG_ON(smpboot_register_percpu_thread(&cpuhp_threads));
 901        kthread_unpark(this_cpu_read(cpuhp_state.thread));
 902}
 903
 904/*
 905 *
 906 * Serialize hotplug trainwrecks outside of the cpu_hotplug_lock
 907 * protected region.
 908 *
 909 * The operation is still serialized against concurrent CPU hotplug via
 910 * cpu_add_remove_lock, i.e. CPU map protection.  But it is _not_
 911 * serialized against other hotplug related activity like adding or
 912 * removing of state callbacks and state instances, which invoke either the
 913 * startup or the teardown callback of the affected state.
 914 *
 915 * This is required for subsystems which are unfixable vs. CPU hotplug and
 916 * evade lock inversion problems by scheduling work which has to be
 917 * completed _before_ cpu_up()/_cpu_down() returns.
 918 *
 919 * Don't even think about adding anything to this for any new code or even
 920 * drivers. It's only purpose is to keep existing lock order trainwrecks
 921 * working.
 922 *
 923 * For cpu_down() there might be valid reasons to finish cleanups which are
 924 * not required to be done under cpu_hotplug_lock, but that's a different
 925 * story and would be not invoked via this.
 926 */
 927static void cpu_up_down_serialize_trainwrecks(bool tasks_frozen)
 928{
 929        /*
 930         * cpusets delegate hotplug operations to a worker to "solve" the
 931         * lock order problems. Wait for the worker, but only if tasks are
 932         * _not_ frozen (suspend, hibernate) as that would wait forever.
 933         *
 934         * The wait is required because otherwise the hotplug operation
 935         * returns with inconsistent state, which could even be observed in
 936         * user space when a new CPU is brought up. The CPU plug uevent
 937         * would be delivered and user space reacting on it would fail to
 938         * move tasks to the newly plugged CPU up to the point where the
 939         * work has finished because up to that point the newly plugged CPU
 940         * is not assignable in cpusets/cgroups. On unplug that's not
 941         * necessarily a visible issue, but it is still inconsistent state,
 942         * which is the real problem which needs to be "fixed". This can't
 943         * prevent the transient state between scheduling the work and
 944         * returning from waiting for it.
 945         */
 946        if (!tasks_frozen)
 947                cpuset_wait_for_hotplug();
 948}
 949
 950#ifdef CONFIG_HOTPLUG_CPU
 951#ifndef arch_clear_mm_cpumask_cpu
 952#define arch_clear_mm_cpumask_cpu(cpu, mm) cpumask_clear_cpu(cpu, mm_cpumask(mm))
 953#endif
 954
 955/**
 956 * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
 957 * @cpu: a CPU id
 958 *
 959 * This function walks all processes, finds a valid mm struct for each one and
 960 * then clears a corresponding bit in mm's cpumask.  While this all sounds
 961 * trivial, there are various non-obvious corner cases, which this function
 962 * tries to solve in a safe manner.
 963 *
 964 * Also note that the function uses a somewhat relaxed locking scheme, so it may
 965 * be called only for an already offlined CPU.
 966 */
 967void clear_tasks_mm_cpumask(int cpu)
 968{
 969        struct task_struct *p;
 970
 971        /*
 972         * This function is called after the cpu is taken down and marked
 973         * offline, so its not like new tasks will ever get this cpu set in
 974         * their mm mask. -- Peter Zijlstra
 975         * Thus, we may use rcu_read_lock() here, instead of grabbing
 976         * full-fledged tasklist_lock.
 977         */
 978        WARN_ON(cpu_online(cpu));
 979        rcu_read_lock();
 980        for_each_process(p) {
 981                struct task_struct *t;
 982
 983                /*
 984                 * Main thread might exit, but other threads may still have
 985                 * a valid mm. Find one.
 986                 */
 987                t = find_lock_task_mm(p);
 988                if (!t)
 989                        continue;
 990                arch_clear_mm_cpumask_cpu(cpu, t->mm);
 991                task_unlock(t);
 992        }
 993        rcu_read_unlock();
 994}
 995
 996/* Take this CPU down. */
 997static int take_cpu_down(void *_param)
 998{
 999        struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
1000        enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE);
1001        int err, cpu = smp_processor_id();
1002        int ret;
1003
1004        /* Ensure this CPU doesn't handle any more interrupts. */
1005        err = __cpu_disable();
1006        if (err < 0)
1007                return err;
1008
1009        /*
1010         * Must be called from CPUHP_TEARDOWN_CPU, which means, as we are going
1011         * down, that the current state is CPUHP_TEARDOWN_CPU - 1.
1012         */
1013        WARN_ON(st->state != (CPUHP_TEARDOWN_CPU - 1));
1014
1015        /* Invoke the former CPU_DYING callbacks */
1016        ret = cpuhp_invoke_callback_range(false, cpu, st, target);
1017
1018        /*
1019         * DYING must not fail!
1020         */
1021        WARN_ON_ONCE(ret);
1022
1023        /* Give up timekeeping duties */
1024        tick_handover_do_timer();
1025        /* Remove CPU from timer broadcasting */
1026        tick_offline_cpu(cpu);
1027        /* Park the stopper thread */
1028        stop_machine_park(cpu);
1029        return 0;
1030}
1031
1032static int takedown_cpu(unsigned int cpu)
1033{
1034        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1035        int err;
1036
1037        /* Park the smpboot threads */
1038        kthread_park(st->thread);
1039
1040        /*
1041         * Prevent irq alloc/free while the dying cpu reorganizes the
1042         * interrupt affinities.
1043         */
1044        irq_lock_sparse();
1045
1046        /*
1047         * So now all preempt/rcu users must observe !cpu_active().
1048         */
1049        err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu));
1050        if (err) {
1051                /* CPU refused to die */
1052                irq_unlock_sparse();
1053                /* Unpark the hotplug thread so we can rollback there */
1054                kthread_unpark(st->thread);
1055                return err;
1056        }
1057        BUG_ON(cpu_online(cpu));
1058
1059        /*
1060         * The teardown callback for CPUHP_AP_SCHED_STARTING will have removed
1061         * all runnable tasks from the CPU, there's only the idle task left now
1062         * that the migration thread is done doing the stop_machine thing.
1063         *
1064         * Wait for the stop thread to go away.
1065         */
1066        wait_for_ap_thread(st, false);
1067        BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);
1068
1069        /* Interrupts are moved away from the dying cpu, reenable alloc/free */
1070        irq_unlock_sparse();
1071
1072        hotplug_cpu__broadcast_tick_pull(cpu);
1073        /* This actually kills the CPU. */
1074        __cpu_die(cpu);
1075
1076        tick_cleanup_dead_cpu(cpu);
1077        rcutree_migrate_callbacks(cpu);
1078        return 0;
1079}
1080
1081static void cpuhp_complete_idle_dead(void *arg)
1082{
1083        struct cpuhp_cpu_state *st = arg;
1084
1085        complete_ap_thread(st, false);
1086}
1087
1088void cpuhp_report_idle_dead(void)
1089{
1090        struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
1091
1092        BUG_ON(st->state != CPUHP_AP_OFFLINE);
1093        rcu_report_dead(smp_processor_id());
1094        st->state = CPUHP_AP_IDLE_DEAD;
1095        /*
1096         * We cannot call complete after rcu_report_dead() so we delegate it
1097         * to an online cpu.
1098         */
1099        smp_call_function_single(cpumask_first(cpu_online_mask),
1100                                 cpuhp_complete_idle_dead, st, 0);
1101}
1102
1103static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
1104                                enum cpuhp_state target)
1105{
1106        enum cpuhp_state prev_state = st->state;
1107        int ret = 0;
1108
1109        ret = cpuhp_invoke_callback_range(false, cpu, st, target);
1110        if (ret) {
1111                pr_debug("CPU DOWN failed (%d) CPU %u state %s (%d)\n",
1112                         ret, cpu, cpuhp_get_step(st->state)->name,
1113                         st->state);
1114
1115                cpuhp_reset_state(cpu, st, prev_state);
1116
1117                if (st->state < prev_state)
1118                        WARN_ON(cpuhp_invoke_callback_range(true, cpu, st,
1119                                                            prev_state));
1120        }
1121
1122        return ret;
1123}
1124
1125/* Requires cpu_add_remove_lock to be held */
1126static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
1127                           enum cpuhp_state target)
1128{
1129        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1130        int prev_state, ret = 0;
1131
1132        if (num_online_cpus() == 1)
1133                return -EBUSY;
1134
1135        if (!cpu_present(cpu))
1136                return -EINVAL;
1137
1138        cpus_write_lock();
1139
1140        cpuhp_tasks_frozen = tasks_frozen;
1141
1142        prev_state = cpuhp_set_state(cpu, st, target);
1143        /*
1144         * If the current CPU state is in the range of the AP hotplug thread,
1145         * then we need to kick the thread.
1146         */
1147        if (st->state > CPUHP_TEARDOWN_CPU) {
1148                st->target = max((int)target, CPUHP_TEARDOWN_CPU);
1149                ret = cpuhp_kick_ap_work(cpu);
1150                /*
1151                 * The AP side has done the error rollback already. Just
1152                 * return the error code..
1153                 */
1154                if (ret)
1155                        goto out;
1156
1157                /*
1158                 * We might have stopped still in the range of the AP hotplug
1159                 * thread. Nothing to do anymore.
1160                 */
1161                if (st->state > CPUHP_TEARDOWN_CPU)
1162                        goto out;
1163
1164                st->target = target;
1165        }
1166        /*
1167         * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need
1168         * to do the further cleanups.
1169         */
1170        ret = cpuhp_down_callbacks(cpu, st, target);
1171        if (ret && st->state < prev_state) {
1172                if (st->state == CPUHP_TEARDOWN_CPU) {
1173                        cpuhp_reset_state(cpu, st, prev_state);
1174                        __cpuhp_kick_ap(st);
1175                } else {
1176                        WARN(1, "DEAD callback error for CPU%d", cpu);
1177                }
1178        }
1179
1180out:
1181        cpus_write_unlock();
1182        /*
1183         * Do post unplug cleanup. This is still protected against
1184         * concurrent CPU hotplug via cpu_add_remove_lock.
1185         */
1186        lockup_detector_cleanup();
1187        arch_smt_update();
1188        cpu_up_down_serialize_trainwrecks(tasks_frozen);
1189        return ret;
1190}
1191
1192static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target)
1193{
1194        /*
1195         * If the platform does not support hotplug, report it explicitly to
1196         * differentiate it from a transient offlining failure.
1197         */
1198        if (cc_platform_has(CC_ATTR_HOTPLUG_DISABLED))
1199                return -EOPNOTSUPP;
1200        if (cpu_hotplug_disabled)
1201                return -EBUSY;
1202        return _cpu_down(cpu, 0, target);
1203}
1204
1205static int cpu_down(unsigned int cpu, enum cpuhp_state target)
1206{
1207        int err;
1208
1209        cpu_maps_update_begin();
1210        err = cpu_down_maps_locked(cpu, target);
1211        cpu_maps_update_done();
1212        return err;
1213}
1214
1215/**
1216 * cpu_device_down - Bring down a cpu device
1217 * @dev: Pointer to the cpu device to offline
1218 *
1219 * This function is meant to be used by device core cpu subsystem only.
1220 *
1221 * Other subsystems should use remove_cpu() instead.
1222 *
1223 * Return: %0 on success or a negative errno code
1224 */
1225int cpu_device_down(struct device *dev)
1226{
1227        return cpu_down(dev->id, CPUHP_OFFLINE);
1228}
1229
1230int remove_cpu(unsigned int cpu)
1231{
1232        int ret;
1233
1234        lock_device_hotplug();
1235        ret = device_offline(get_cpu_device(cpu));
1236        unlock_device_hotplug();
1237
1238        return ret;
1239}
1240EXPORT_SYMBOL_GPL(remove_cpu);
1241
1242void smp_shutdown_nonboot_cpus(unsigned int primary_cpu)
1243{
1244        unsigned int cpu;
1245        int error;
1246
1247        cpu_maps_update_begin();
1248
1249        /*
1250         * Make certain the cpu I'm about to reboot on is online.
1251         *
1252         * This is inline to what migrate_to_reboot_cpu() already do.
1253         */
1254        if (!cpu_online(primary_cpu))
1255                primary_cpu = cpumask_first(cpu_online_mask);
1256
1257        for_each_online_cpu(cpu) {
1258                if (cpu == primary_cpu)
1259                        continue;
1260
1261                error = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
1262                if (error) {
1263                        pr_err("Failed to offline CPU%d - error=%d",
1264                                cpu, error);
1265                        break;
1266                }
1267        }
1268
1269        /*
1270         * Ensure all but the reboot CPU are offline.
1271         */
1272        BUG_ON(num_online_cpus() > 1);
1273
1274        /*
1275         * Make sure the CPUs won't be enabled by someone else after this
1276         * point. Kexec will reboot to a new kernel shortly resetting
1277         * everything along the way.
1278         */
1279        cpu_hotplug_disabled++;
1280
1281        cpu_maps_update_done();
1282}
1283
1284#else
1285#define takedown_cpu            NULL
1286#endif /*CONFIG_HOTPLUG_CPU*/
1287
1288/**
1289 * notify_cpu_starting(cpu) - Invoke the callbacks on the starting CPU
1290 * @cpu: cpu that just started
1291 *
1292 * It must be called by the arch code on the new cpu, before the new cpu
1293 * enables interrupts and before the "boot" cpu returns from __cpu_up().
1294 */
1295void notify_cpu_starting(unsigned int cpu)
1296{
1297        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1298        enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
1299        int ret;
1300
1301        rcu_cpu_starting(cpu);  /* Enables RCU usage on this CPU. */
1302        cpumask_set_cpu(cpu, &cpus_booted_once_mask);
1303        ret = cpuhp_invoke_callback_range(true, cpu, st, target);
1304
1305        /*
1306         * STARTING must not fail!
1307         */
1308        WARN_ON_ONCE(ret);
1309}
1310
1311/*
1312 * Called from the idle task. Wake up the controlling task which brings the
1313 * hotplug thread of the upcoming CPU up and then delegates the rest of the
1314 * online bringup to the hotplug thread.
1315 */
1316void cpuhp_online_idle(enum cpuhp_state state)
1317{
1318        struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
1319
1320        /* Happens for the boot cpu */
1321        if (state != CPUHP_AP_ONLINE_IDLE)
1322                return;
1323
1324        /*
1325         * Unpart the stopper thread before we start the idle loop (and start
1326         * scheduling); this ensures the stopper task is always available.
1327         */
1328        stop_machine_unpark(smp_processor_id());
1329
1330        st->state = CPUHP_AP_ONLINE_IDLE;
1331        complete_ap_thread(st, true);
1332}
1333
1334/* Requires cpu_add_remove_lock to be held */
1335static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
1336{
1337        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1338        struct task_struct *idle;
1339        int ret = 0;
1340
1341        cpus_write_lock();
1342
1343        if (!cpu_present(cpu)) {
1344                ret = -EINVAL;
1345                goto out;
1346        }
1347
1348        /*
1349         * The caller of cpu_up() might have raced with another
1350         * caller. Nothing to do.
1351         */
1352        if (st->state >= target)
1353                goto out;
1354
1355        if (st->state == CPUHP_OFFLINE) {
1356                /* Let it fail before we try to bring the cpu up */
1357                idle = idle_thread_get(cpu);
1358                if (IS_ERR(idle)) {
1359                        ret = PTR_ERR(idle);
1360                        goto out;
1361                }
1362        }
1363
1364        cpuhp_tasks_frozen = tasks_frozen;
1365
1366        cpuhp_set_state(cpu, st, target);
1367        /*
1368         * If the current CPU state is in the range of the AP hotplug thread,
1369         * then we need to kick the thread once more.
1370         */
1371        if (st->state > CPUHP_BRINGUP_CPU) {
1372                ret = cpuhp_kick_ap_work(cpu);
1373                /*
1374                 * The AP side has done the error rollback already. Just
1375                 * return the error code..
1376                 */
1377                if (ret)
1378                        goto out;
1379        }
1380
1381        /*
1382         * Try to reach the target state. We max out on the BP at
1383         * CPUHP_BRINGUP_CPU. After that the AP hotplug thread is
1384         * responsible for bringing it up to the target state.
1385         */
1386        target = min((int)target, CPUHP_BRINGUP_CPU);
1387        ret = cpuhp_up_callbacks(cpu, st, target);
1388out:
1389        cpus_write_unlock();
1390        arch_smt_update();
1391        cpu_up_down_serialize_trainwrecks(tasks_frozen);
1392        return ret;
1393}
1394
1395static int cpu_up(unsigned int cpu, enum cpuhp_state target)
1396{
1397        int err = 0;
1398
1399        if (!cpu_possible(cpu)) {
1400                pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n",
1401                       cpu);
1402#if defined(CONFIG_IA64)
1403                pr_err("please check additional_cpus= boot parameter\n");
1404#endif
1405                return -EINVAL;
1406        }
1407
1408        err = try_online_node(cpu_to_node(cpu));
1409        if (err)
1410                return err;
1411
1412        cpu_maps_update_begin();
1413
1414        if (cpu_hotplug_disabled) {
1415                err = -EBUSY;
1416                goto out;
1417        }
1418        if (!cpu_smt_allowed(cpu)) {
1419                err = -EPERM;
1420                goto out;
1421        }
1422
1423        err = _cpu_up(cpu, 0, target);
1424out:
1425        cpu_maps_update_done();
1426        return err;
1427}
1428
1429/**
1430 * cpu_device_up - Bring up a cpu device
1431 * @dev: Pointer to the cpu device to online
1432 *
1433 * This function is meant to be used by device core cpu subsystem only.
1434 *
1435 * Other subsystems should use add_cpu() instead.
1436 *
1437 * Return: %0 on success or a negative errno code
1438 */
1439int cpu_device_up(struct device *dev)
1440{
1441        return cpu_up(dev->id, CPUHP_ONLINE);
1442}
1443
1444int add_cpu(unsigned int cpu)
1445{
1446        int ret;
1447
1448        lock_device_hotplug();
1449        ret = device_online(get_cpu_device(cpu));
1450        unlock_device_hotplug();
1451
1452        return ret;
1453}
1454EXPORT_SYMBOL_GPL(add_cpu);
1455
1456/**
1457 * bringup_hibernate_cpu - Bring up the CPU that we hibernated on
1458 * @sleep_cpu: The cpu we hibernated on and should be brought up.
1459 *
1460 * On some architectures like arm64, we can hibernate on any CPU, but on
1461 * wake up the CPU we hibernated on might be offline as a side effect of
1462 * using maxcpus= for example.
1463 *
1464 * Return: %0 on success or a negative errno code
1465 */
1466int bringup_hibernate_cpu(unsigned int sleep_cpu)
1467{
1468        int ret;
1469
1470        if (!cpu_online(sleep_cpu)) {
1471                pr_info("Hibernated on a CPU that is offline! Bringing CPU up.\n");
1472                ret = cpu_up(sleep_cpu, CPUHP_ONLINE);
1473                if (ret) {
1474                        pr_err("Failed to bring hibernate-CPU up!\n");
1475                        return ret;
1476                }
1477        }
1478        return 0;
1479}
1480
1481void bringup_nonboot_cpus(unsigned int setup_max_cpus)
1482{
1483        unsigned int cpu;
1484
1485        for_each_present_cpu(cpu) {
1486                if (num_online_cpus() >= setup_max_cpus)
1487                        break;
1488                if (!cpu_online(cpu))
1489                        cpu_up(cpu, CPUHP_ONLINE);
1490        }
1491}
1492
1493#ifdef CONFIG_PM_SLEEP_SMP
1494static cpumask_var_t frozen_cpus;
1495
1496int freeze_secondary_cpus(int primary)
1497{
1498        int cpu, error = 0;
1499
1500        cpu_maps_update_begin();
1501        if (primary == -1) {
1502                primary = cpumask_first(cpu_online_mask);
1503                if (!housekeeping_cpu(primary, HK_TYPE_TIMER))
1504                        primary = housekeeping_any_cpu(HK_TYPE_TIMER);
1505        } else {
1506                if (!cpu_online(primary))
1507                        primary = cpumask_first(cpu_online_mask);
1508        }
1509
1510        /*
1511         * We take down all of the non-boot CPUs in one shot to avoid races
1512         * with the userspace trying to use the CPU hotplug at the same time
1513         */
1514        cpumask_clear(frozen_cpus);
1515
1516        pr_info("Disabling non-boot CPUs ...\n");
1517        for_each_online_cpu(cpu) {
1518                if (cpu == primary)
1519                        continue;
1520
1521                if (pm_wakeup_pending()) {
1522                        pr_info("Wakeup pending. Abort CPU freeze\n");
1523                        error = -EBUSY;
1524                        break;
1525                }
1526
1527                trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
1528                error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
1529                trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
1530                if (!error)
1531                        cpumask_set_cpu(cpu, frozen_cpus);
1532                else {
1533                        pr_err("Error taking CPU%d down: %d\n", cpu, error);
1534                        break;
1535                }
1536        }
1537
1538        if (!error)
1539                BUG_ON(num_online_cpus() > 1);
1540        else
1541                pr_err("Non-boot CPUs are not disabled\n");
1542
1543        /*
1544         * Make sure the CPUs won't be enabled by someone else. We need to do
1545         * this even in case of failure as all freeze_secondary_cpus() users are
1546         * supposed to do thaw_secondary_cpus() on the failure path.
1547         */
1548        cpu_hotplug_disabled++;
1549
1550        cpu_maps_update_done();
1551        return error;
1552}
1553
1554void __weak arch_thaw_secondary_cpus_begin(void)
1555{
1556}
1557
1558void __weak arch_thaw_secondary_cpus_end(void)
1559{
1560}
1561
1562void thaw_secondary_cpus(void)
1563{
1564        int cpu, error;
1565
1566        /* Allow everyone to use the CPU hotplug again */
1567        cpu_maps_update_begin();
1568        __cpu_hotplug_enable();
1569        if (cpumask_empty(frozen_cpus))
1570                goto out;
1571
1572        pr_info("Enabling non-boot CPUs ...\n");
1573
1574        arch_thaw_secondary_cpus_begin();
1575
1576        for_each_cpu(cpu, frozen_cpus) {
1577                trace_suspend_resume(TPS("CPU_ON"), cpu, true);
1578                error = _cpu_up(cpu, 1, CPUHP_ONLINE);
1579                trace_suspend_resume(TPS("CPU_ON"), cpu, false);
1580                if (!error) {
1581                        pr_info("CPU%d is up\n", cpu);
1582                        continue;
1583                }
1584                pr_warn("Error taking CPU%d up: %d\n", cpu, error);
1585        }
1586
1587        arch_thaw_secondary_cpus_end();
1588
1589        cpumask_clear(frozen_cpus);
1590out:
1591        cpu_maps_update_done();
1592}
1593
1594static int __init alloc_frozen_cpus(void)
1595{
1596        if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO))
1597                return -ENOMEM;
1598        return 0;
1599}
1600core_initcall(alloc_frozen_cpus);
1601
1602/*
1603 * When callbacks for CPU hotplug notifications are being executed, we must
1604 * ensure that the state of the system with respect to the tasks being frozen
1605 * or not, as reported by the notification, remains unchanged *throughout the
1606 * duration* of the execution of the callbacks.
1607 * Hence we need to prevent the freezer from racing with regular CPU hotplug.
1608 *
1609 * This synchronization is implemented by mutually excluding regular CPU
1610 * hotplug and Suspend/Hibernate call paths by hooking onto the Suspend/
1611 * Hibernate notifications.
1612 */
1613static int
1614cpu_hotplug_pm_callback(struct notifier_block *nb,
1615                        unsigned long action, void *ptr)
1616{
1617        switch (action) {
1618
1619        case PM_SUSPEND_PREPARE:
1620        case PM_HIBERNATION_PREPARE:
1621                cpu_hotplug_disable();
1622                break;
1623
1624        case PM_POST_SUSPEND:
1625        case PM_POST_HIBERNATION:
1626                cpu_hotplug_enable();
1627                break;
1628
1629        default:
1630                return NOTIFY_DONE;
1631        }
1632
1633        return NOTIFY_OK;
1634}
1635
1636
1637static int __init cpu_hotplug_pm_sync_init(void)
1638{
1639        /*
1640         * cpu_hotplug_pm_callback has higher priority than x86
1641         * bsp_pm_callback which depends on cpu_hotplug_pm_callback
1642         * to disable cpu hotplug to avoid cpu hotplug race.
1643         */
1644        pm_notifier(cpu_hotplug_pm_callback, 0);
1645        return 0;
1646}
1647core_initcall(cpu_hotplug_pm_sync_init);
1648
1649#endif /* CONFIG_PM_SLEEP_SMP */
1650
1651int __boot_cpu_id;
1652
1653#endif /* CONFIG_SMP */
1654
1655/* Boot processor state steps */
1656static struct cpuhp_step cpuhp_hp_states[] = {
1657        [CPUHP_OFFLINE] = {
1658                .name                   = "offline",
1659                .startup.single         = NULL,
1660                .teardown.single        = NULL,
1661        },
1662#ifdef CONFIG_SMP
1663        [CPUHP_CREATE_THREADS]= {
1664                .name                   = "threads:prepare",
1665                .startup.single         = smpboot_create_threads,
1666                .teardown.single        = NULL,
1667                .cant_stop              = true,
1668        },
1669        [CPUHP_PERF_PREPARE] = {
1670                .name                   = "perf:prepare",
1671                .startup.single         = perf_event_init_cpu,
1672                .teardown.single        = perf_event_exit_cpu,
1673        },
1674        [CPUHP_RANDOM_PREPARE] = {
1675                .name                   = "random:prepare",
1676                .startup.single         = random_prepare_cpu,
1677                .teardown.single        = NULL,
1678        },
1679        [CPUHP_WORKQUEUE_PREP] = {
1680                .name                   = "workqueue:prepare",
1681                .startup.single         = workqueue_prepare_cpu,
1682                .teardown.single        = NULL,
1683        },
1684        [CPUHP_HRTIMERS_PREPARE] = {
1685                .name                   = "hrtimers:prepare",
1686                .startup.single         = hrtimers_prepare_cpu,
1687                .teardown.single        = hrtimers_dead_cpu,
1688        },
1689        [CPUHP_SMPCFD_PREPARE] = {
1690                .name                   = "smpcfd:prepare",
1691                .startup.single         = smpcfd_prepare_cpu,
1692                .teardown.single        = smpcfd_dead_cpu,
1693        },
1694        [CPUHP_RELAY_PREPARE] = {
1695                .name                   = "relay:prepare",
1696                .startup.single         = relay_prepare_cpu,
1697                .teardown.single        = NULL,
1698        },
1699        [CPUHP_SLAB_PREPARE] = {
1700                .name                   = "slab:prepare",
1701                .startup.single         = slab_prepare_cpu,
1702                .teardown.single        = slab_dead_cpu,
1703        },
1704        [CPUHP_RCUTREE_PREP] = {
1705                .name                   = "RCU/tree:prepare",
1706                .startup.single         = rcutree_prepare_cpu,
1707                .teardown.single        = rcutree_dead_cpu,
1708        },
1709        /*
1710         * On the tear-down path, timers_dead_cpu() must be invoked
1711         * before blk_mq_queue_reinit_notify() from notify_dead(),
1712         * otherwise a RCU stall occurs.
1713         */
1714        [CPUHP_TIMERS_PREPARE] = {
1715                .name                   = "timers:prepare",
1716                .startup.single         = timers_prepare_cpu,
1717                .teardown.single        = timers_dead_cpu,
1718        },
1719        /* Kicks the plugged cpu into life */
1720        [CPUHP_BRINGUP_CPU] = {
1721                .name                   = "cpu:bringup",
1722                .startup.single         = bringup_cpu,
1723                .teardown.single        = finish_cpu,
1724                .cant_stop              = true,
1725        },
1726        /* Final state before CPU kills itself */
1727        [CPUHP_AP_IDLE_DEAD] = {
1728                .name                   = "idle:dead",
1729        },
1730        /*
1731         * Last state before CPU enters the idle loop to die. Transient state
1732         * for synchronization.
1733         */
1734        [CPUHP_AP_OFFLINE] = {
1735                .name                   = "ap:offline",
1736                .cant_stop              = true,
1737        },
1738        /* First state is scheduler control. Interrupts are disabled */
1739        [CPUHP_AP_SCHED_STARTING] = {
1740                .name                   = "sched:starting",
1741                .startup.single         = sched_cpu_starting,
1742                .teardown.single        = sched_cpu_dying,
1743        },
1744        [CPUHP_AP_RCUTREE_DYING] = {
1745                .name                   = "RCU/tree:dying",
1746                .startup.single         = NULL,
1747                .teardown.single        = rcutree_dying_cpu,
1748        },
1749        [CPUHP_AP_SMPCFD_DYING] = {
1750                .name                   = "smpcfd:dying",
1751                .startup.single         = NULL,
1752                .teardown.single        = smpcfd_dying_cpu,
1753        },
1754        /* Entry state on starting. Interrupts enabled from here on. Transient
1755         * state for synchronsization */
1756        [CPUHP_AP_ONLINE] = {
1757                .name                   = "ap:online",
1758        },
1759        /*
1760         * Handled on control processor until the plugged processor manages
1761         * this itself.
1762         */
1763        [CPUHP_TEARDOWN_CPU] = {
1764                .name                   = "cpu:teardown",
1765                .startup.single         = NULL,
1766                .teardown.single        = takedown_cpu,
1767                .cant_stop              = true,
1768        },
1769
1770        [CPUHP_AP_SCHED_WAIT_EMPTY] = {
1771                .name                   = "sched:waitempty",
1772                .startup.single         = NULL,
1773                .teardown.single        = sched_cpu_wait_empty,
1774        },
1775
1776        /* Handle smpboot threads park/unpark */
1777        [CPUHP_AP_SMPBOOT_THREADS] = {
1778                .name                   = "smpboot/threads:online",
1779                .startup.single         = smpboot_unpark_threads,
1780                .teardown.single        = smpboot_park_threads,
1781        },
1782        [CPUHP_AP_IRQ_AFFINITY_ONLINE] = {
1783                .name                   = "irq/affinity:online",
1784                .startup.single         = irq_affinity_online_cpu,
1785                .teardown.single        = NULL,
1786        },
1787        [CPUHP_AP_PERF_ONLINE] = {
1788                .name                   = "perf:online",
1789                .startup.single         = perf_event_init_cpu,
1790                .teardown.single        = perf_event_exit_cpu,
1791        },
1792        [CPUHP_AP_WATCHDOG_ONLINE] = {
1793                .name                   = "lockup_detector:online",
1794                .startup.single         = lockup_detector_online_cpu,
1795                .teardown.single        = lockup_detector_offline_cpu,
1796        },
1797        [CPUHP_AP_WORKQUEUE_ONLINE] = {
1798                .name                   = "workqueue:online",
1799                .startup.single         = workqueue_online_cpu,
1800                .teardown.single        = workqueue_offline_cpu,
1801        },
1802        [CPUHP_AP_RANDOM_ONLINE] = {
1803                .name                   = "random:online",
1804                .startup.single         = random_online_cpu,
1805                .teardown.single        = NULL,
1806        },
1807        [CPUHP_AP_RCUTREE_ONLINE] = {
1808                .name                   = "RCU/tree:online",
1809                .startup.single         = rcutree_online_cpu,
1810                .teardown.single        = rcutree_offline_cpu,
1811        },
1812#endif
1813        /*
1814         * The dynamically registered state space is here
1815         */
1816
1817#ifdef CONFIG_SMP
1818        /* Last state is scheduler control setting the cpu active */
1819        [CPUHP_AP_ACTIVE] = {
1820                .name                   = "sched:active",
1821                .startup.single         = sched_cpu_activate,
1822                .teardown.single        = sched_cpu_deactivate,
1823        },
1824#endif
1825
1826        /* CPU is fully up and running. */
1827        [CPUHP_ONLINE] = {
1828                .name                   = "online",
1829                .startup.single         = NULL,
1830                .teardown.single        = NULL,
1831        },
1832};
1833
1834/* Sanity check for callbacks */
1835static int cpuhp_cb_check(enum cpuhp_state state)
1836{
1837        if (state <= CPUHP_OFFLINE || state >= CPUHP_ONLINE)
1838                return -EINVAL;
1839        return 0;
1840}
1841
1842/*
1843 * Returns a free for dynamic slot assignment of the Online state. The states
1844 * are protected by the cpuhp_slot_states mutex and an empty slot is identified
1845 * by having no name assigned.
1846 */
1847static int cpuhp_reserve_state(enum cpuhp_state state)
1848{
1849        enum cpuhp_state i, end;
1850        struct cpuhp_step *step;
1851
1852        switch (state) {
1853        case CPUHP_AP_ONLINE_DYN:
1854                step = cpuhp_hp_states + CPUHP_AP_ONLINE_DYN;
1855                end = CPUHP_AP_ONLINE_DYN_END;
1856                break;
1857        case CPUHP_BP_PREPARE_DYN:
1858                step = cpuhp_hp_states + CPUHP_BP_PREPARE_DYN;
1859                end = CPUHP_BP_PREPARE_DYN_END;
1860                break;
1861        default:
1862                return -EINVAL;
1863        }
1864
1865        for (i = state; i <= end; i++, step++) {
1866                if (!step->name)
1867                        return i;
1868        }
1869        WARN(1, "No more dynamic states available for CPU hotplug\n");
1870        return -ENOSPC;
1871}
1872
1873static int cpuhp_store_callbacks(enum cpuhp_state state, const char *name,
1874                                 int (*startup)(unsigned int cpu),
1875                                 int (*teardown)(unsigned int cpu),
1876                                 bool multi_instance)
1877{
1878        /* (Un)Install the callbacks for further cpu hotplug operations */
1879        struct cpuhp_step *sp;
1880        int ret = 0;
1881
1882        /*
1883         * If name is NULL, then the state gets removed.
1884         *
1885         * CPUHP_AP_ONLINE_DYN and CPUHP_BP_PREPARE_DYN are handed out on
1886         * the first allocation from these dynamic ranges, so the removal
1887         * would trigger a new allocation and clear the wrong (already
1888         * empty) state, leaving the callbacks of the to be cleared state
1889         * dangling, which causes wreckage on the next hotplug operation.
1890         */
1891        if (name && (state == CPUHP_AP_ONLINE_DYN ||
1892                     state == CPUHP_BP_PREPARE_DYN)) {
1893                ret = cpuhp_reserve_state(state);
1894                if (ret < 0)
1895                        return ret;
1896                state = ret;
1897        }
1898        sp = cpuhp_get_step(state);
1899        if (name && sp->name)
1900                return -EBUSY;
1901
1902        sp->startup.single = startup;
1903        sp->teardown.single = teardown;
1904        sp->name = name;
1905        sp->multi_instance = multi_instance;
1906        INIT_HLIST_HEAD(&sp->list);
1907        return ret;
1908}
1909
1910static void *cpuhp_get_teardown_cb(enum cpuhp_state state)
1911{
1912        return cpuhp_get_step(state)->teardown.single;
1913}
1914
1915/*
1916 * Call the startup/teardown function for a step either on the AP or
1917 * on the current CPU.
1918 */
1919static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup,
1920                            struct hlist_node *node)
1921{
1922        struct cpuhp_step *sp = cpuhp_get_step(state);
1923        int ret;
1924
1925        /*
1926         * If there's nothing to do, we done.
1927         * Relies on the union for multi_instance.
1928         */
1929        if (cpuhp_step_empty(bringup, sp))
1930                return 0;
1931        /*
1932         * The non AP bound callbacks can fail on bringup. On teardown
1933         * e.g. module removal we crash for now.
1934         */
1935#ifdef CONFIG_SMP
1936        if (cpuhp_is_ap_state(state))
1937                ret = cpuhp_invoke_ap_callback(cpu, state, bringup, node);
1938        else
1939                ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
1940#else
1941        ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
1942#endif
1943        BUG_ON(ret && !bringup);
1944        return ret;
1945}
1946
1947/*
1948 * Called from __cpuhp_setup_state on a recoverable failure.
1949 *
1950 * Note: The teardown callbacks for rollback are not allowed to fail!
1951 */
1952static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state,
1953                                   struct hlist_node *node)
1954{
1955        int cpu;
1956
1957        /* Roll back the already executed steps on the other cpus */
1958        for_each_present_cpu(cpu) {
1959                struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1960                int cpustate = st->state;
1961
1962                if (cpu >= failedcpu)
1963                        break;
1964
1965                /* Did we invoke the startup call on that cpu ? */
1966                if (cpustate >= state)
1967                        cpuhp_issue_call(cpu, state, false, node);
1968        }
1969}
1970
1971int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state,
1972                                          struct hlist_node *node,
1973                                          bool invoke)
1974{
1975        struct cpuhp_step *sp;
1976        int cpu;
1977        int ret;
1978
1979        lockdep_assert_cpus_held();
1980
1981        sp = cpuhp_get_step(state);
1982        if (sp->multi_instance == false)
1983                return -EINVAL;
1984
1985        mutex_lock(&cpuhp_state_mutex);
1986
1987        if (!invoke || !sp->startup.multi)
1988                goto add_node;
1989
1990        /*
1991         * Try to call the startup callback for each present cpu
1992         * depending on the hotplug state of the cpu.
1993         */
1994        for_each_present_cpu(cpu) {
1995                struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1996                int cpustate = st->state;
1997
1998                if (cpustate < state)
1999                        continue;
2000
2001                ret = cpuhp_issue_call(cpu, state, true, node);
2002                if (ret) {
2003                        if (sp->teardown.multi)
2004                                cpuhp_rollback_install(cpu, state, node);
2005                        goto unlock;
2006                }
2007        }
2008add_node:
2009        ret = 0;
2010        hlist_add_head(node, &sp->list);
2011unlock:
2012        mutex_unlock(&cpuhp_state_mutex);
2013        return ret;
2014}
2015
2016int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
2017                               bool invoke)
2018{
2019        int ret;
2020
2021        cpus_read_lock();
2022        ret = __cpuhp_state_add_instance_cpuslocked(state, node, invoke);
2023        cpus_read_unlock();
2024        return ret;
2025}
2026EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
2027
2028/**
2029 * __cpuhp_setup_state_cpuslocked - Setup the callbacks for an hotplug machine state
2030 * @state:              The state to setup
2031 * @name:               Name of the step
2032 * @invoke:             If true, the startup function is invoked for cpus where
2033 *                      cpu state >= @state
2034 * @startup:            startup callback function
2035 * @teardown:           teardown callback function
2036 * @multi_instance:     State is set up for multiple instances which get
2037 *                      added afterwards.
2038 *
2039 * The caller needs to hold cpus read locked while calling this function.
2040 * Return:
2041 *   On success:
2042 *      Positive state number if @state is CPUHP_AP_ONLINE_DYN;
2043 *      0 for all other states
2044 *   On failure: proper (negative) error code
2045 */
2046int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
2047                                   const char *name, bool invoke,
2048                                   int (*startup)(unsigned int cpu),
2049                                   int (*teardown)(unsigned int cpu),
2050                                   bool multi_instance)
2051{
2052        int cpu, ret = 0;
2053        bool dynstate;
2054
2055        lockdep_assert_cpus_held();
2056
2057        if (cpuhp_cb_check(state) || !name)
2058                return -EINVAL;
2059
2060        mutex_lock(&cpuhp_state_mutex);
2061
2062        ret = cpuhp_store_callbacks(state, name, startup, teardown,
2063                                    multi_instance);
2064
2065        dynstate = state == CPUHP_AP_ONLINE_DYN;
2066        if (ret > 0 && dynstate) {
2067                state = ret;
2068                ret = 0;
2069        }
2070
2071        if (ret || !invoke || !startup)
2072                goto out;
2073
2074        /*
2075         * Try to call the startup callback for each present cpu
2076         * depending on the hotplug state of the cpu.
2077         */
2078        for_each_present_cpu(cpu) {
2079                struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
2080                int cpustate = st->state;
2081
2082                if (cpustate < state)
2083                        continue;
2084
2085                ret = cpuhp_issue_call(cpu, state, true, NULL);
2086                if (ret) {
2087                        if (teardown)
2088                                cpuhp_rollback_install(cpu, state, NULL);
2089                        cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
2090                        goto out;
2091                }
2092        }
2093out:
2094        mutex_unlock(&cpuhp_state_mutex);
2095        /*
2096         * If the requested state is CPUHP_AP_ONLINE_DYN, return the
2097         * dynamically allocated state in case of success.
2098         */
2099        if (!ret && dynstate)
2100                return state;
2101        return ret;
2102}
2103EXPORT_SYMBOL(__cpuhp_setup_state_cpuslocked);
2104
2105int __cpuhp_setup_state(enum cpuhp_state state,
2106                        const char *name, bool invoke,
2107                        int (*startup)(unsigned int cpu),
2108                        int (*teardown)(unsigned int cpu),
2109                        bool multi_instance)
2110{
2111        int ret;
2112
2113        cpus_read_lock();
2114        ret = __cpuhp_setup_state_cpuslocked(state, name, invoke, startup,
2115                                             teardown, multi_instance);
2116        cpus_read_unlock();
2117        return ret;
2118}
2119EXPORT_SYMBOL(__cpuhp_setup_state);
2120
2121int __cpuhp_state_remove_instance(enum cpuhp_state state,
2122                                  struct hlist_node *node, bool invoke)
2123{
2124        struct cpuhp_step *sp = cpuhp_get_step(state);
2125        int cpu;
2126
2127        BUG_ON(cpuhp_cb_check(state));
2128
2129        if (!sp->multi_instance)
2130                return -EINVAL;
2131
2132        cpus_read_lock();
2133        mutex_lock(&cpuhp_state_mutex);
2134
2135        if (!invoke || !cpuhp_get_teardown_cb(state))
2136                goto remove;
2137        /*
2138         * Call the teardown callback for each present cpu depending
2139         * on the hotplug state of the cpu. This function is not
2140         * allowed to fail currently!
2141         */
2142        for_each_present_cpu(cpu) {
2143                struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
2144                int cpustate = st->state;
2145
2146                if (cpustate >= state)
2147                        cpuhp_issue_call(cpu, state, false, node);
2148        }
2149
2150remove:
2151        hlist_del(node);
2152        mutex_unlock(&cpuhp_state_mutex);
2153        cpus_read_unlock();
2154
2155        return 0;
2156}
2157EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);
2158
2159/**
2160 * __cpuhp_remove_state_cpuslocked - Remove the callbacks for an hotplug machine state
2161 * @state:      The state to remove
2162 * @invoke:     If true, the teardown function is invoked for cpus where
2163 *              cpu state >= @state
2164 *
2165 * The caller needs to hold cpus read locked while calling this function.
2166 * The teardown callback is currently not allowed to fail. Think
2167 * about module removal!
2168 */
2169void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke)
2170{
2171        struct cpuhp_step *sp = cpuhp_get_step(state);
2172        int cpu;
2173
2174        BUG_ON(cpuhp_cb_check(state));
2175
2176        lockdep_assert_cpus_held();
2177
2178        mutex_lock(&cpuhp_state_mutex);
2179        if (sp->multi_instance) {
2180                WARN(!hlist_empty(&sp->list),
2181                     "Error: Removing state %d which has instances left.\n",
2182                     state);
2183                goto remove;
2184        }
2185
2186        if (!invoke || !cpuhp_get_teardown_cb(state))
2187                goto remove;
2188
2189        /*
2190         * Call the teardown callback for each present cpu depending
2191         * on the hotplug state of the cpu. This function is not
2192         * allowed to fail currently!
2193         */
2194        for_each_present_cpu(cpu) {
2195                struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
2196                int cpustate = st->state;
2197
2198                if (cpustate >= state)
2199                        cpuhp_issue_call(cpu, state, false, NULL);
2200        }
2201remove:
2202        cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
2203        mutex_unlock(&cpuhp_state_mutex);
2204}
2205EXPORT_SYMBOL(__cpuhp_remove_state_cpuslocked);
2206
2207void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
2208{
2209        cpus_read_lock();
2210        __cpuhp_remove_state_cpuslocked(state, invoke);
2211        cpus_read_unlock();
2212}
2213EXPORT_SYMBOL(__cpuhp_remove_state);
2214
2215#ifdef CONFIG_HOTPLUG_SMT
2216static void cpuhp_offline_cpu_device(unsigned int cpu)
2217{
2218        struct device *dev = get_cpu_device(cpu);
2219
2220        dev->offline = true;
2221        /* Tell user space about the state change */
2222        kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
2223}
2224
2225static void cpuhp_online_cpu_device(unsigned int cpu)
2226{
2227        struct device *dev = get_cpu_device(cpu);
2228
2229        dev->offline = false;
2230        /* Tell user space about the state change */
2231        kobject_uevent(&dev->kobj, KOBJ_ONLINE);
2232}
2233
2234int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
2235{
2236        int cpu, ret = 0;
2237
2238        cpu_maps_update_begin();
2239        for_each_online_cpu(cpu) {
2240                if (topology_is_primary_thread(cpu))
2241                        continue;
2242                ret = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
2243                if (ret)
2244                        break;
2245                /*
2246                 * As this needs to hold the cpu maps lock it's impossible
2247                 * to call device_offline() because that ends up calling
2248                 * cpu_down() which takes cpu maps lock. cpu maps lock
2249                 * needs to be held as this might race against in kernel
2250                 * abusers of the hotplug machinery (thermal management).
2251                 *
2252                 * So nothing would update device:offline state. That would
2253                 * leave the sysfs entry stale and prevent onlining after
2254                 * smt control has been changed to 'off' again. This is
2255                 * called under the sysfs hotplug lock, so it is properly
2256                 * serialized against the regular offline usage.
2257                 */
2258                cpuhp_offline_cpu_device(cpu);
2259        }
2260        if (!ret)
2261                cpu_smt_control = ctrlval;
2262        cpu_maps_update_done();
2263        return ret;
2264}
2265
2266int cpuhp_smt_enable(void)
2267{
2268        int cpu, ret = 0;
2269
2270        cpu_maps_update_begin();
2271        cpu_smt_control = CPU_SMT_ENABLED;
2272        for_each_present_cpu(cpu) {
2273                /* Skip online CPUs and CPUs on offline nodes */
2274                if (cpu_online(cpu) || !node_online(cpu_to_node(cpu)))
2275                        continue;
2276                ret = _cpu_up(cpu, 0, CPUHP_ONLINE);
2277                if (ret)
2278                        break;
2279                /* See comment in cpuhp_smt_disable() */
2280                cpuhp_online_cpu_device(cpu);
2281        }
2282        cpu_maps_update_done();
2283        return ret;
2284}
2285#endif
2286
2287#if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU)
2288static ssize_t state_show(struct device *dev,
2289                          struct device_attribute *attr, char *buf)
2290{
2291        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2292
2293        return sprintf(buf, "%d\n", st->state);
2294}
2295static DEVICE_ATTR_RO(state);
2296
2297static ssize_t target_store(struct device *dev, struct device_attribute *attr,
2298                            const char *buf, size_t count)
2299{
2300        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2301        struct cpuhp_step *sp;
2302        int target, ret;
2303
2304        ret = kstrtoint(buf, 10, &target);
2305        if (ret)
2306                return ret;
2307
2308#ifdef CONFIG_CPU_HOTPLUG_STATE_CONTROL
2309        if (target < CPUHP_OFFLINE || target > CPUHP_ONLINE)
2310                return -EINVAL;
2311#else
2312        if (target != CPUHP_OFFLINE && target != CPUHP_ONLINE)
2313                return -EINVAL;
2314#endif
2315
2316        ret = lock_device_hotplug_sysfs();
2317        if (ret)
2318                return ret;
2319
2320        mutex_lock(&cpuhp_state_mutex);
2321        sp = cpuhp_get_step(target);
2322        ret = !sp->name || sp->cant_stop ? -EINVAL : 0;
2323        mutex_unlock(&cpuhp_state_mutex);
2324        if (ret)
2325                goto out;
2326
2327        if (st->state < target)
2328                ret = cpu_up(dev->id, target);
2329        else
2330                ret = cpu_down(dev->id, target);
2331out:
2332        unlock_device_hotplug();
2333        return ret ? ret : count;
2334}
2335
2336static ssize_t target_show(struct device *dev,
2337                           struct device_attribute *attr, char *buf)
2338{
2339        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2340
2341        return sprintf(buf, "%d\n", st->target);
2342}
2343static DEVICE_ATTR_RW(target);
2344
2345static ssize_t fail_store(struct device *dev, struct device_attribute *attr,
2346                          const char *buf, size_t count)
2347{
2348        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2349        struct cpuhp_step *sp;
2350        int fail, ret;
2351
2352        ret = kstrtoint(buf, 10, &fail);
2353        if (ret)
2354                return ret;
2355
2356        if (fail == CPUHP_INVALID) {
2357                st->fail = fail;
2358                return count;
2359        }
2360
2361        if (fail < CPUHP_OFFLINE || fail > CPUHP_ONLINE)
2362                return -EINVAL;
2363
2364        /*
2365         * Cannot fail STARTING/DYING callbacks.
2366         */
2367        if (cpuhp_is_atomic_state(fail))
2368                return -EINVAL;
2369
2370        /*
2371         * DEAD callbacks cannot fail...
2372         * ... neither can CPUHP_BRINGUP_CPU during hotunplug. The latter
2373         * triggering STARTING callbacks, a failure in this state would
2374         * hinder rollback.
2375         */
2376        if (fail <= CPUHP_BRINGUP_CPU && st->state > CPUHP_BRINGUP_CPU)
2377                return -EINVAL;
2378
2379        /*
2380         * Cannot fail anything that doesn't have callbacks.
2381         */
2382        mutex_lock(&cpuhp_state_mutex);
2383        sp = cpuhp_get_step(fail);
2384        if (!sp->startup.single && !sp->teardown.single)
2385                ret = -EINVAL;
2386        mutex_unlock(&cpuhp_state_mutex);
2387        if (ret)
2388                return ret;
2389
2390        st->fail = fail;
2391
2392        return count;
2393}
2394
2395static ssize_t fail_show(struct device *dev,
2396                         struct device_attribute *attr, char *buf)
2397{
2398        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2399
2400        return sprintf(buf, "%d\n", st->fail);
2401}
2402
2403static DEVICE_ATTR_RW(fail);
2404
2405static struct attribute *cpuhp_cpu_attrs[] = {
2406        &dev_attr_state.attr,
2407        &dev_attr_target.attr,
2408        &dev_attr_fail.attr,
2409        NULL
2410};
2411
2412static const struct attribute_group cpuhp_cpu_attr_group = {
2413        .attrs = cpuhp_cpu_attrs,
2414        .name = "hotplug",
2415        NULL
2416};
2417
2418static ssize_t states_show(struct device *dev,
2419                                 struct device_attribute *attr, char *buf)
2420{
2421        ssize_t cur, res = 0;
2422        int i;
2423
2424        mutex_lock(&cpuhp_state_mutex);
2425        for (i = CPUHP_OFFLINE; i <= CPUHP_ONLINE; i++) {
2426                struct cpuhp_step *sp = cpuhp_get_step(i);
2427
2428                if (sp->name) {
2429                        cur = sprintf(buf, "%3d: %s\n", i, sp->name);
2430                        buf += cur;
2431                        res += cur;
2432                }
2433        }
2434        mutex_unlock(&cpuhp_state_mutex);
2435        return res;
2436}
2437static DEVICE_ATTR_RO(states);
2438
2439static struct attribute *cpuhp_cpu_root_attrs[] = {
2440        &dev_attr_states.attr,
2441        NULL
2442};
2443
2444static const struct attribute_group cpuhp_cpu_root_attr_group = {
2445        .attrs = cpuhp_cpu_root_attrs,
2446        .name = "hotplug",
2447        NULL
2448};
2449
2450#ifdef CONFIG_HOTPLUG_SMT
2451
2452static ssize_t
2453__store_smt_control(struct device *dev, struct device_attribute *attr,
2454                    const char *buf, size_t count)
2455{
2456        int ctrlval, ret;
2457
2458        if (sysfs_streq(buf, "on"))
2459                ctrlval = CPU_SMT_ENABLED;
2460        else if (sysfs_streq(buf, "off"))
2461                ctrlval = CPU_SMT_DISABLED;
2462        else if (sysfs_streq(buf, "forceoff"))
2463                ctrlval = CPU_SMT_FORCE_DISABLED;
2464        else
2465                return -EINVAL;
2466
2467        if (cpu_smt_control == CPU_SMT_FORCE_DISABLED)
2468                return -EPERM;
2469
2470        if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
2471                return -ENODEV;
2472
2473        ret = lock_device_hotplug_sysfs();
2474        if (ret)
2475                return ret;
2476
2477        if (ctrlval != cpu_smt_control) {
2478                switch (ctrlval) {
2479                case CPU_SMT_ENABLED:
2480                        ret = cpuhp_smt_enable();
2481                        break;
2482                case CPU_SMT_DISABLED:
2483                case CPU_SMT_FORCE_DISABLED:
2484                        ret = cpuhp_smt_disable(ctrlval);
2485                        break;
2486                }
2487        }
2488
2489        unlock_device_hotplug();
2490        return ret ? ret : count;
2491}
2492
2493#else /* !CONFIG_HOTPLUG_SMT */
2494static ssize_t
2495__store_smt_control(struct device *dev, struct device_attribute *attr,
2496                    const char *buf, size_t count)
2497{
2498        return -ENODEV;
2499}
2500#endif /* CONFIG_HOTPLUG_SMT */
2501
2502static const char *smt_states[] = {
2503        [CPU_SMT_ENABLED]               = "on",
2504        [CPU_SMT_DISABLED]              = "off",
2505        [CPU_SMT_FORCE_DISABLED]        = "forceoff",
2506        [CPU_SMT_NOT_SUPPORTED]         = "notsupported",
2507        [CPU_SMT_NOT_IMPLEMENTED]       = "notimplemented",
2508};
2509
2510static ssize_t control_show(struct device *dev,
2511                            struct device_attribute *attr, char *buf)
2512{
2513        const char *state = smt_states[cpu_smt_control];
2514
2515        return snprintf(buf, PAGE_SIZE - 2, "%s\n", state);
2516}
2517
2518static ssize_t control_store(struct device *dev, struct device_attribute *attr,
2519                             const char *buf, size_t count)
2520{
2521        return __store_smt_control(dev, attr, buf, count);
2522}
2523static DEVICE_ATTR_RW(control);
2524
2525static ssize_t active_show(struct device *dev,
2526                           struct device_attribute *attr, char *buf)
2527{
2528        return snprintf(buf, PAGE_SIZE - 2, "%d\n", sched_smt_active());
2529}
2530static DEVICE_ATTR_RO(active);
2531
2532static struct attribute *cpuhp_smt_attrs[] = {
2533        &dev_attr_control.attr,
2534        &dev_attr_active.attr,
2535        NULL
2536};
2537
2538static const struct attribute_group cpuhp_smt_attr_group = {
2539        .attrs = cpuhp_smt_attrs,
2540        .name = "smt",
2541        NULL
2542};
2543
2544static int __init cpu_smt_sysfs_init(void)
2545{
2546        return sysfs_create_group(&cpu_subsys.dev_root->kobj,
2547                                  &cpuhp_smt_attr_group);
2548}
2549
2550static int __init cpuhp_sysfs_init(void)
2551{
2552        int cpu, ret;
2553
2554        ret = cpu_smt_sysfs_init();
2555        if (ret)
2556                return ret;
2557
2558        ret = sysfs_create_group(&cpu_subsys.dev_root->kobj,
2559                                 &cpuhp_cpu_root_attr_group);
2560        if (ret)
2561                return ret;
2562
2563        for_each_possible_cpu(cpu) {
2564                struct device *dev = get_cpu_device(cpu);
2565
2566                if (!dev)
2567                        continue;
2568                ret = sysfs_create_group(&dev->kobj, &cpuhp_cpu_attr_group);
2569                if (ret)
2570                        return ret;
2571        }
2572        return 0;
2573}
2574device_initcall(cpuhp_sysfs_init);
2575#endif /* CONFIG_SYSFS && CONFIG_HOTPLUG_CPU */
2576
2577/*
2578 * cpu_bit_bitmap[] is a special, "compressed" data structure that
2579 * represents all NR_CPUS bits binary values of 1<<nr.
2580 *
2581 * It is used by cpumask_of() to get a constant address to a CPU
2582 * mask value that has a single bit set only.
2583 */
2584
2585/* cpu_bit_bitmap[0] is empty - so we can back into it */
2586#define MASK_DECLARE_1(x)       [x+1][0] = (1UL << (x))
2587#define MASK_DECLARE_2(x)       MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
2588#define MASK_DECLARE_4(x)       MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
2589#define MASK_DECLARE_8(x)       MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
2590
2591const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {
2592
2593        MASK_DECLARE_8(0),      MASK_DECLARE_8(8),
2594        MASK_DECLARE_8(16),     MASK_DECLARE_8(24),
2595#if BITS_PER_LONG > 32
2596        MASK_DECLARE_8(32),     MASK_DECLARE_8(40),
2597        MASK_DECLARE_8(48),     MASK_DECLARE_8(56),
2598#endif
2599};
2600EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
2601
2602const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
2603EXPORT_SYMBOL(cpu_all_bits);
2604
2605#ifdef CONFIG_INIT_ALL_POSSIBLE
2606struct cpumask __cpu_possible_mask __read_mostly
2607        = {CPU_BITS_ALL};
2608#else
2609struct cpumask __cpu_possible_mask __read_mostly;
2610#endif
2611EXPORT_SYMBOL(__cpu_possible_mask);
2612
2613struct cpumask __cpu_online_mask __read_mostly;
2614EXPORT_SYMBOL(__cpu_online_mask);
2615
2616struct cpumask __cpu_present_mask __read_mostly;
2617EXPORT_SYMBOL(__cpu_present_mask);
2618
2619struct cpumask __cpu_active_mask __read_mostly;
2620EXPORT_SYMBOL(__cpu_active_mask);
2621
2622struct cpumask __cpu_dying_mask __read_mostly;
2623EXPORT_SYMBOL(__cpu_dying_mask);
2624
2625atomic_t __num_online_cpus __read_mostly;
2626EXPORT_SYMBOL(__num_online_cpus);
2627
2628void init_cpu_present(const struct cpumask *src)
2629{
2630        cpumask_copy(&__cpu_present_mask, src);
2631}
2632
2633void init_cpu_possible(const struct cpumask *src)
2634{
2635        cpumask_copy(&__cpu_possible_mask, src);
2636}
2637
2638void init_cpu_online(const struct cpumask *src)
2639{
2640        cpumask_copy(&__cpu_online_mask, src);
2641}
2642
2643void set_cpu_online(unsigned int cpu, bool online)
2644{
2645        /*
2646         * atomic_inc/dec() is required to handle the horrid abuse of this
2647         * function by the reboot and kexec code which invoke it from
2648         * IPI/NMI broadcasts when shutting down CPUs. Invocation from
2649         * regular CPU hotplug is properly serialized.
2650         *
2651         * Note, that the fact that __num_online_cpus is of type atomic_t
2652         * does not protect readers which are not serialized against
2653         * concurrent hotplug operations.
2654         */
2655        if (online) {
2656                if (!cpumask_test_and_set_cpu(cpu, &__cpu_online_mask))
2657                        atomic_inc(&__num_online_cpus);
2658        } else {
2659                if (cpumask_test_and_clear_cpu(cpu, &__cpu_online_mask))
2660                        atomic_dec(&__num_online_cpus);
2661        }
2662}
2663
2664/*
2665 * Activate the first processor.
2666 */
2667void __init boot_cpu_init(void)
2668{
2669        int cpu = smp_processor_id();
2670
2671        /* Mark the boot cpu "present", "online" etc for SMP and UP case */
2672        set_cpu_online(cpu, true);
2673        set_cpu_active(cpu, true);
2674        set_cpu_present(cpu, true);
2675        set_cpu_possible(cpu, true);
2676
2677#ifdef CONFIG_SMP
2678        __boot_cpu_id = cpu;
2679#endif
2680}
2681
2682/*
2683 * Must be called _AFTER_ setting up the per_cpu areas
2684 */
2685void __init boot_cpu_hotplug_init(void)
2686{
2687#ifdef CONFIG_SMP
2688        cpumask_set_cpu(smp_processor_id(), &cpus_booted_once_mask);
2689#endif
2690        this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
2691}
2692
2693/*
2694 * These are used for a global "mitigations=" cmdline option for toggling
2695 * optional CPU mitigations.
2696 */
2697enum cpu_mitigations {
2698        CPU_MITIGATIONS_OFF,
2699        CPU_MITIGATIONS_AUTO,
2700        CPU_MITIGATIONS_AUTO_NOSMT,
2701};
2702
2703static enum cpu_mitigations cpu_mitigations __ro_after_init =
2704        CPU_MITIGATIONS_AUTO;
2705
2706static int __init mitigations_parse_cmdline(char *arg)
2707{
2708        if (!strcmp(arg, "off"))
2709                cpu_mitigations = CPU_MITIGATIONS_OFF;
2710        else if (!strcmp(arg, "auto"))
2711                cpu_mitigations = CPU_MITIGATIONS_AUTO;
2712        else if (!strcmp(arg, "auto,nosmt"))
2713                cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT;
2714        else
2715                pr_crit("Unsupported mitigations=%s, system may still be vulnerable\n",
2716                        arg);
2717
2718        return 0;
2719}
2720early_param("mitigations", mitigations_parse_cmdline);
2721
2722/* mitigations=off */
2723bool cpu_mitigations_off(void)
2724{
2725        return cpu_mitigations == CPU_MITIGATIONS_OFF;
2726}
2727EXPORT_SYMBOL_GPL(cpu_mitigations_off);
2728
2729/* mitigations=auto,nosmt */
2730bool cpu_mitigations_auto_nosmt(void)
2731{
2732        return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT;
2733}
2734EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt);
2735