linux/init/main.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 *  linux/init/main.c
   4 *
   5 *  Copyright (C) 1991, 1992  Linus Torvalds
   6 *
   7 *  GK 2/5/95  -  Changed to support mounting root fs via NFS
   8 *  Added initrd & change_root: Werner Almesberger & Hans Lermen, Feb '96
   9 *  Moan early if gcc is old, avoiding bogus kernels - Paul Gortmaker, May '96
  10 *  Simplified starting of init:  Michael A. Griffith <grif@acm.org>
  11 */
  12
  13#define DEBUG           /* Enable initcall_debug */
  14
  15#include <linux/types.h>
  16#include <linux/extable.h>
  17#include <linux/module.h>
  18#include <linux/proc_fs.h>
  19#include <linux/binfmts.h>
  20#include <linux/kernel.h>
  21#include <linux/syscalls.h>
  22#include <linux/stackprotector.h>
  23#include <linux/string.h>
  24#include <linux/ctype.h>
  25#include <linux/delay.h>
  26#include <linux/ioport.h>
  27#include <linux/init.h>
  28#include <linux/initrd.h>
  29#include <linux/memblock.h>
  30#include <linux/acpi.h>
  31#include <linux/bootconfig.h>
  32#include <linux/console.h>
  33#include <linux/nmi.h>
  34#include <linux/percpu.h>
  35#include <linux/kmod.h>
  36#include <linux/kprobes.h>
  37#include <linux/kmsan.h>
  38#include <linux/vmalloc.h>
  39#include <linux/kernel_stat.h>
  40#include <linux/start_kernel.h>
  41#include <linux/security.h>
  42#include <linux/smp.h>
  43#include <linux/profile.h>
  44#include <linux/kfence.h>
  45#include <linux/rcupdate.h>
  46#include <linux/srcu.h>
  47#include <linux/moduleparam.h>
  48#include <linux/kallsyms.h>
  49#include <linux/buildid.h>
  50#include <linux/writeback.h>
  51#include <linux/cpu.h>
  52#include <linux/cpuset.h>
  53#include <linux/cgroup.h>
  54#include <linux/efi.h>
  55#include <linux/tick.h>
  56#include <linux/sched/isolation.h>
  57#include <linux/interrupt.h>
  58#include <linux/taskstats_kern.h>
  59#include <linux/delayacct.h>
  60#include <linux/unistd.h>
  61#include <linux/utsname.h>
  62#include <linux/rmap.h>
  63#include <linux/mempolicy.h>
  64#include <linux/key.h>
  65#include <linux/debug_locks.h>
  66#include <linux/debugobjects.h>
  67#include <linux/lockdep.h>
  68#include <linux/kmemleak.h>
  69#include <linux/padata.h>
  70#include <linux/pid_namespace.h>
  71#include <linux/device/driver.h>
  72#include <linux/kthread.h>
  73#include <linux/sched.h>
  74#include <linux/sched/init.h>
  75#include <linux/signal.h>
  76#include <linux/idr.h>
  77#include <linux/kgdb.h>
  78#include <linux/ftrace.h>
  79#include <linux/async.h>
  80#include <linux/shmem_fs.h>
  81#include <linux/slab.h>
  82#include <linux/perf_event.h>
  83#include <linux/ptrace.h>
  84#include <linux/pti.h>
  85#include <linux/blkdev.h>
  86#include <linux/sched/clock.h>
  87#include <linux/sched/task.h>
  88#include <linux/sched/task_stack.h>
  89#include <linux/context_tracking.h>
  90#include <linux/random.h>
  91#include <linux/list.h>
  92#include <linux/integrity.h>
  93#include <linux/proc_ns.h>
  94#include <linux/io.h>
  95#include <linux/cache.h>
  96#include <linux/rodata_test.h>
  97#include <linux/jump_label.h>
  98#include <linux/kcsan.h>
  99#include <linux/init_syscalls.h>
 100#include <linux/stackdepot.h>
 101#include <linux/randomize_kstack.h>
 102#include <net/net_namespace.h>
 103
 104#include <asm/io.h>
 105#include <asm/setup.h>
 106#include <asm/sections.h>
 107#include <asm/cacheflush.h>
 108
 109#define CREATE_TRACE_POINTS
 110#include <trace/events/initcall.h>
 111
 112#include <kunit/test.h>
 113
 114static int kernel_init(void *);
 115
 116/*
 117 * Debug helper: via this flag we know that we are in 'early bootup code'
 118 * where only the boot processor is running with IRQ disabled.  This means
 119 * two things - IRQ must not be enabled before the flag is cleared and some
 120 * operations which are not allowed with IRQ disabled are allowed while the
 121 * flag is set.
 122 */
 123bool early_boot_irqs_disabled __read_mostly;
 124
 125enum system_states system_state __read_mostly;
 126EXPORT_SYMBOL(system_state);
 127
 128/*
 129 * Boot command-line arguments
 130 */
 131#define MAX_INIT_ARGS CONFIG_INIT_ENV_ARG_LIMIT
 132#define MAX_INIT_ENVS CONFIG_INIT_ENV_ARG_LIMIT
 133
 134/* Default late time init is NULL. archs can override this later. */
 135void (*__initdata late_time_init)(void);
 136
 137/* Untouched command line saved by arch-specific code. */
 138char __initdata boot_command_line[COMMAND_LINE_SIZE];
 139/* Untouched saved command line (eg. for /proc) */
 140char *saved_command_line __ro_after_init;
 141unsigned int saved_command_line_len __ro_after_init;
 142/* Command line for parameter parsing */
 143static char *static_command_line;
 144/* Untouched extra command line */
 145static char *extra_command_line;
 146/* Extra init arguments */
 147static char *extra_init_args;
 148
 149#ifdef CONFIG_BOOT_CONFIG
 150/* Is bootconfig on command line? */
 151static bool bootconfig_found;
 152static size_t initargs_offs;
 153#else
 154# define bootconfig_found false
 155# define initargs_offs 0
 156#endif
 157
 158static char *execute_command;
 159static char *ramdisk_execute_command = "/init";
 160
 161/*
 162 * Used to generate warnings if static_key manipulation functions are used
 163 * before jump_label_init is called.
 164 */
 165bool static_key_initialized __read_mostly;
 166EXPORT_SYMBOL_GPL(static_key_initialized);
 167
 168/*
 169 * If set, this is an indication to the drivers that reset the underlying
 170 * device before going ahead with the initialization otherwise driver might
 171 * rely on the BIOS and skip the reset operation.
 172 *
 173 * This is useful if kernel is booting in an unreliable environment.
 174 * For ex. kdump situation where previous kernel has crashed, BIOS has been
 175 * skipped and devices will be in unknown state.
 176 */
 177unsigned int reset_devices;
 178EXPORT_SYMBOL(reset_devices);
 179
 180static int __init set_reset_devices(char *str)
 181{
 182        reset_devices = 1;
 183        return 1;
 184}
 185
 186__setup("reset_devices", set_reset_devices);
 187
 188static const char *argv_init[MAX_INIT_ARGS+2] = { "init", NULL, };
 189const char *envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, };
 190static const char *panic_later, *panic_param;
 191
 192static bool __init obsolete_checksetup(char *line)
 193{
 194        const struct obs_kernel_param *p;
 195        bool had_early_param = false;
 196
 197        p = __setup_start;
 198        do {
 199                int n = strlen(p->str);
 200                if (parameqn(line, p->str, n)) {
 201                        if (p->early) {
 202                                /* Already done in parse_early_param?
 203                                 * (Needs exact match on param part).
 204                                 * Keep iterating, as we can have early
 205                                 * params and __setups of same names 8( */
 206                                if (line[n] == '\0' || line[n] == '=')
 207                                        had_early_param = true;
 208                        } else if (!p->setup_func) {
 209                                pr_warn("Parameter %s is obsolete, ignored\n",
 210                                        p->str);
 211                                return true;
 212                        } else if (p->setup_func(line + n))
 213                                return true;
 214                }
 215                p++;
 216        } while (p < __setup_end);
 217
 218        return had_early_param;
 219}
 220
 221/*
 222 * This should be approx 2 Bo*oMips to start (note initial shift), and will
 223 * still work even if initially too large, it will just take slightly longer
 224 */
 225unsigned long loops_per_jiffy = (1<<12);
 226EXPORT_SYMBOL(loops_per_jiffy);
 227
 228static int __init debug_kernel(char *str)
 229{
 230        console_loglevel = CONSOLE_LOGLEVEL_DEBUG;
 231        return 0;
 232}
 233
 234static int __init quiet_kernel(char *str)
 235{
 236        console_loglevel = CONSOLE_LOGLEVEL_QUIET;
 237        return 0;
 238}
 239
 240early_param("debug", debug_kernel);
 241early_param("quiet", quiet_kernel);
 242
 243static int __init loglevel(char *str)
 244{
 245        int newlevel;
 246
 247        /*
 248         * Only update loglevel value when a correct setting was passed,
 249         * to prevent blind crashes (when loglevel being set to 0) that
 250         * are quite hard to debug
 251         */
 252        if (get_option(&str, &newlevel)) {
 253                console_loglevel = newlevel;
 254                return 0;
 255        }
 256
 257        return -EINVAL;
 258}
 259
 260early_param("loglevel", loglevel);
 261
 262#ifdef CONFIG_BLK_DEV_INITRD
 263static void * __init get_boot_config_from_initrd(size_t *_size)
 264{
 265        u32 size, csum;
 266        char *data;
 267        u32 *hdr;
 268        int i;
 269
 270        if (!initrd_end)
 271                return NULL;
 272
 273        data = (char *)initrd_end - BOOTCONFIG_MAGIC_LEN;
 274        /*
 275         * Since Grub may align the size of initrd to 4, we must
 276         * check the preceding 3 bytes as well.
 277         */
 278        for (i = 0; i < 4; i++) {
 279                if (!memcmp(data, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN))
 280                        goto found;
 281                data--;
 282        }
 283        return NULL;
 284
 285found:
 286        hdr = (u32 *)(data - 8);
 287        size = le32_to_cpu(hdr[0]);
 288        csum = le32_to_cpu(hdr[1]);
 289
 290        data = ((void *)hdr) - size;
 291        if ((unsigned long)data < initrd_start) {
 292                pr_err("bootconfig size %d is greater than initrd size %ld\n",
 293                        size, initrd_end - initrd_start);
 294                return NULL;
 295        }
 296
 297        if (xbc_calc_checksum(data, size) != csum) {
 298                pr_err("bootconfig checksum failed\n");
 299                return NULL;
 300        }
 301
 302        /* Remove bootconfig from initramfs/initrd */
 303        initrd_end = (unsigned long)data;
 304        if (_size)
 305                *_size = size;
 306
 307        return data;
 308}
 309#else
 310static void * __init get_boot_config_from_initrd(size_t *_size)
 311{
 312        return NULL;
 313}
 314#endif
 315
 316#ifdef CONFIG_BOOT_CONFIG
 317
 318static char xbc_namebuf[XBC_KEYLEN_MAX] __initdata;
 319
 320#define rest(dst, end) ((end) > (dst) ? (end) - (dst) : 0)
 321
 322static int __init xbc_snprint_cmdline(char *buf, size_t size,
 323                                      struct xbc_node *root)
 324{
 325        struct xbc_node *knode, *vnode;
 326        char *end = buf + size;
 327        const char *val;
 328        int ret;
 329
 330        xbc_node_for_each_key_value(root, knode, val) {
 331                ret = xbc_node_compose_key_after(root, knode,
 332                                        xbc_namebuf, XBC_KEYLEN_MAX);
 333                if (ret < 0)
 334                        return ret;
 335
 336                vnode = xbc_node_get_child(knode);
 337                if (!vnode) {
 338                        ret = snprintf(buf, rest(buf, end), "%s ", xbc_namebuf);
 339                        if (ret < 0)
 340                                return ret;
 341                        buf += ret;
 342                        continue;
 343                }
 344                xbc_array_for_each_value(vnode, val) {
 345                        ret = snprintf(buf, rest(buf, end), "%s=\"%s\" ",
 346                                       xbc_namebuf, val);
 347                        if (ret < 0)
 348                                return ret;
 349                        buf += ret;
 350                }
 351        }
 352
 353        return buf - (end - size);
 354}
 355#undef rest
 356
 357/* Make an extra command line under given key word */
 358static char * __init xbc_make_cmdline(const char *key)
 359{
 360        struct xbc_node *root;
 361        char *new_cmdline;
 362        int ret, len = 0;
 363
 364        root = xbc_find_node(key);
 365        if (!root)
 366                return NULL;
 367
 368        /* Count required buffer size */
 369        len = xbc_snprint_cmdline(NULL, 0, root);
 370        if (len <= 0)
 371                return NULL;
 372
 373        new_cmdline = memblock_alloc(len + 1, SMP_CACHE_BYTES);
 374        if (!new_cmdline) {
 375                pr_err("Failed to allocate memory for extra kernel cmdline.\n");
 376                return NULL;
 377        }
 378
 379        ret = xbc_snprint_cmdline(new_cmdline, len + 1, root);
 380        if (ret < 0 || ret > len) {
 381                pr_err("Failed to print extra kernel cmdline.\n");
 382                memblock_free(new_cmdline, len + 1);
 383                return NULL;
 384        }
 385
 386        return new_cmdline;
 387}
 388
 389static int __init bootconfig_params(char *param, char *val,
 390                                    const char *unused, void *arg)
 391{
 392        if (strcmp(param, "bootconfig") == 0) {
 393                bootconfig_found = true;
 394        }
 395        return 0;
 396}
 397
 398static int __init warn_bootconfig(char *str)
 399{
 400        /* The 'bootconfig' has been handled by bootconfig_params(). */
 401        return 0;
 402}
 403
 404static void __init setup_boot_config(void)
 405{
 406        static char tmp_cmdline[COMMAND_LINE_SIZE] __initdata;
 407        const char *msg, *data;
 408        int pos, ret;
 409        size_t size;
 410        char *err;
 411
 412        /* Cut out the bootconfig data even if we have no bootconfig option */
 413        data = get_boot_config_from_initrd(&size);
 414        /* If there is no bootconfig in initrd, try embedded one. */
 415        if (!data)
 416                data = xbc_get_embedded_bootconfig(&size);
 417
 418        strscpy(tmp_cmdline, boot_command_line, COMMAND_LINE_SIZE);
 419        err = parse_args("bootconfig", tmp_cmdline, NULL, 0, 0, 0, NULL,
 420                         bootconfig_params);
 421
 422        if (IS_ERR(err) || !(bootconfig_found || IS_ENABLED(CONFIG_BOOT_CONFIG_FORCE)))
 423                return;
 424
 425        /* parse_args() stops at the next param of '--' and returns an address */
 426        if (err)
 427                initargs_offs = err - tmp_cmdline;
 428
 429        if (!data) {
 430                /* If user intended to use bootconfig, show an error level message */
 431                if (bootconfig_found)
 432                        pr_err("'bootconfig' found on command line, but no bootconfig found\n");
 433                else
 434                        pr_info("No bootconfig data provided, so skipping bootconfig");
 435                return;
 436        }
 437
 438        if (size >= XBC_DATA_MAX) {
 439                pr_err("bootconfig size %ld greater than max size %d\n",
 440                        (long)size, XBC_DATA_MAX);
 441                return;
 442        }
 443
 444        ret = xbc_init(data, size, &msg, &pos);
 445        if (ret < 0) {
 446                if (pos < 0)
 447                        pr_err("Failed to init bootconfig: %s.\n", msg);
 448                else
 449                        pr_err("Failed to parse bootconfig: %s at %d.\n",
 450                                msg, pos);
 451        } else {
 452                xbc_get_info(&ret, NULL);
 453                pr_info("Load bootconfig: %ld bytes %d nodes\n", (long)size, ret);
 454                /* keys starting with "kernel." are passed via cmdline */
 455                extra_command_line = xbc_make_cmdline("kernel");
 456                /* Also, "init." keys are init arguments */
 457                extra_init_args = xbc_make_cmdline("init");
 458        }
 459        return;
 460}
 461
 462static void __init exit_boot_config(void)
 463{
 464        xbc_exit();
 465}
 466
 467#else   /* !CONFIG_BOOT_CONFIG */
 468
 469static void __init setup_boot_config(void)
 470{
 471        /* Remove bootconfig data from initrd */
 472        get_boot_config_from_initrd(NULL);
 473}
 474
 475static int __init warn_bootconfig(char *str)
 476{
 477        pr_warn("WARNING: 'bootconfig' found on the kernel command line but CONFIG_BOOT_CONFIG is not set.\n");
 478        return 0;
 479}
 480
 481#define exit_boot_config()      do {} while (0)
 482
 483#endif  /* CONFIG_BOOT_CONFIG */
 484
 485early_param("bootconfig", warn_bootconfig);
 486
 487/* Change NUL term back to "=", to make "param" the whole string. */
 488static void __init repair_env_string(char *param, char *val)
 489{
 490        if (val) {
 491                /* param=val or param="val"? */
 492                if (val == param+strlen(param)+1)
 493                        val[-1] = '=';
 494                else if (val == param+strlen(param)+2) {
 495                        val[-2] = '=';
 496                        memmove(val-1, val, strlen(val)+1);
 497                } else
 498                        BUG();
 499        }
 500}
 501
 502/* Anything after -- gets handed straight to init. */
 503static int __init set_init_arg(char *param, char *val,
 504                               const char *unused, void *arg)
 505{
 506        unsigned int i;
 507
 508        if (panic_later)
 509                return 0;
 510
 511        repair_env_string(param, val);
 512
 513        for (i = 0; argv_init[i]; i++) {
 514                if (i == MAX_INIT_ARGS) {
 515                        panic_later = "init";
 516                        panic_param = param;
 517                        return 0;
 518                }
 519        }
 520        argv_init[i] = param;
 521        return 0;
 522}
 523
 524/*
 525 * Unknown boot options get handed to init, unless they look like
 526 * unused parameters (modprobe will find them in /proc/cmdline).
 527 */
 528static int __init unknown_bootoption(char *param, char *val,
 529                                     const char *unused, void *arg)
 530{
 531        size_t len = strlen(param);
 532
 533        /* Handle params aliased to sysctls */
 534        if (sysctl_is_alias(param))
 535                return 0;
 536
 537        repair_env_string(param, val);
 538
 539        /* Handle obsolete-style parameters */
 540        if (obsolete_checksetup(param))
 541                return 0;
 542
 543        /* Unused module parameter. */
 544        if (strnchr(param, len, '.'))
 545                return 0;
 546
 547        if (panic_later)
 548                return 0;
 549
 550        if (val) {
 551                /* Environment option */
 552                unsigned int i;
 553                for (i = 0; envp_init[i]; i++) {
 554                        if (i == MAX_INIT_ENVS) {
 555                                panic_later = "env";
 556                                panic_param = param;
 557                        }
 558                        if (!strncmp(param, envp_init[i], len+1))
 559                                break;
 560                }
 561                envp_init[i] = param;
 562        } else {
 563                /* Command line option */
 564                unsigned int i;
 565                for (i = 0; argv_init[i]; i++) {
 566                        if (i == MAX_INIT_ARGS) {
 567                                panic_later = "init";
 568                                panic_param = param;
 569                        }
 570                }
 571                argv_init[i] = param;
 572        }
 573        return 0;
 574}
 575
 576static int __init init_setup(char *str)
 577{
 578        unsigned int i;
 579
 580        execute_command = str;
 581        /*
 582         * In case LILO is going to boot us with default command line,
 583         * it prepends "auto" before the whole cmdline which makes
 584         * the shell think it should execute a script with such name.
 585         * So we ignore all arguments entered _before_ init=... [MJ]
 586         */
 587        for (i = 1; i < MAX_INIT_ARGS; i++)
 588                argv_init[i] = NULL;
 589        return 1;
 590}
 591__setup("init=", init_setup);
 592
 593static int __init rdinit_setup(char *str)
 594{
 595        unsigned int i;
 596
 597        ramdisk_execute_command = str;
 598        /* See "auto" comment in init_setup */
 599        for (i = 1; i < MAX_INIT_ARGS; i++)
 600                argv_init[i] = NULL;
 601        return 1;
 602}
 603__setup("rdinit=", rdinit_setup);
 604
 605#ifndef CONFIG_SMP
 606static const unsigned int setup_max_cpus = NR_CPUS;
 607static inline void setup_nr_cpu_ids(void) { }
 608static inline void smp_prepare_cpus(unsigned int maxcpus) { }
 609#endif
 610
 611/*
 612 * We need to store the untouched command line for future reference.
 613 * We also need to store the touched command line since the parameter
 614 * parsing is performed in place, and we should allow a component to
 615 * store reference of name/value for future reference.
 616 */
 617static void __init setup_command_line(char *command_line)
 618{
 619        size_t len, xlen = 0, ilen = 0;
 620
 621        if (extra_command_line)
 622                xlen = strlen(extra_command_line);
 623        if (extra_init_args)
 624                ilen = strlen(extra_init_args) + 4; /* for " -- " */
 625
 626        len = xlen + strlen(boot_command_line) + 1;
 627
 628        saved_command_line = memblock_alloc(len + ilen, SMP_CACHE_BYTES);
 629        if (!saved_command_line)
 630                panic("%s: Failed to allocate %zu bytes\n", __func__, len + ilen);
 631
 632        static_command_line = memblock_alloc(len, SMP_CACHE_BYTES);
 633        if (!static_command_line)
 634                panic("%s: Failed to allocate %zu bytes\n", __func__, len);
 635
 636        if (xlen) {
 637                /*
 638                 * We have to put extra_command_line before boot command
 639                 * lines because there could be dashes (separator of init
 640                 * command line) in the command lines.
 641                 */
 642                strcpy(saved_command_line, extra_command_line);
 643                strcpy(static_command_line, extra_command_line);
 644        }
 645        strcpy(saved_command_line + xlen, boot_command_line);
 646        strcpy(static_command_line + xlen, command_line);
 647
 648        if (ilen) {
 649                /*
 650                 * Append supplemental init boot args to saved_command_line
 651                 * so that user can check what command line options passed
 652                 * to init.
 653                 * The order should always be
 654                 * " -- "[bootconfig init-param][cmdline init-param]
 655                 */
 656                if (initargs_offs) {
 657                        len = xlen + initargs_offs;
 658                        strcpy(saved_command_line + len, extra_init_args);
 659                        len += ilen - 4;        /* strlen(extra_init_args) */
 660                        strcpy(saved_command_line + len,
 661                                boot_command_line + initargs_offs - 1);
 662                } else {
 663                        len = strlen(saved_command_line);
 664                        strcpy(saved_command_line + len, " -- ");
 665                        len += 4;
 666                        strcpy(saved_command_line + len, extra_init_args);
 667                }
 668        }
 669
 670        saved_command_line_len = strlen(saved_command_line);
 671}
 672
 673/*
 674 * We need to finalize in a non-__init function or else race conditions
 675 * between the root thread and the init thread may cause start_kernel to
 676 * be reaped by free_initmem before the root thread has proceeded to
 677 * cpu_idle.
 678 *
 679 * gcc-3.4 accidentally inlines this function, so use noinline.
 680 */
 681
 682static __initdata DECLARE_COMPLETION(kthreadd_done);
 683
 684noinline void __ref __noreturn rest_init(void)
 685{
 686        struct task_struct *tsk;
 687        int pid;
 688
 689        rcu_scheduler_starting();
 690        /*
 691         * We need to spawn init first so that it obtains pid 1, however
 692         * the init task will end up wanting to create kthreads, which, if
 693         * we schedule it before we create kthreadd, will OOPS.
 694         */
 695        pid = user_mode_thread(kernel_init, NULL, CLONE_FS);
 696        /*
 697         * Pin init on the boot CPU. Task migration is not properly working
 698         * until sched_init_smp() has been run. It will set the allowed
 699         * CPUs for init to the non isolated CPUs.
 700         */
 701        rcu_read_lock();
 702        tsk = find_task_by_pid_ns(pid, &init_pid_ns);
 703        tsk->flags |= PF_NO_SETAFFINITY;
 704        set_cpus_allowed_ptr(tsk, cpumask_of(smp_processor_id()));
 705        rcu_read_unlock();
 706
 707        numa_default_policy();
 708        pid = kernel_thread(kthreadd, NULL, NULL, CLONE_FS | CLONE_FILES);
 709        rcu_read_lock();
 710        kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);
 711        rcu_read_unlock();
 712
 713        /*
 714         * Enable might_sleep() and smp_processor_id() checks.
 715         * They cannot be enabled earlier because with CONFIG_PREEMPTION=y
 716         * kernel_thread() would trigger might_sleep() splats. With
 717         * CONFIG_PREEMPT_VOLUNTARY=y the init task might have scheduled
 718         * already, but it's stuck on the kthreadd_done completion.
 719         */
 720        system_state = SYSTEM_SCHEDULING;
 721
 722        complete(&kthreadd_done);
 723
 724        /*
 725         * The boot idle thread must execute schedule()
 726         * at least once to get things moving:
 727         */
 728        schedule_preempt_disabled();
 729        /* Call into cpu_idle with preempt disabled */
 730        cpu_startup_entry(CPUHP_ONLINE);
 731}
 732
 733/* Check for early params. */
 734static int __init do_early_param(char *param, char *val,
 735                                 const char *unused, void *arg)
 736{
 737        const struct obs_kernel_param *p;
 738
 739        for (p = __setup_start; p < __setup_end; p++) {
 740                if ((p->early && parameq(param, p->str)) ||
 741                    (strcmp(param, "console") == 0 &&
 742                     strcmp(p->str, "earlycon") == 0)
 743                ) {
 744                        if (p->setup_func(val) != 0)
 745                                pr_warn("Malformed early option '%s'\n", param);
 746                }
 747        }
 748        /* We accept everything at this stage. */
 749        return 0;
 750}
 751
 752void __init parse_early_options(char *cmdline)
 753{
 754        parse_args("early options", cmdline, NULL, 0, 0, 0, NULL,
 755                   do_early_param);
 756}
 757
 758/* Arch code calls this early on, or if not, just before other parsing. */
 759void __init parse_early_param(void)
 760{
 761        static int done __initdata;
 762        static char tmp_cmdline[COMMAND_LINE_SIZE] __initdata;
 763
 764        if (done)
 765                return;
 766
 767        /* All fall through to do_early_param. */
 768        strscpy(tmp_cmdline, boot_command_line, COMMAND_LINE_SIZE);
 769        parse_early_options(tmp_cmdline);
 770        done = 1;
 771}
 772
 773void __init __weak arch_post_acpi_subsys_init(void) { }
 774
 775void __init __weak smp_setup_processor_id(void)
 776{
 777}
 778
 779# if THREAD_SIZE >= PAGE_SIZE
 780void __init __weak thread_stack_cache_init(void)
 781{
 782}
 783#endif
 784
 785void __init __weak poking_init(void) { }
 786
 787void __init __weak pgtable_cache_init(void) { }
 788
 789void __init __weak trap_init(void) { }
 790
 791bool initcall_debug;
 792core_param(initcall_debug, initcall_debug, bool, 0644);
 793
 794#ifdef TRACEPOINTS_ENABLED
 795static void __init initcall_debug_enable(void);
 796#else
 797static inline void initcall_debug_enable(void)
 798{
 799}
 800#endif
 801
 802#ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
 803DEFINE_STATIC_KEY_MAYBE_RO(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,
 804                           randomize_kstack_offset);
 805DEFINE_PER_CPU(u32, kstack_offset);
 806
 807static int __init early_randomize_kstack_offset(char *buf)
 808{
 809        int ret;
 810        bool bool_result;
 811
 812        ret = kstrtobool(buf, &bool_result);
 813        if (ret)
 814                return ret;
 815
 816        if (bool_result)
 817                static_branch_enable(&randomize_kstack_offset);
 818        else
 819                static_branch_disable(&randomize_kstack_offset);
 820        return 0;
 821}
 822early_param("randomize_kstack_offset", early_randomize_kstack_offset);
 823#endif
 824
 825void __init __weak __noreturn arch_call_rest_init(void)
 826{
 827        rest_init();
 828}
 829
 830static void __init print_unknown_bootoptions(void)
 831{
 832        char *unknown_options;
 833        char *end;
 834        const char *const *p;
 835        size_t len;
 836
 837        if (panic_later || (!argv_init[1] && !envp_init[2]))
 838                return;
 839
 840        /*
 841         * Determine how many options we have to print out, plus a space
 842         * before each
 843         */
 844        len = 1; /* null terminator */
 845        for (p = &argv_init[1]; *p; p++) {
 846                len++;
 847                len += strlen(*p);
 848        }
 849        for (p = &envp_init[2]; *p; p++) {
 850                len++;
 851                len += strlen(*p);
 852        }
 853
 854        unknown_options = memblock_alloc(len, SMP_CACHE_BYTES);
 855        if (!unknown_options) {
 856                pr_err("%s: Failed to allocate %zu bytes\n",
 857                        __func__, len);
 858                return;
 859        }
 860        end = unknown_options;
 861
 862        for (p = &argv_init[1]; *p; p++)
 863                end += sprintf(end, " %s", *p);
 864        for (p = &envp_init[2]; *p; p++)
 865                end += sprintf(end, " %s", *p);
 866
 867        /* Start at unknown_options[1] to skip the initial space */
 868        pr_notice("Unknown kernel command line parameters \"%s\", will be passed to user space.\n",
 869                &unknown_options[1]);
 870        memblock_free(unknown_options, len);
 871}
 872
 873asmlinkage __visible __init __no_sanitize_address __noreturn __no_stack_protector
 874void start_kernel(void)
 875{
 876        char *command_line;
 877        char *after_dashes;
 878
 879        set_task_stack_end_magic(&init_task);
 880        smp_setup_processor_id();
 881        debug_objects_early_init();
 882        init_vmlinux_build_id();
 883
 884        cgroup_init_early();
 885
 886        local_irq_disable();
 887        early_boot_irqs_disabled = true;
 888
 889        /*
 890         * Interrupts are still disabled. Do necessary setups, then
 891         * enable them.
 892         */
 893        boot_cpu_init();
 894        page_address_init();
 895        pr_notice("%s", linux_banner);
 896        early_security_init();
 897        setup_arch(&command_line);
 898        setup_boot_config();
 899        setup_command_line(command_line);
 900        setup_nr_cpu_ids();
 901        setup_per_cpu_areas();
 902        smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
 903        boot_cpu_hotplug_init();
 904
 905        pr_notice("Kernel command line: %s\n", saved_command_line);
 906        /* parameters may set static keys */
 907        jump_label_init();
 908        parse_early_param();
 909        after_dashes = parse_args("Booting kernel",
 910                                  static_command_line, __start___param,
 911                                  __stop___param - __start___param,
 912                                  -1, -1, NULL, &unknown_bootoption);
 913        print_unknown_bootoptions();
 914        if (!IS_ERR_OR_NULL(after_dashes))
 915                parse_args("Setting init args", after_dashes, NULL, 0, -1, -1,
 916                           NULL, set_init_arg);
 917        if (extra_init_args)
 918                parse_args("Setting extra init args", extra_init_args,
 919                           NULL, 0, -1, -1, NULL, set_init_arg);
 920
 921        /* Architectural and non-timekeeping rng init, before allocator init */
 922        random_init_early(command_line);
 923
 924        /*
 925         * These use large bootmem allocations and must precede
 926         * initalization of page allocator
 927         */
 928        setup_log_buf(0);
 929        vfs_caches_init_early();
 930        sort_main_extable();
 931        trap_init();
 932        mm_core_init();
 933        poking_init();
 934        ftrace_init();
 935
 936        /* trace_printk can be enabled here */
 937        early_trace_init();
 938
 939        /*
 940         * Set up the scheduler prior starting any interrupts (such as the
 941         * timer interrupt). Full topology setup happens at smp_init()
 942         * time - but meanwhile we still have a functioning scheduler.
 943         */
 944        sched_init();
 945
 946        if (WARN(!irqs_disabled(),
 947                 "Interrupts were enabled *very* early, fixing it\n"))
 948                local_irq_disable();
 949        radix_tree_init();
 950        maple_tree_init();
 951
 952        /*
 953         * Set up housekeeping before setting up workqueues to allow the unbound
 954         * workqueue to take non-housekeeping into account.
 955         */
 956        housekeeping_init();
 957
 958        /*
 959         * Allow workqueue creation and work item queueing/cancelling
 960         * early.  Work item execution depends on kthreads and starts after
 961         * workqueue_init().
 962         */
 963        workqueue_init_early();
 964
 965        rcu_init();
 966
 967        /* Trace events are available after this */
 968        trace_init();
 969
 970        if (initcall_debug)
 971                initcall_debug_enable();
 972
 973        context_tracking_init();
 974        /* init some links before init_ISA_irqs() */
 975        early_irq_init();
 976        init_IRQ();
 977        tick_init();
 978        rcu_init_nohz();
 979        init_timers();
 980        srcu_init();
 981        hrtimers_init();
 982        softirq_init();
 983        timekeeping_init();
 984        time_init();
 985
 986        /* This must be after timekeeping is initialized */
 987        random_init();
 988
 989        /* These make use of the fully initialized rng */
 990        kfence_init();
 991        boot_init_stack_canary();
 992
 993        perf_event_init();
 994        profile_init();
 995        call_function_init();
 996        WARN(!irqs_disabled(), "Interrupts were enabled early\n");
 997
 998        early_boot_irqs_disabled = false;
 999        local_irq_enable();
1000
1001        kmem_cache_init_late();
1002
1003        /*
1004         * HACK ALERT! This is early. We're enabling the console before
1005         * we've done PCI setups etc, and console_init() must be aware of
1006         * this. But we do want output early, in case something goes wrong.
1007         */
1008        console_init();
1009        if (panic_later)
1010                panic("Too many boot %s vars at `%s'", panic_later,
1011                      panic_param);
1012
1013        lockdep_init();
1014
1015        /*
1016         * Need to run this when irqs are enabled, because it wants
1017         * to self-test [hard/soft]-irqs on/off lock inversion bugs
1018         * too:
1019         */
1020        locking_selftest();
1021
1022#ifdef CONFIG_BLK_DEV_INITRD
1023        if (initrd_start && !initrd_below_start_ok &&
1024            page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) {
1025                pr_crit("initrd overwritten (0x%08lx < 0x%08lx) - disabling it.\n",
1026                    page_to_pfn(virt_to_page((void *)initrd_start)),
1027                    min_low_pfn);
1028                initrd_start = 0;
1029        }
1030#endif
1031        setup_per_cpu_pageset();
1032        numa_policy_init();
1033        acpi_early_init();
1034        if (late_time_init)
1035                late_time_init();
1036        sched_clock_init();
1037        calibrate_delay();
1038
1039        arch_cpu_finalize_init();
1040
1041        pid_idr_init();
1042        anon_vma_init();
1043#ifdef CONFIG_X86
1044        if (efi_enabled(EFI_RUNTIME_SERVICES))
1045                efi_enter_virtual_mode();
1046#endif
1047        thread_stack_cache_init();
1048        cred_init();
1049        fork_init();
1050        proc_caches_init();
1051        uts_ns_init();
1052        key_init();
1053        security_init();
1054        dbg_late_init();
1055        net_ns_init();
1056        vfs_caches_init();
1057        pagecache_init();
1058        signals_init();
1059        seq_file_init();
1060        proc_root_init();
1061        nsfs_init();
1062        cpuset_init();
1063        cgroup_init();
1064        taskstats_init_early();
1065        delayacct_init();
1066
1067        acpi_subsystem_init();
1068        arch_post_acpi_subsys_init();
1069        kcsan_init();
1070
1071        /* Do the rest non-__init'ed, we're now alive */
1072        arch_call_rest_init();
1073
1074        /*
1075         * Avoid stack canaries in callers of boot_init_stack_canary for gcc-10
1076         * and older.
1077         */
1078#if !__has_attribute(__no_stack_protector__)
1079        prevent_tail_call_optimization();
1080#endif
1081}
1082
1083/* Call all constructor functions linked into the kernel. */
1084static void __init do_ctors(void)
1085{
1086/*
1087 * For UML, the constructors have already been called by the
1088 * normal setup code as it's just a normal ELF binary, so we
1089 * cannot do it again - but we do need CONFIG_CONSTRUCTORS
1090 * even on UML for modules.
1091 */
1092#if defined(CONFIG_CONSTRUCTORS) && !defined(CONFIG_UML)
1093        ctor_fn_t *fn = (ctor_fn_t *) __ctors_start;
1094
1095        for (; fn < (ctor_fn_t *) __ctors_end; fn++)
1096                (*fn)();
1097#endif
1098}
1099
1100#ifdef CONFIG_KALLSYMS
1101struct blacklist_entry {
1102        struct list_head next;
1103        char *buf;
1104};
1105
1106static __initdata_or_module LIST_HEAD(blacklisted_initcalls);
1107
1108static int __init initcall_blacklist(char *str)
1109{
1110        char *str_entry;
1111        struct blacklist_entry *entry;
1112
1113        /* str argument is a comma-separated list of functions */
1114        do {
1115                str_entry = strsep(&str, ",");
1116                if (str_entry) {
1117                        pr_debug("blacklisting initcall %s\n", str_entry);
1118                        entry = memblock_alloc(sizeof(*entry),
1119                                               SMP_CACHE_BYTES);
1120                        if (!entry)
1121                                panic("%s: Failed to allocate %zu bytes\n",
1122                                      __func__, sizeof(*entry));
1123                        entry->buf = memblock_alloc(strlen(str_entry) + 1,
1124                                                    SMP_CACHE_BYTES);
1125                        if (!entry->buf)
1126                                panic("%s: Failed to allocate %zu bytes\n",
1127                                      __func__, strlen(str_entry) + 1);
1128                        strcpy(entry->buf, str_entry);
1129                        list_add(&entry->next, &blacklisted_initcalls);
1130                }
1131        } while (str_entry);
1132
1133        return 1;
1134}
1135
1136static bool __init_or_module initcall_blacklisted(initcall_t fn)
1137{
1138        struct blacklist_entry *entry;
1139        char fn_name[KSYM_SYMBOL_LEN];
1140        unsigned long addr;
1141
1142        if (list_empty(&blacklisted_initcalls))
1143                return false;
1144
1145        addr = (unsigned long) dereference_function_descriptor(fn);
1146        sprint_symbol_no_offset(fn_name, addr);
1147
1148        /*
1149         * fn will be "function_name [module_name]" where [module_name] is not
1150         * displayed for built-in init functions.  Strip off the [module_name].
1151         */
1152        strreplace(fn_name, ' ', '\0');
1153
1154        list_for_each_entry(entry, &blacklisted_initcalls, next) {
1155                if (!strcmp(fn_name, entry->buf)) {
1156                        pr_debug("initcall %s blacklisted\n", fn_name);
1157                        return true;
1158                }
1159        }
1160
1161        return false;
1162}
1163#else
1164static int __init initcall_blacklist(char *str)
1165{
1166        pr_warn("initcall_blacklist requires CONFIG_KALLSYMS\n");
1167        return 0;
1168}
1169
1170static bool __init_or_module initcall_blacklisted(initcall_t fn)
1171{
1172        return false;
1173}
1174#endif
1175__setup("initcall_blacklist=", initcall_blacklist);
1176
1177static __init_or_module void
1178trace_initcall_start_cb(void *data, initcall_t fn)
1179{
1180        ktime_t *calltime = data;
1181
1182        printk(KERN_DEBUG "calling  %pS @ %i\n", fn, task_pid_nr(current));
1183        *calltime = ktime_get();
1184}
1185
1186static __init_or_module void
1187trace_initcall_finish_cb(void *data, initcall_t fn, int ret)
1188{
1189        ktime_t rettime, *calltime = data;
1190
1191        rettime = ktime_get();
1192        printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs\n",
1193                 fn, ret, (unsigned long long)ktime_us_delta(rettime, *calltime));
1194}
1195
1196static ktime_t initcall_calltime;
1197
1198#ifdef TRACEPOINTS_ENABLED
1199static void __init initcall_debug_enable(void)
1200{
1201        int ret;
1202
1203        ret = register_trace_initcall_start(trace_initcall_start_cb,
1204                                            &initcall_calltime);
1205        ret |= register_trace_initcall_finish(trace_initcall_finish_cb,
1206                                              &initcall_calltime);
1207        WARN(ret, "Failed to register initcall tracepoints\n");
1208}
1209# define do_trace_initcall_start        trace_initcall_start
1210# define do_trace_initcall_finish       trace_initcall_finish
1211#else
1212static inline void do_trace_initcall_start(initcall_t fn)
1213{
1214        if (!initcall_debug)
1215                return;
1216        trace_initcall_start_cb(&initcall_calltime, fn);
1217}
1218static inline void do_trace_initcall_finish(initcall_t fn, int ret)
1219{
1220        if (!initcall_debug)
1221                return;
1222        trace_initcall_finish_cb(&initcall_calltime, fn, ret);
1223}
1224#endif /* !TRACEPOINTS_ENABLED */
1225
1226int __init_or_module do_one_initcall(initcall_t fn)
1227{
1228        int count = preempt_count();
1229        char msgbuf[64];
1230        int ret;
1231
1232        if (initcall_blacklisted(fn))
1233                return -EPERM;
1234
1235        do_trace_initcall_start(fn);
1236        ret = fn();
1237        do_trace_initcall_finish(fn, ret);
1238
1239        msgbuf[0] = 0;
1240
1241        if (preempt_count() != count) {
1242                sprintf(msgbuf, "preemption imbalance ");
1243                preempt_count_set(count);
1244        }
1245        if (irqs_disabled()) {
1246                strlcat(msgbuf, "disabled interrupts ", sizeof(msgbuf));
1247                local_irq_enable();
1248        }
1249        WARN(msgbuf[0], "initcall %pS returned with %s\n", fn, msgbuf);
1250
1251        add_latent_entropy();
1252        return ret;
1253}
1254
1255
1256static initcall_entry_t *initcall_levels[] __initdata = {
1257        __initcall0_start,
1258        __initcall1_start,
1259        __initcall2_start,
1260        __initcall3_start,
1261        __initcall4_start,
1262        __initcall5_start,
1263        __initcall6_start,
1264        __initcall7_start,
1265        __initcall_end,
1266};
1267
1268/* Keep these in sync with initcalls in include/linux/init.h */
1269static const char *initcall_level_names[] __initdata = {
1270        "pure",
1271        "core",
1272        "postcore",
1273        "arch",
1274        "subsys",
1275        "fs",
1276        "device",
1277        "late",
1278};
1279
1280static int __init ignore_unknown_bootoption(char *param, char *val,
1281                               const char *unused, void *arg)
1282{
1283        return 0;
1284}
1285
1286static void __init do_initcall_level(int level, char *command_line)
1287{
1288        initcall_entry_t *fn;
1289
1290        parse_args(initcall_level_names[level],
1291                   command_line, __start___param,
1292                   __stop___param - __start___param,
1293                   level, level,
1294                   NULL, ignore_unknown_bootoption);
1295
1296        trace_initcall_level(initcall_level_names[level]);
1297        for (fn = initcall_levels[level]; fn < initcall_levels[level+1]; fn++)
1298                do_one_initcall(initcall_from_entry(fn));
1299}
1300
1301static void __init do_initcalls(void)
1302{
1303        int level;
1304        size_t len = saved_command_line_len + 1;
1305        char *command_line;
1306
1307        command_line = kzalloc(len, GFP_KERNEL);
1308        if (!command_line)
1309                panic("%s: Failed to allocate %zu bytes\n", __func__, len);
1310
1311        for (level = 0; level < ARRAY_SIZE(initcall_levels) - 1; level++) {
1312                /* Parser modifies command_line, restore it each time */
1313                strcpy(command_line, saved_command_line);
1314                do_initcall_level(level, command_line);
1315        }
1316
1317        kfree(command_line);
1318}
1319
1320/*
1321 * Ok, the machine is now initialized. None of the devices
1322 * have been touched yet, but the CPU subsystem is up and
1323 * running, and memory and process management works.
1324 *
1325 * Now we can finally start doing some real work..
1326 */
1327static void __init do_basic_setup(void)
1328{
1329        cpuset_init_smp();
1330        driver_init();
1331        init_irq_proc();
1332        do_ctors();
1333        do_initcalls();
1334}
1335
1336static void __init do_pre_smp_initcalls(void)
1337{
1338        initcall_entry_t *fn;
1339
1340        trace_initcall_level("early");
1341        for (fn = __initcall_start; fn < __initcall0_start; fn++)
1342                do_one_initcall(initcall_from_entry(fn));
1343}
1344
1345static int run_init_process(const char *init_filename)
1346{
1347        const char *const *p;
1348
1349        argv_init[0] = init_filename;
1350        pr_info("Run %s as init process\n", init_filename);
1351        pr_debug("  with arguments:\n");
1352        for (p = argv_init; *p; p++)
1353                pr_debug("    %s\n", *p);
1354        pr_debug("  with environment:\n");
1355        for (p = envp_init; *p; p++)
1356                pr_debug("    %s\n", *p);
1357        return kernel_execve(init_filename, argv_init, envp_init);
1358}
1359
1360static int try_to_run_init_process(const char *init_filename)
1361{
1362        int ret;
1363
1364        ret = run_init_process(init_filename);
1365
1366        if (ret && ret != -ENOENT) {
1367                pr_err("Starting init: %s exists but couldn't execute it (error %d)\n",
1368                       init_filename, ret);
1369        }
1370
1371        return ret;
1372}
1373
1374static noinline void __init kernel_init_freeable(void);
1375
1376#if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_STRICT_MODULE_RWX)
1377bool rodata_enabled __ro_after_init = true;
1378
1379#ifndef arch_parse_debug_rodata
1380static inline bool arch_parse_debug_rodata(char *str) { return false; }
1381#endif
1382
1383static int __init set_debug_rodata(char *str)
1384{
1385        if (arch_parse_debug_rodata(str))
1386                return 0;
1387
1388        if (str && !strcmp(str, "on"))
1389                rodata_enabled = true;
1390        else if (str && !strcmp(str, "off"))
1391                rodata_enabled = false;
1392        else
1393                pr_warn("Invalid option string for rodata: '%s'\n", str);
1394        return 0;
1395}
1396early_param("rodata", set_debug_rodata);
1397#endif
1398
1399#ifdef CONFIG_STRICT_KERNEL_RWX
1400static void mark_readonly(void)
1401{
1402        if (rodata_enabled) {
1403                /*
1404                 * load_module() results in W+X mappings, which are cleaned
1405                 * up with call_rcu().  Let's make sure that queued work is
1406                 * flushed so that we don't hit false positives looking for
1407                 * insecure pages which are W+X.
1408                 */
1409                rcu_barrier();
1410                mark_rodata_ro();
1411                rodata_test();
1412        } else
1413                pr_info("Kernel memory protection disabled.\n");
1414}
1415#elif defined(CONFIG_ARCH_HAS_STRICT_KERNEL_RWX)
1416static inline void mark_readonly(void)
1417{
1418        pr_warn("Kernel memory protection not selected by kernel config.\n");
1419}
1420#else
1421static inline void mark_readonly(void)
1422{
1423        pr_warn("This architecture does not have kernel memory protection.\n");
1424}
1425#endif
1426
1427void __weak free_initmem(void)
1428{
1429        free_initmem_default(POISON_FREE_INITMEM);
1430}
1431
1432static int __ref kernel_init(void *unused)
1433{
1434        int ret;
1435
1436        /*
1437         * Wait until kthreadd is all set-up.
1438         */
1439        wait_for_completion(&kthreadd_done);
1440
1441        kernel_init_freeable();
1442        /* need to finish all async __init code before freeing the memory */
1443        async_synchronize_full();
1444
1445        system_state = SYSTEM_FREEING_INITMEM;
1446        kprobe_free_init_mem();
1447        ftrace_free_init_mem();
1448        kgdb_free_init_mem();
1449        exit_boot_config();
1450        free_initmem();
1451        mark_readonly();
1452
1453        /*
1454         * Kernel mappings are now finalized - update the userspace page-table
1455         * to finalize PTI.
1456         */
1457        pti_finalize();
1458
1459        system_state = SYSTEM_RUNNING;
1460        numa_default_policy();
1461
1462        rcu_end_inkernel_boot();
1463
1464        do_sysctl_args();
1465
1466        if (ramdisk_execute_command) {
1467                ret = run_init_process(ramdisk_execute_command);
1468                if (!ret)
1469                        return 0;
1470                pr_err("Failed to execute %s (error %d)\n",
1471                       ramdisk_execute_command, ret);
1472        }
1473
1474        /*
1475         * We try each of these until one succeeds.
1476         *
1477         * The Bourne shell can be used instead of init if we are
1478         * trying to recover a really broken machine.
1479         */
1480        if (execute_command) {
1481                ret = run_init_process(execute_command);
1482                if (!ret)
1483                        return 0;
1484                panic("Requested init %s failed (error %d).",
1485                      execute_command, ret);
1486        }
1487
1488        if (CONFIG_DEFAULT_INIT[0] != '\0') {
1489                ret = run_init_process(CONFIG_DEFAULT_INIT);
1490                if (ret)
1491                        pr_err("Default init %s failed (error %d)\n",
1492                               CONFIG_DEFAULT_INIT, ret);
1493                else
1494                        return 0;
1495        }
1496
1497        if (!try_to_run_init_process("/sbin/init") ||
1498            !try_to_run_init_process("/etc/init") ||
1499            !try_to_run_init_process("/bin/init") ||
1500            !try_to_run_init_process("/bin/sh"))
1501                return 0;
1502
1503        panic("No working init found.  Try passing init= option to kernel. "
1504              "See Linux Documentation/admin-guide/init.rst for guidance.");
1505}
1506
1507/* Open /dev/console, for stdin/stdout/stderr, this should never fail */
1508void __init console_on_rootfs(void)
1509{
1510        struct file *file = filp_open("/dev/console", O_RDWR, 0);
1511
1512        if (IS_ERR(file)) {
1513                pr_err("Warning: unable to open an initial console.\n");
1514                return;
1515        }
1516        init_dup(file);
1517        init_dup(file);
1518        init_dup(file);
1519        fput(file);
1520}
1521
1522static noinline void __init kernel_init_freeable(void)
1523{
1524        /* Now the scheduler is fully set up and can do blocking allocations */
1525        gfp_allowed_mask = __GFP_BITS_MASK;
1526
1527        /*
1528         * init can allocate pages on any node
1529         */
1530        set_mems_allowed(node_states[N_MEMORY]);
1531
1532        cad_pid = get_pid(task_pid(current));
1533
1534        smp_prepare_cpus(setup_max_cpus);
1535
1536        workqueue_init();
1537
1538        init_mm_internals();
1539
1540        rcu_init_tasks_generic();
1541        do_pre_smp_initcalls();
1542        lockup_detector_init();
1543
1544        smp_init();
1545        sched_init_smp();
1546
1547        workqueue_init_topology();
1548        padata_init();
1549        page_alloc_init_late();
1550
1551        do_basic_setup();
1552
1553        kunit_run_all_tests();
1554
1555        wait_for_initramfs();
1556        console_on_rootfs();
1557
1558        /*
1559         * check if there is an early userspace init.  If yes, let it do all
1560         * the work
1561         */
1562        if (init_eaccess(ramdisk_execute_command) != 0) {
1563                ramdisk_execute_command = NULL;
1564                prepare_namespace();
1565        }
1566
1567        /*
1568         * Ok, we have completed the initial bootup, and
1569         * we're essentially up and running. Get rid of the
1570         * initmem segments and start the user-mode stuff..
1571         *
1572         * rootfs is available now, try loading the public keys
1573         * and default modules
1574         */
1575
1576        integrity_load_keys();
1577}
1578