linux/kernel/crash_core.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * crash.c - kernel crash support code.
   4 * Copyright (C) 2002-2004 Eric Biederman  <ebiederm@xmission.com>
   5 */
   6
   7#include <linux/buildid.h>
   8#include <linux/init.h>
   9#include <linux/utsname.h>
  10#include <linux/vmalloc.h>
  11#include <linux/sizes.h>
  12#include <linux/kexec.h>
  13#include <linux/memory.h>
  14#include <linux/cpuhotplug.h>
  15#include <linux/memblock.h>
  16#include <linux/kexec.h>
  17#include <linux/kmemleak.h>
  18
  19#include <asm/page.h>
  20#include <asm/sections.h>
  21
  22#include <crypto/sha1.h>
  23
  24#include "kallsyms_internal.h"
  25#include "kexec_internal.h"
  26
  27/* Per cpu memory for storing cpu states in case of system crash. */
  28note_buf_t __percpu *crash_notes;
  29
  30/* vmcoreinfo stuff */
  31unsigned char *vmcoreinfo_data;
  32size_t vmcoreinfo_size;
  33u32 *vmcoreinfo_note;
  34
  35/* trusted vmcoreinfo, e.g. we can make a copy in the crash memory */
  36static unsigned char *vmcoreinfo_data_safecopy;
  37
  38/* Location of the reserved area for the crash kernel */
  39struct resource crashk_res = {
  40        .name  = "Crash kernel",
  41        .start = 0,
  42        .end   = 0,
  43        .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
  44        .desc  = IORES_DESC_CRASH_KERNEL
  45};
  46struct resource crashk_low_res = {
  47        .name  = "Crash kernel",
  48        .start = 0,
  49        .end   = 0,
  50        .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
  51        .desc  = IORES_DESC_CRASH_KERNEL
  52};
  53
  54/*
  55 * parsing the "crashkernel" commandline
  56 *
  57 * this code is intended to be called from architecture specific code
  58 */
  59
  60
  61/*
  62 * This function parses command lines in the format
  63 *
  64 *   crashkernel=ramsize-range:size[,...][@offset]
  65 *
  66 * The function returns 0 on success and -EINVAL on failure.
  67 */
  68static int __init parse_crashkernel_mem(char *cmdline,
  69                                        unsigned long long system_ram,
  70                                        unsigned long long *crash_size,
  71                                        unsigned long long *crash_base)
  72{
  73        char *cur = cmdline, *tmp;
  74        unsigned long long total_mem = system_ram;
  75
  76        /*
  77         * Firmware sometimes reserves some memory regions for its own use,
  78         * so the system memory size is less than the actual physical memory
  79         * size. Work around this by rounding up the total size to 128M,
  80         * which is enough for most test cases.
  81         */
  82        total_mem = roundup(total_mem, SZ_128M);
  83
  84        /* for each entry of the comma-separated list */
  85        do {
  86                unsigned long long start, end = ULLONG_MAX, size;
  87
  88                /* get the start of the range */
  89                start = memparse(cur, &tmp);
  90                if (cur == tmp) {
  91                        pr_warn("crashkernel: Memory value expected\n");
  92                        return -EINVAL;
  93                }
  94                cur = tmp;
  95                if (*cur != '-') {
  96                        pr_warn("crashkernel: '-' expected\n");
  97                        return -EINVAL;
  98                }
  99                cur++;
 100
 101                /* if no ':' is here, than we read the end */
 102                if (*cur != ':') {
 103                        end = memparse(cur, &tmp);
 104                        if (cur == tmp) {
 105                                pr_warn("crashkernel: Memory value expected\n");
 106                                return -EINVAL;
 107                        }
 108                        cur = tmp;
 109                        if (end <= start) {
 110                                pr_warn("crashkernel: end <= start\n");
 111                                return -EINVAL;
 112                        }
 113                }
 114
 115                if (*cur != ':') {
 116                        pr_warn("crashkernel: ':' expected\n");
 117                        return -EINVAL;
 118                }
 119                cur++;
 120
 121                size = memparse(cur, &tmp);
 122                if (cur == tmp) {
 123                        pr_warn("Memory value expected\n");
 124                        return -EINVAL;
 125                }
 126                cur = tmp;
 127                if (size >= total_mem) {
 128                        pr_warn("crashkernel: invalid size\n");
 129                        return -EINVAL;
 130                }
 131
 132                /* match ? */
 133                if (total_mem >= start && total_mem < end) {
 134                        *crash_size = size;
 135                        break;
 136                }
 137        } while (*cur++ == ',');
 138
 139        if (*crash_size > 0) {
 140                while (*cur && *cur != ' ' && *cur != '@')
 141                        cur++;
 142                if (*cur == '@') {
 143                        cur++;
 144                        *crash_base = memparse(cur, &tmp);
 145                        if (cur == tmp) {
 146                                pr_warn("Memory value expected after '@'\n");
 147                                return -EINVAL;
 148                        }
 149                }
 150        } else
 151                pr_info("crashkernel size resulted in zero bytes\n");
 152
 153        return 0;
 154}
 155
 156/*
 157 * That function parses "simple" (old) crashkernel command lines like
 158 *
 159 *      crashkernel=size[@offset]
 160 *
 161 * It returns 0 on success and -EINVAL on failure.
 162 */
 163static int __init parse_crashkernel_simple(char *cmdline,
 164                                           unsigned long long *crash_size,
 165                                           unsigned long long *crash_base)
 166{
 167        char *cur = cmdline;
 168
 169        *crash_size = memparse(cmdline, &cur);
 170        if (cmdline == cur) {
 171                pr_warn("crashkernel: memory value expected\n");
 172                return -EINVAL;
 173        }
 174
 175        if (*cur == '@')
 176                *crash_base = memparse(cur+1, &cur);
 177        else if (*cur != ' ' && *cur != '\0') {
 178                pr_warn("crashkernel: unrecognized char: %c\n", *cur);
 179                return -EINVAL;
 180        }
 181
 182        return 0;
 183}
 184
 185#define SUFFIX_HIGH 0
 186#define SUFFIX_LOW  1
 187#define SUFFIX_NULL 2
 188static __initdata char *suffix_tbl[] = {
 189        [SUFFIX_HIGH] = ",high",
 190        [SUFFIX_LOW]  = ",low",
 191        [SUFFIX_NULL] = NULL,
 192};
 193
 194/*
 195 * That function parses "suffix"  crashkernel command lines like
 196 *
 197 *      crashkernel=size,[high|low]
 198 *
 199 * It returns 0 on success and -EINVAL on failure.
 200 */
 201static int __init parse_crashkernel_suffix(char *cmdline,
 202                                           unsigned long long *crash_size,
 203                                           const char *suffix)
 204{
 205        char *cur = cmdline;
 206
 207        *crash_size = memparse(cmdline, &cur);
 208        if (cmdline == cur) {
 209                pr_warn("crashkernel: memory value expected\n");
 210                return -EINVAL;
 211        }
 212
 213        /* check with suffix */
 214        if (strncmp(cur, suffix, strlen(suffix))) {
 215                pr_warn("crashkernel: unrecognized char: %c\n", *cur);
 216                return -EINVAL;
 217        }
 218        cur += strlen(suffix);
 219        if (*cur != ' ' && *cur != '\0') {
 220                pr_warn("crashkernel: unrecognized char: %c\n", *cur);
 221                return -EINVAL;
 222        }
 223
 224        return 0;
 225}
 226
 227static __init char *get_last_crashkernel(char *cmdline,
 228                             const char *name,
 229                             const char *suffix)
 230{
 231        char *p = cmdline, *ck_cmdline = NULL;
 232
 233        /* find crashkernel and use the last one if there are more */
 234        p = strstr(p, name);
 235        while (p) {
 236                char *end_p = strchr(p, ' ');
 237                char *q;
 238
 239                if (!end_p)
 240                        end_p = p + strlen(p);
 241
 242                if (!suffix) {
 243                        int i;
 244
 245                        /* skip the one with any known suffix */
 246                        for (i = 0; suffix_tbl[i]; i++) {
 247                                q = end_p - strlen(suffix_tbl[i]);
 248                                if (!strncmp(q, suffix_tbl[i],
 249                                             strlen(suffix_tbl[i])))
 250                                        goto next;
 251                        }
 252                        ck_cmdline = p;
 253                } else {
 254                        q = end_p - strlen(suffix);
 255                        if (!strncmp(q, suffix, strlen(suffix)))
 256                                ck_cmdline = p;
 257                }
 258next:
 259                p = strstr(p+1, name);
 260        }
 261
 262        return ck_cmdline;
 263}
 264
 265static int __init __parse_crashkernel(char *cmdline,
 266                             unsigned long long system_ram,
 267                             unsigned long long *crash_size,
 268                             unsigned long long *crash_base,
 269                             const char *suffix)
 270{
 271        char *first_colon, *first_space;
 272        char *ck_cmdline;
 273        char *name = "crashkernel=";
 274
 275        BUG_ON(!crash_size || !crash_base);
 276        *crash_size = 0;
 277        *crash_base = 0;
 278
 279        ck_cmdline = get_last_crashkernel(cmdline, name, suffix);
 280        if (!ck_cmdline)
 281                return -ENOENT;
 282
 283        ck_cmdline += strlen(name);
 284
 285        if (suffix)
 286                return parse_crashkernel_suffix(ck_cmdline, crash_size,
 287                                suffix);
 288        /*
 289         * if the commandline contains a ':', then that's the extended
 290         * syntax -- if not, it must be the classic syntax
 291         */
 292        first_colon = strchr(ck_cmdline, ':');
 293        first_space = strchr(ck_cmdline, ' ');
 294        if (first_colon && (!first_space || first_colon < first_space))
 295                return parse_crashkernel_mem(ck_cmdline, system_ram,
 296                                crash_size, crash_base);
 297
 298        return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base);
 299}
 300
 301/*
 302 * That function is the entry point for command line parsing and should be
 303 * called from the arch-specific code.
 304 *
 305 * If crashkernel=,high|low is supported on architecture, non-NULL values
 306 * should be passed to parameters 'low_size' and 'high'.
 307 */
 308int __init parse_crashkernel(char *cmdline,
 309                             unsigned long long system_ram,
 310                             unsigned long long *crash_size,
 311                             unsigned long long *crash_base,
 312                             unsigned long long *low_size,
 313                             bool *high)
 314{
 315        int ret;
 316
 317        /* crashkernel=X[@offset] */
 318        ret = __parse_crashkernel(cmdline, system_ram, crash_size,
 319                                crash_base, NULL);
 320#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
 321        /*
 322         * If non-NULL 'high' passed in and no normal crashkernel
 323         * setting detected, try parsing crashkernel=,high|low.
 324         */
 325        if (high && ret == -ENOENT) {
 326                ret = __parse_crashkernel(cmdline, 0, crash_size,
 327                                crash_base, suffix_tbl[SUFFIX_HIGH]);
 328                if (ret || !*crash_size)
 329                        return -EINVAL;
 330
 331                /*
 332                 * crashkernel=Y,low can be specified or not, but invalid value
 333                 * is not allowed.
 334                 */
 335                ret = __parse_crashkernel(cmdline, 0, low_size,
 336                                crash_base, suffix_tbl[SUFFIX_LOW]);
 337                if (ret == -ENOENT) {
 338                        *low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
 339                        ret = 0;
 340                } else if (ret) {
 341                        return ret;
 342                }
 343
 344                *high = true;
 345        }
 346#endif
 347        if (!*crash_size)
 348                ret = -EINVAL;
 349
 350        return ret;
 351}
 352
 353/*
 354 * Add a dummy early_param handler to mark crashkernel= as a known command line
 355 * parameter and suppress incorrect warnings in init/main.c.
 356 */
 357static int __init parse_crashkernel_dummy(char *arg)
 358{
 359        return 0;
 360}
 361early_param("crashkernel", parse_crashkernel_dummy);
 362
 363#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
 364static int __init reserve_crashkernel_low(unsigned long long low_size)
 365{
 366#ifdef CONFIG_64BIT
 367        unsigned long long low_base;
 368
 369        low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX);
 370        if (!low_base) {
 371                pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size);
 372                return -ENOMEM;
 373        }
 374
 375        pr_info("crashkernel low memory reserved: 0x%08llx - 0x%08llx (%lld MB)\n",
 376                low_base, low_base + low_size, low_size >> 20);
 377
 378        crashk_low_res.start = low_base;
 379        crashk_low_res.end   = low_base + low_size - 1;
 380        insert_resource(&iomem_resource, &crashk_low_res);
 381#endif
 382        return 0;
 383}
 384
 385void __init reserve_crashkernel_generic(char *cmdline,
 386                             unsigned long long crash_size,
 387                             unsigned long long crash_base,
 388                             unsigned long long crash_low_size,
 389                             bool high)
 390{
 391        unsigned long long search_end = CRASH_ADDR_LOW_MAX, search_base = 0;
 392        bool fixed_base = false;
 393
 394        /* User specifies base address explicitly. */
 395        if (crash_base) {
 396                fixed_base = true;
 397                search_base = crash_base;
 398                search_end = crash_base + crash_size;
 399        } else if (high) {
 400                search_base = CRASH_ADDR_LOW_MAX;
 401                search_end = CRASH_ADDR_HIGH_MAX;
 402        }
 403
 404retry:
 405        crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
 406                                               search_base, search_end);
 407        if (!crash_base) {
 408                /*
 409                 * For crashkernel=size[KMG]@offset[KMG], print out failure
 410                 * message if can't reserve the specified region.
 411                 */
 412                if (fixed_base) {
 413                        pr_warn("crashkernel reservation failed - memory is in use.\n");
 414                        return;
 415                }
 416
 417                /*
 418                 * For crashkernel=size[KMG], if the first attempt was for
 419                 * low memory, fall back to high memory, the minimum required
 420                 * low memory will be reserved later.
 421                 */
 422                if (!high && search_end == CRASH_ADDR_LOW_MAX) {
 423                        search_end = CRASH_ADDR_HIGH_MAX;
 424                        search_base = CRASH_ADDR_LOW_MAX;
 425                        crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
 426                        goto retry;
 427                }
 428
 429                /*
 430                 * For crashkernel=size[KMG],high, if the first attempt was
 431                 * for high memory, fall back to low memory.
 432                 */
 433                if (high && search_end == CRASH_ADDR_HIGH_MAX) {
 434                        search_end = CRASH_ADDR_LOW_MAX;
 435                        search_base = 0;
 436                        goto retry;
 437                }
 438                pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
 439                        crash_size);
 440                return;
 441        }
 442
 443        if ((crash_base >= CRASH_ADDR_LOW_MAX) &&
 444             crash_low_size && reserve_crashkernel_low(crash_low_size)) {
 445                memblock_phys_free(crash_base, crash_size);
 446                return;
 447        }
 448
 449        pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
 450                crash_base, crash_base + crash_size, crash_size >> 20);
 451
 452        /*
 453         * The crashkernel memory will be removed from the kernel linear
 454         * map. Inform kmemleak so that it won't try to access it.
 455         */
 456        kmemleak_ignore_phys(crash_base);
 457        if (crashk_low_res.end)
 458                kmemleak_ignore_phys(crashk_low_res.start);
 459
 460        crashk_res.start = crash_base;
 461        crashk_res.end = crash_base + crash_size - 1;
 462        insert_resource(&iomem_resource, &crashk_res);
 463}
 464#endif
 465
 466int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map,
 467                          void **addr, unsigned long *sz)
 468{
 469        Elf64_Ehdr *ehdr;
 470        Elf64_Phdr *phdr;
 471        unsigned long nr_cpus = num_possible_cpus(), nr_phdr, elf_sz;
 472        unsigned char *buf;
 473        unsigned int cpu, i;
 474        unsigned long long notes_addr;
 475        unsigned long mstart, mend;
 476
 477        /* extra phdr for vmcoreinfo ELF note */
 478        nr_phdr = nr_cpus + 1;
 479        nr_phdr += mem->nr_ranges;
 480
 481        /*
 482         * kexec-tools creates an extra PT_LOAD phdr for kernel text mapping
 483         * area (for example, ffffffff80000000 - ffffffffa0000000 on x86_64).
 484         * I think this is required by tools like gdb. So same physical
 485         * memory will be mapped in two ELF headers. One will contain kernel
 486         * text virtual addresses and other will have __va(physical) addresses.
 487         */
 488
 489        nr_phdr++;
 490        elf_sz = sizeof(Elf64_Ehdr) + nr_phdr * sizeof(Elf64_Phdr);
 491        elf_sz = ALIGN(elf_sz, ELF_CORE_HEADER_ALIGN);
 492
 493        buf = vzalloc(elf_sz);
 494        if (!buf)
 495                return -ENOMEM;
 496
 497        ehdr = (Elf64_Ehdr *)buf;
 498        phdr = (Elf64_Phdr *)(ehdr + 1);
 499        memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
 500        ehdr->e_ident[EI_CLASS] = ELFCLASS64;
 501        ehdr->e_ident[EI_DATA] = ELFDATA2LSB;
 502        ehdr->e_ident[EI_VERSION] = EV_CURRENT;
 503        ehdr->e_ident[EI_OSABI] = ELF_OSABI;
 504        memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD);
 505        ehdr->e_type = ET_CORE;
 506        ehdr->e_machine = ELF_ARCH;
 507        ehdr->e_version = EV_CURRENT;
 508        ehdr->e_phoff = sizeof(Elf64_Ehdr);
 509        ehdr->e_ehsize = sizeof(Elf64_Ehdr);
 510        ehdr->e_phentsize = sizeof(Elf64_Phdr);
 511
 512        /* Prepare one phdr of type PT_NOTE for each possible CPU */
 513        for_each_possible_cpu(cpu) {
 514                phdr->p_type = PT_NOTE;
 515                notes_addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpu));
 516                phdr->p_offset = phdr->p_paddr = notes_addr;
 517                phdr->p_filesz = phdr->p_memsz = sizeof(note_buf_t);
 518                (ehdr->e_phnum)++;
 519                phdr++;
 520        }
 521
 522        /* Prepare one PT_NOTE header for vmcoreinfo */
 523        phdr->p_type = PT_NOTE;
 524        phdr->p_offset = phdr->p_paddr = paddr_vmcoreinfo_note();
 525        phdr->p_filesz = phdr->p_memsz = VMCOREINFO_NOTE_SIZE;
 526        (ehdr->e_phnum)++;
 527        phdr++;
 528
 529        /* Prepare PT_LOAD type program header for kernel text region */
 530        if (need_kernel_map) {
 531                phdr->p_type = PT_LOAD;
 532                phdr->p_flags = PF_R|PF_W|PF_X;
 533                phdr->p_vaddr = (unsigned long) _text;
 534                phdr->p_filesz = phdr->p_memsz = _end - _text;
 535                phdr->p_offset = phdr->p_paddr = __pa_symbol(_text);
 536                ehdr->e_phnum++;
 537                phdr++;
 538        }
 539
 540        /* Go through all the ranges in mem->ranges[] and prepare phdr */
 541        for (i = 0; i < mem->nr_ranges; i++) {
 542                mstart = mem->ranges[i].start;
 543                mend = mem->ranges[i].end;
 544
 545                phdr->p_type = PT_LOAD;
 546                phdr->p_flags = PF_R|PF_W|PF_X;
 547                phdr->p_offset  = mstart;
 548
 549                phdr->p_paddr = mstart;
 550                phdr->p_vaddr = (unsigned long) __va(mstart);
 551                phdr->p_filesz = phdr->p_memsz = mend - mstart + 1;
 552                phdr->p_align = 0;
 553                ehdr->e_phnum++;
 554                pr_debug("Crash PT_LOAD ELF header. phdr=%p vaddr=0x%llx, paddr=0x%llx, sz=0x%llx e_phnum=%d p_offset=0x%llx\n",
 555                        phdr, phdr->p_vaddr, phdr->p_paddr, phdr->p_filesz,
 556                        ehdr->e_phnum, phdr->p_offset);
 557                phdr++;
 558        }
 559
 560        *addr = buf;
 561        *sz = elf_sz;
 562        return 0;
 563}
 564
 565int crash_exclude_mem_range(struct crash_mem *mem,
 566                            unsigned long long mstart, unsigned long long mend)
 567{
 568        int i, j;
 569        unsigned long long start, end, p_start, p_end;
 570        struct range temp_range = {0, 0};
 571
 572        for (i = 0; i < mem->nr_ranges; i++) {
 573                start = mem->ranges[i].start;
 574                end = mem->ranges[i].end;
 575                p_start = mstart;
 576                p_end = mend;
 577
 578                if (mstart > end || mend < start)
 579                        continue;
 580
 581                /* Truncate any area outside of range */
 582                if (mstart < start)
 583                        p_start = start;
 584                if (mend > end)
 585                        p_end = end;
 586
 587                /* Found completely overlapping range */
 588                if (p_start == start && p_end == end) {
 589                        mem->ranges[i].start = 0;
 590                        mem->ranges[i].end = 0;
 591                        if (i < mem->nr_ranges - 1) {
 592                                /* Shift rest of the ranges to left */
 593                                for (j = i; j < mem->nr_ranges - 1; j++) {
 594                                        mem->ranges[j].start =
 595                                                mem->ranges[j+1].start;
 596                                        mem->ranges[j].end =
 597                                                        mem->ranges[j+1].end;
 598                                }
 599
 600                                /*
 601                                 * Continue to check if there are another overlapping ranges
 602                                 * from the current position because of shifting the above
 603                                 * mem ranges.
 604                                 */
 605                                i--;
 606                                mem->nr_ranges--;
 607                                continue;
 608                        }
 609                        mem->nr_ranges--;
 610                        return 0;
 611                }
 612
 613                if (p_start > start && p_end < end) {
 614                        /* Split original range */
 615                        mem->ranges[i].end = p_start - 1;
 616                        temp_range.start = p_end + 1;
 617                        temp_range.end = end;
 618                } else if (p_start != start)
 619                        mem->ranges[i].end = p_start - 1;
 620                else
 621                        mem->ranges[i].start = p_end + 1;
 622                break;
 623        }
 624
 625        /* If a split happened, add the split to array */
 626        if (!temp_range.end)
 627                return 0;
 628
 629        /* Split happened */
 630        if (i == mem->max_nr_ranges - 1)
 631                return -ENOMEM;
 632
 633        /* Location where new range should go */
 634        j = i + 1;
 635        if (j < mem->nr_ranges) {
 636                /* Move over all ranges one slot towards the end */
 637                for (i = mem->nr_ranges - 1; i >= j; i--)
 638                        mem->ranges[i + 1] = mem->ranges[i];
 639        }
 640
 641        mem->ranges[j].start = temp_range.start;
 642        mem->ranges[j].end = temp_range.end;
 643        mem->nr_ranges++;
 644        return 0;
 645}
 646
 647Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
 648                          void *data, size_t data_len)
 649{
 650        struct elf_note *note = (struct elf_note *)buf;
 651
 652        note->n_namesz = strlen(name) + 1;
 653        note->n_descsz = data_len;
 654        note->n_type   = type;
 655        buf += DIV_ROUND_UP(sizeof(*note), sizeof(Elf_Word));
 656        memcpy(buf, name, note->n_namesz);
 657        buf += DIV_ROUND_UP(note->n_namesz, sizeof(Elf_Word));
 658        memcpy(buf, data, data_len);
 659        buf += DIV_ROUND_UP(data_len, sizeof(Elf_Word));
 660
 661        return buf;
 662}
 663
 664void final_note(Elf_Word *buf)
 665{
 666        memset(buf, 0, sizeof(struct elf_note));
 667}
 668
 669static void update_vmcoreinfo_note(void)
 670{
 671        u32 *buf = vmcoreinfo_note;
 672
 673        if (!vmcoreinfo_size)
 674                return;
 675        buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data,
 676                              vmcoreinfo_size);
 677        final_note(buf);
 678}
 679
 680void crash_update_vmcoreinfo_safecopy(void *ptr)
 681{
 682        if (ptr)
 683                memcpy(ptr, vmcoreinfo_data, vmcoreinfo_size);
 684
 685        vmcoreinfo_data_safecopy = ptr;
 686}
 687
 688void crash_save_vmcoreinfo(void)
 689{
 690        if (!vmcoreinfo_note)
 691                return;
 692
 693        /* Use the safe copy to generate vmcoreinfo note if have */
 694        if (vmcoreinfo_data_safecopy)
 695                vmcoreinfo_data = vmcoreinfo_data_safecopy;
 696
 697        vmcoreinfo_append_str("CRASHTIME=%lld\n", ktime_get_real_seconds());
 698        update_vmcoreinfo_note();
 699}
 700
 701void vmcoreinfo_append_str(const char *fmt, ...)
 702{
 703        va_list args;
 704        char buf[0x50];
 705        size_t r;
 706
 707        va_start(args, fmt);
 708        r = vscnprintf(buf, sizeof(buf), fmt, args);
 709        va_end(args);
 710
 711        r = min(r, (size_t)VMCOREINFO_BYTES - vmcoreinfo_size);
 712
 713        memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r);
 714
 715        vmcoreinfo_size += r;
 716
 717        WARN_ONCE(vmcoreinfo_size == VMCOREINFO_BYTES,
 718                  "vmcoreinfo data exceeds allocated size, truncating");
 719}
 720
 721/*
 722 * provide an empty default implementation here -- architecture
 723 * code may override this
 724 */
 725void __weak arch_crash_save_vmcoreinfo(void)
 726{}
 727
 728phys_addr_t __weak paddr_vmcoreinfo_note(void)
 729{
 730        return __pa(vmcoreinfo_note);
 731}
 732EXPORT_SYMBOL(paddr_vmcoreinfo_note);
 733
 734static int __init crash_save_vmcoreinfo_init(void)
 735{
 736        vmcoreinfo_data = (unsigned char *)get_zeroed_page(GFP_KERNEL);
 737        if (!vmcoreinfo_data) {
 738                pr_warn("Memory allocation for vmcoreinfo_data failed\n");
 739                return -ENOMEM;
 740        }
 741
 742        vmcoreinfo_note = alloc_pages_exact(VMCOREINFO_NOTE_SIZE,
 743                                                GFP_KERNEL | __GFP_ZERO);
 744        if (!vmcoreinfo_note) {
 745                free_page((unsigned long)vmcoreinfo_data);
 746                vmcoreinfo_data = NULL;
 747                pr_warn("Memory allocation for vmcoreinfo_note failed\n");
 748                return -ENOMEM;
 749        }
 750
 751        VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
 752        VMCOREINFO_BUILD_ID();
 753        VMCOREINFO_PAGESIZE(PAGE_SIZE);
 754
 755        VMCOREINFO_SYMBOL(init_uts_ns);
 756        VMCOREINFO_OFFSET(uts_namespace, name);
 757        VMCOREINFO_SYMBOL(node_online_map);
 758#ifdef CONFIG_MMU
 759        VMCOREINFO_SYMBOL_ARRAY(swapper_pg_dir);
 760#endif
 761        VMCOREINFO_SYMBOL(_stext);
 762        VMCOREINFO_SYMBOL(vmap_area_list);
 763
 764#ifndef CONFIG_NUMA
 765        VMCOREINFO_SYMBOL(mem_map);
 766        VMCOREINFO_SYMBOL(contig_page_data);
 767#endif
 768#ifdef CONFIG_SPARSEMEM
 769        VMCOREINFO_SYMBOL_ARRAY(mem_section);
 770        VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
 771        VMCOREINFO_STRUCT_SIZE(mem_section);
 772        VMCOREINFO_OFFSET(mem_section, section_mem_map);
 773        VMCOREINFO_NUMBER(SECTION_SIZE_BITS);
 774        VMCOREINFO_NUMBER(MAX_PHYSMEM_BITS);
 775#endif
 776        VMCOREINFO_STRUCT_SIZE(page);
 777        VMCOREINFO_STRUCT_SIZE(pglist_data);
 778        VMCOREINFO_STRUCT_SIZE(zone);
 779        VMCOREINFO_STRUCT_SIZE(free_area);
 780        VMCOREINFO_STRUCT_SIZE(list_head);
 781        VMCOREINFO_SIZE(nodemask_t);
 782        VMCOREINFO_OFFSET(page, flags);
 783        VMCOREINFO_OFFSET(page, _refcount);
 784        VMCOREINFO_OFFSET(page, mapping);
 785        VMCOREINFO_OFFSET(page, lru);
 786        VMCOREINFO_OFFSET(page, _mapcount);
 787        VMCOREINFO_OFFSET(page, private);
 788        VMCOREINFO_OFFSET(page, compound_head);
 789        VMCOREINFO_OFFSET(pglist_data, node_zones);
 790        VMCOREINFO_OFFSET(pglist_data, nr_zones);
 791#ifdef CONFIG_FLATMEM
 792        VMCOREINFO_OFFSET(pglist_data, node_mem_map);
 793#endif
 794        VMCOREINFO_OFFSET(pglist_data, node_start_pfn);
 795        VMCOREINFO_OFFSET(pglist_data, node_spanned_pages);
 796        VMCOREINFO_OFFSET(pglist_data, node_id);
 797        VMCOREINFO_OFFSET(zone, free_area);
 798        VMCOREINFO_OFFSET(zone, vm_stat);
 799        VMCOREINFO_OFFSET(zone, spanned_pages);
 800        VMCOREINFO_OFFSET(free_area, free_list);
 801        VMCOREINFO_OFFSET(list_head, next);
 802        VMCOREINFO_OFFSET(list_head, prev);
 803        VMCOREINFO_OFFSET(vmap_area, va_start);
 804        VMCOREINFO_OFFSET(vmap_area, list);
 805        VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER + 1);
 806        log_buf_vmcoreinfo_setup();
 807        VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES);
 808        VMCOREINFO_NUMBER(NR_FREE_PAGES);
 809        VMCOREINFO_NUMBER(PG_lru);
 810        VMCOREINFO_NUMBER(PG_private);
 811        VMCOREINFO_NUMBER(PG_swapcache);
 812        VMCOREINFO_NUMBER(PG_swapbacked);
 813        VMCOREINFO_NUMBER(PG_slab);
 814#ifdef CONFIG_MEMORY_FAILURE
 815        VMCOREINFO_NUMBER(PG_hwpoison);
 816#endif
 817        VMCOREINFO_NUMBER(PG_head_mask);
 818#define PAGE_BUDDY_MAPCOUNT_VALUE       (~PG_buddy)
 819        VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
 820#ifdef CONFIG_HUGETLB_PAGE
 821        VMCOREINFO_NUMBER(PG_hugetlb);
 822#define PAGE_OFFLINE_MAPCOUNT_VALUE     (~PG_offline)
 823        VMCOREINFO_NUMBER(PAGE_OFFLINE_MAPCOUNT_VALUE);
 824#endif
 825
 826#ifdef CONFIG_KALLSYMS
 827        VMCOREINFO_SYMBOL(kallsyms_names);
 828        VMCOREINFO_SYMBOL(kallsyms_num_syms);
 829        VMCOREINFO_SYMBOL(kallsyms_token_table);
 830        VMCOREINFO_SYMBOL(kallsyms_token_index);
 831#ifdef CONFIG_KALLSYMS_BASE_RELATIVE
 832        VMCOREINFO_SYMBOL(kallsyms_offsets);
 833        VMCOREINFO_SYMBOL(kallsyms_relative_base);
 834#else
 835        VMCOREINFO_SYMBOL(kallsyms_addresses);
 836#endif /* CONFIG_KALLSYMS_BASE_RELATIVE */
 837#endif /* CONFIG_KALLSYMS */
 838
 839        arch_crash_save_vmcoreinfo();
 840        update_vmcoreinfo_note();
 841
 842        return 0;
 843}
 844
 845subsys_initcall(crash_save_vmcoreinfo_init);
 846
 847static int __init crash_notes_memory_init(void)
 848{
 849        /* Allocate memory for saving cpu registers. */
 850        size_t size, align;
 851
 852        /*
 853         * crash_notes could be allocated across 2 vmalloc pages when percpu
 854         * is vmalloc based . vmalloc doesn't guarantee 2 continuous vmalloc
 855         * pages are also on 2 continuous physical pages. In this case the
 856         * 2nd part of crash_notes in 2nd page could be lost since only the
 857         * starting address and size of crash_notes are exported through sysfs.
 858         * Here round up the size of crash_notes to the nearest power of two
 859         * and pass it to __alloc_percpu as align value. This can make sure
 860         * crash_notes is allocated inside one physical page.
 861         */
 862        size = sizeof(note_buf_t);
 863        align = min(roundup_pow_of_two(sizeof(note_buf_t)), PAGE_SIZE);
 864
 865        /*
 866         * Break compile if size is bigger than PAGE_SIZE since crash_notes
 867         * definitely will be in 2 pages with that.
 868         */
 869        BUILD_BUG_ON(size > PAGE_SIZE);
 870
 871        crash_notes = __alloc_percpu(size, align);
 872        if (!crash_notes) {
 873                pr_warn("Memory allocation for saving cpu register states failed\n");
 874                return -ENOMEM;
 875        }
 876        return 0;
 877}
 878subsys_initcall(crash_notes_memory_init);
 879
 880#ifdef CONFIG_CRASH_HOTPLUG
 881#undef pr_fmt
 882#define pr_fmt(fmt) "crash hp: " fmt
 883
 884/*
 885 * Different than kexec/kdump loading/unloading/jumping/shrinking which
 886 * usually rarely happen, there will be many crash hotplug events notified
 887 * during one short period, e.g one memory board is hot added and memory
 888 * regions are online. So mutex lock  __crash_hotplug_lock is used to
 889 * serialize the crash hotplug handling specifically.
 890 */
 891DEFINE_MUTEX(__crash_hotplug_lock);
 892#define crash_hotplug_lock() mutex_lock(&__crash_hotplug_lock)
 893#define crash_hotplug_unlock() mutex_unlock(&__crash_hotplug_lock)
 894
 895/*
 896 * This routine utilized when the crash_hotplug sysfs node is read.
 897 * It reflects the kernel's ability/permission to update the crash
 898 * elfcorehdr directly.
 899 */
 900int crash_check_update_elfcorehdr(void)
 901{
 902        int rc = 0;
 903
 904        crash_hotplug_lock();
 905        /* Obtain lock while reading crash information */
 906        if (!kexec_trylock()) {
 907                pr_info("kexec_trylock() failed, elfcorehdr may be inaccurate\n");
 908                crash_hotplug_unlock();
 909                return 0;
 910        }
 911        if (kexec_crash_image) {
 912                if (kexec_crash_image->file_mode)
 913                        rc = 1;
 914                else
 915                        rc = kexec_crash_image->update_elfcorehdr;
 916        }
 917        /* Release lock now that update complete */
 918        kexec_unlock();
 919        crash_hotplug_unlock();
 920
 921        return rc;
 922}
 923
 924/*
 925 * To accurately reflect hot un/plug changes of cpu and memory resources
 926 * (including onling and offlining of those resources), the elfcorehdr
 927 * (which is passed to the crash kernel via the elfcorehdr= parameter)
 928 * must be updated with the new list of CPUs and memories.
 929 *
 930 * In order to make changes to elfcorehdr, two conditions are needed:
 931 * First, the segment containing the elfcorehdr must be large enough
 932 * to permit a growing number of resources; the elfcorehdr memory size
 933 * is based on NR_CPUS_DEFAULT and CRASH_MAX_MEMORY_RANGES.
 934 * Second, purgatory must explicitly exclude the elfcorehdr from the
 935 * list of segments it checks (since the elfcorehdr changes and thus
 936 * would require an update to purgatory itself to update the digest).
 937 */
 938static void crash_handle_hotplug_event(unsigned int hp_action, unsigned int cpu)
 939{
 940        struct kimage *image;
 941
 942        crash_hotplug_lock();
 943        /* Obtain lock while changing crash information */
 944        if (!kexec_trylock()) {
 945                pr_info("kexec_trylock() failed, elfcorehdr may be inaccurate\n");
 946                crash_hotplug_unlock();
 947                return;
 948        }
 949
 950        /* Check kdump is not loaded */
 951        if (!kexec_crash_image)
 952                goto out;
 953
 954        image = kexec_crash_image;
 955
 956        /* Check that updating elfcorehdr is permitted */
 957        if (!(image->file_mode || image->update_elfcorehdr))
 958                goto out;
 959
 960        if (hp_action == KEXEC_CRASH_HP_ADD_CPU ||
 961                hp_action == KEXEC_CRASH_HP_REMOVE_CPU)
 962                pr_debug("hp_action %u, cpu %u\n", hp_action, cpu);
 963        else
 964                pr_debug("hp_action %u\n", hp_action);
 965
 966        /*
 967         * The elfcorehdr_index is set to -1 when the struct kimage
 968         * is allocated. Find the segment containing the elfcorehdr,
 969         * if not already found.
 970         */
 971        if (image->elfcorehdr_index < 0) {
 972                unsigned long mem;
 973                unsigned char *ptr;
 974                unsigned int n;
 975
 976                for (n = 0; n < image->nr_segments; n++) {
 977                        mem = image->segment[n].mem;
 978                        ptr = kmap_local_page(pfn_to_page(mem >> PAGE_SHIFT));
 979                        if (ptr) {
 980                                /* The segment containing elfcorehdr */
 981                                if (memcmp(ptr, ELFMAG, SELFMAG) == 0)
 982                                        image->elfcorehdr_index = (int)n;
 983                                kunmap_local(ptr);
 984                        }
 985                }
 986        }
 987
 988        if (image->elfcorehdr_index < 0) {
 989                pr_err("unable to locate elfcorehdr segment");
 990                goto out;
 991        }
 992
 993        /* Needed in order for the segments to be updated */
 994        arch_kexec_unprotect_crashkres();
 995
 996        /* Differentiate between normal load and hotplug update */
 997        image->hp_action = hp_action;
 998
 999        /* Now invoke arch-specific update handler */
1000        arch_crash_handle_hotplug_event(image);
1001
1002        /* No longer handling a hotplug event */
1003        image->hp_action = KEXEC_CRASH_HP_NONE;
1004        image->elfcorehdr_updated = true;
1005
1006        /* Change back to read-only */
1007        arch_kexec_protect_crashkres();
1008
1009        /* Errors in the callback is not a reason to rollback state */
1010out:
1011        /* Release lock now that update complete */
1012        kexec_unlock();
1013        crash_hotplug_unlock();
1014}
1015
1016static int crash_memhp_notifier(struct notifier_block *nb, unsigned long val, void *v)
1017{
1018        switch (val) {
1019        case MEM_ONLINE:
1020                crash_handle_hotplug_event(KEXEC_CRASH_HP_ADD_MEMORY,
1021                        KEXEC_CRASH_HP_INVALID_CPU);
1022                break;
1023
1024        case MEM_OFFLINE:
1025                crash_handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_MEMORY,
1026                        KEXEC_CRASH_HP_INVALID_CPU);
1027                break;
1028        }
1029        return NOTIFY_OK;
1030}
1031
1032static struct notifier_block crash_memhp_nb = {
1033        .notifier_call = crash_memhp_notifier,
1034        .priority = 0
1035};
1036
1037static int crash_cpuhp_online(unsigned int cpu)
1038{
1039        crash_handle_hotplug_event(KEXEC_CRASH_HP_ADD_CPU, cpu);
1040        return 0;
1041}
1042
1043static int crash_cpuhp_offline(unsigned int cpu)
1044{
1045        crash_handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_CPU, cpu);
1046        return 0;
1047}
1048
1049static int __init crash_hotplug_init(void)
1050{
1051        int result = 0;
1052
1053        if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG))
1054                register_memory_notifier(&crash_memhp_nb);
1055
1056        if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) {
1057                result = cpuhp_setup_state_nocalls(CPUHP_BP_PREPARE_DYN,
1058                        "crash/cpuhp", crash_cpuhp_online, crash_cpuhp_offline);
1059        }
1060
1061        return result;
1062}
1063
1064subsys_initcall(crash_hotplug_init);
1065#endif
1066