linux/arch/x86/kernel/alternative.c
<<
>>
Prefs
   1#include <linux/module.h>
   2#include <linux/sched.h>
   3#include <linux/mutex.h>
   4#include <linux/list.h>
   5#include <linux/stringify.h>
   6#include <linux/kprobes.h>
   7#include <linux/mm.h>
   8#include <linux/vmalloc.h>
   9#include <linux/memory.h>
  10#include <linux/stop_machine.h>
  11#include <linux/slab.h>
  12#include <asm/alternative.h>
  13#include <asm/sections.h>
  14#include <asm/pgtable.h>
  15#include <asm/mce.h>
  16#include <asm/nmi.h>
  17#include <asm/cacheflush.h>
  18#include <asm/tlbflush.h>
  19#include <asm/io.h>
  20#include <asm/fixmap.h>
  21
  22#define MAX_PATCH_LEN (255-1)
  23
  24#ifdef CONFIG_HOTPLUG_CPU
  25static int smp_alt_once;
  26
  27static int __init bootonly(char *str)
  28{
  29        smp_alt_once = 1;
  30        return 1;
  31}
  32__setup("smp-alt-boot", bootonly);
  33#else
  34#define smp_alt_once 1
  35#endif
  36
  37static int __initdata_or_module debug_alternative;
  38
  39static int __init debug_alt(char *str)
  40{
  41        debug_alternative = 1;
  42        return 1;
  43}
  44__setup("debug-alternative", debug_alt);
  45
  46static int noreplace_smp;
  47
  48static int __init setup_noreplace_smp(char *str)
  49{
  50        noreplace_smp = 1;
  51        return 1;
  52}
  53__setup("noreplace-smp", setup_noreplace_smp);
  54
  55#ifdef CONFIG_PARAVIRT
  56static int __initdata_or_module noreplace_paravirt = 0;
  57
  58static int __init setup_noreplace_paravirt(char *str)
  59{
  60        noreplace_paravirt = 1;
  61        return 1;
  62}
  63__setup("noreplace-paravirt", setup_noreplace_paravirt);
  64#endif
  65
  66#define DPRINTK(fmt, args...) if (debug_alternative) \
  67        printk(KERN_DEBUG fmt, args)
  68
  69/*
  70 * Each GENERIC_NOPX is of X bytes, and defined as an array of bytes
  71 * that correspond to that nop. Getting from one nop to the next, we
  72 * add to the array the offset that is equal to the sum of all sizes of
  73 * nops preceding the one we are after.
  74 *
  75 * Note: The GENERIC_NOP5_ATOMIC is at the end, as it breaks the
  76 * nice symmetry of sizes of the previous nops.
  77 */
  78#if defined(GENERIC_NOP1) && !defined(CONFIG_X86_64)
  79static const unsigned char intelnops[] =
  80{
  81        GENERIC_NOP1,
  82        GENERIC_NOP2,
  83        GENERIC_NOP3,
  84        GENERIC_NOP4,
  85        GENERIC_NOP5,
  86        GENERIC_NOP6,
  87        GENERIC_NOP7,
  88        GENERIC_NOP8,
  89        GENERIC_NOP5_ATOMIC
  90};
  91static const unsigned char * const intel_nops[ASM_NOP_MAX+2] =
  92{
  93        NULL,
  94        intelnops,
  95        intelnops + 1,
  96        intelnops + 1 + 2,
  97        intelnops + 1 + 2 + 3,
  98        intelnops + 1 + 2 + 3 + 4,
  99        intelnops + 1 + 2 + 3 + 4 + 5,
 100        intelnops + 1 + 2 + 3 + 4 + 5 + 6,
 101        intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
 102        intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
 103};
 104#endif
 105
 106#ifdef K8_NOP1
 107static const unsigned char k8nops[] =
 108{
 109        K8_NOP1,
 110        K8_NOP2,
 111        K8_NOP3,
 112        K8_NOP4,
 113        K8_NOP5,
 114        K8_NOP6,
 115        K8_NOP7,
 116        K8_NOP8,
 117        K8_NOP5_ATOMIC
 118};
 119static const unsigned char * const k8_nops[ASM_NOP_MAX+2] =
 120{
 121        NULL,
 122        k8nops,
 123        k8nops + 1,
 124        k8nops + 1 + 2,
 125        k8nops + 1 + 2 + 3,
 126        k8nops + 1 + 2 + 3 + 4,
 127        k8nops + 1 + 2 + 3 + 4 + 5,
 128        k8nops + 1 + 2 + 3 + 4 + 5 + 6,
 129        k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
 130        k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
 131};
 132#endif
 133
 134#if defined(K7_NOP1) && !defined(CONFIG_X86_64)
 135static const unsigned char k7nops[] =
 136{
 137        K7_NOP1,
 138        K7_NOP2,
 139        K7_NOP3,
 140        K7_NOP4,
 141        K7_NOP5,
 142        K7_NOP6,
 143        K7_NOP7,
 144        K7_NOP8,
 145        K7_NOP5_ATOMIC
 146};
 147static const unsigned char * const k7_nops[ASM_NOP_MAX+2] =
 148{
 149        NULL,
 150        k7nops,
 151        k7nops + 1,
 152        k7nops + 1 + 2,
 153        k7nops + 1 + 2 + 3,
 154        k7nops + 1 + 2 + 3 + 4,
 155        k7nops + 1 + 2 + 3 + 4 + 5,
 156        k7nops + 1 + 2 + 3 + 4 + 5 + 6,
 157        k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
 158        k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
 159};
 160#endif
 161
 162#ifdef P6_NOP1
 163static const unsigned char  __initconst_or_module p6nops[] =
 164{
 165        P6_NOP1,
 166        P6_NOP2,
 167        P6_NOP3,
 168        P6_NOP4,
 169        P6_NOP5,
 170        P6_NOP6,
 171        P6_NOP7,
 172        P6_NOP8,
 173        P6_NOP5_ATOMIC
 174};
 175static const unsigned char * const p6_nops[ASM_NOP_MAX+2] =
 176{
 177        NULL,
 178        p6nops,
 179        p6nops + 1,
 180        p6nops + 1 + 2,
 181        p6nops + 1 + 2 + 3,
 182        p6nops + 1 + 2 + 3 + 4,
 183        p6nops + 1 + 2 + 3 + 4 + 5,
 184        p6nops + 1 + 2 + 3 + 4 + 5 + 6,
 185        p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
 186        p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
 187};
 188#endif
 189
 190/* Initialize these to a safe default */
 191#ifdef CONFIG_X86_64
 192const unsigned char * const *ideal_nops = p6_nops;
 193#else
 194const unsigned char * const *ideal_nops = intel_nops;
 195#endif
 196
 197void __init arch_init_ideal_nops(void)
 198{
 199        switch (boot_cpu_data.x86_vendor) {
 200        case X86_VENDOR_INTEL:
 201                /*
 202                 * Due to a decoder implementation quirk, some
 203                 * specific Intel CPUs actually perform better with
 204                 * the "k8_nops" than with the SDM-recommended NOPs.
 205                 */
 206                if (boot_cpu_data.x86 == 6 &&
 207                    boot_cpu_data.x86_model >= 0x0f &&
 208                    boot_cpu_data.x86_model != 0x1c &&
 209                    boot_cpu_data.x86_model != 0x26 &&
 210                    boot_cpu_data.x86_model != 0x27 &&
 211                    boot_cpu_data.x86_model < 0x30) {
 212                        ideal_nops = k8_nops;
 213                } else if (boot_cpu_has(X86_FEATURE_NOPL)) {
 214                           ideal_nops = p6_nops;
 215                } else {
 216#ifdef CONFIG_X86_64
 217                        ideal_nops = k8_nops;
 218#else
 219                        ideal_nops = intel_nops;
 220#endif
 221                }
 222
 223        default:
 224#ifdef CONFIG_X86_64
 225                ideal_nops = k8_nops;
 226#else
 227                if (boot_cpu_has(X86_FEATURE_K8))
 228                        ideal_nops = k8_nops;
 229                else if (boot_cpu_has(X86_FEATURE_K7))
 230                        ideal_nops = k7_nops;
 231                else
 232                        ideal_nops = intel_nops;
 233#endif
 234        }
 235}
 236
 237/* Use this to add nops to a buffer, then text_poke the whole buffer. */
 238static void __init_or_module add_nops(void *insns, unsigned int len)
 239{
 240        while (len > 0) {
 241                unsigned int noplen = len;
 242                if (noplen > ASM_NOP_MAX)
 243                        noplen = ASM_NOP_MAX;
 244                memcpy(insns, ideal_nops[noplen], noplen);
 245                insns += noplen;
 246                len -= noplen;
 247        }
 248}
 249
 250extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
 251extern s32 __smp_locks[], __smp_locks_end[];
 252void *text_poke_early(void *addr, const void *opcode, size_t len);
 253
 254/* Replace instructions with better alternatives for this CPU type.
 255   This runs before SMP is initialized to avoid SMP problems with
 256   self modifying code. This implies that asymmetric systems where
 257   APs have less capabilities than the boot processor are not handled.
 258   Tough. Make sure you disable such features by hand. */
 259
 260void __init_or_module apply_alternatives(struct alt_instr *start,
 261                                         struct alt_instr *end)
 262{
 263        struct alt_instr *a;
 264        u8 *instr, *replacement;
 265        u8 insnbuf[MAX_PATCH_LEN];
 266
 267        DPRINTK("%s: alt table %p -> %p\n", __func__, start, end);
 268        /*
 269         * The scan order should be from start to end. A later scanned
 270         * alternative code can overwrite a previous scanned alternative code.
 271         * Some kernel functions (e.g. memcpy, memset, etc) use this order to
 272         * patch code.
 273         *
 274         * So be careful if you want to change the scan order to any other
 275         * order.
 276         */
 277        for (a = start; a < end; a++) {
 278                instr = (u8 *)&a->instr_offset + a->instr_offset;
 279                replacement = (u8 *)&a->repl_offset + a->repl_offset;
 280                BUG_ON(a->replacementlen > a->instrlen);
 281                BUG_ON(a->instrlen > sizeof(insnbuf));
 282                BUG_ON(a->cpuid >= NCAPINTS*32);
 283                if (!boot_cpu_has(a->cpuid))
 284                        continue;
 285
 286                memcpy(insnbuf, replacement, a->replacementlen);
 287
 288                /* 0xe8 is a relative jump; fix the offset. */
 289                if (*insnbuf == 0xe8 && a->replacementlen == 5)
 290                    *(s32 *)(insnbuf + 1) += replacement - instr;
 291
 292                add_nops(insnbuf + a->replacementlen,
 293                         a->instrlen - a->replacementlen);
 294
 295                text_poke_early(instr, insnbuf, a->instrlen);
 296        }
 297}
 298
 299#ifdef CONFIG_SMP
 300
 301static void alternatives_smp_lock(const s32 *start, const s32 *end,
 302                                  u8 *text, u8 *text_end)
 303{
 304        const s32 *poff;
 305
 306        mutex_lock(&text_mutex);
 307        for (poff = start; poff < end; poff++) {
 308                u8 *ptr = (u8 *)poff + *poff;
 309
 310                if (!*poff || ptr < text || ptr >= text_end)
 311                        continue;
 312                /* turn DS segment override prefix into lock prefix */
 313                if (*ptr == 0x3e)
 314                        text_poke(ptr, ((unsigned char []){0xf0}), 1);
 315        };
 316        mutex_unlock(&text_mutex);
 317}
 318
 319static void alternatives_smp_unlock(const s32 *start, const s32 *end,
 320                                    u8 *text, u8 *text_end)
 321{
 322        const s32 *poff;
 323
 324        if (noreplace_smp)
 325                return;
 326
 327        mutex_lock(&text_mutex);
 328        for (poff = start; poff < end; poff++) {
 329                u8 *ptr = (u8 *)poff + *poff;
 330
 331                if (!*poff || ptr < text || ptr >= text_end)
 332                        continue;
 333                /* turn lock prefix into DS segment override prefix */
 334                if (*ptr == 0xf0)
 335                        text_poke(ptr, ((unsigned char []){0x3E}), 1);
 336        };
 337        mutex_unlock(&text_mutex);
 338}
 339
 340struct smp_alt_module {
 341        /* what is this ??? */
 342        struct module   *mod;
 343        char            *name;
 344
 345        /* ptrs to lock prefixes */
 346        const s32       *locks;
 347        const s32       *locks_end;
 348
 349        /* .text segment, needed to avoid patching init code ;) */
 350        u8              *text;
 351        u8              *text_end;
 352
 353        struct list_head next;
 354};
 355static LIST_HEAD(smp_alt_modules);
 356static DEFINE_MUTEX(smp_alt);
 357static int smp_mode = 1;        /* protected by smp_alt */
 358
 359void __init_or_module alternatives_smp_module_add(struct module *mod,
 360                                                  char *name,
 361                                                  void *locks, void *locks_end,
 362                                                  void *text,  void *text_end)
 363{
 364        struct smp_alt_module *smp;
 365
 366        if (noreplace_smp)
 367                return;
 368
 369        if (smp_alt_once) {
 370                if (boot_cpu_has(X86_FEATURE_UP))
 371                        alternatives_smp_unlock(locks, locks_end,
 372                                                text, text_end);
 373                return;
 374        }
 375
 376        smp = kzalloc(sizeof(*smp), GFP_KERNEL);
 377        if (NULL == smp)
 378                return; /* we'll run the (safe but slow) SMP code then ... */
 379
 380        smp->mod        = mod;
 381        smp->name       = name;
 382        smp->locks      = locks;
 383        smp->locks_end  = locks_end;
 384        smp->text       = text;
 385        smp->text_end   = text_end;
 386        DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n",
 387                __func__, smp->locks, smp->locks_end,
 388                smp->text, smp->text_end, smp->name);
 389
 390        mutex_lock(&smp_alt);
 391        list_add_tail(&smp->next, &smp_alt_modules);
 392        if (boot_cpu_has(X86_FEATURE_UP))
 393                alternatives_smp_unlock(smp->locks, smp->locks_end,
 394                                        smp->text, smp->text_end);
 395        mutex_unlock(&smp_alt);
 396}
 397
 398void __init_or_module alternatives_smp_module_del(struct module *mod)
 399{
 400        struct smp_alt_module *item;
 401
 402        if (smp_alt_once || noreplace_smp)
 403                return;
 404
 405        mutex_lock(&smp_alt);
 406        list_for_each_entry(item, &smp_alt_modules, next) {
 407                if (mod != item->mod)
 408                        continue;
 409                list_del(&item->next);
 410                mutex_unlock(&smp_alt);
 411                DPRINTK("%s: %s\n", __func__, item->name);
 412                kfree(item);
 413                return;
 414        }
 415        mutex_unlock(&smp_alt);
 416}
 417
 418bool skip_smp_alternatives;
 419void alternatives_smp_switch(int smp)
 420{
 421        struct smp_alt_module *mod;
 422
 423#ifdef CONFIG_LOCKDEP
 424        /*
 425         * Older binutils section handling bug prevented
 426         * alternatives-replacement from working reliably.
 427         *
 428         * If this still occurs then you should see a hang
 429         * or crash shortly after this line:
 430         */
 431        printk("lockdep: fixing up alternatives.\n");
 432#endif
 433
 434        if (noreplace_smp || smp_alt_once || skip_smp_alternatives)
 435                return;
 436        BUG_ON(!smp && (num_online_cpus() > 1));
 437
 438        mutex_lock(&smp_alt);
 439
 440        /*
 441         * Avoid unnecessary switches because it forces JIT based VMs to
 442         * throw away all cached translations, which can be quite costly.
 443         */
 444        if (smp == smp_mode) {
 445                /* nothing */
 446        } else if (smp) {
 447                printk(KERN_INFO "SMP alternatives: switching to SMP code\n");
 448                clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
 449                clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
 450                list_for_each_entry(mod, &smp_alt_modules, next)
 451                        alternatives_smp_lock(mod->locks, mod->locks_end,
 452                                              mod->text, mod->text_end);
 453        } else {
 454                printk(KERN_INFO "SMP alternatives: switching to UP code\n");
 455                set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
 456                set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
 457                list_for_each_entry(mod, &smp_alt_modules, next)
 458                        alternatives_smp_unlock(mod->locks, mod->locks_end,
 459                                                mod->text, mod->text_end);
 460        }
 461        smp_mode = smp;
 462        mutex_unlock(&smp_alt);
 463}
 464
 465/* Return 1 if the address range is reserved for smp-alternatives */
 466int alternatives_text_reserved(void *start, void *end)
 467{
 468        struct smp_alt_module *mod;
 469        const s32 *poff;
 470        u8 *text_start = start;
 471        u8 *text_end = end;
 472
 473        list_for_each_entry(mod, &smp_alt_modules, next) {
 474                if (mod->text > text_end || mod->text_end < text_start)
 475                        continue;
 476                for (poff = mod->locks; poff < mod->locks_end; poff++) {
 477                        const u8 *ptr = (const u8 *)poff + *poff;
 478
 479                        if (text_start <= ptr && text_end > ptr)
 480                                return 1;
 481                }
 482        }
 483
 484        return 0;
 485}
 486#endif
 487
 488#ifdef CONFIG_PARAVIRT
 489void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
 490                                     struct paravirt_patch_site *end)
 491{
 492        struct paravirt_patch_site *p;
 493        char insnbuf[MAX_PATCH_LEN];
 494
 495        if (noreplace_paravirt)
 496                return;
 497
 498        for (p = start; p < end; p++) {
 499                unsigned int used;
 500
 501                BUG_ON(p->len > MAX_PATCH_LEN);
 502                /* prep the buffer with the original instructions */
 503                memcpy(insnbuf, p->instr, p->len);
 504                used = pv_init_ops.patch(p->instrtype, p->clobbers, insnbuf,
 505                                         (unsigned long)p->instr, p->len);
 506
 507                BUG_ON(used > p->len);
 508
 509                /* Pad the rest with nops */
 510                add_nops(insnbuf + used, p->len - used);
 511                text_poke_early(p->instr, insnbuf, p->len);
 512        }
 513}
 514extern struct paravirt_patch_site __start_parainstructions[],
 515        __stop_parainstructions[];
 516#endif  /* CONFIG_PARAVIRT */
 517
 518void __init alternative_instructions(void)
 519{
 520        /* The patching is not fully atomic, so try to avoid local interruptions
 521           that might execute the to be patched code.
 522           Other CPUs are not running. */
 523        stop_nmi();
 524
 525        /*
 526         * Don't stop machine check exceptions while patching.
 527         * MCEs only happen when something got corrupted and in this
 528         * case we must do something about the corruption.
 529         * Ignoring it is worse than a unlikely patching race.
 530         * Also machine checks tend to be broadcast and if one CPU
 531         * goes into machine check the others follow quickly, so we don't
 532         * expect a machine check to cause undue problems during to code
 533         * patching.
 534         */
 535
 536        apply_alternatives(__alt_instructions, __alt_instructions_end);
 537
 538        /* switch to patch-once-at-boottime-only mode and free the
 539         * tables in case we know the number of CPUs will never ever
 540         * change */
 541#ifdef CONFIG_HOTPLUG_CPU
 542        if (num_possible_cpus() < 2)
 543                smp_alt_once = 1;
 544#endif
 545
 546#ifdef CONFIG_SMP
 547        if (smp_alt_once) {
 548                if (1 == num_possible_cpus()) {
 549                        printk(KERN_INFO "SMP alternatives: switching to UP code\n");
 550                        set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
 551                        set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
 552
 553                        alternatives_smp_unlock(__smp_locks, __smp_locks_end,
 554                                                _text, _etext);
 555                }
 556        } else {
 557                alternatives_smp_module_add(NULL, "core kernel",
 558                                            __smp_locks, __smp_locks_end,
 559                                            _text, _etext);
 560
 561                /* Only switch to UP mode if we don't immediately boot others */
 562                if (num_present_cpus() == 1 || setup_max_cpus <= 1)
 563                        alternatives_smp_switch(0);
 564        }
 565#endif
 566        apply_paravirt(__parainstructions, __parainstructions_end);
 567
 568        if (smp_alt_once)
 569                free_init_pages("SMP alternatives",
 570                                (unsigned long)__smp_locks,
 571                                (unsigned long)__smp_locks_end);
 572
 573        restart_nmi();
 574}
 575
 576/**
 577 * text_poke_early - Update instructions on a live kernel at boot time
 578 * @addr: address to modify
 579 * @opcode: source of the copy
 580 * @len: length to copy
 581 *
 582 * When you use this code to patch more than one byte of an instruction
 583 * you need to make sure that other CPUs cannot execute this code in parallel.
 584 * Also no thread must be currently preempted in the middle of these
 585 * instructions. And on the local CPU you need to be protected again NMI or MCE
 586 * handlers seeing an inconsistent instruction while you patch.
 587 */
 588void *__init_or_module text_poke_early(void *addr, const void *opcode,
 589                                              size_t len)
 590{
 591        unsigned long flags;
 592        local_irq_save(flags);
 593        memcpy(addr, opcode, len);
 594        sync_core();
 595        local_irq_restore(flags);
 596        /* Could also do a CLFLUSH here to speed up CPU recovery; but
 597           that causes hangs on some VIA CPUs. */
 598        return addr;
 599}
 600
 601/**
 602 * text_poke - Update instructions on a live kernel
 603 * @addr: address to modify
 604 * @opcode: source of the copy
 605 * @len: length to copy
 606 *
 607 * Only atomic text poke/set should be allowed when not doing early patching.
 608 * It means the size must be writable atomically and the address must be aligned
 609 * in a way that permits an atomic write. It also makes sure we fit on a single
 610 * page.
 611 *
 612 * Note: Must be called under text_mutex.
 613 */
 614void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
 615{
 616        unsigned long flags;
 617        char *vaddr;
 618        struct page *pages[2];
 619        int i;
 620
 621        if (!core_kernel_text((unsigned long)addr)) {
 622                pages[0] = vmalloc_to_page(addr);
 623                pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
 624        } else {
 625                pages[0] = virt_to_page(addr);
 626                WARN_ON(!PageReserved(pages[0]));
 627                pages[1] = virt_to_page(addr + PAGE_SIZE);
 628        }
 629        BUG_ON(!pages[0]);
 630        local_irq_save(flags);
 631        set_fixmap(FIX_TEXT_POKE0, page_to_phys(pages[0]));
 632        if (pages[1])
 633                set_fixmap(FIX_TEXT_POKE1, page_to_phys(pages[1]));
 634        vaddr = (char *)fix_to_virt(FIX_TEXT_POKE0);
 635        memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len);
 636        clear_fixmap(FIX_TEXT_POKE0);
 637        if (pages[1])
 638                clear_fixmap(FIX_TEXT_POKE1);
 639        local_flush_tlb();
 640        sync_core();
 641        /* Could also do a CLFLUSH here to speed up CPU recovery; but
 642           that causes hangs on some VIA CPUs. */
 643        for (i = 0; i < len; i++)
 644                BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]);
 645        local_irq_restore(flags);
 646        return addr;
 647}
 648
 649/*
 650 * Cross-modifying kernel text with stop_machine().
 651 * This code originally comes from immediate value.
 652 */
 653static atomic_t stop_machine_first;
 654static int wrote_text;
 655
 656struct text_poke_params {
 657        struct text_poke_param *params;
 658        int nparams;
 659};
 660
 661static int __kprobes stop_machine_text_poke(void *data)
 662{
 663        struct text_poke_params *tpp = data;
 664        struct text_poke_param *p;
 665        int i;
 666
 667        if (atomic_dec_and_test(&stop_machine_first)) {
 668                for (i = 0; i < tpp->nparams; i++) {
 669                        p = &tpp->params[i];
 670                        text_poke(p->addr, p->opcode, p->len);
 671                }
 672                smp_wmb();      /* Make sure other cpus see that this has run */
 673                wrote_text = 1;
 674        } else {
 675                while (!wrote_text)
 676                        cpu_relax();
 677                smp_mb();       /* Load wrote_text before following execution */
 678        }
 679
 680        for (i = 0; i < tpp->nparams; i++) {
 681                p = &tpp->params[i];
 682                flush_icache_range((unsigned long)p->addr,
 683                                   (unsigned long)p->addr + p->len);
 684        }
 685        /*
 686         * Intel Archiecture Software Developer's Manual section 7.1.3 specifies
 687         * that a core serializing instruction such as "cpuid" should be
 688         * executed on _each_ core before the new instruction is made visible.
 689         */
 690        sync_core();
 691        return 0;
 692}
 693
 694/**
 695 * text_poke_smp - Update instructions on a live kernel on SMP
 696 * @addr: address to modify
 697 * @opcode: source of the copy
 698 * @len: length to copy
 699 *
 700 * Modify multi-byte instruction by using stop_machine() on SMP. This allows
 701 * user to poke/set multi-byte text on SMP. Only non-NMI/MCE code modifying
 702 * should be allowed, since stop_machine() does _not_ protect code against
 703 * NMI and MCE.
 704 *
 705 * Note: Must be called under get_online_cpus() and text_mutex.
 706 */
 707void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
 708{
 709        struct text_poke_params tpp;
 710        struct text_poke_param p;
 711
 712        p.addr = addr;
 713        p.opcode = opcode;
 714        p.len = len;
 715        tpp.params = &p;
 716        tpp.nparams = 1;
 717        atomic_set(&stop_machine_first, 1);
 718        wrote_text = 0;
 719        /* Use __stop_machine() because the caller already got online_cpus. */
 720        __stop_machine(stop_machine_text_poke, (void *)&tpp, cpu_online_mask);
 721        return addr;
 722}
 723
 724/**
 725 * text_poke_smp_batch - Update instructions on a live kernel on SMP
 726 * @params: an array of text_poke parameters
 727 * @n: the number of elements in params.
 728 *
 729 * Modify multi-byte instruction by using stop_machine() on SMP. Since the
 730 * stop_machine() is heavy task, it is better to aggregate text_poke requests
 731 * and do it once if possible.
 732 *
 733 * Note: Must be called under get_online_cpus() and text_mutex.
 734 */
 735void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n)
 736{
 737        struct text_poke_params tpp = {.params = params, .nparams = n};
 738
 739        atomic_set(&stop_machine_first, 1);
 740        wrote_text = 0;
 741        __stop_machine(stop_machine_text_poke, (void *)&tpp, cpu_online_mask);
 742}
 743
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.