linux/arch/x86/kernel/alternative.c
<<
>>
Prefs
   1#include <linux/module.h>
   2#include <linux/sched.h>
   3#include <linux/mutex.h>
   4#include <linux/list.h>
   5#include <linux/kprobes.h>
   6#include <linux/mm.h>
   7#include <linux/vmalloc.h>
   8#include <asm/alternative.h>
   9#include <asm/sections.h>
  10#include <asm/pgtable.h>
  11#include <asm/mce.h>
  12#include <asm/nmi.h>
  13#include <asm/vsyscall.h>
  14#include <asm/cacheflush.h>
  15#include <asm/io.h>
  16
  17#define MAX_PATCH_LEN (255-1)
  18
  19#ifdef CONFIG_HOTPLUG_CPU
  20static int smp_alt_once;
  21
  22static int __init bootonly(char *str)
  23{
  24        smp_alt_once = 1;
  25        return 1;
  26}
  27__setup("smp-alt-boot", bootonly);
  28#else
  29#define smp_alt_once 1
  30#endif
  31
  32static int debug_alternative;
  33
  34static int __init debug_alt(char *str)
  35{
  36        debug_alternative = 1;
  37        return 1;
  38}
  39__setup("debug-alternative", debug_alt);
  40
  41static int noreplace_smp;
  42
  43static int __init setup_noreplace_smp(char *str)
  44{
  45        noreplace_smp = 1;
  46        return 1;
  47}
  48__setup("noreplace-smp", setup_noreplace_smp);
  49
  50#ifdef CONFIG_PARAVIRT
  51static int noreplace_paravirt = 0;
  52
  53static int __init setup_noreplace_paravirt(char *str)
  54{
  55        noreplace_paravirt = 1;
  56        return 1;
  57}
  58__setup("noreplace-paravirt", setup_noreplace_paravirt);
  59#endif
  60
  61#define DPRINTK(fmt, args...) if (debug_alternative) \
  62        printk(KERN_DEBUG fmt, args)
  63
  64#ifdef GENERIC_NOP1
  65/* Use inline assembly to define this because the nops are defined
  66   as inline assembly strings in the include files and we cannot
  67   get them easily into strings. */
  68asm("\t.section .rodata, \"a\"\nintelnops: "
  69        GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
  70        GENERIC_NOP7 GENERIC_NOP8
  71    "\t.previous");
  72extern const unsigned char intelnops[];
  73static const unsigned char *const intel_nops[ASM_NOP_MAX+1] = {
  74        NULL,
  75        intelnops,
  76        intelnops + 1,
  77        intelnops + 1 + 2,
  78        intelnops + 1 + 2 + 3,
  79        intelnops + 1 + 2 + 3 + 4,
  80        intelnops + 1 + 2 + 3 + 4 + 5,
  81        intelnops + 1 + 2 + 3 + 4 + 5 + 6,
  82        intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
  83};
  84#endif
  85
  86#ifdef K8_NOP1
  87asm("\t.section .rodata, \"a\"\nk8nops: "
  88        K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
  89        K8_NOP7 K8_NOP8
  90    "\t.previous");
  91extern const unsigned char k8nops[];
  92static const unsigned char *const k8_nops[ASM_NOP_MAX+1] = {
  93        NULL,
  94        k8nops,
  95        k8nops + 1,
  96        k8nops + 1 + 2,
  97        k8nops + 1 + 2 + 3,
  98        k8nops + 1 + 2 + 3 + 4,
  99        k8nops + 1 + 2 + 3 + 4 + 5,
 100        k8nops + 1 + 2 + 3 + 4 + 5 + 6,
 101        k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
 102};
 103#endif
 104
 105#ifdef K7_NOP1
 106asm("\t.section .rodata, \"a\"\nk7nops: "
 107        K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
 108        K7_NOP7 K7_NOP8
 109    "\t.previous");
 110extern const unsigned char k7nops[];
 111static const unsigned char *const k7_nops[ASM_NOP_MAX+1] = {
 112        NULL,
 113        k7nops,
 114        k7nops + 1,
 115        k7nops + 1 + 2,
 116        k7nops + 1 + 2 + 3,
 117        k7nops + 1 + 2 + 3 + 4,
 118        k7nops + 1 + 2 + 3 + 4 + 5,
 119        k7nops + 1 + 2 + 3 + 4 + 5 + 6,
 120        k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
 121};
 122#endif
 123
 124#ifdef P6_NOP1
 125asm("\t.section .rodata, \"a\"\np6nops: "
 126        P6_NOP1 P6_NOP2 P6_NOP3 P6_NOP4 P6_NOP5 P6_NOP6
 127        P6_NOP7 P6_NOP8
 128    "\t.previous");
 129extern const unsigned char p6nops[];
 130static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = {
 131        NULL,
 132        p6nops,
 133        p6nops + 1,
 134        p6nops + 1 + 2,
 135        p6nops + 1 + 2 + 3,
 136        p6nops + 1 + 2 + 3 + 4,
 137        p6nops + 1 + 2 + 3 + 4 + 5,
 138        p6nops + 1 + 2 + 3 + 4 + 5 + 6,
 139        p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
 140};
 141#endif
 142
 143#ifdef CONFIG_X86_64
 144
 145extern char __vsyscall_0;
 146const unsigned char *const *find_nop_table(void)
 147{
 148        if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
 149            boot_cpu_has(X86_FEATURE_NOPL))
 150                return p6_nops;
 151        else
 152                return k8_nops;
 153}
 154
 155#else /* CONFIG_X86_64 */
 156
 157const unsigned char *const *find_nop_table(void)
 158{
 159        if (boot_cpu_has(X86_FEATURE_K8))
 160                return k8_nops;
 161        else if (boot_cpu_has(X86_FEATURE_K7))
 162                return k7_nops;
 163        else if (boot_cpu_has(X86_FEATURE_NOPL))
 164                return p6_nops;
 165        else
 166                return intel_nops;
 167}
 168
 169#endif /* CONFIG_X86_64 */
 170
 171/* Use this to add nops to a buffer, then text_poke the whole buffer. */
 172void add_nops(void *insns, unsigned int len)
 173{
 174        const unsigned char *const *noptable = find_nop_table();
 175
 176        while (len > 0) {
 177                unsigned int noplen = len;
 178                if (noplen > ASM_NOP_MAX)
 179                        noplen = ASM_NOP_MAX;
 180                memcpy(insns, noptable[noplen], noplen);
 181                insns += noplen;
 182                len -= noplen;
 183        }
 184}
 185EXPORT_SYMBOL_GPL(add_nops);
 186
 187extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
 188extern u8 *__smp_locks[], *__smp_locks_end[];
 189
 190/* Replace instructions with better alternatives for this CPU type.
 191   This runs before SMP is initialized to avoid SMP problems with
 192   self modifying code. This implies that assymetric systems where
 193   APs have less capabilities than the boot processor are not handled.
 194   Tough. Make sure you disable such features by hand. */
 195
 196void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
 197{
 198        struct alt_instr *a;
 199        char insnbuf[MAX_PATCH_LEN];
 200
 201        DPRINTK("%s: alt table %p -> %p\n", __func__, start, end);
 202        for (a = start; a < end; a++) {
 203                u8 *instr = a->instr;
 204                BUG_ON(a->replacementlen > a->instrlen);
 205                BUG_ON(a->instrlen > sizeof(insnbuf));
 206                if (!boot_cpu_has(a->cpuid))
 207                        continue;
 208#ifdef CONFIG_X86_64
 209                /* vsyscall code is not mapped yet. resolve it manually. */
 210                if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) {
 211                        instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0));
 212                        DPRINTK("%s: vsyscall fixup: %p => %p\n",
 213                                __func__, a->instr, instr);
 214                }
 215#endif
 216                memcpy(insnbuf, a->replacement, a->replacementlen);
 217                add_nops(insnbuf + a->replacementlen,
 218                         a->instrlen - a->replacementlen);
 219                text_poke_early(instr, insnbuf, a->instrlen);
 220        }
 221}
 222
 223#ifdef CONFIG_SMP
 224
 225static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end)
 226{
 227        u8 **ptr;
 228
 229        for (ptr = start; ptr < end; ptr++) {
 230                if (*ptr < text)
 231                        continue;
 232                if (*ptr > text_end)
 233                        continue;
 234                /* turn DS segment override prefix into lock prefix */
 235                text_poke(*ptr, ((unsigned char []){0xf0}), 1);
 236        };
 237}
 238
 239static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end)
 240{
 241        u8 **ptr;
 242
 243        if (noreplace_smp)
 244                return;
 245
 246        for (ptr = start; ptr < end; ptr++) {
 247                if (*ptr < text)
 248                        continue;
 249                if (*ptr > text_end)
 250                        continue;
 251                /* turn lock prefix into DS segment override prefix */
 252                text_poke(*ptr, ((unsigned char []){0x3E}), 1);
 253        };
 254}
 255
 256struct smp_alt_module {
 257        /* what is this ??? */
 258        struct module   *mod;
 259        char            *name;
 260
 261        /* ptrs to lock prefixes */
 262        u8              **locks;
 263        u8              **locks_end;
 264
 265        /* .text segment, needed to avoid patching init code ;) */
 266        u8              *text;
 267        u8              *text_end;
 268
 269        struct list_head next;
 270};
 271static LIST_HEAD(smp_alt_modules);
 272static DEFINE_MUTEX(smp_alt);
 273static int smp_mode = 1;        /* protected by smp_alt */
 274
 275void alternatives_smp_module_add(struct module *mod, char *name,
 276                                 void *locks, void *locks_end,
 277                                 void *text,  void *text_end)
 278{
 279        struct smp_alt_module *smp;
 280
 281        if (noreplace_smp)
 282                return;
 283
 284        if (smp_alt_once) {
 285                if (boot_cpu_has(X86_FEATURE_UP))
 286                        alternatives_smp_unlock(locks, locks_end,
 287                                                text, text_end);
 288                return;
 289        }
 290
 291        smp = kzalloc(sizeof(*smp), GFP_KERNEL);
 292        if (NULL == smp)
 293                return; /* we'll run the (safe but slow) SMP code then ... */
 294
 295        smp->mod        = mod;
 296        smp->name       = name;
 297        smp->locks      = locks;
 298        smp->locks_end  = locks_end;
 299        smp->text       = text;
 300        smp->text_end   = text_end;
 301        DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n",
 302                __func__, smp->locks, smp->locks_end,
 303                smp->text, smp->text_end, smp->name);
 304
 305        mutex_lock(&smp_alt);
 306        list_add_tail(&smp->next, &smp_alt_modules);
 307        if (boot_cpu_has(X86_FEATURE_UP))
 308                alternatives_smp_unlock(smp->locks, smp->locks_end,
 309                                        smp->text, smp->text_end);
 310        mutex_unlock(&smp_alt);
 311}
 312
 313void alternatives_smp_module_del(struct module *mod)
 314{
 315        struct smp_alt_module *item;
 316
 317        if (smp_alt_once || noreplace_smp)
 318                return;
 319
 320        mutex_lock(&smp_alt);
 321        list_for_each_entry(item, &smp_alt_modules, next) {
 322                if (mod != item->mod)
 323                        continue;
 324                list_del(&item->next);
 325                mutex_unlock(&smp_alt);
 326                DPRINTK("%s: %s\n", __func__, item->name);
 327                kfree(item);
 328                return;
 329        }
 330        mutex_unlock(&smp_alt);
 331}
 332
 333void alternatives_smp_switch(int smp)
 334{
 335        struct smp_alt_module *mod;
 336
 337#ifdef CONFIG_LOCKDEP
 338        /*
 339         * Older binutils section handling bug prevented
 340         * alternatives-replacement from working reliably.
 341         *
 342         * If this still occurs then you should see a hang
 343         * or crash shortly after this line:
 344         */
 345        printk("lockdep: fixing up alternatives.\n");
 346#endif
 347
 348        if (noreplace_smp || smp_alt_once)
 349                return;
 350        BUG_ON(!smp && (num_online_cpus() > 1));
 351
 352        mutex_lock(&smp_alt);
 353
 354        /*
 355         * Avoid unnecessary switches because it forces JIT based VMs to
 356         * throw away all cached translations, which can be quite costly.
 357         */
 358        if (smp == smp_mode) {
 359                /* nothing */
 360        } else if (smp) {
 361                printk(KERN_INFO "SMP alternatives: switching to SMP code\n");
 362                clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
 363                clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
 364                list_for_each_entry(mod, &smp_alt_modules, next)
 365                        alternatives_smp_lock(mod->locks, mod->locks_end,
 366                                              mod->text, mod->text_end);
 367        } else {
 368                printk(KERN_INFO "SMP alternatives: switching to UP code\n");
 369                set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
 370                set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
 371                list_for_each_entry(mod, &smp_alt_modules, next)
 372                        alternatives_smp_unlock(mod->locks, mod->locks_end,
 373                                                mod->text, mod->text_end);
 374        }
 375        smp_mode = smp;
 376        mutex_unlock(&smp_alt);
 377}
 378
 379#endif
 380
 381#ifdef CONFIG_PARAVIRT
 382void apply_paravirt(struct paravirt_patch_site *start,
 383                    struct paravirt_patch_site *end)
 384{
 385        struct paravirt_patch_site *p;
 386        char insnbuf[MAX_PATCH_LEN];
 387
 388        if (noreplace_paravirt)
 389                return;
 390
 391        for (p = start; p < end; p++) {
 392                unsigned int used;
 393
 394                BUG_ON(p->len > MAX_PATCH_LEN);
 395                /* prep the buffer with the original instructions */
 396                memcpy(insnbuf, p->instr, p->len);
 397                used = pv_init_ops.patch(p->instrtype, p->clobbers, insnbuf,
 398                                         (unsigned long)p->instr, p->len);
 399
 400                BUG_ON(used > p->len);
 401
 402                /* Pad the rest with nops */
 403                add_nops(insnbuf + used, p->len - used);
 404                text_poke_early(p->instr, insnbuf, p->len);
 405        }
 406}
 407extern struct paravirt_patch_site __start_parainstructions[],
 408        __stop_parainstructions[];
 409#endif  /* CONFIG_PARAVIRT */
 410
 411void __init alternative_instructions(void)
 412{
 413        /* The patching is not fully atomic, so try to avoid local interruptions
 414           that might execute the to be patched code.
 415           Other CPUs are not running. */
 416        stop_nmi();
 417#ifdef CONFIG_X86_MCE
 418        stop_mce();
 419#endif
 420
 421        apply_alternatives(__alt_instructions, __alt_instructions_end);
 422
 423        /* switch to patch-once-at-boottime-only mode and free the
 424         * tables in case we know the number of CPUs will never ever
 425         * change */
 426#ifdef CONFIG_HOTPLUG_CPU
 427        if (num_possible_cpus() < 2)
 428                smp_alt_once = 1;
 429#endif
 430
 431#ifdef CONFIG_SMP
 432        if (smp_alt_once) {
 433                if (1 == num_possible_cpus()) {
 434                        printk(KERN_INFO "SMP alternatives: switching to UP code\n");
 435                        set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
 436                        set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
 437
 438                        alternatives_smp_unlock(__smp_locks, __smp_locks_end,
 439                                                _text, _etext);
 440                }
 441        } else {
 442                alternatives_smp_module_add(NULL, "core kernel",
 443                                            __smp_locks, __smp_locks_end,
 444                                            _text, _etext);
 445
 446                /* Only switch to UP mode if we don't immediately boot others */
 447                if (num_present_cpus() == 1 || setup_max_cpus <= 1)
 448                        alternatives_smp_switch(0);
 449        }
 450#endif
 451        apply_paravirt(__parainstructions, __parainstructions_end);
 452
 453        if (smp_alt_once)
 454                free_init_pages("SMP alternatives",
 455                                (unsigned long)__smp_locks,
 456                                (unsigned long)__smp_locks_end);
 457
 458        restart_nmi();
 459#ifdef CONFIG_X86_MCE
 460        restart_mce();
 461#endif
 462}
 463
 464/**
 465 * text_poke_early - Update instructions on a live kernel at boot time
 466 * @addr: address to modify
 467 * @opcode: source of the copy
 468 * @len: length to copy
 469 *
 470 * When you use this code to patch more than one byte of an instruction
 471 * you need to make sure that other CPUs cannot execute this code in parallel.
 472 * Also no thread must be currently preempted in the middle of these
 473 * instructions. And on the local CPU you need to be protected again NMI or MCE
 474 * handlers seeing an inconsistent instruction while you patch.
 475 */
 476void *text_poke_early(void *addr, const void *opcode, size_t len)
 477{
 478        unsigned long flags;
 479        local_irq_save(flags);
 480        memcpy(addr, opcode, len);
 481        local_irq_restore(flags);
 482        sync_core();
 483        /* Could also do a CLFLUSH here to speed up CPU recovery; but
 484           that causes hangs on some VIA CPUs. */
 485        return addr;
 486}
 487
 488/**
 489 * text_poke - Update instructions on a live kernel
 490 * @addr: address to modify
 491 * @opcode: source of the copy
 492 * @len: length to copy
 493 *
 494 * Only atomic text poke/set should be allowed when not doing early patching.
 495 * It means the size must be writable atomically and the address must be aligned
 496 * in a way that permits an atomic write. It also makes sure we fit on a single
 497 * page.
 498 */
 499void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
 500{
 501        unsigned long flags;
 502        char *vaddr;
 503        int nr_pages = 2;
 504        struct page *pages[2];
 505        int i;
 506
 507        if (!core_kernel_text((unsigned long)addr)) {
 508                pages[0] = vmalloc_to_page(addr);
 509                pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
 510        } else {
 511                pages[0] = virt_to_page(addr);
 512                WARN_ON(!PageReserved(pages[0]));
 513                pages[1] = virt_to_page(addr + PAGE_SIZE);
 514        }
 515        BUG_ON(!pages[0]);
 516        if (!pages[1])
 517                nr_pages = 1;
 518        vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
 519        BUG_ON(!vaddr);
 520        local_irq_save(flags);
 521        memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len);
 522        local_irq_restore(flags);
 523        vunmap(vaddr);
 524        sync_core();
 525        /* Could also do a CLFLUSH here to speed up CPU recovery; but
 526           that causes hangs on some VIA CPUs. */
 527        for (i = 0; i < len; i++)
 528                BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]);
 529        return addr;
 530}
 531
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.