linux/arch/x86/kernel/head_64.S
<<
>>
Prefs
   1/*
   2 *  linux/arch/x86_64/kernel/head.S -- start in 32bit and switch to 64bit
   3 *
   4 *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
   5 *  Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
   6 *  Copyright (C) 2000 Karsten Keil <kkeil@suse.de>
   7 *  Copyright (C) 2001,2002 Andi Kleen <ak@suse.de>
   8 *  Copyright (C) 2005 Eric Biederman <ebiederm@xmission.com>
   9 */
  10
  11
  12#include <linux/linkage.h>
  13#include <linux/threads.h>
  14#include <linux/init.h>
  15#include <asm/desc.h>
  16#include <asm/segment.h>
  17#include <asm/pgtable.h>
  18#include <asm/page.h>
  19#include <asm/msr.h>
  20#include <asm/cache.h>
  21#include <asm/processor-flags.h>
  22
  23#ifdef CONFIG_PARAVIRT
  24#include <asm/asm-offsets.h>
  25#include <asm/paravirt.h>
  26#else
  27#define GET_CR2_INTO_RCX movq %cr2, %rcx
  28#endif
  29
  30/* we are not able to switch in one step to the final KERNEL ADRESS SPACE
  31 * because we need identity-mapped pages.
  32 *
  33 */
  34
  35#define pud_index(x)    (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
  36
  37L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET)
  38L3_PAGE_OFFSET = pud_index(__PAGE_OFFSET)
  39L4_START_KERNEL = pgd_index(__START_KERNEL_map)
  40L3_START_KERNEL = pud_index(__START_KERNEL_map)
  41
  42        .text
  43        .section .text.head
  44        .code64
  45        .globl startup_64
  46startup_64:
  47
  48        /*
  49         * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1,
  50         * and someone has loaded an identity mapped page table
  51         * for us.  These identity mapped page tables map all of the
  52         * kernel pages and possibly all of memory.
  53         *
  54         * %esi holds a physical pointer to real_mode_data.
  55         *
  56         * We come here either directly from a 64bit bootloader, or from
  57         * arch/x86_64/boot/compressed/head.S.
  58         *
  59         * We only come here initially at boot nothing else comes here.
  60         *
  61         * Since we may be loaded at an address different from what we were
  62         * compiled to run at we first fixup the physical addresses in our page
  63         * tables and then reload them.
  64         */
  65
  66        /* Compute the delta between the address I am compiled to run at and the
  67         * address I am actually running at.
  68         */
  69        leaq    _text(%rip), %rbp
  70        subq    $_text - __START_KERNEL_map, %rbp
  71
  72        /* Is the address not 2M aligned? */
  73        movq    %rbp, %rax
  74        andl    $~PMD_PAGE_MASK, %eax
  75        testl   %eax, %eax
  76        jnz     bad_address
  77
  78        /* Is the address too large? */
  79        leaq    _text(%rip), %rdx
  80        movq    $PGDIR_SIZE, %rax
  81        cmpq    %rax, %rdx
  82        jae     bad_address
  83
  84        /* Fixup the physical addresses in the page table
  85         */
  86        addq    %rbp, init_level4_pgt + 0(%rip)
  87        addq    %rbp, init_level4_pgt + (L4_PAGE_OFFSET*8)(%rip)
  88        addq    %rbp, init_level4_pgt + (L4_START_KERNEL*8)(%rip)
  89
  90        addq    %rbp, level3_ident_pgt + 0(%rip)
  91
  92        addq    %rbp, level3_kernel_pgt + (510*8)(%rip)
  93        addq    %rbp, level3_kernel_pgt + (511*8)(%rip)
  94
  95        addq    %rbp, level2_fixmap_pgt + (506*8)(%rip)
  96
  97        /* Add an Identity mapping if I am above 1G */
  98        leaq    _text(%rip), %rdi
  99        andq    $PMD_PAGE_MASK, %rdi
 100
 101        movq    %rdi, %rax
 102        shrq    $PUD_SHIFT, %rax
 103        andq    $(PTRS_PER_PUD - 1), %rax
 104        jz      ident_complete
 105
 106        leaq    (level2_spare_pgt - __START_KERNEL_map + _KERNPG_TABLE)(%rbp), %rdx
 107        leaq    level3_ident_pgt(%rip), %rbx
 108        movq    %rdx, 0(%rbx, %rax, 8)
 109
 110        movq    %rdi, %rax
 111        shrq    $PMD_SHIFT, %rax
 112        andq    $(PTRS_PER_PMD - 1), %rax
 113        leaq    __PAGE_KERNEL_IDENT_LARGE_EXEC(%rdi), %rdx
 114        leaq    level2_spare_pgt(%rip), %rbx
 115        movq    %rdx, 0(%rbx, %rax, 8)
 116ident_complete:
 117
 118        /*
 119         * Fixup the kernel text+data virtual addresses. Note that
 120         * we might write invalid pmds, when the kernel is relocated
 121         * cleanup_highmap() fixes this up along with the mappings
 122         * beyond _end.
 123         */
 124
 125        leaq    level2_kernel_pgt(%rip), %rdi
 126        leaq    4096(%rdi), %r8
 127        /* See if it is a valid page table entry */
 1281:      testq   $1, 0(%rdi)
 129        jz      2f
 130        addq    %rbp, 0(%rdi)
 131        /* Go to the next page */
 1322:      addq    $8, %rdi
 133        cmp     %r8, %rdi
 134        jne     1b
 135
 136        /* Fixup phys_base */
 137        addq    %rbp, phys_base(%rip)
 138
 139#ifdef CONFIG_X86_TRAMPOLINE
 140        addq    %rbp, trampoline_level4_pgt + 0(%rip)
 141        addq    %rbp, trampoline_level4_pgt + (511*8)(%rip)
 142#endif
 143
 144        /* Due to ENTRY(), sometimes the empty space gets filled with
 145         * zeros. Better take a jmp than relying on empty space being
 146         * filled with 0x90 (nop)
 147         */
 148        jmp secondary_startup_64
 149ENTRY(secondary_startup_64)
 150        /*
 151         * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1,
 152         * and someone has loaded a mapped page table.
 153         *
 154         * %esi holds a physical pointer to real_mode_data.
 155         *
 156         * We come here either from startup_64 (using physical addresses)
 157         * or from trampoline.S (using virtual addresses).
 158         *
 159         * Using virtual addresses from trampoline.S removes the need
 160         * to have any identity mapped pages in the kernel page table
 161         * after the boot processor executes this code.
 162         */
 163
 164        /* Enable PAE mode and PGE */
 165        movl    $(X86_CR4_PAE | X86_CR4_PGE), %eax
 166        movq    %rax, %cr4
 167
 168        /* Setup early boot stage 4 level pagetables. */
 169        movq    $(init_level4_pgt - __START_KERNEL_map), %rax
 170        addq    phys_base(%rip), %rax
 171        movq    %rax, %cr3
 172
 173        /* Ensure I am executing from virtual addresses */
 174        movq    $1f, %rax
 175        jmp     *%rax
 1761:
 177
 178        /* Check if nx is implemented */
 179        movl    $0x80000001, %eax
 180        cpuid
 181        movl    %edx,%edi
 182
 183        /* Setup EFER (Extended Feature Enable Register) */
 184        movl    $MSR_EFER, %ecx
 185        rdmsr
 186        btsl    $_EFER_SCE, %eax        /* Enable System Call */
 187        btl     $20,%edi                /* No Execute supported? */
 188        jnc     1f
 189        btsl    $_EFER_NX, %eax
 1901:      wrmsr                           /* Make changes effective */
 191
 192        /* Setup cr0 */
 193#define CR0_STATE       (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
 194                         X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
 195                         X86_CR0_PG)
 196        movl    $CR0_STATE, %eax
 197        /* Make changes effective */
 198        movq    %rax, %cr0
 199
 200        /* Setup a boot time stack */
 201        movq stack_start(%rip),%rsp
 202
 203        /* zero EFLAGS after setting rsp */
 204        pushq $0
 205        popfq
 206
 207        /*
 208         * We must switch to a new descriptor in kernel space for the GDT
 209         * because soon the kernel won't have access anymore to the userspace
 210         * addresses where we're currently running on. We have to do that here
 211         * because in 32bit we couldn't load a 64bit linear address.
 212         */
 213        lgdt    early_gdt_descr(%rip)
 214
 215        /* set up data segments. actually 0 would do too */
 216        movl $__KERNEL_DS,%eax
 217        movl %eax,%ds
 218        movl %eax,%ss
 219        movl %eax,%es
 220
 221        /*
 222         * We don't really need to load %fs or %gs, but load them anyway
 223         * to kill any stale realmode selectors.  This allows execution
 224         * under VT hardware.
 225         */
 226        movl %eax,%fs
 227        movl %eax,%gs
 228
 229        /* 
 230         * Setup up a dummy PDA. this is just for some early bootup code
 231         * that does in_interrupt() 
 232         */ 
 233        movl    $MSR_GS_BASE,%ecx
 234        movq    $empty_zero_page,%rax
 235        movq    %rax,%rdx
 236        shrq    $32,%rdx
 237        wrmsr   
 238
 239        /* esi is pointer to real mode structure with interesting info.
 240           pass it to C */
 241        movl    %esi, %edi
 242        
 243        /* Finally jump to run C code and to be on real kernel address
 244         * Since we are running on identity-mapped space we have to jump
 245         * to the full 64bit address, this is only possible as indirect
 246         * jump.  In addition we need to ensure %cs is set so we make this
 247         * a far return.
 248         */
 249        movq    initial_code(%rip),%rax
 250        pushq   $0              # fake return address to stop unwinder
 251        pushq   $__KERNEL_CS    # set correct cs
 252        pushq   %rax            # target address in negative space
 253        lretq
 254
 255        /* SMP bootup changes these two */
 256        __REFDATA
 257        .align  8
 258        ENTRY(initial_code)
 259        .quad   x86_64_start_kernel
 260        __FINITDATA
 261
 262        ENTRY(stack_start)
 263        .quad  init_thread_union+THREAD_SIZE-8
 264        .word  0
 265
 266bad_address:
 267        jmp bad_address
 268
 269        .section ".init.text","ax"
 270#ifdef CONFIG_EARLY_PRINTK
 271        .globl early_idt_handlers
 272early_idt_handlers:
 273        i = 0
 274        .rept NUM_EXCEPTION_VECTORS
 275        movl $i, %esi
 276        jmp early_idt_handler
 277        i = i + 1
 278        .endr
 279#endif
 280
 281ENTRY(early_idt_handler)
 282#ifdef CONFIG_EARLY_PRINTK
 283        cmpl $2,early_recursion_flag(%rip)
 284        jz  1f
 285        incl early_recursion_flag(%rip)
 286        GET_CR2_INTO_RCX
 287        movq %rcx,%r9
 288        xorl %r8d,%r8d          # zero for error code
 289        movl %esi,%ecx          # get vector number
 290        # Test %ecx against mask of vectors that push error code.
 291        cmpl $31,%ecx
 292        ja 0f
 293        movl $1,%eax
 294        salq %cl,%rax
 295        testl $0x27d00,%eax
 296        je 0f
 297        popq %r8                # get error code
 2980:      movq 0(%rsp),%rcx       # get ip
 299        movq 8(%rsp),%rdx       # get cs
 300        xorl %eax,%eax
 301        leaq early_idt_msg(%rip),%rdi
 302        call early_printk
 303        cmpl $2,early_recursion_flag(%rip)
 304        jz  1f
 305        call dump_stack
 306#ifdef CONFIG_KALLSYMS  
 307        leaq early_idt_ripmsg(%rip),%rdi
 308        movq 0(%rsp),%rsi       # get rip again
 309        call __print_symbol
 310#endif
 311#endif /* EARLY_PRINTK */
 3121:      hlt
 313        jmp 1b
 314
 315#ifdef CONFIG_EARLY_PRINTK
 316early_recursion_flag:
 317        .long 0
 318
 319early_idt_msg:
 320        .asciz "PANIC: early exception %02lx rip %lx:%lx error %lx cr2 %lx\n"
 321early_idt_ripmsg:
 322        .asciz "RIP %s\n"
 323#endif /* CONFIG_EARLY_PRINTK */
 324        .previous
 325
 326.balign PAGE_SIZE
 327
 328#define NEXT_PAGE(name) \
 329        .balign PAGE_SIZE; \
 330ENTRY(name)
 331
 332/* Automate the creation of 1 to 1 mapping pmd entries */
 333#define PMDS(START, PERM, COUNT)                        \
 334        i = 0 ;                                         \
 335        .rept (COUNT) ;                                 \
 336        .quad   (START) + (i << PMD_SHIFT) + (PERM) ;   \
 337        i = i + 1 ;                                     \
 338        .endr
 339
 340        /*
 341         * This default setting generates an ident mapping at address 0x100000
 342         * and a mapping for the kernel that precisely maps virtual address
 343         * 0xffffffff80000000 to physical address 0x000000. (always using
 344         * 2Mbyte large pages provided by PAE mode)
 345         */
 346NEXT_PAGE(init_level4_pgt)
 347        .quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
 348        .org    init_level4_pgt + L4_PAGE_OFFSET*8, 0
 349        .quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
 350        .org    init_level4_pgt + L4_START_KERNEL*8, 0
 351        /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
 352        .quad   level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
 353
 354NEXT_PAGE(level3_ident_pgt)
 355        .quad   level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
 356        .fill   511,8,0
 357
 358NEXT_PAGE(level3_kernel_pgt)
 359        .fill   L3_START_KERNEL,8,0
 360        /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
 361        .quad   level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE
 362        .quad   level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE
 363
 364NEXT_PAGE(level2_fixmap_pgt)
 365        .fill   506,8,0
 366        .quad   level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE
 367        /* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */
 368        .fill   5,8,0
 369
 370NEXT_PAGE(level1_fixmap_pgt)
 371        .fill   512,8,0
 372
 373NEXT_PAGE(level2_ident_pgt)
 374        /* Since I easily can, map the first 1G.
 375         * Don't set NX because code runs from these pages.
 376         */
 377        PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
 378
 379NEXT_PAGE(level2_kernel_pgt)
 380        /*
 381         * 512 MB kernel mapping. We spend a full page on this pagetable
 382         * anyway.
 383         *
 384         * The kernel code+data+bss must not be bigger than that.
 385         *
 386         * (NOTE: at +512MB starts the module area, see MODULES_VADDR.
 387         *  If you want to increase this then increase MODULES_VADDR
 388         *  too.)
 389         */
 390        PMDS(0, __PAGE_KERNEL_LARGE_EXEC,
 391                KERNEL_IMAGE_SIZE/PMD_SIZE)
 392
 393NEXT_PAGE(level2_spare_pgt)
 394        .fill   512, 8, 0
 395
 396#undef PMDS
 397#undef NEXT_PAGE
 398
 399        .data
 400        .align 16
 401        .globl early_gdt_descr
 402early_gdt_descr:
 403        .word   GDT_ENTRIES*8-1
 404        .quad   per_cpu__gdt_page
 405
 406ENTRY(phys_base)
 407        /* This must match the first entry in level2_kernel_pgt */
 408        .quad   0x0000000000000000
 409
 410#include "../../x86/xen/xen-head.S"
 411        
 412        .section .bss, "aw", @nobits
 413        .align L1_CACHE_BYTES
 414ENTRY(idt_table)
 415        .skip 256 * 16
 416
 417        .section .bss.page_aligned, "aw", @nobits
 418        .align PAGE_SIZE
 419ENTRY(empty_zero_page)
 420        .skip PAGE_SIZE
 421
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.