linux/arch/ppc/mm/fault.c
<<
>>
Prefs
   1/*
   2 *  PowerPC version
   3 *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
   4 *
   5 *  Derived from "arch/i386/mm/fault.c"
   6 *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
   7 *
   8 *  Modified by Cort Dougan and Paul Mackerras.
   9 *
  10 *  This program is free software; you can redistribute it and/or
  11 *  modify it under the terms of the GNU General Public License
  12 *  as published by the Free Software Foundation; either version
  13 *  2 of the License, or (at your option) any later version.
  14 */
  15
  16#include <linux/signal.h>
  17#include <linux/sched.h>
  18#include <linux/kernel.h>
  19#include <linux/errno.h>
  20#include <linux/string.h>
  21#include <linux/types.h>
  22#include <linux/ptrace.h>
  23#include <linux/mman.h>
  24#include <linux/mm.h>
  25#include <linux/interrupt.h>
  26#include <linux/highmem.h>
  27#include <linux/module.h>
  28
  29#include <asm/page.h>
  30#include <asm/pgtable.h>
  31#include <asm/mmu.h>
  32#include <asm/mmu_context.h>
  33#include <asm/system.h>
  34#include <asm/uaccess.h>
  35#include <asm/tlbflush.h>
  36
  37#if defined(CONFIG_XMON) || defined(CONFIG_KGDB)
  38extern void (*debugger)(struct pt_regs *);
  39extern void (*debugger_fault_handler)(struct pt_regs *);
  40extern int (*debugger_dabr_match)(struct pt_regs *);
  41int debugger_kernel_faults = 1;
  42#endif
  43
  44unsigned long htab_reloads;     /* updated by hashtable.S:hash_page() */
  45unsigned long htab_evicts;      /* updated by hashtable.S:hash_page() */
  46unsigned long htab_preloads;    /* updated by hashtable.S:add_hash_page() */
  47unsigned long pte_misses;       /* updated by do_page_fault() */
  48unsigned long pte_errors;       /* updated by do_page_fault() */
  49unsigned int probingmem;
  50
  51/*
  52 * Check whether the instruction at regs->nip is a store using
  53 * an update addressing form which will update r1.
  54 */
  55static int store_updates_sp(struct pt_regs *regs)
  56{
  57        unsigned int inst;
  58
  59        if (get_user(inst, (unsigned int __user *)regs->nip))
  60                return 0;
  61        /* check for 1 in the rA field */
  62        if (((inst >> 16) & 0x1f) != 1)
  63                return 0;
  64        /* check major opcode */
  65        switch (inst >> 26) {
  66        case 37:        /* stwu */
  67        case 39:        /* stbu */
  68        case 45:        /* sthu */
  69        case 53:        /* stfsu */
  70        case 55:        /* stfdu */
  71                return 1;
  72        case 31:
  73                /* check minor opcode */
  74                switch ((inst >> 1) & 0x3ff) {
  75                case 183:       /* stwux */
  76                case 247:       /* stbux */
  77                case 439:       /* sthux */
  78                case 695:       /* stfsux */
  79                case 759:       /* stfdux */
  80                        return 1;
  81                }
  82        }
  83        return 0;
  84}
  85
  86/*
  87 * For 600- and 800-family processors, the error_code parameter is DSISR
  88 * for a data fault, SRR1 for an instruction fault. For 400-family processors
  89 * the error_code parameter is ESR for a data fault, 0 for an instruction
  90 * fault.
  91 */
  92int do_page_fault(struct pt_regs *regs, unsigned long address,
  93                  unsigned long error_code)
  94{
  95        struct vm_area_struct * vma;
  96        struct mm_struct *mm = current->mm;
  97        siginfo_t info;
  98        int code = SEGV_MAPERR;
  99        int fault;
 100#if defined(CONFIG_4xx) || defined (CONFIG_BOOKE)
 101        int is_write = error_code & ESR_DST;
 102#else
 103        int is_write = 0;
 104
 105        /*
 106         * Fortunately the bit assignments in SRR1 for an instruction
 107         * fault and DSISR for a data fault are mostly the same for the
 108         * bits we are interested in.  But there are some bits which
 109         * indicate errors in DSISR but can validly be set in SRR1.
 110         */
 111        if (TRAP(regs) == 0x400)
 112                error_code &= 0x48200000;
 113        else
 114                is_write = error_code & 0x02000000;
 115#endif /* CONFIG_4xx || CONFIG_BOOKE */
 116
 117#if defined(CONFIG_XMON) || defined(CONFIG_KGDB)
 118        if (debugger_fault_handler && TRAP(regs) == 0x300) {
 119                debugger_fault_handler(regs);
 120                return 0;
 121        }
 122#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
 123        if (error_code & 0x00400000) {
 124                /* DABR match */
 125                if (debugger_dabr_match(regs))
 126                        return 0;
 127        }
 128#endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/
 129#endif /* CONFIG_XMON || CONFIG_KGDB */
 130
 131        if (in_atomic() || mm == NULL)
 132                return SIGSEGV;
 133
 134        down_read(&mm->mmap_sem);
 135        vma = find_vma(mm, address);
 136        if (!vma)
 137                goto bad_area;
 138        if (vma->vm_start <= address)
 139                goto good_area;
 140        if (!(vma->vm_flags & VM_GROWSDOWN))
 141                goto bad_area;
 142        if (!is_write)
 143                goto bad_area;
 144
 145        /*
 146         * N.B. The rs6000/xcoff ABI allows programs to access up to
 147         * a few hundred bytes below the stack pointer.
 148         * The kernel signal delivery code writes up to about 1.5kB
 149         * below the stack pointer (r1) before decrementing it.
 150         * The exec code can write slightly over 640kB to the stack
 151         * before setting the user r1.  Thus we allow the stack to
 152         * expand to 1MB without further checks.
 153         */
 154        if (address + 0x100000 < vma->vm_end) {
 155                /* get user regs even if this fault is in kernel mode */
 156                struct pt_regs *uregs = current->thread.regs;
 157                if (uregs == NULL)
 158                        goto bad_area;
 159
 160                /*
 161                 * A user-mode access to an address a long way below
 162                 * the stack pointer is only valid if the instruction
 163                 * is one which would update the stack pointer to the
 164                 * address accessed if the instruction completed,
 165                 * i.e. either stwu rs,n(r1) or stwux rs,r1,rb
 166                 * (or the byte, halfword, float or double forms).
 167                 *
 168                 * If we don't check this then any write to the area
 169                 * between the last mapped region and the stack will
 170                 * expand the stack rather than segfaulting.
 171                 */
 172                if (address + 2048 < uregs->gpr[1]
 173                    && (!user_mode(regs) || !store_updates_sp(regs)))
 174                        goto bad_area;
 175        }
 176        if (expand_stack(vma, address))
 177                goto bad_area;
 178
 179good_area:
 180        code = SEGV_ACCERR;
 181#if defined(CONFIG_6xx)
 182        if (error_code & 0x95700000)
 183                /* an error such as lwarx to I/O controller space,
 184                   address matching DABR, eciwx, etc. */
 185                goto bad_area;
 186#endif /* CONFIG_6xx */
 187#if defined(CONFIG_8xx)
 188        /* The MPC8xx seems to always set 0x80000000, which is
 189         * "undefined".  Of those that can be set, this is the only
 190         * one which seems bad.
 191         */
 192        if (error_code & 0x10000000)
 193                /* Guarded storage error. */
 194                goto bad_area;
 195#endif /* CONFIG_8xx */
 196
 197        /* a write */
 198        if (is_write) {
 199                if (!(vma->vm_flags & VM_WRITE))
 200                        goto bad_area;
 201#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
 202        /* an exec  - 4xx/Book-E allows for per-page execute permission */
 203        } else if (TRAP(regs) == 0x400) {
 204                pte_t *ptep;
 205                pmd_t *pmdp;
 206
 207#if 0
 208                /* It would be nice to actually enforce the VM execute
 209                   permission on CPUs which can do so, but far too
 210                   much stuff in userspace doesn't get the permissions
 211                   right, so we let any page be executed for now. */
 212                if (! (vma->vm_flags & VM_EXEC))
 213                        goto bad_area;
 214#endif
 215
 216                /* Since 4xx/Book-E supports per-page execute permission,
 217                 * we lazily flush dcache to icache. */
 218                ptep = NULL;
 219                if (get_pteptr(mm, address, &ptep, &pmdp)) {
 220                        spinlock_t *ptl = pte_lockptr(mm, pmdp);
 221                        spin_lock(ptl);
 222                        if (pte_present(*ptep)) {
 223                                struct page *page = pte_page(*ptep);
 224
 225                                if (!test_bit(PG_arch_1, &page->flags)) {
 226                                        flush_dcache_icache_page(page);
 227                                        set_bit(PG_arch_1, &page->flags);
 228                                }
 229                                pte_update(ptep, 0, _PAGE_HWEXEC);
 230                                _tlbie(address, mm->context.id);
 231                                pte_unmap_unlock(ptep, ptl);
 232                                up_read(&mm->mmap_sem);
 233                                return 0;
 234                        }
 235                        pte_unmap_unlock(ptep, ptl);
 236                }
 237#endif
 238        /* a read */
 239        } else {
 240                /* protection fault */
 241                if (error_code & 0x08000000)
 242                        goto bad_area;
 243                if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
 244                        goto bad_area;
 245        }
 246
 247        /*
 248         * If for any reason at all we couldn't handle the fault,
 249         * make sure we exit gracefully rather than endlessly redo
 250         * the fault.
 251         */
 252 survive:
 253        fault = handle_mm_fault(mm, vma, address, is_write);
 254        if (unlikely(fault & VM_FAULT_ERROR)) {
 255                if (fault & VM_FAULT_OOM)
 256                        goto out_of_memory;
 257                else if (fault & VM_FAULT_SIGBUS)
 258                        goto do_sigbus;
 259                BUG();
 260        }
 261        if (fault & VM_FAULT_MAJOR)
 262                current->maj_flt++;
 263        else
 264                current->min_flt++;
 265
 266        up_read(&mm->mmap_sem);
 267        /*
 268         * keep track of tlb+htab misses that are good addrs but
 269         * just need pte's created via handle_mm_fault()
 270         * -- Cort
 271         */
 272        pte_misses++;
 273        return 0;
 274
 275bad_area:
 276        up_read(&mm->mmap_sem);
 277        pte_errors++;
 278
 279        /* User mode accesses cause a SIGSEGV */
 280        if (user_mode(regs)) {
 281                _exception(SIGSEGV, regs, code, address);
 282                return 0;
 283        }
 284
 285        return SIGSEGV;
 286
 287/*
 288 * We ran out of memory, or some other thing happened to us that made
 289 * us unable to handle the page fault gracefully.
 290 */
 291out_of_memory:
 292        up_read(&mm->mmap_sem);
 293        if (is_global_init(current)) {
 294                yield();
 295                down_read(&mm->mmap_sem);
 296                goto survive;
 297        }
 298        printk("VM: killing process %s\n", current->comm);
 299        if (user_mode(regs))
 300                do_group_exit(SIGKILL);
 301        return SIGKILL;
 302
 303do_sigbus:
 304        up_read(&mm->mmap_sem);
 305        info.si_signo = SIGBUS;
 306        info.si_errno = 0;
 307        info.si_code = BUS_ADRERR;
 308        info.si_addr = (void __user *)address;
 309        force_sig_info (SIGBUS, &info, current);
 310        if (!user_mode(regs))
 311                return SIGBUS;
 312        return 0;
 313}
 314
 315/*
 316 * bad_page_fault is called when we have a bad access from the kernel.
 317 * It is called from the DSI and ISI handlers in head.S and from some
 318 * of the procedures in traps.c.
 319 */
 320void
 321bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
 322{
 323        const struct exception_table_entry *entry;
 324
 325        /* Are we prepared to handle this fault?  */
 326        if ((entry = search_exception_tables(regs->nip)) != NULL) {
 327                regs->nip = entry->fixup;
 328                return;
 329        }
 330
 331        /* kernel has accessed a bad area */
 332#if defined(CONFIG_XMON) || defined(CONFIG_KGDB)
 333        if (debugger_kernel_faults)
 334                debugger(regs);
 335#endif
 336        die("kernel access of bad area", regs, sig);
 337}
 338
 339#ifdef CONFIG_8xx
 340
 341/* The pgtable.h claims some functions generically exist, but I
 342 * can't find them......
 343 */
 344pte_t *va_to_pte(unsigned long address)
 345{
 346        pgd_t *dir;
 347        pmd_t *pmd;
 348        pte_t *pte;
 349
 350        if (address < TASK_SIZE)
 351                return NULL;
 352
 353        dir = pgd_offset(&init_mm, address);
 354        if (dir) {
 355                pmd = pmd_offset(dir, address & PAGE_MASK);
 356                if (pmd && pmd_present(*pmd)) {
 357                        pte = pte_offset_kernel(pmd, address & PAGE_MASK);
 358                        if (pte && pte_present(*pte))
 359                                return(pte);
 360                }
 361        }
 362        return NULL;
 363}
 364
 365unsigned long va_to_phys(unsigned long address)
 366{
 367        pte_t *pte;
 368
 369        pte = va_to_pte(address);
 370        if (pte)
 371                return(((unsigned long)(pte_val(*pte)) & PAGE_MASK) | (address & ~(PAGE_MASK)));
 372        return (0);
 373}
 374
 375void
 376print_8xx_pte(struct mm_struct *mm, unsigned long addr)
 377{
 378        pgd_t * pgd;
 379        pmd_t * pmd;
 380        pte_t * pte;
 381
 382        printk(" pte @ 0x%8lx: ", addr);
 383        pgd = pgd_offset(mm, addr & PAGE_MASK);
 384        if (pgd) {
 385                pmd = pmd_offset(pgd, addr & PAGE_MASK);
 386                if (pmd && pmd_present(*pmd)) {
 387                        pte = pte_offset_kernel(pmd, addr & PAGE_MASK);
 388                        if (pte) {
 389                                printk(" (0x%08lx)->(0x%08lx)->0x%08lx\n",
 390                                        (long)pgd, (long)pte, (long)pte_val(*pte));
 391#define pp ((long)pte_val(*pte))                        
 392                                printk(" RPN: %05lx PP: %lx SPS: %lx SH: %lx "
 393                                       "CI: %lx v: %lx\n",
 394                                       pp>>12,    /* rpn */
 395                                       (pp>>10)&3, /* pp */
 396                                       (pp>>3)&1, /* small */
 397                                       (pp>>2)&1, /* shared */
 398                                       (pp>>1)&1, /* cache inhibit */
 399                                       pp&1       /* valid */
 400                                       );
 401#undef pp                       
 402                        }
 403                        else {
 404                                printk("no pte\n");
 405                        }
 406                }
 407                else {
 408                        printk("no pmd\n");
 409                }
 410        }
 411        else {
 412                printk("no pgd\n");
 413        }
 414}
 415
 416int
 417get_8xx_pte(struct mm_struct *mm, unsigned long addr)
 418{
 419        pgd_t * pgd;
 420        pmd_t * pmd;
 421        pte_t * pte;
 422        int     retval = 0;
 423
 424        pgd = pgd_offset(mm, addr & PAGE_MASK);
 425        if (pgd) {
 426                pmd = pmd_offset(pgd, addr & PAGE_MASK);
 427                if (pmd && pmd_present(*pmd)) {
 428                        pte = pte_offset_kernel(pmd, addr & PAGE_MASK);
 429                        if (pte) {
 430                                retval = (int)pte_val(*pte);
 431                        }
 432                }
 433        }
 434        return(retval);
 435}
 436#endif /* CONFIG_8xx */
 437
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.