1/* 2 * linux/arch/cris/mm/fault.c 3 * 4 * Copyright (C) 2000, 2001 Axis Communications AB 5 * 6 * Authors: Bjorn Wesen 7 * 8 * $Log: fault.c,v $ 9 * Revision 1.23 2003/10/16 05:32:32 starvik 10 * Only read TLB_SELECT if DEBUG 11 * 12 * Revision 1.22 2003/07/07 09:07:04 johana 13 * Added special CONFIG_ETRAX_DEBUG_INTERRUPT handling here 14 * to deal with a di in entry.S 15 * 16 * Revision 1.21 2002/05/28 14:24:56 bjornw 17 * Corrected typo 18 * 19 * Revision 1.20 2001/11/22 13:34:06 bjornw 20 * * Bug workaround (LX TR89): force a rerun of the whole of an interrupted 21 * unaligned write, because the second half of the write will be corrupted 22 * otherwise. Affected unaligned writes spanning not-yet mapped pages. 23 * * Optimization: use the wr_rd bit in R_MMU_CAUSE to know whether a miss 24 * was due to a read or a write (before we didn't know this until the next 25 * restart of the interrupted instruction, thus wasting one fault-irq) 26 * 27 * Revision 1.19 2001/11/12 19:02:10 pkj 28 * Fixed compiler warnings. 29 * 30 * Revision 1.18 2001/07/18 22:14:32 bjornw 31 * Enable interrupts in the bulk of do_page_fault 32 * 33 * Revision 1.17 2001/07/18 13:07:23 bjornw 34 * * Detect non-existant PTE's in vmalloc pmd synchronization 35 * * Remove comment about fast-paths for VMALLOC_START etc, because all that 36 * was totally bogus anyway it turned out :) 37 * * Fix detection of vmalloc-area synchronization 38 * * Add some comments 39 * 40 * Revision 1.16 2001/06/13 00:06:08 bjornw 41 * current_pgd should be volatile 42 * 43 * Revision 1.15 2001/06/13 00:02:23 bjornw 44 * Use a separate variable to store the current pgd to avoid races in schedule 45 * 46 * Revision 1.14 2001/05/16 17:41:07 hp 47 * Last comment tweak further tweaked. 48 * 49 * Revision 1.13 2001/05/15 00:58:44 hp 50 * Expand a bit on the comment why we compare address >= TASK_SIZE rather 51 * than >= VMALLOC_START. 52 * 53 * Revision 1.12 2001/04/04 10:51:14 bjornw 54 * mmap_sem is grabbed for reading 55 * 56 * Revision 1.11 2001/03/23 07:36:07 starvik 57 * Corrected according to review remarks 58 * 59 * Revision 1.10 2001/03/21 16:10:11 bjornw 60 * CRIS_FRAME_FIXUP not needed anymore, use FRAME_NORMAL 61 * 62 * Revision 1.9 2001/03/05 13:22:20 bjornw 63 * Spell-fix and fix in vmalloc_fault handling 64 * 65 * Revision 1.8 2000/11/22 14:45:31 bjornw 66 * * 2.4.0-test10 removed the set_pgdir instantaneous kernel global mapping 67 * into all processes. Instead we fill in the missing PTE entries on demand. 68 * 69 * Revision 1.7 2000/11/21 16:39:09 bjornw 70 * fixup switches frametype 71 * 72 * Revision 1.6 2000/11/17 16:54:08 bjornw 73 * More detailed siginfo reporting 74 * 75 * 76 */ 77 78#include <linux/config.h> 79#include <linux/signal.h> 80#include <linux/sched.h> 81#include <linux/kernel.h> 82#include <linux/errno.h> 83#include <linux/string.h> 84#include <linux/types.h> 85#include <linux/ptrace.h> 86#include <linux/mman.h> 87#include <linux/mm.h> 88#include <linux/interrupt.h> 89 90#include <asm/system.h> 91#include <asm/segment.h> 92#include <asm/pgtable.h> 93#include <asm/uaccess.h> 94#include <asm/svinto.h> 95 96extern void die_if_kernel(const char *,struct pt_regs *,long); 97 98asmlinkage void do_invalid_op (struct pt_regs *, unsigned long); 99asmlinkage void do_page_fault(unsigned long address, struct pt_regs *regs, 100 int error_code); 101 102/* debug of low-level TLB reload */ 103#undef DEBUG 104 105#ifdef DEBUG 106#define D(x) x 107#else 108#define D(x) 109#endif 110 111/* debug of higher-level faults */ 112#define DPG(x) 113 114/* current active page directory */ 115 116volatile pgd_t *current_pgd; 117 118/* fast TLB-fill fault handler 119 * this is called from entry.S with interrupts disabled 120 */ 121 122void 123handle_mmu_bus_fault(struct pt_regs *regs) 124{ 125 int cause; 126#ifdef DEBUG 127 int select; 128 int index; 129 int page_id; 130 int acc, inv; 131#endif 132 int miss, we, writeac; 133 pmd_t *pmd; 134 pte_t pte; 135 int errcode; 136 unsigned long address; 137 138#ifdef CONFIG_ETRAX_DEBUG_INTERRUPT /* The di is actually in entry.S */ 139 log_int(rdpc(), regs->dccr, 0); 140#endif 141 cause = *R_MMU_CAUSE; 142 143 address = cause & PAGE_MASK; /* get faulting address */ 144 145#ifdef DEBUG 146 select = *R_TLB_SELECT; 147 page_id = IO_EXTRACT(R_MMU_CAUSE, page_id, cause); 148 acc = IO_EXTRACT(R_MMU_CAUSE, acc_excp, cause); 149 inv = IO_EXTRACT(R_MMU_CAUSE, inv_excp, cause); 150 index = IO_EXTRACT(R_TLB_SELECT, index, select); 151#endif 152 miss = IO_EXTRACT(R_MMU_CAUSE, miss_excp, cause); 153 we = IO_EXTRACT(R_MMU_CAUSE, we_excp, cause); 154 writeac = IO_EXTRACT(R_MMU_CAUSE, wr_rd, cause); 155 156 /* ETRAX 100LX TR89 bugfix: if the second half of an unaligned 157 * write causes a MMU-fault, it will not be restarted correctly. 158 * This could happen if a write crosses a page-boundary and the 159 * second page is not yet COW'ed or even loaded. The workaround 160 * is to clear the unaligned bit in the CPU status record, so 161 * that the CPU will rerun both the first and second halves of 162 * the instruction. This will not have any sideeffects unless 163 * the first half goes to any device or memory that can't be 164 * written twice, and which is mapped through the MMU. 165 * 166 * We only need to do this for writes. 167 */ 168 169 if(writeac) 170 regs->csrinstr &= ~(1 << 5); 171 172 /* Set errcode's R/W flag according to the mode which caused the 173 * fault 174 */ 175 176 errcode = writeac << 1; 177 178 D(printk("bus_fault from IRP 0x%lx: addr 0x%lx, miss %d, inv %d, we %d, acc %d, dx %d pid %d\n", 179 regs->irp, address, miss, inv, we, acc, index, page_id)); 180 181 /* for a miss, we need to reload the TLB entry */ 182 183 if (miss) { 184 /* see if the pte exists at all 185 * refer through current_pgd, dont use mm->pgd 186 */ 187 188 pmd = (pmd_t *)(current_pgd + pgd_index(address)); 189 if (pmd_none(*pmd)) 190 goto dofault; 191 if (pmd_bad(*pmd)) { 192 printk("bad pgdir entry 0x%lx at 0x%p\n", *(unsigned long*)pmd, pmd); 193 pmd_clear(pmd); 194 return; 195 } 196 pte = *pte_offset(pmd, address); 197 if (!pte_present(pte)) 198 goto dofault; 199 200#ifdef DEBUG 201 printk(" found pte %lx pg %p ", pte_val(pte), pte_page(pte)); 202 if (pte_val(pte) & _PAGE_SILENT_WRITE) 203 printk("Silent-W "); 204 if (pte_val(pte) & _PAGE_KERNEL) 205 printk("Kernel "); 206 if (pte_val(pte) & _PAGE_SILENT_READ) 207 printk("Silent-R "); 208 if (pte_val(pte) & _PAGE_GLOBAL) 209 printk("Global "); 210 if (pte_val(pte) & _PAGE_PRESENT) 211 printk("Present "); 212 if (pte_val(pte) & _PAGE_ACCESSED) 213 printk("Accessed "); 214 if (pte_val(pte) & _PAGE_MODIFIED) 215 printk("Modified "); 216 if (pte_val(pte) & _PAGE_READ) 217 printk("Readable "); 218 if (pte_val(pte) & _PAGE_WRITE) 219 printk("Writeable "); 220 printk("\n"); 221#endif 222 223 /* load up the chosen TLB entry 224 * this assumes the pte format is the same as the TLB_LO layout. 225 * 226 * the write to R_TLB_LO also writes the vpn and page_id fields from 227 * R_MMU_CAUSE, which we in this case obviously want to keep 228 */ 229 230 *R_TLB_LO = pte_val(pte); 231 232 return; 233 } 234 235 errcode = 1 | (we << 1); 236 237 dofault: 238 /* leave it to the MM system fault handler below */ 239 D(printk("do_page_fault %lx errcode %d\n", address, errcode)); 240 do_page_fault(address, regs, errcode); 241} 242 243/* 244 * This routine handles page faults. It determines the address, 245 * and the problem, and then passes it off to one of the appropriate 246 * routines. 247 * 248 * Notice that the address we're given is aligned to the page the fault 249 * occurred in, since we only get the PFN in R_MMU_CAUSE not the complete 250 * address. 251 * 252 * error_code: 253 * bit 0 == 0 means no page found, 1 means protection fault 254 * bit 1 == 0 means read, 1 means write 255 * 256 * If this routine detects a bad access, it returns 1, otherwise it 257 * returns 0. 258 */ 259 260asmlinkage void 261do_page_fault(unsigned long address, struct pt_regs *regs, 262 int error_code) 263{ 264 struct task_struct *tsk; 265 struct mm_struct *mm; 266 struct vm_area_struct * vma; 267 int writeaccess; 268 unsigned long fixup; 269 siginfo_t info; 270 271 tsk = current; 272 273 /* 274 * We fault-in kernel-space virtual memory on-demand. The 275 * 'reference' page table is init_mm.pgd. 276 * 277 * NOTE! We MUST NOT take any locks for this case. We may 278 * be in an interrupt or a critical region, and should 279 * only copy the information from the master page table, 280 * nothing more. 281 * 282 * NOTE2: This is done so that, when updating the vmalloc 283 * mappings we don't have to walk all processes pgdirs and 284 * add the high mappings all at once. Instead we do it as they 285 * are used. However vmalloc'ed page entries have the PAGE_GLOBAL 286 * bit set so sometimes the TLB can use a lingering entry. 287 * 288 * This verifies that the fault happens in kernel space 289 * and that the fault was not a protection error (error_code & 1). 290 */ 291 292 if (address >= VMALLOC_START && 293 !(error_code & 1) && 294 !user_mode(regs)) 295 goto vmalloc_fault; 296 297 /* we can and should enable interrupts at this point */ 298 sti(); 299 300 mm = tsk->mm; 301 writeaccess = error_code & 2; 302 info.si_code = SEGV_MAPERR; 303 304 /* 305 * If we're in an interrupt or have no user 306 * context, we must not take the fault.. 307 */ 308 309 if (in_interrupt() || !mm) 310 goto no_context; 311 312 down_read(&mm->mmap_sem); 313 vma = find_vma(mm, address); 314 if (!vma) 315 goto bad_area; 316 if (vma->vm_start <= address) 317 goto good_area; 318 if (!(vma->vm_flags & VM_GROWSDOWN)) 319 goto bad_area; 320 if (user_mode(regs)) { 321 /* 322 * accessing the stack below usp is always a bug. 323 * we get page-aligned addresses so we can only check 324 * if we're within a page from usp, but that might be 325 * enough to catch brutal errors at least. 326 */ 327 if (address + PAGE_SIZE < rdusp()) 328 goto bad_area; 329 } 330 if (expand_stack(vma, address)) 331 goto bad_area; 332 333 /* 334 * Ok, we have a good vm_area for this memory access, so 335 * we can handle it.. 336 */ 337 338 good_area: 339 info.si_code = SEGV_ACCERR; 340 341 /* first do some preliminary protection checks */ 342 343 if (writeaccess) { 344 if (!(vma->vm_flags & VM_WRITE)) 345 goto bad_area; 346 } else { 347 if (!(vma->vm_flags & (VM_READ | VM_EXEC))) 348 goto bad_area; 349 } 350 351 /* 352 * If for any reason at all we couldn't handle the fault, 353 * make sure we exit gracefully rather than endlessly redo 354 * the fault. 355 */ 356 357 switch (handle_mm_fault(mm, vma, address, writeaccess)) { 358 case 1: 359 tsk->min_flt++; 360 break; 361 case 2: 362 tsk->maj_flt++; 363 break; 364 case 0: 365 goto do_sigbus; 366 default: 367 goto out_of_memory; 368 } 369 370 up_read(&mm->mmap_sem); 371 return; 372 373 /* 374 * Something tried to access memory that isn't in our memory map.. 375 * Fix it, but check if it's kernel or user first.. 376 */ 377 378 bad_area: 379 up_read(&mm->mmap_sem); 380 381 bad_area_nosemaphore: 382 DPG(show_registers(regs)); 383 384 /* User mode accesses just cause a SIGSEGV */ 385 386 if (user_mode(regs)) { 387 info.si_signo = SIGSEGV; 388 info.si_errno = 0; 389 /* info.si_code has been set above */ 390 info.si_addr = (void *)address; 391 force_sig_info(SIGSEGV, &info, tsk); 392 return; 393 } 394 395 no_context: 396 397 /* Are we prepared to handle this kernel fault? 398 * 399 * (The kernel has valid exception-points in the source 400 * when it acesses user-memory. When it fails in one 401 * of those points, we find it in a table and do a jump 402 * to some fixup code that loads an appropriate error 403 * code) 404 */ 405 406 if ((fixup = search_exception_table(regs->irp)) != 0) { 407 /* Adjust the instruction pointer in the stackframe */ 408 409 regs->irp = fixup; 410 411 /* We do not want to return by restoring the CPU-state 412 * anymore, so switch frame-types (see ptrace.h) 413 */ 414 415 regs->frametype = CRIS_FRAME_NORMAL; 416 417 D(printk("doing fixup to 0x%lx\n", fixup)); 418 return; 419 } 420 421 /* 422 * Oops. The kernel tried to access some bad page. We'll have to 423 * terminate things with extreme prejudice. 424 */ 425 426 if ((unsigned long) (address) < PAGE_SIZE) 427 printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference"); 428 else 429 printk(KERN_ALERT "Unable to handle kernel access"); 430 printk(" at virtual address %08lx\n",address); 431 432 die_if_kernel("Oops", regs, error_code); 433 434 do_exit(SIGKILL); 435 436 /* 437 * We ran out of memory, or some other thing happened to us that made 438 * us unable to handle the page fault gracefully. 439 */ 440 441 out_of_memory: 442 up_read(&mm->mmap_sem); 443 printk("VM: killing process %s\n", tsk->comm); 444 if (user_mode(regs)) 445 do_exit(SIGKILL); 446 goto no_context; 447 448 do_sigbus: 449 up_read(&mm->mmap_sem); 450 451 /* 452 * Send a sigbus, regardless of whether we were in kernel 453 * or user mode. 454 */ 455 info.si_signo = SIGBUS; 456 info.si_errno = 0; 457 info.si_code = BUS_ADRERR; 458 info.si_addr = (void *)address; 459 force_sig_info(SIGBUS, &info, tsk); 460 461 /* Kernel mode? Handle exceptions or die */ 462 if (!user_mode(regs)) 463 goto no_context; 464 return; 465 466vmalloc_fault: 467 { 468 /* 469 * Synchronize this task's top level page-table 470 * with the 'reference' page table. 471 * 472 * Use current_pgd instead of tsk->active_mm->pgd 473 * since the latter might be unavailable if this 474 * code is executed in a misfortunately run irq 475 * (like inside schedule() between switch_mm and 476 * switch_to...). 477 */ 478 479 int offset = pgd_index(address); 480 pgd_t *pgd, *pgd_k; 481 pmd_t *pmd, *pmd_k; 482 pte_t *pte_k; 483 484 pgd = (pgd_t *)current_pgd + offset; 485 pgd_k = init_mm.pgd + offset; 486 487 /* Since we're two-level, we don't need to do both 488 * set_pgd and set_pmd (they do the same thing). If 489 * we go three-level at some point, do the right thing 490 * with pgd_present and set_pgd here. 491 * 492 * Also, since the vmalloc area is global, we don't 493 * need to copy individual PTE's, it is enough to 494 * copy the pgd pointer into the pte page of the 495 * root task. If that is there, we'll find our pte if 496 * it exists. 497 */ 498 499 pmd = pmd_offset(pgd, address); 500 pmd_k = pmd_offset(pgd_k, address); 501 502 if (!pmd_present(*pmd_k)) 503 goto bad_area_nosemaphore; 504 505 set_pmd(pmd, *pmd_k); 506 507 /* Make sure the actual PTE exists as well to 508 * catch kernel vmalloc-area accesses to non-mapped 509 * addresses. If we don't do this, this will just 510 * silently loop forever. 511 */ 512 513 pte_k = pte_offset(pmd_k, address); 514 if (!pte_present(*pte_k)) 515 goto no_context; 516 517 return; 518 } 519} 520

