linux-old/arch/s390/mm/fault.c
<<
>>
Prefs
   1/*
   2 *  arch/s390/mm/fault.c
   3 *
   4 *  S390 version
   5 *    Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation
   6 *    Author(s): Hartmut Penner (hp@de.ibm.com)
   7 *               Ulrich Weigand (uweigand@de.ibm.com)
   8 *
   9 *  Derived from "arch/i386/mm/fault.c"
  10 *    Copyright (C) 1995  Linus Torvalds
  11 */
  12
  13#include <linux/config.h>
  14#include <linux/signal.h>
  15#include <linux/sched.h>
  16#include <linux/kernel.h>
  17#include <linux/errno.h>
  18#include <linux/string.h>
  19#include <linux/types.h>
  20#include <linux/ptrace.h>
  21#include <linux/mman.h>
  22#include <linux/mm.h>
  23#include <linux/smp.h>
  24#include <linux/smp_lock.h>
  25#include <linux/compatmac.h>
  26#include <linux/init.h>
  27#include <linux/console.h>
  28
  29#include <asm/system.h>
  30#include <asm/uaccess.h>
  31#include <asm/pgtable.h>
  32#include <asm/hardirq.h>
  33
  34#ifdef CONFIG_SYSCTL
  35extern int sysctl_userprocess_debug;
  36#endif
  37
  38extern void die(const char *,struct pt_regs *,long);
  39
  40extern spinlock_t timerlist_lock;
  41
  42/*
  43 * Unlock any spinlocks which will prevent us from getting the
  44 * message out (timerlist_lock is acquired through the
  45 * console unblank code)
  46 */
  47void bust_spinlocks(int yes)
  48{
  49        spin_lock_init(&timerlist_lock);
  50        if (yes) {
  51                oops_in_progress = 1;
  52        } else {
  53                int loglevel_save = console_loglevel;
  54                oops_in_progress = 0;
  55                console_unblank();
  56                /*
  57                 * OK, the message is on the console.  Now we call printk()
  58                 * without oops_in_progress set so that printk will give klogd
  59                 * a poke.  Hold onto your hats...
  60                 */
  61                console_loglevel = 15;
  62                printk(" ");
  63                console_loglevel = loglevel_save;
  64        }
  65}
  66
  67/*
  68 * Check which address space is addressed by the access
  69 * register in S390_lowcore.exc_access_id.
  70 * Returns 1 for user space and 0 for kernel space.
  71 */
  72static int __check_access_register(struct pt_regs *regs, int error_code)
  73{
  74        int areg = S390_lowcore.exc_access_id;
  75
  76        if (areg == 0)
  77                /* Access via access register 0 -> kernel address */
  78                return 0;
  79        if (regs && areg < NUM_ACRS && regs->acrs[areg] <= 1)
  80                /*
  81                 * access register contains 0 -> kernel address,
  82                 * access register contains 1 -> user space address
  83                 */
  84                return regs->acrs[areg];
  85
  86        /* Something unhealthy was done with the access registers... */
  87        die("page fault via unknown access register", regs, error_code);
  88        do_exit(SIGKILL);
  89        return 0;
  90}
  91
  92/*
  93 * Check which address space the address belongs to.
  94 * Returns 1 for user space and 0 for kernel space.
  95 */
  96static inline int check_user_space(struct pt_regs *regs, int error_code)
  97{
  98        /*
  99         * The lowest two bits of S390_lowcore.trans_exc_code indicate
 100         * which paging table was used:
 101         *   0: Primary Segment Table Descriptor
 102         *   1: STD determined via access register
 103         *   2: Secondary Segment Table Descriptor
 104         *   3: Home Segment Table Descriptor
 105         */
 106        int descriptor = S390_lowcore.trans_exc_code & 3;
 107        if (descriptor == 1)
 108                return __check_access_register(regs, error_code);
 109        return descriptor >> 1;
 110}
 111
 112/*
 113 * Send SIGSEGV to task.  This is an external routine
 114 * to keep the stack usage of do_page_fault small.
 115 */
 116static void force_sigsegv(struct pt_regs *regs, unsigned long error_code,
 117                          int si_code, unsigned long address)
 118{
 119        struct siginfo si;
 120
 121#if defined(CONFIG_SYSCTL) || defined(CONFIG_PROCESS_DEBUG)
 122#if defined(CONFIG_SYSCTL)
 123        if (sysctl_userprocess_debug)
 124#endif
 125        {
 126                printk("User process fault: interruption code 0x%lX\n",
 127                       error_code);
 128                printk("failing address: %lX\n", address);
 129                show_regs(regs);
 130        }
 131#endif
 132        si.si_signo = SIGSEGV;
 133        si.si_code = si_code;
 134        si.si_addr = (void *) address;
 135        force_sig_info(SIGSEGV, &si, current);
 136}
 137
 138/*
 139 * This routine handles page faults.  It determines the address,
 140 * and the problem, and then passes it off to one of the appropriate
 141 * routines.
 142 *
 143 * error_code:
 144 *   04       Protection           ->  Write-Protection  (suprression)
 145 *   10       Segment translation  ->  Not present       (nullification)
 146 *   11       Page translation     ->  Not present       (nullification)
 147 */
 148extern inline void do_exception(struct pt_regs *regs, unsigned long error_code)
 149{
 150        struct task_struct *tsk;
 151        struct mm_struct *mm;
 152        struct vm_area_struct * vma;
 153        unsigned long address;
 154        int user_address;
 155        unsigned long fixup;
 156        int si_code = SEGV_MAPERR;
 157
 158        tsk = current;
 159        mm = tsk->mm;
 160        
 161        /* 
 162         * Check for low-address protection.  This needs to be treated
 163         * as a special case because the translation exception code 
 164         * field is not guaranteed to contain valid data in this case.
 165         */
 166        if (error_code == 4 && !(S390_lowcore.trans_exc_code & 4)) {
 167
 168                /* Low-address protection hit in kernel mode means 
 169                   NULL pointer write access in kernel mode.  */
 170                if (!(regs->psw.mask & PSW_PROBLEM_STATE)) {
 171                        address = 0;
 172                        user_address = 0;
 173                        goto no_context;
 174                }
 175
 176                /* Low-address protection hit in user mode 'cannot happen'.  */
 177                die ("Low-address protection", regs, error_code);
 178                do_exit(SIGKILL);
 179        }
 180
 181        /* 
 182         * get the failing address 
 183         * more specific the segment and page table portion of 
 184         * the address 
 185         */
 186        address = S390_lowcore.trans_exc_code&0x7ffff000;
 187        user_address = check_user_space(regs, error_code);
 188
 189        /*
 190         * Verify that the fault happened in user space, that
 191         * we are not in an interrupt and that there is a 
 192         * user context.
 193         */
 194        if (user_address == 0 || in_interrupt() || !mm)
 195                goto no_context;
 196
 197        /*
 198         * When we get here, the fault happened in the current
 199         * task's user address space, so we can switch on the
 200         * interrupts again and then search the VMAs
 201         */
 202        __sti();
 203
 204        down_read(&mm->mmap_sem);
 205
 206        vma = find_vma(mm, address);
 207        if (!vma)
 208                goto bad_area;
 209        if (vma->vm_start <= address) 
 210                goto good_area;
 211        if (!(vma->vm_flags & VM_GROWSDOWN))
 212                goto bad_area;
 213        if (expand_stack(vma, address))
 214                goto bad_area;
 215/*
 216 * Ok, we have a good vm_area for this memory access, so
 217 * we can handle it..
 218 */
 219good_area:
 220        si_code = SEGV_ACCERR;
 221        if (error_code != 4) {
 222                /* page not present, check vm flags */
 223                if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
 224                        goto bad_area;
 225        } else {
 226                if (!(vma->vm_flags & VM_WRITE))
 227                        goto bad_area;
 228        }
 229
 230survive:
 231        /*
 232         * If for any reason at all we couldn't handle the fault,
 233         * make sure we exit gracefully rather than endlessly redo
 234         * the fault.
 235         */
 236        switch (handle_mm_fault(mm, vma, address, error_code == 4)) {
 237        case 1:
 238                tsk->min_flt++;
 239                break;
 240        case 2:
 241                tsk->maj_flt++;
 242                break;
 243        case 0:
 244                goto do_sigbus;
 245        default:
 246                goto out_of_memory;
 247        }
 248
 249        up_read(&mm->mmap_sem);
 250        return;
 251
 252/*
 253 * Something tried to access memory that isn't in our memory map..
 254 * Fix it, but check if it's kernel or user first..
 255 */
 256bad_area:
 257        up_read(&mm->mmap_sem);
 258
 259        /* User mode accesses just cause a SIGSEGV */
 260        if (regs->psw.mask & PSW_PROBLEM_STATE) {
 261                tsk->thread.prot_addr = address;
 262                tsk->thread.trap_no = error_code;
 263                force_sigsegv(regs, error_code, si_code, address);
 264                return;
 265        }
 266
 267no_context:
 268        /* Are we prepared to handle this kernel fault?  */
 269        if ((fixup = search_exception_table(regs->psw.addr)) != 0) {
 270                regs->psw.addr = fixup;
 271                return;
 272        }
 273
 274/*
 275 * Oops. The kernel tried to access some bad page. We'll have to
 276 * terminate things with extreme prejudice.
 277 */
 278        if (user_address == 0)
 279                printk(KERN_ALERT "Unable to handle kernel pointer dereference"
 280                       " at virtual kernel address %08lx\n", address);
 281        else
 282                printk(KERN_ALERT "Unable to handle kernel paging request"
 283                       " at virtual user address %08lx\n", address);
 284
 285        die("Oops", regs, error_code);
 286        do_exit(SIGKILL);
 287
 288
 289/*
 290 * We ran out of memory, or some other thing happened to us that made
 291 * us unable to handle the page fault gracefully.
 292*/
 293out_of_memory:
 294        if (tsk->pid == 1) {
 295                yield();
 296                goto survive;
 297        }
 298        up_read(&mm->mmap_sem);
 299        printk("VM: killing process %s\n", tsk->comm);
 300        if (regs->psw.mask & PSW_PROBLEM_STATE)
 301                do_exit(SIGKILL);
 302        goto no_context;
 303
 304do_sigbus:
 305        up_read(&mm->mmap_sem);
 306
 307        /*
 308         * Send a sigbus, regardless of whether we were in kernel
 309         * or user mode.
 310         */
 311        tsk->thread.prot_addr = address;
 312        tsk->thread.trap_no = error_code;
 313        force_sig(SIGBUS, tsk);
 314
 315        /* Kernel mode? Handle exceptions or die */
 316        if (!(regs->psw.mask & PSW_PROBLEM_STATE))
 317                goto no_context;
 318}
 319
 320void do_protection_exception(struct pt_regs *regs, unsigned long error_code)
 321{
 322        regs->psw.addr -= (error_code >> 16);
 323        do_exception(regs, 4);
 324}
 325
 326void do_segment_exception(struct pt_regs *regs, unsigned long error_code)
 327{
 328        do_exception(regs, 0x10);
 329}
 330
 331void do_page_exception(struct pt_regs *regs, unsigned long error_code)
 332{
 333        do_exception(regs, 0x11);
 334}
 335
 336typedef struct _pseudo_wait_t {
 337       struct _pseudo_wait_t *next;
 338       wait_queue_head_t queue;
 339       unsigned long address;
 340       int resolved;
 341} pseudo_wait_t;
 342
 343static pseudo_wait_t *pseudo_lock_queue = NULL;
 344static spinlock_t pseudo_wait_spinlock; /* spinlock to protect lock queue */
 345
 346/*
 347 * This routine handles 'pagex' pseudo page faults.
 348 */
 349asmlinkage void
 350do_pseudo_page_fault(struct pt_regs *regs, unsigned long error_code)
 351{
 352        pseudo_wait_t wait_struct;
 353        pseudo_wait_t *ptr, *last, *next;
 354        unsigned long address;
 355
 356        /*
 357         * get the failing address
 358         * more specific the segment and page table portion of
 359         * the address
 360         */
 361        address = S390_lowcore.trans_exc_code & 0xfffff000;
 362
 363        if (address & 0x80000000) {
 364                /* high bit set -> a page has been swapped in by VM */
 365                address &= 0x7fffffff;
 366                spin_lock(&pseudo_wait_spinlock);
 367                last = NULL;
 368                ptr = pseudo_lock_queue;
 369                while (ptr != NULL) {
 370                        next = ptr->next;
 371                        if (address == ptr->address) {
 372                                 /*
 373                                 * This is one of the processes waiting
 374                                 * for the page. Unchain from the queue.
 375                                 * There can be more than one process
 376                                 * waiting for the same page. VM presents
 377                                 * an initial and a completion interrupt for
 378                                 * every process that tries to access a 
 379                                 * page swapped out by VM. 
 380                                 */
 381                                if (last == NULL)
 382                                        pseudo_lock_queue = next;
 383                                else
 384                                        last->next = next;
 385                                /* now wake up the process */
 386                                ptr->resolved = 1;
 387                                wake_up(&ptr->queue);
 388                        } else
 389                                last = ptr;
 390                        ptr = next;
 391                }
 392                spin_unlock(&pseudo_wait_spinlock);
 393        } else {
 394                /* Pseudo page faults in kernel mode is a bad idea */
 395                if (!(regs->psw.mask & PSW_PROBLEM_STATE)) {
 396                        /*
 397                         * VM presents pseudo page faults if the interrupted
 398                         * state was not disabled for interrupts. So we can
 399                         * get pseudo page fault interrupts while running
 400                         * in kernel mode. We simply access the page here
 401                         * while we are running disabled. VM will then swap
 402                         * in the page synchronously.
 403                         */
 404                         if (check_user_space(regs, error_code) == 0)
 405                                 /* dereference a virtual kernel address */
 406                                 __asm__ __volatile__ (
 407                                         "  ic 0,0(%0)"
 408                                         : : "a" (address) : "0");
 409                         else
 410                                 /* dereference a virtual user address */
 411                                 __asm__ __volatile__ (
 412                                         "  la   2,0(%0)\n"
 413                                         "  sacf 512\n"
 414                                         "  ic   2,0(2)\n"
 415                                         "0:sacf 0\n"
 416                                         ".section __ex_table,\"a\"\n"
 417                                         "  .align 4\n"
 418                                         "  .long  0b,0b\n"
 419                                         ".previous"
 420                                         : : "a" (address) : "2" );
 421
 422                        return;
 423                }
 424                /* initialize and add element to pseudo_lock_queue */
 425                init_waitqueue_head (&wait_struct.queue);
 426                wait_struct.address = address;
 427                wait_struct.resolved = 0;
 428                spin_lock(&pseudo_wait_spinlock);
 429                wait_struct.next = pseudo_lock_queue;
 430                pseudo_lock_queue = &wait_struct;
 431                spin_unlock(&pseudo_wait_spinlock);
 432                /* go to sleep */
 433                wait_event(wait_struct.queue, wait_struct.resolved);
 434        }
 435}
 436
 437#ifdef CONFIG_PFAULT 
 438/*
 439 * 'pfault' pseudo page faults routines.
 440 */
 441static int pfault_disable = 0;
 442
 443static int __init nopfault(char *str)
 444{
 445        pfault_disable = 1;
 446        return 1;
 447}
 448
 449__setup("nopfault", nopfault);
 450
 451typedef struct {
 452        __u16 refdiagc;
 453        __u16 reffcode;
 454        __u16 refdwlen;
 455        __u16 refversn;
 456        __u64 refgaddr;
 457        __u64 refselmk;
 458        __u64 refcmpmk;
 459        __u64 reserved;
 460} __attribute__ ((packed)) pfault_refbk_t;
 461
 462int pfault_init(void)
 463{
 464        pfault_refbk_t refbk =
 465        { 0x258, 0, 5, 2, __LC_KERNEL_STACK, 1ULL << 48, 1ULL << 48, 0ULL };
 466        int rc;
 467
 468        if (pfault_disable)
 469                return -1;
 470        __asm__ __volatile__(
 471                "    diag  %1,%0,0x258\n"
 472                "0:  j     2f\n"
 473                "1:  la    %0,8\n"
 474                "2:\n"
 475                ".section __ex_table,\"a\"\n"
 476                "   .align 4\n"
 477                "   .long  0b,1b\n"
 478                ".previous"
 479                : "=d" (rc) : "a" (&refbk) : "cc" );
 480        __ctl_set_bit(0, 9);
 481        return rc;
 482}
 483
 484void pfault_fini(void)
 485{
 486        pfault_refbk_t refbk =
 487        { 0x258, 1, 5, 2, 0ULL, 0ULL, 0ULL, 0ULL };
 488
 489        if (pfault_disable)
 490                return;
 491        __ctl_clear_bit(0,9);
 492        __asm__ __volatile__(
 493                "    diag  %0,0,0x258\n"
 494                "0:\n"
 495                ".section __ex_table,\"a\"\n"
 496                "   .align 4\n"
 497                "   .long  0b,0b\n"
 498                ".previous"
 499                : : "a" (&refbk) : "cc" );
 500}
 501
 502asmlinkage void
 503pfault_interrupt(struct pt_regs *regs, __u16 error_code)
 504{
 505        struct task_struct *tsk;
 506        wait_queue_head_t queue;
 507        wait_queue_head_t *qp;
 508        __u16 subcode;
 509
 510        /*
 511         * Get the external interruption subcode & pfault
 512         * initial/completion signal bit. VM stores this 
 513         * in the 'cpu address' field associated with the
 514         * external interrupt. 
 515         */
 516        subcode = S390_lowcore.cpu_addr;
 517        if ((subcode & 0xff00) != 0x0200)
 518                return;
 519
 520        /*
 521         * Get the token (= address of kernel stack of affected task).
 522         */
 523        tsk = (struct task_struct *)
 524                (*((unsigned long *) __LC_PFAULT_INTPARM) - THREAD_SIZE);
 525        
 526        /*
 527         * We got all needed information from the lowcore and can
 528         * now safely switch on interrupts.
 529         */
 530        if (regs->psw.mask & PSW_PROBLEM_STATE)
 531                __sti();
 532
 533        if (subcode & 0x0080) {
 534                /* signal bit is set -> a page has been swapped in by VM */
 535                qp = (wait_queue_head_t *)
 536                        xchg(&tsk->thread.pfault_wait, -1);
 537                if (qp != NULL) {
 538                        /* Initial interrupt was faster than the completion
 539                         * interrupt. pfault_wait is valid. Set pfault_wait
 540                         * back to zero and wake up the process. This can
 541                         * safely be done because the task is still sleeping
 542                         * and can't procude new pfaults. */
 543                        tsk->thread.pfault_wait = 0ULL;
 544                        wake_up(qp);
 545                }
 546        } else {
 547                /* signal bit not set -> a real page is missing. */
 548                init_waitqueue_head (&queue);
 549                qp = (wait_queue_head_t *)
 550                        xchg(&tsk->thread.pfault_wait, (addr_t) &queue);
 551                if (qp != NULL) {
 552                        /* Completion interrupt was faster than the initial
 553                         * interrupt (swapped in a -1 for pfault_wait). Set
 554                         * pfault_wait back to zero and exit. This can be
 555                         * done safely because tsk is running in kernel 
 556                         * mode and can't produce new pfaults. */
 557                        tsk->thread.pfault_wait = 0ULL;
 558                }
 559
 560                /* go to sleep */
 561                wait_event(queue, tsk->thread.pfault_wait == 0ULL);
 562        }
 563}
 564#endif
 565
 566
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.