linux-bk/arch/um/kernel/tt/process_kern.c
<<
>>
Prefs
   1/* 
   2 * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
   3 * Licensed under the GPL
   4 */
   5
   6#include "linux/sched.h"
   7#include "linux/signal.h"
   8#include "linux/kernel.h"
   9#include "linux/interrupt.h"
  10#include "linux/ptrace.h"
  11#include "asm/system.h"
  12#include "asm/pgalloc.h"
  13#include "asm/ptrace.h"
  14#include "asm/tlbflush.h"
  15#include "irq_user.h"
  16#include "signal_user.h"
  17#include "kern_util.h"
  18#include "user_util.h"
  19#include "os.h"
  20#include "kern.h"
  21#include "sigcontext.h"
  22#include "time_user.h"
  23#include "mem_user.h"
  24#include "tlb.h"
  25#include "mode.h"
  26#include "init.h"
  27#include "tt.h"
  28
  29void *switch_to_tt(void *prev, void *next, void *last)
  30{
  31        struct task_struct *from, *to, *prev_sched;
  32        unsigned long flags;
  33        int err, vtalrm, alrm, prof, cpu;
  34        char c;
  35        /* jailing and SMP are incompatible, so this doesn't need to be 
  36         * made per-cpu 
  37         */
  38        static int reading;
  39
  40        from = prev;
  41        to = next;
  42
  43        to->thread.prev_sched = from;
  44
  45        cpu = from->thread_info->cpu;
  46        if(cpu == 0)
  47                forward_interrupts(to->thread.mode.tt.extern_pid);
  48#ifdef CONFIG_SMP
  49        forward_ipi(cpu_data[cpu].ipi_pipe[0], to->thread.mode.tt.extern_pid);
  50#endif
  51        local_irq_save(flags);
  52
  53        vtalrm = change_sig(SIGVTALRM, 0);
  54        alrm = change_sig(SIGALRM, 0);
  55        prof = change_sig(SIGPROF, 0);
  56
  57        forward_pending_sigio(to->thread.mode.tt.extern_pid);
  58
  59        c = 0;
  60        set_current(to);
  61
  62        reading = 0;
  63        err = os_write_file(to->thread.mode.tt.switch_pipe[1], &c, sizeof(c));
  64        if(err != sizeof(c))
  65                panic("write of switch_pipe failed, err = %d", -err);
  66
  67        reading = 1;
  68        if((from->exit_state == EXIT_ZOMBIE) ||
  69           (from->exit_state == EXIT_DEAD))
  70                os_kill_process(os_getpid(), 0);
  71
  72        err = os_read_file(from->thread.mode.tt.switch_pipe[0], &c, sizeof(c));
  73        if(err != sizeof(c))
  74                panic("read of switch_pipe failed, errno = %d", -err);
  75
  76        /* If the process that we have just scheduled away from has exited,
  77         * then it needs to be killed here.  The reason is that, even though
  78         * it will kill itself when it next runs, that may be too late.  Its
  79         * stack will be freed, possibly before then, and if that happens,
  80         * we have a use-after-free situation.  So, it gets killed here
  81         * in case it has not already killed itself.
  82         */
  83        prev_sched = current->thread.prev_sched;
  84        if((prev_sched->exit_state == EXIT_ZOMBIE) ||
  85           (prev_sched->exit_state == EXIT_DEAD))
  86                os_kill_process(prev_sched->thread.mode.tt.extern_pid, 1);
  87
  88        /* This works around a nasty race with 'jail'.  If we are switching
  89         * between two threads of a threaded app and the incoming process 
  90         * runs before the outgoing process reaches the read, and it makes
  91         * it all the way out to userspace, then it will have write-protected 
  92         * the outgoing process stack.  Then, when the outgoing process 
  93         * returns from the write, it will segfault because it can no longer
  94         * write its own stack.  So, in order to avoid that, the incoming 
  95         * thread sits in a loop yielding until 'reading' is set.  This 
  96         * isn't entirely safe, since there may be a reschedule from a timer
  97         * happening between setting 'reading' and sleeping in read.  But,
  98         * it should get a whole quantum in which to reach the read and sleep,
  99         * which should be enough.
 100         */
 101
 102        if(jail){
 103                while(!reading) sched_yield();
 104        }
 105
 106        change_sig(SIGVTALRM, vtalrm);
 107        change_sig(SIGALRM, alrm);
 108        change_sig(SIGPROF, prof);
 109
 110        arch_switch();
 111
 112        flush_tlb_all();
 113        local_irq_restore(flags);
 114
 115        return(current->thread.prev_sched);
 116}
 117
 118void release_thread_tt(struct task_struct *task)
 119{
 120        int pid = task->thread.mode.tt.extern_pid;
 121
 122        if(os_getpid() != pid)
 123                os_kill_process(pid, 0);
 124}
 125
 126void exit_thread_tt(void)
 127{
 128        os_close_file(current->thread.mode.tt.switch_pipe[0]);
 129        os_close_file(current->thread.mode.tt.switch_pipe[1]);
 130}
 131
 132void suspend_new_thread(int fd)
 133{
 134        int err;
 135        char c;
 136
 137        os_stop_process(os_getpid());
 138        err = os_read_file(fd, &c, sizeof(c));
 139        if(err != sizeof(c))
 140                panic("read failed in suspend_new_thread, err = %d", -err);
 141}
 142
 143void schedule_tail(task_t *prev);
 144
 145static void new_thread_handler(int sig)
 146{
 147        unsigned long disable;
 148        int (*fn)(void *);
 149        void *arg;
 150
 151        fn = current->thread.request.u.thread.proc;
 152        arg = current->thread.request.u.thread.arg;
 153
 154        UPT_SC(&current->thread.regs.regs) = (void *) (&sig + 1);
 155        disable = (1 << (SIGVTALRM - 1)) | (1 << (SIGALRM - 1)) |
 156                (1 << (SIGIO - 1)) | (1 << (SIGPROF - 1));
 157        SC_SIGMASK(UPT_SC(&current->thread.regs.regs)) &= ~disable;
 158
 159        suspend_new_thread(current->thread.mode.tt.switch_pipe[0]);
 160
 161        force_flush_all();
 162        if(current->thread.prev_sched != NULL)
 163                schedule_tail(current->thread.prev_sched);
 164        current->thread.prev_sched = NULL;
 165
 166        init_new_thread_signals(1);
 167        enable_timer();
 168        free_page(current->thread.temp_stack);
 169        set_cmdline("(kernel thread)");
 170
 171        change_sig(SIGUSR1, 1);
 172        change_sig(SIGVTALRM, 1);
 173        change_sig(SIGPROF, 1);
 174        local_irq_enable();
 175        if(!run_kernel_thread(fn, arg, &current->thread.exec_buf))
 176                do_exit(0);
 177
 178        /* XXX No set_user_mode here because a newly execed process will
 179         * immediately segfault on its non-existent IP, coming straight back
 180         * to the signal handler, which will call set_user_mode on its way
 181         * out.  This should probably change since it's confusing.
 182         */
 183}
 184
 185static int new_thread_proc(void *stack)
 186{
 187        /* local_irq_disable is needed to block out signals until this thread is
 188         * properly scheduled.  Otherwise, the tracing thread will get mighty
 189         * upset about any signals that arrive before that.
 190         * This has the complication that it sets the saved signal mask in
 191         * the sigcontext to block signals.  This gets restored when this
 192         * thread (or a descendant, since they get a copy of this sigcontext)
 193         * returns to userspace.
 194         * So, this is compensated for elsewhere.
 195         * XXX There is still a small window until local_irq_disable() actually
 196         * finishes where signals are possible - shouldn't be a problem in
 197         * practice since SIGIO hasn't been forwarded here yet, and the
 198         * local_irq_disable should finish before a SIGVTALRM has time to be
 199         * delivered.
 200         */
 201
 202        local_irq_disable();
 203        init_new_thread_stack(stack, new_thread_handler);
 204        os_usr1_process(os_getpid());
 205        change_sig(SIGUSR1, 1);
 206        return(0);
 207}
 208
 209/* Signal masking - signals are blocked at the start of fork_tramp.  They
 210 * are re-enabled when finish_fork_handler is entered by fork_tramp hitting
 211 * itself with a SIGUSR1.  set_user_mode has to be run with SIGUSR1 off,
 212 * so it is blocked before it's called.  They are re-enabled on sigreturn
 213 * despite the fact that they were blocked when the SIGUSR1 was issued because
 214 * copy_thread copies the parent's sigcontext, including the signal mask
 215 * onto the signal frame.
 216 */
 217
 218void finish_fork_handler(int sig)
 219{
 220        UPT_SC(&current->thread.regs.regs) = (void *) (&sig + 1);
 221        suspend_new_thread(current->thread.mode.tt.switch_pipe[0]);
 222
 223        force_flush_all();
 224        if(current->thread.prev_sched != NULL)
 225                schedule_tail(current->thread.prev_sched);
 226        current->thread.prev_sched = NULL;
 227
 228        enable_timer();
 229        change_sig(SIGVTALRM, 1);
 230        local_irq_enable();
 231        if(current->mm != current->parent->mm)
 232                protect_memory(uml_reserved, high_physmem - uml_reserved, 1, 
 233                               1, 0, 1);
 234        task_protections((unsigned long) current_thread);
 235
 236        free_page(current->thread.temp_stack);
 237        local_irq_disable();
 238        change_sig(SIGUSR1, 0);
 239        set_user_mode(current);
 240}
 241
 242int fork_tramp(void *stack)
 243{
 244        local_irq_disable();
 245        arch_init_thread();
 246        init_new_thread_stack(stack, finish_fork_handler);
 247
 248        os_usr1_process(os_getpid());
 249        change_sig(SIGUSR1, 1);
 250        return(0);
 251}
 252
 253int copy_thread_tt(int nr, unsigned long clone_flags, unsigned long sp,
 254                   unsigned long stack_top, struct task_struct * p, 
 255                   struct pt_regs *regs)
 256{
 257        int (*tramp)(void *);
 258        int new_pid, err;
 259        unsigned long stack;
 260        
 261        if(current->thread.forking)
 262                tramp = fork_tramp;
 263        else {
 264                tramp = new_thread_proc;
 265                p->thread.request.u.thread = current->thread.request.u.thread;
 266        }
 267
 268        err = os_pipe(p->thread.mode.tt.switch_pipe, 1, 1);
 269        if(err < 0){
 270                printk("copy_thread : pipe failed, err = %d\n", -err);
 271                return(err);
 272        }
 273
 274        stack = alloc_stack(0, 0);
 275        if(stack == 0){
 276                printk(KERN_ERR "copy_thread : failed to allocate "
 277                       "temporary stack\n");
 278                return(-ENOMEM);
 279        }
 280
 281        clone_flags &= CLONE_VM;
 282        p->thread.temp_stack = stack;
 283        new_pid = start_fork_tramp(p->thread_info, stack, clone_flags, tramp);
 284        if(new_pid < 0){
 285                printk(KERN_ERR "copy_thread : clone failed - errno = %d\n", 
 286                       -new_pid);
 287                return(new_pid);
 288        }
 289
 290        if(current->thread.forking){
 291                sc_to_sc(UPT_SC(&p->thread.regs.regs), 
 292                         UPT_SC(&current->thread.regs.regs));
 293                SC_SET_SYSCALL_RETURN(UPT_SC(&p->thread.regs.regs), 0);
 294                if(sp != 0) SC_SP(UPT_SC(&p->thread.regs.regs)) = sp;
 295        }
 296        p->thread.mode.tt.extern_pid = new_pid;
 297
 298        current->thread.request.op = OP_FORK;
 299        current->thread.request.u.fork.pid = new_pid;
 300        os_usr1_process(os_getpid());
 301
 302        /* Enable the signal and then disable it to ensure that it is handled
 303         * here, and nowhere else.
 304         */
 305        change_sig(SIGUSR1, 1);
 306
 307        change_sig(SIGUSR1, 0);
 308        err = 0;
 309        return(err);
 310}
 311
 312void reboot_tt(void)
 313{
 314        current->thread.request.op = OP_REBOOT;
 315        os_usr1_process(os_getpid());
 316        change_sig(SIGUSR1, 1);
 317}
 318
 319void halt_tt(void)
 320{
 321        current->thread.request.op = OP_HALT;
 322        os_usr1_process(os_getpid());
 323        change_sig(SIGUSR1, 1);
 324}
 325
 326void kill_off_processes_tt(void)
 327{
 328        struct task_struct *p;
 329        int me;
 330
 331        me = os_getpid();
 332        for_each_process(p){
 333                if(p->thread.mode.tt.extern_pid != me) 
 334                        os_kill_process(p->thread.mode.tt.extern_pid, 0);
 335        }
 336        if(init_task.thread.mode.tt.extern_pid != me) 
 337                os_kill_process(init_task.thread.mode.tt.extern_pid, 0);
 338}
 339
 340void initial_thread_cb_tt(void (*proc)(void *), void *arg)
 341{
 342        if(os_getpid() == tracing_pid){
 343                (*proc)(arg);
 344        }
 345        else {
 346                current->thread.request.op = OP_CB;
 347                current->thread.request.u.cb.proc = proc;
 348                current->thread.request.u.cb.arg = arg;
 349                os_usr1_process(os_getpid());
 350                change_sig(SIGUSR1, 1);
 351
 352                change_sig(SIGUSR1, 0);
 353        }
 354}
 355
 356int do_proc_op(void *t, int proc_id)
 357{
 358        struct task_struct *task;
 359        struct thread_struct *thread;
 360        int op, pid;
 361
 362        task = t;
 363        thread = &task->thread;
 364        op = thread->request.op;
 365        switch(op){
 366        case OP_NONE:
 367        case OP_TRACE_ON:
 368                break;
 369        case OP_EXEC:
 370                pid = thread->request.u.exec.pid;
 371                do_exec(thread->mode.tt.extern_pid, pid);
 372                thread->mode.tt.extern_pid = pid;
 373                cpu_tasks[task->thread_info->cpu].pid = pid;
 374                break;
 375        case OP_FORK:
 376                attach_process(thread->request.u.fork.pid);
 377                break;
 378        case OP_CB:
 379                (*thread->request.u.cb.proc)(thread->request.u.cb.arg);
 380                break;
 381        case OP_REBOOT:
 382        case OP_HALT:
 383                break;
 384        default:
 385                tracer_panic("Bad op in do_proc_op");
 386                break;
 387        }
 388        thread->request.op = OP_NONE;
 389        return(op);
 390}
 391
 392void init_idle_tt(void)
 393{
 394        default_idle();
 395}
 396
 397/* Changed by jail_setup, which is a setup */
 398int jail = 0;
 399
 400int __init jail_setup(char *line, int *add)
 401{
 402        int ok = 1;
 403
 404        if(jail) return(0);
 405#ifdef CONFIG_SMP
 406        printf("'jail' may not used used in a kernel with CONFIG_SMP "
 407               "enabled\n");
 408        ok = 0;
 409#endif
 410#ifdef CONFIG_HOSTFS
 411        printf("'jail' may not used used in a kernel with CONFIG_HOSTFS "
 412               "enabled\n");
 413        ok = 0;
 414#endif
 415#ifdef CONFIG_MODULES
 416        printf("'jail' may not used used in a kernel with CONFIG_MODULES "
 417               "enabled\n");
 418        ok = 0;
 419#endif  
 420        if(!ok) exit(1);
 421
 422        /* CAP_SYS_RAWIO controls the ability to open /dev/mem and /dev/kmem.
 423         * Removing it from the bounding set eliminates the ability of anything
 424         * to acquire it, and thus read or write kernel memory.
 425         */
 426        cap_lower(cap_bset, CAP_SYS_RAWIO);
 427        jail = 1;
 428        return(0);
 429}
 430
 431__uml_setup("jail", jail_setup,
 432"jail\n"
 433"    Enables the protection of kernel memory from processes.\n\n"
 434);
 435
 436static void mprotect_kernel_mem(int w)
 437{
 438        unsigned long start, end;
 439        int pages;
 440
 441        if(!jail || (current == &init_task)) return;
 442
 443        pages = (1 << CONFIG_KERNEL_STACK_ORDER);
 444
 445        start = (unsigned long) current_thread + PAGE_SIZE;
 446        end = (unsigned long) current_thread + PAGE_SIZE * pages;
 447        protect_memory(uml_reserved, start - uml_reserved, 1, w, 1, 1);
 448        protect_memory(end, high_physmem - end, 1, w, 1, 1);
 449
 450        start = (unsigned long) UML_ROUND_DOWN(&_stext);
 451        end = (unsigned long) UML_ROUND_UP(&_etext);
 452        protect_memory(start, end - start, 1, w, 1, 1);
 453
 454        start = (unsigned long) UML_ROUND_DOWN(&_unprotected_end);
 455        end = (unsigned long) UML_ROUND_UP(&_edata);
 456        protect_memory(start, end - start, 1, w, 1, 1);
 457
 458        start = (unsigned long) UML_ROUND_DOWN(&__bss_start);
 459        end = (unsigned long) UML_ROUND_UP(brk_start);
 460        protect_memory(start, end - start, 1, w, 1, 1);
 461
 462        mprotect_kernel_vm(w);
 463}
 464
 465void unprotect_kernel_mem(void)
 466{
 467        mprotect_kernel_mem(1);
 468}
 469
 470void protect_kernel_mem(void)
 471{
 472        mprotect_kernel_mem(0);
 473}
 474
 475extern void start_kernel(void);
 476
 477static int start_kernel_proc(void *unused)
 478{
 479        int pid;
 480
 481        block_signals();
 482        pid = os_getpid();
 483
 484        cpu_tasks[0].pid = pid;
 485        cpu_tasks[0].task = current;
 486#ifdef CONFIG_SMP
 487        cpu_online_map = cpumask_of_cpu(0);
 488#endif
 489        if(debug) os_stop_process(pid);
 490        start_kernel();
 491        return(0);
 492}
 493
 494void set_tracing(void *task, int tracing)
 495{
 496        ((struct task_struct *) task)->thread.mode.tt.tracing = tracing;
 497}
 498
 499int is_tracing(void *t)
 500{
 501        return (((struct task_struct *) t)->thread.mode.tt.tracing);
 502}
 503
 504int set_user_mode(void *t)
 505{
 506        struct task_struct *task;
 507
 508        task = t ? t : current;
 509        if(task->thread.mode.tt.tracing) 
 510                return(1);
 511        task->thread.request.op = OP_TRACE_ON;
 512        os_usr1_process(os_getpid());
 513        return(0);
 514}
 515
 516void set_init_pid(int pid)
 517{
 518        int err;
 519
 520        init_task.thread.mode.tt.extern_pid = pid;
 521        err = os_pipe(init_task.thread.mode.tt.switch_pipe, 1, 1);
 522        if(err)
 523                panic("Can't create switch pipe for init_task, errno = %d",
 524                      -err);
 525}
 526
 527int start_uml_tt(void)
 528{
 529        void *sp;
 530        int pages;
 531
 532        pages = (1 << CONFIG_KERNEL_STACK_ORDER);
 533        sp = (void *) ((unsigned long) init_task.thread_info) +
 534                pages * PAGE_SIZE - sizeof(unsigned long);
 535        return(tracer(start_kernel_proc, sp));
 536}
 537
 538int external_pid_tt(struct task_struct *task)
 539{
 540        return(task->thread.mode.tt.extern_pid);
 541}
 542
 543int thread_pid_tt(struct task_struct *task)
 544{
 545        return(task->thread.mode.tt.extern_pid);
 546}
 547
 548int is_valid_pid(int pid)
 549{
 550        struct task_struct *task;
 551
 552        read_lock(&tasklist_lock);
 553        for_each_process(task){
 554                if(task->thread.mode.tt.extern_pid == pid){
 555                        read_unlock(&tasklist_lock);
 556                        return(1);
 557                }
 558        }
 559        read_unlock(&tasklist_lock);
 560        return(0);
 561}
 562
 563/*
 564 * Overrides for Emacs so that we follow Linus's tabbing style.
 565 * Emacs will notice this stuff at the end of the file and automatically
 566 * adjust the settings for this buffer only.  This must remain at the end
 567 * of the file.
 568 * ---------------------------------------------------------------------------
 569 * Local variables:
 570 * c-file-style: "linux"
 571 * End:
 572 */
 573
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.