linux/kernel/context_tracking.c
<<
>>
Prefs
   1/*
   2 * Context tracking: Probe on high level context boundaries such as kernel
   3 * and userspace. This includes syscalls and exceptions entry/exit.
   4 *
   5 * This is used by RCU to remove its dependency on the timer tick while a CPU
   6 * runs in userspace.
   7 *
   8 *  Started by Frederic Weisbecker:
   9 *
  10 * Copyright (C) 2012 Red Hat, Inc., Frederic Weisbecker <fweisbec@redhat.com>
  11 *
  12 * Many thanks to Gilad Ben-Yossef, Paul McKenney, Ingo Molnar, Andrew Morton,
  13 * Steven Rostedt, Peter Zijlstra for suggestions and improvements.
  14 *
  15 */
  16
  17#include <linux/context_tracking.h>
  18#include <linux/rcupdate.h>
  19#include <linux/sched.h>
  20#include <linux/hardirq.h>
  21#include <linux/export.h>
  22
  23DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
  24#ifdef CONFIG_CONTEXT_TRACKING_FORCE
  25        .active = true,
  26#endif
  27};
  28
  29/**
  30 * user_enter - Inform the context tracking that the CPU is going to
  31 *              enter userspace mode.
  32 *
  33 * This function must be called right before we switch from the kernel
  34 * to userspace, when it's guaranteed the remaining kernel instructions
  35 * to execute won't use any RCU read side critical section because this
  36 * function sets RCU in extended quiescent state.
  37 */
  38void user_enter(void)
  39{
  40        unsigned long flags;
  41
  42        /*
  43         * Some contexts may involve an exception occuring in an irq,
  44         * leading to that nesting:
  45         * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
  46         * This would mess up the dyntick_nesting count though. And rcu_irq_*()
  47         * helpers are enough to protect RCU uses inside the exception. So
  48         * just return immediately if we detect we are in an IRQ.
  49         */
  50        if (in_interrupt())
  51                return;
  52
  53        /* Kernel threads aren't supposed to go to userspace */
  54        WARN_ON_ONCE(!current->mm);
  55
  56        local_irq_save(flags);
  57        if (__this_cpu_read(context_tracking.active) &&
  58            __this_cpu_read(context_tracking.state) != IN_USER) {
  59                /*
  60                 * At this stage, only low level arch entry code remains and
  61                 * then we'll run in userspace. We can assume there won't be
  62                 * any RCU read-side critical section until the next call to
  63                 * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency
  64                 * on the tick.
  65                 */
  66                vtime_user_enter(current);
  67                rcu_user_enter();
  68                __this_cpu_write(context_tracking.state, IN_USER);
  69        }
  70        local_irq_restore(flags);
  71}
  72
  73#ifdef CONFIG_PREEMPT
  74/**
  75 * preempt_schedule_context - preempt_schedule called by tracing
  76 *
  77 * The tracing infrastructure uses preempt_enable_notrace to prevent
  78 * recursion and tracing preempt enabling caused by the tracing
  79 * infrastructure itself. But as tracing can happen in areas coming
  80 * from userspace or just about to enter userspace, a preempt enable
  81 * can occur before user_exit() is called. This will cause the scheduler
  82 * to be called when the system is still in usermode.
  83 *
  84 * To prevent this, the preempt_enable_notrace will use this function
  85 * instead of preempt_schedule() to exit user context if needed before
  86 * calling the scheduler.
  87 */
  88void __sched notrace preempt_schedule_context(void)
  89{
  90        struct thread_info *ti = current_thread_info();
  91        enum ctx_state prev_ctx;
  92
  93        if (likely(ti->preempt_count || irqs_disabled()))
  94                return;
  95
  96        /*
  97         * Need to disable preemption in case user_exit() is traced
  98         * and the tracer calls preempt_enable_notrace() causing
  99         * an infinite recursion.
 100         */
 101        preempt_disable_notrace();
 102        prev_ctx = exception_enter();
 103        preempt_enable_no_resched_notrace();
 104
 105        preempt_schedule();
 106
 107        preempt_disable_notrace();
 108        exception_exit(prev_ctx);
 109        preempt_enable_notrace();
 110}
 111EXPORT_SYMBOL_GPL(preempt_schedule_context);
 112#endif /* CONFIG_PREEMPT */
 113
 114/**
 115 * user_exit - Inform the context tracking that the CPU is
 116 *             exiting userspace mode and entering the kernel.
 117 *
 118 * This function must be called after we entered the kernel from userspace
 119 * before any use of RCU read side critical section. This potentially include
 120 * any high level kernel code like syscalls, exceptions, signal handling, etc...
 121 *
 122 * This call supports re-entrancy. This way it can be called from any exception
 123 * handler without needing to know if we came from userspace or not.
 124 */
 125void user_exit(void)
 126{
 127        unsigned long flags;
 128
 129        if (in_interrupt())
 130                return;
 131
 132        local_irq_save(flags);
 133        if (__this_cpu_read(context_tracking.state) == IN_USER) {
 134                /*
 135                 * We are going to run code that may use RCU. Inform
 136                 * RCU core about that (ie: we may need the tick again).
 137                 */
 138                rcu_user_exit();
 139                vtime_user_exit(current);
 140                __this_cpu_write(context_tracking.state, IN_KERNEL);
 141        }
 142        local_irq_restore(flags);
 143}
 144
 145void guest_enter(void)
 146{
 147        if (vtime_accounting_enabled())
 148                vtime_guest_enter(current);
 149        else
 150                __guest_enter();
 151}
 152EXPORT_SYMBOL_GPL(guest_enter);
 153
 154void guest_exit(void)
 155{
 156        if (vtime_accounting_enabled())
 157                vtime_guest_exit(current);
 158        else
 159                __guest_exit();
 160}
 161EXPORT_SYMBOL_GPL(guest_exit);
 162
 163
 164/**
 165 * context_tracking_task_switch - context switch the syscall callbacks
 166 * @prev: the task that is being switched out
 167 * @next: the task that is being switched in
 168 *
 169 * The context tracking uses the syscall slow path to implement its user-kernel
 170 * boundaries probes on syscalls. This way it doesn't impact the syscall fast
 171 * path on CPUs that don't do context tracking.
 172 *
 173 * But we need to clear the flag on the previous task because it may later
 174 * migrate to some CPU that doesn't do the context tracking. As such the TIF
 175 * flag may not be desired there.
 176 */
 177void context_tracking_task_switch(struct task_struct *prev,
 178                             struct task_struct *next)
 179{
 180        if (__this_cpu_read(context_tracking.active)) {
 181                clear_tsk_thread_flag(prev, TIF_NOHZ);
 182                set_tsk_thread_flag(next, TIF_NOHZ);
 183        }
 184}
 185
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.