linux/kernel/trace/trace_sysprof.c
<<
>>
Prefs
   1/*
   2 * trace stack traces
   3 *
   4 * Copyright (C) 2004-2008, Soeren Sandmann
   5 * Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com>
   6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
   7 */
   8#include <linux/kallsyms.h>
   9#include <linux/debugfs.h>
  10#include <linux/hrtimer.h>
  11#include <linux/uaccess.h>
  12#include <linux/ftrace.h>
  13#include <linux/module.h>
  14#include <linux/irq.h>
  15#include <linux/fs.h>
  16
  17#include <asm/stacktrace.h>
  18
  19#include "trace.h"
  20
  21static struct trace_array       *sysprof_trace;
  22static int __read_mostly        tracer_enabled;
  23
  24/*
  25 * 1 msec sample interval by default:
  26 */
  27static unsigned long sample_period = 1000000;
  28static const unsigned int sample_max_depth = 512;
  29
  30static DEFINE_MUTEX(sample_timer_lock);
  31/*
  32 * Per CPU hrtimers that do the profiling:
  33 */
  34static DEFINE_PER_CPU(struct hrtimer, stack_trace_hrtimer);
  35
  36struct stack_frame {
  37        const void __user       *next_fp;
  38        unsigned long           return_address;
  39};
  40
  41static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
  42{
  43        int ret;
  44
  45        if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
  46                return 0;
  47
  48        ret = 1;
  49        pagefault_disable();
  50        if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
  51                ret = 0;
  52        pagefault_enable();
  53
  54        return ret;
  55}
  56
  57struct backtrace_info {
  58        struct trace_array_cpu  *data;
  59        struct trace_array      *tr;
  60        int                     pos;
  61};
  62
  63static void
  64backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
  65{
  66        /* Ignore warnings */
  67}
  68
  69static void backtrace_warning(void *data, char *msg)
  70{
  71        /* Ignore warnings */
  72}
  73
  74static int backtrace_stack(void *data, char *name)
  75{
  76        /* Don't bother with IRQ stacks for now */
  77        return -1;
  78}
  79
  80static void backtrace_address(void *data, unsigned long addr, int reliable)
  81{
  82        struct backtrace_info *info = data;
  83
  84        if (info->pos < sample_max_depth && reliable) {
  85                __trace_special(info->tr, info->data, 1, addr, 0);
  86
  87                info->pos++;
  88        }
  89}
  90
  91static const struct stacktrace_ops backtrace_ops = {
  92        .warning                = backtrace_warning,
  93        .warning_symbol         = backtrace_warning_symbol,
  94        .stack                  = backtrace_stack,
  95        .address                = backtrace_address,
  96        .walk_stack             = print_context_stack,
  97};
  98
  99static int
 100trace_kernel(struct pt_regs *regs, struct trace_array *tr,
 101             struct trace_array_cpu *data)
 102{
 103        struct backtrace_info info;
 104        unsigned long bp;
 105        char *stack;
 106
 107        info.tr = tr;
 108        info.data = data;
 109        info.pos = 1;
 110
 111        __trace_special(info.tr, info.data, 1, regs->ip, 0);
 112
 113        stack = ((char *)regs + sizeof(struct pt_regs));
 114#ifdef CONFIG_FRAME_POINTER
 115        bp = regs->bp;
 116#else
 117        bp = 0;
 118#endif
 119
 120        dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, &info);
 121
 122        return info.pos;
 123}
 124
 125static void timer_notify(struct pt_regs *regs, int cpu)
 126{
 127        struct trace_array_cpu *data;
 128        struct stack_frame frame;
 129        struct trace_array *tr;
 130        const void __user *fp;
 131        int is_user;
 132        int i;
 133
 134        if (!regs)
 135                return;
 136
 137        tr = sysprof_trace;
 138        data = tr->data[cpu];
 139        is_user = user_mode(regs);
 140
 141        if (!current || current->pid == 0)
 142                return;
 143
 144        if (is_user && current->state != TASK_RUNNING)
 145                return;
 146
 147        __trace_special(tr, data, 0, 0, current->pid);
 148
 149        if (!is_user)
 150                i = trace_kernel(regs, tr, data);
 151        else
 152                i = 0;
 153
 154        /*
 155         * Trace user stack if we are not a kernel thread
 156         */
 157        if (current->mm && i < sample_max_depth) {
 158                regs = (struct pt_regs *)current->thread.sp0 - 1;
 159
 160                fp = (void __user *)regs->bp;
 161
 162                __trace_special(tr, data, 2, regs->ip, 0);
 163
 164                while (i < sample_max_depth) {
 165                        frame.next_fp = NULL;
 166                        frame.return_address = 0;
 167                        if (!copy_stack_frame(fp, &frame))
 168                                break;
 169                        if ((unsigned long)fp < regs->sp)
 170                                break;
 171
 172                        __trace_special(tr, data, 2, frame.return_address,
 173                                        (unsigned long)fp);
 174                        fp = frame.next_fp;
 175
 176                        i++;
 177                }
 178
 179        }
 180
 181        /*
 182         * Special trace entry if we overflow the max depth:
 183         */
 184        if (i == sample_max_depth)
 185                __trace_special(tr, data, -1, -1, -1);
 186
 187        __trace_special(tr, data, 3, current->pid, i);
 188}
 189
 190static enum hrtimer_restart stack_trace_timer_fn(struct hrtimer *hrtimer)
 191{
 192        /* trace here */
 193        timer_notify(get_irq_regs(), smp_processor_id());
 194
 195        hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
 196
 197        return HRTIMER_RESTART;
 198}
 199
 200static void start_stack_timer(void *unused)
 201{
 202        struct hrtimer *hrtimer = &__get_cpu_var(stack_trace_hrtimer);
 203
 204        hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 205        hrtimer->function = stack_trace_timer_fn;
 206
 207        hrtimer_start(hrtimer, ns_to_ktime(sample_period),
 208                      HRTIMER_MODE_REL_PINNED);
 209}
 210
 211static void start_stack_timers(void)
 212{
 213        on_each_cpu(start_stack_timer, NULL, 1);
 214}
 215
 216static void stop_stack_timer(int cpu)
 217{
 218        struct hrtimer *hrtimer = &per_cpu(stack_trace_hrtimer, cpu);
 219
 220        hrtimer_cancel(hrtimer);
 221}
 222
 223static void stop_stack_timers(void)
 224{
 225        int cpu;
 226
 227        for_each_online_cpu(cpu)
 228                stop_stack_timer(cpu);
 229}
 230
 231static void stop_stack_trace(struct trace_array *tr)
 232{
 233        mutex_lock(&sample_timer_lock);
 234        stop_stack_timers();
 235        tracer_enabled = 0;
 236        mutex_unlock(&sample_timer_lock);
 237}
 238
 239static int stack_trace_init(struct trace_array *tr)
 240{
 241        sysprof_trace = tr;
 242
 243        tracing_start_cmdline_record();
 244
 245        mutex_lock(&sample_timer_lock);
 246        start_stack_timers();
 247        tracer_enabled = 1;
 248        mutex_unlock(&sample_timer_lock);
 249        return 0;
 250}
 251
 252static void stack_trace_reset(struct trace_array *tr)
 253{
 254        tracing_stop_cmdline_record();
 255        stop_stack_trace(tr);
 256}
 257
 258static struct tracer stack_trace __read_mostly =
 259{
 260        .name           = "sysprof",
 261        .init           = stack_trace_init,
 262        .reset          = stack_trace_reset,
 263#ifdef CONFIG_FTRACE_SELFTEST
 264        .selftest    = trace_selftest_startup_sysprof,
 265#endif
 266};
 267
 268__init static int init_stack_trace(void)
 269{
 270        return register_tracer(&stack_trace);
 271}
 272device_initcall(init_stack_trace);
 273
 274#define MAX_LONG_DIGITS 22
 275
 276static ssize_t
 277sysprof_sample_read(struct file *filp, char __user *ubuf,
 278                    size_t cnt, loff_t *ppos)
 279{
 280        char buf[MAX_LONG_DIGITS];
 281        int r;
 282
 283        r = sprintf(buf, "%ld\n", nsecs_to_usecs(sample_period));
 284
 285        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
 286}
 287
 288static ssize_t
 289sysprof_sample_write(struct file *filp, const char __user *ubuf,
 290                     size_t cnt, loff_t *ppos)
 291{
 292        char buf[MAX_LONG_DIGITS];
 293        unsigned long val;
 294
 295        if (cnt > MAX_LONG_DIGITS-1)
 296                cnt = MAX_LONG_DIGITS-1;
 297
 298        if (copy_from_user(&buf, ubuf, cnt))
 299                return -EFAULT;
 300
 301        buf[cnt] = 0;
 302
 303        val = simple_strtoul(buf, NULL, 10);
 304        /*
 305         * Enforce a minimum sample period of 100 usecs:
 306         */
 307        if (val < 100)
 308                val = 100;
 309
 310        mutex_lock(&sample_timer_lock);
 311        stop_stack_timers();
 312        sample_period = val * 1000;
 313        start_stack_timers();
 314        mutex_unlock(&sample_timer_lock);
 315
 316        return cnt;
 317}
 318
 319static const struct file_operations sysprof_sample_fops = {
 320        .read           = sysprof_sample_read,
 321        .write          = sysprof_sample_write,
 322};
 323
 324void init_tracer_sysprof_debugfs(struct dentry *d_tracer)
 325{
 326
 327        trace_create_file("sysprof_sample_period", 0644,
 328                        d_tracer, NULL, &sysprof_sample_fops);
 329}
 330
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.