linux/kernel/trace/trace.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * ring buffer based function tracer
   4 *
   5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
   6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
   7 *
   8 * Originally taken from the RT patch by:
   9 *    Arnaldo Carvalho de Melo <acme@redhat.com>
  10 *
  11 * Based on code from the latency_tracer, that is:
  12 *  Copyright (C) 2004-2006 Ingo Molnar
  13 *  Copyright (C) 2004 Nadia Yvette Chambers
  14 */
  15#include <linux/ring_buffer.h>
  16#include <generated/utsrelease.h>
  17#include <linux/stacktrace.h>
  18#include <linux/writeback.h>
  19#include <linux/kallsyms.h>
  20#include <linux/security.h>
  21#include <linux/seq_file.h>
  22#include <linux/notifier.h>
  23#include <linux/irqflags.h>
  24#include <linux/debugfs.h>
  25#include <linux/tracefs.h>
  26#include <linux/pagemap.h>
  27#include <linux/hardirq.h>
  28#include <linux/linkage.h>
  29#include <linux/uaccess.h>
  30#include <linux/vmalloc.h>
  31#include <linux/ftrace.h>
  32#include <linux/module.h>
  33#include <linux/percpu.h>
  34#include <linux/splice.h>
  35#include <linux/kdebug.h>
  36#include <linux/string.h>
  37#include <linux/mount.h>
  38#include <linux/rwsem.h>
  39#include <linux/slab.h>
  40#include <linux/ctype.h>
  41#include <linux/init.h>
  42#include <linux/panic_notifier.h>
  43#include <linux/poll.h>
  44#include <linux/nmi.h>
  45#include <linux/fs.h>
  46#include <linux/trace.h>
  47#include <linux/sched/clock.h>
  48#include <linux/sched/rt.h>
  49#include <linux/fsnotify.h>
  50#include <linux/irq_work.h>
  51#include <linux/workqueue.h>
  52
  53#include "trace.h"
  54#include "trace_output.h"
  55
  56/*
  57 * On boot up, the ring buffer is set to the minimum size, so that
  58 * we do not waste memory on systems that are not using tracing.
  59 */
  60bool ring_buffer_expanded;
  61
  62/*
  63 * We need to change this state when a selftest is running.
  64 * A selftest will lurk into the ring-buffer to count the
  65 * entries inserted during the selftest although some concurrent
  66 * insertions into the ring-buffer such as trace_printk could occurred
  67 * at the same time, giving false positive or negative results.
  68 */
  69static bool __read_mostly tracing_selftest_running;
  70
  71/*
  72 * If boot-time tracing including tracers/events via kernel cmdline
  73 * is running, we do not want to run SELFTEST.
  74 */
  75bool __read_mostly tracing_selftest_disabled;
  76
  77#ifdef CONFIG_FTRACE_STARTUP_TEST
  78void __init disable_tracing_selftest(const char *reason)
  79{
  80        if (!tracing_selftest_disabled) {
  81                tracing_selftest_disabled = true;
  82                pr_info("Ftrace startup test is disabled due to %s\n", reason);
  83        }
  84}
  85#endif
  86
  87/* Pipe tracepoints to printk */
  88struct trace_iterator *tracepoint_print_iter;
  89int tracepoint_printk;
  90static bool tracepoint_printk_stop_on_boot __initdata;
  91static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
  92
  93/* For tracers that don't implement custom flags */
  94static struct tracer_opt dummy_tracer_opt[] = {
  95        { }
  96};
  97
  98static int
  99dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
 100{
 101        return 0;
 102}
 103
 104/*
 105 * To prevent the comm cache from being overwritten when no
 106 * tracing is active, only save the comm when a trace event
 107 * occurred.
 108 */
 109static DEFINE_PER_CPU(bool, trace_taskinfo_save);
 110
 111/*
 112 * Kill all tracing for good (never come back).
 113 * It is initialized to 1 but will turn to zero if the initialization
 114 * of the tracer is successful. But that is the only place that sets
 115 * this back to zero.
 116 */
 117static int tracing_disabled = 1;
 118
 119cpumask_var_t __read_mostly     tracing_buffer_mask;
 120
 121/*
 122 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
 123 *
 124 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
 125 * is set, then ftrace_dump is called. This will output the contents
 126 * of the ftrace buffers to the console.  This is very useful for
 127 * capturing traces that lead to crashes and outputing it to a
 128 * serial console.
 129 *
 130 * It is default off, but you can enable it with either specifying
 131 * "ftrace_dump_on_oops" in the kernel command line, or setting
 132 * /proc/sys/kernel/ftrace_dump_on_oops
 133 * Set 1 if you want to dump buffers of all CPUs
 134 * Set 2 if you want to dump the buffer of the CPU that triggered oops
 135 */
 136
 137enum ftrace_dump_mode ftrace_dump_on_oops;
 138
 139/* When set, tracing will stop when a WARN*() is hit */
 140int __disable_trace_on_warning;
 141
 142#ifdef CONFIG_TRACE_EVAL_MAP_FILE
 143/* Map of enums to their values, for "eval_map" file */
 144struct trace_eval_map_head {
 145        struct module                   *mod;
 146        unsigned long                   length;
 147};
 148
 149union trace_eval_map_item;
 150
 151struct trace_eval_map_tail {
 152        /*
 153         * "end" is first and points to NULL as it must be different
 154         * than "mod" or "eval_string"
 155         */
 156        union trace_eval_map_item       *next;
 157        const char                      *end;   /* points to NULL */
 158};
 159
 160static DEFINE_MUTEX(trace_eval_mutex);
 161
 162/*
 163 * The trace_eval_maps are saved in an array with two extra elements,
 164 * one at the beginning, and one at the end. The beginning item contains
 165 * the count of the saved maps (head.length), and the module they
 166 * belong to if not built in (head.mod). The ending item contains a
 167 * pointer to the next array of saved eval_map items.
 168 */
 169union trace_eval_map_item {
 170        struct trace_eval_map           map;
 171        struct trace_eval_map_head      head;
 172        struct trace_eval_map_tail      tail;
 173};
 174
 175static union trace_eval_map_item *trace_eval_maps;
 176#endif /* CONFIG_TRACE_EVAL_MAP_FILE */
 177
 178int tracing_set_tracer(struct trace_array *tr, const char *buf);
 179static void ftrace_trace_userstack(struct trace_array *tr,
 180                                   struct trace_buffer *buffer,
 181                                   unsigned int trace_ctx);
 182
 183#define MAX_TRACER_SIZE         100
 184static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
 185static char *default_bootup_tracer;
 186
 187static bool allocate_snapshot;
 188
 189static int __init set_cmdline_ftrace(char *str)
 190{
 191        strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
 192        default_bootup_tracer = bootup_tracer_buf;
 193        /* We are using ftrace early, expand it */
 194        ring_buffer_expanded = true;
 195        return 1;
 196}
 197__setup("ftrace=", set_cmdline_ftrace);
 198
 199static int __init set_ftrace_dump_on_oops(char *str)
 200{
 201        if (*str++ != '=' || !*str || !strcmp("1", str)) {
 202                ftrace_dump_on_oops = DUMP_ALL;
 203                return 1;
 204        }
 205
 206        if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
 207                ftrace_dump_on_oops = DUMP_ORIG;
 208                return 1;
 209        }
 210
 211        return 0;
 212}
 213__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
 214
 215static int __init stop_trace_on_warning(char *str)
 216{
 217        if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
 218                __disable_trace_on_warning = 1;
 219        return 1;
 220}
 221__setup("traceoff_on_warning", stop_trace_on_warning);
 222
 223static int __init boot_alloc_snapshot(char *str)
 224{
 225        allocate_snapshot = true;
 226        /* We also need the main ring buffer expanded */
 227        ring_buffer_expanded = true;
 228        return 1;
 229}
 230__setup("alloc_snapshot", boot_alloc_snapshot);
 231
 232
 233static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
 234
 235static int __init set_trace_boot_options(char *str)
 236{
 237        strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
 238        return 0;
 239}
 240__setup("trace_options=", set_trace_boot_options);
 241
 242static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
 243static char *trace_boot_clock __initdata;
 244
 245static int __init set_trace_boot_clock(char *str)
 246{
 247        strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
 248        trace_boot_clock = trace_boot_clock_buf;
 249        return 0;
 250}
 251__setup("trace_clock=", set_trace_boot_clock);
 252
 253static int __init set_tracepoint_printk(char *str)
 254{
 255        if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
 256                tracepoint_printk = 1;
 257        return 1;
 258}
 259__setup("tp_printk", set_tracepoint_printk);
 260
 261static int __init set_tracepoint_printk_stop(char *str)
 262{
 263        tracepoint_printk_stop_on_boot = true;
 264        return 1;
 265}
 266__setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
 267
 268unsigned long long ns2usecs(u64 nsec)
 269{
 270        nsec += 500;
 271        do_div(nsec, 1000);
 272        return nsec;
 273}
 274
 275static void
 276trace_process_export(struct trace_export *export,
 277               struct ring_buffer_event *event, int flag)
 278{
 279        struct trace_entry *entry;
 280        unsigned int size = 0;
 281
 282        if (export->flags & flag) {
 283                entry = ring_buffer_event_data(event);
 284                size = ring_buffer_event_length(event);
 285                export->write(export, entry, size);
 286        }
 287}
 288
 289static DEFINE_MUTEX(ftrace_export_lock);
 290
 291static struct trace_export __rcu *ftrace_exports_list __read_mostly;
 292
 293static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
 294static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
 295static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
 296
 297static inline void ftrace_exports_enable(struct trace_export *export)
 298{
 299        if (export->flags & TRACE_EXPORT_FUNCTION)
 300                static_branch_inc(&trace_function_exports_enabled);
 301
 302        if (export->flags & TRACE_EXPORT_EVENT)
 303                static_branch_inc(&trace_event_exports_enabled);
 304
 305        if (export->flags & TRACE_EXPORT_MARKER)
 306                static_branch_inc(&trace_marker_exports_enabled);
 307}
 308
 309static inline void ftrace_exports_disable(struct trace_export *export)
 310{
 311        if (export->flags & TRACE_EXPORT_FUNCTION)
 312                static_branch_dec(&trace_function_exports_enabled);
 313
 314        if (export->flags & TRACE_EXPORT_EVENT)
 315                static_branch_dec(&trace_event_exports_enabled);
 316
 317        if (export->flags & TRACE_EXPORT_MARKER)
 318                static_branch_dec(&trace_marker_exports_enabled);
 319}
 320
 321static void ftrace_exports(struct ring_buffer_event *event, int flag)
 322{
 323        struct trace_export *export;
 324
 325        preempt_disable_notrace();
 326
 327        export = rcu_dereference_raw_check(ftrace_exports_list);
 328        while (export) {
 329                trace_process_export(export, event, flag);
 330                export = rcu_dereference_raw_check(export->next);
 331        }
 332
 333        preempt_enable_notrace();
 334}
 335
 336static inline void
 337add_trace_export(struct trace_export **list, struct trace_export *export)
 338{
 339        rcu_assign_pointer(export->next, *list);
 340        /*
 341         * We are entering export into the list but another
 342         * CPU might be walking that list. We need to make sure
 343         * the export->next pointer is valid before another CPU sees
 344         * the export pointer included into the list.
 345         */
 346        rcu_assign_pointer(*list, export);
 347}
 348
 349static inline int
 350rm_trace_export(struct trace_export **list, struct trace_export *export)
 351{
 352        struct trace_export **p;
 353
 354        for (p = list; *p != NULL; p = &(*p)->next)
 355                if (*p == export)
 356                        break;
 357
 358        if (*p != export)
 359                return -1;
 360
 361        rcu_assign_pointer(*p, (*p)->next);
 362
 363        return 0;
 364}
 365
 366static inline void
 367add_ftrace_export(struct trace_export **list, struct trace_export *export)
 368{
 369        ftrace_exports_enable(export);
 370
 371        add_trace_export(list, export);
 372}
 373
 374static inline int
 375rm_ftrace_export(struct trace_export **list, struct trace_export *export)
 376{
 377        int ret;
 378
 379        ret = rm_trace_export(list, export);
 380        ftrace_exports_disable(export);
 381
 382        return ret;
 383}
 384
 385int register_ftrace_export(struct trace_export *export)
 386{
 387        if (WARN_ON_ONCE(!export->write))
 388                return -1;
 389
 390        mutex_lock(&ftrace_export_lock);
 391
 392        add_ftrace_export(&ftrace_exports_list, export);
 393
 394        mutex_unlock(&ftrace_export_lock);
 395
 396        return 0;
 397}
 398EXPORT_SYMBOL_GPL(register_ftrace_export);
 399
 400int unregister_ftrace_export(struct trace_export *export)
 401{
 402        int ret;
 403
 404        mutex_lock(&ftrace_export_lock);
 405
 406        ret = rm_ftrace_export(&ftrace_exports_list, export);
 407
 408        mutex_unlock(&ftrace_export_lock);
 409
 410        return ret;
 411}
 412EXPORT_SYMBOL_GPL(unregister_ftrace_export);
 413
 414/* trace_flags holds trace_options default values */
 415#define TRACE_DEFAULT_FLAGS                                             \
 416        (FUNCTION_DEFAULT_FLAGS |                                       \
 417         TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
 418         TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
 419         TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
 420         TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
 421         TRACE_ITER_HASH_PTR)
 422
 423/* trace_options that are only supported by global_trace */
 424#define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
 425               TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
 426
 427/* trace_flags that are default zero for instances */
 428#define ZEROED_TRACE_FLAGS \
 429        (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
 430
 431/*
 432 * The global_trace is the descriptor that holds the top-level tracing
 433 * buffers for the live tracing.
 434 */
 435static struct trace_array global_trace = {
 436        .trace_flags = TRACE_DEFAULT_FLAGS,
 437};
 438
 439LIST_HEAD(ftrace_trace_arrays);
 440
 441int trace_array_get(struct trace_array *this_tr)
 442{
 443        struct trace_array *tr;
 444        int ret = -ENODEV;
 445
 446        mutex_lock(&trace_types_lock);
 447        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
 448                if (tr == this_tr) {
 449                        tr->ref++;
 450                        ret = 0;
 451                        break;
 452                }
 453        }
 454        mutex_unlock(&trace_types_lock);
 455
 456        return ret;
 457}
 458
 459static void __trace_array_put(struct trace_array *this_tr)
 460{
 461        WARN_ON(!this_tr->ref);
 462        this_tr->ref--;
 463}
 464
 465/**
 466 * trace_array_put - Decrement the reference counter for this trace array.
 467 * @this_tr : pointer to the trace array
 468 *
 469 * NOTE: Use this when we no longer need the trace array returned by
 470 * trace_array_get_by_name(). This ensures the trace array can be later
 471 * destroyed.
 472 *
 473 */
 474void trace_array_put(struct trace_array *this_tr)
 475{
 476        if (!this_tr)
 477                return;
 478
 479        mutex_lock(&trace_types_lock);
 480        __trace_array_put(this_tr);
 481        mutex_unlock(&trace_types_lock);
 482}
 483EXPORT_SYMBOL_GPL(trace_array_put);
 484
 485int tracing_check_open_get_tr(struct trace_array *tr)
 486{
 487        int ret;
 488
 489        ret = security_locked_down(LOCKDOWN_TRACEFS);
 490        if (ret)
 491                return ret;
 492
 493        if (tracing_disabled)
 494                return -ENODEV;
 495
 496        if (tr && trace_array_get(tr) < 0)
 497                return -ENODEV;
 498
 499        return 0;
 500}
 501
 502int call_filter_check_discard(struct trace_event_call *call, void *rec,
 503                              struct trace_buffer *buffer,
 504                              struct ring_buffer_event *event)
 505{
 506        if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
 507            !filter_match_preds(call->filter, rec)) {
 508                __trace_event_discard_commit(buffer, event);
 509                return 1;
 510        }
 511
 512        return 0;
 513}
 514
 515void trace_free_pid_list(struct trace_pid_list *pid_list)
 516{
 517        vfree(pid_list->pids);
 518        kfree(pid_list);
 519}
 520
 521/**
 522 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
 523 * @filtered_pids: The list of pids to check
 524 * @search_pid: The PID to find in @filtered_pids
 525 *
 526 * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
 527 */
 528bool
 529trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
 530{
 531        /*
 532         * If pid_max changed after filtered_pids was created, we
 533         * by default ignore all pids greater than the previous pid_max.
 534         */
 535        if (search_pid >= filtered_pids->pid_max)
 536                return false;
 537
 538        return test_bit(search_pid, filtered_pids->pids);
 539}
 540
 541/**
 542 * trace_ignore_this_task - should a task be ignored for tracing
 543 * @filtered_pids: The list of pids to check
 544 * @filtered_no_pids: The list of pids not to be traced
 545 * @task: The task that should be ignored if not filtered
 546 *
 547 * Checks if @task should be traced or not from @filtered_pids.
 548 * Returns true if @task should *NOT* be traced.
 549 * Returns false if @task should be traced.
 550 */
 551bool
 552trace_ignore_this_task(struct trace_pid_list *filtered_pids,
 553                       struct trace_pid_list *filtered_no_pids,
 554                       struct task_struct *task)
 555{
 556        /*
 557         * If filtered_no_pids is not empty, and the task's pid is listed
 558         * in filtered_no_pids, then return true.
 559         * Otherwise, if filtered_pids is empty, that means we can
 560         * trace all tasks. If it has content, then only trace pids
 561         * within filtered_pids.
 562         */
 563
 564        return (filtered_pids &&
 565                !trace_find_filtered_pid(filtered_pids, task->pid)) ||
 566                (filtered_no_pids &&
 567                 trace_find_filtered_pid(filtered_no_pids, task->pid));
 568}
 569
 570/**
 571 * trace_filter_add_remove_task - Add or remove a task from a pid_list
 572 * @pid_list: The list to modify
 573 * @self: The current task for fork or NULL for exit
 574 * @task: The task to add or remove
 575 *
 576 * If adding a task, if @self is defined, the task is only added if @self
 577 * is also included in @pid_list. This happens on fork and tasks should
 578 * only be added when the parent is listed. If @self is NULL, then the
 579 * @task pid will be removed from the list, which would happen on exit
 580 * of a task.
 581 */
 582void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
 583                                  struct task_struct *self,
 584                                  struct task_struct *task)
 585{
 586        if (!pid_list)
 587                return;
 588
 589        /* For forks, we only add if the forking task is listed */
 590        if (self) {
 591                if (!trace_find_filtered_pid(pid_list, self->pid))
 592                        return;
 593        }
 594
 595        /* Sorry, but we don't support pid_max changing after setting */
 596        if (task->pid >= pid_list->pid_max)
 597                return;
 598
 599        /* "self" is set for forks, and NULL for exits */
 600        if (self)
 601                set_bit(task->pid, pid_list->pids);
 602        else
 603                clear_bit(task->pid, pid_list->pids);
 604}
 605
 606/**
 607 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
 608 * @pid_list: The pid list to show
 609 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
 610 * @pos: The position of the file
 611 *
 612 * This is used by the seq_file "next" operation to iterate the pids
 613 * listed in a trace_pid_list structure.
 614 *
 615 * Returns the pid+1 as we want to display pid of zero, but NULL would
 616 * stop the iteration.
 617 */
 618void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
 619{
 620        unsigned long pid = (unsigned long)v;
 621
 622        (*pos)++;
 623
 624        /* pid already is +1 of the actual previous bit */
 625        pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
 626
 627        /* Return pid + 1 to allow zero to be represented */
 628        if (pid < pid_list->pid_max)
 629                return (void *)(pid + 1);
 630
 631        return NULL;
 632}
 633
 634/**
 635 * trace_pid_start - Used for seq_file to start reading pid lists
 636 * @pid_list: The pid list to show
 637 * @pos: The position of the file
 638 *
 639 * This is used by seq_file "start" operation to start the iteration
 640 * of listing pids.
 641 *
 642 * Returns the pid+1 as we want to display pid of zero, but NULL would
 643 * stop the iteration.
 644 */
 645void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
 646{
 647        unsigned long pid;
 648        loff_t l = 0;
 649
 650        pid = find_first_bit(pid_list->pids, pid_list->pid_max);
 651        if (pid >= pid_list->pid_max)
 652                return NULL;
 653
 654        /* Return pid + 1 so that zero can be the exit value */
 655        for (pid++; pid && l < *pos;
 656             pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
 657                ;
 658        return (void *)pid;
 659}
 660
 661/**
 662 * trace_pid_show - show the current pid in seq_file processing
 663 * @m: The seq_file structure to write into
 664 * @v: A void pointer of the pid (+1) value to display
 665 *
 666 * Can be directly used by seq_file operations to display the current
 667 * pid value.
 668 */
 669int trace_pid_show(struct seq_file *m, void *v)
 670{
 671        unsigned long pid = (unsigned long)v - 1;
 672
 673        seq_printf(m, "%lu\n", pid);
 674        return 0;
 675}
 676
 677/* 128 should be much more than enough */
 678#define PID_BUF_SIZE            127
 679
 680int trace_pid_write(struct trace_pid_list *filtered_pids,
 681                    struct trace_pid_list **new_pid_list,
 682                    const char __user *ubuf, size_t cnt)
 683{
 684        struct trace_pid_list *pid_list;
 685        struct trace_parser parser;
 686        unsigned long val;
 687        int nr_pids = 0;
 688        ssize_t read = 0;
 689        ssize_t ret = 0;
 690        loff_t pos;
 691        pid_t pid;
 692
 693        if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
 694                return -ENOMEM;
 695
 696        /*
 697         * Always recreate a new array. The write is an all or nothing
 698         * operation. Always create a new array when adding new pids by
 699         * the user. If the operation fails, then the current list is
 700         * not modified.
 701         */
 702        pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
 703        if (!pid_list) {
 704                trace_parser_put(&parser);
 705                return -ENOMEM;
 706        }
 707
 708        pid_list->pid_max = READ_ONCE(pid_max);
 709
 710        /* Only truncating will shrink pid_max */
 711        if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
 712                pid_list->pid_max = filtered_pids->pid_max;
 713
 714        pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
 715        if (!pid_list->pids) {
 716                trace_parser_put(&parser);
 717                kfree(pid_list);
 718                return -ENOMEM;
 719        }
 720
 721        if (filtered_pids) {
 722                /* copy the current bits to the new max */
 723                for_each_set_bit(pid, filtered_pids->pids,
 724                                 filtered_pids->pid_max) {
 725                        set_bit(pid, pid_list->pids);
 726                        nr_pids++;
 727                }
 728        }
 729
 730        while (cnt > 0) {
 731
 732                pos = 0;
 733
 734                ret = trace_get_user(&parser, ubuf, cnt, &pos);
 735                if (ret < 0 || !trace_parser_loaded(&parser))
 736                        break;
 737
 738                read += ret;
 739                ubuf += ret;
 740                cnt -= ret;
 741
 742                ret = -EINVAL;
 743                if (kstrtoul(parser.buffer, 0, &val))
 744                        break;
 745                if (val >= pid_list->pid_max)
 746                        break;
 747
 748                pid = (pid_t)val;
 749
 750                set_bit(pid, pid_list->pids);
 751                nr_pids++;
 752
 753                trace_parser_clear(&parser);
 754                ret = 0;
 755        }
 756        trace_parser_put(&parser);
 757
 758        if (ret < 0) {
 759                trace_free_pid_list(pid_list);
 760                return ret;
 761        }
 762
 763        if (!nr_pids) {
 764                /* Cleared the list of pids */
 765                trace_free_pid_list(pid_list);
 766                read = ret;
 767                pid_list = NULL;
 768        }
 769
 770        *new_pid_list = pid_list;
 771
 772        return read;
 773}
 774
 775static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
 776{
 777        u64 ts;
 778
 779        /* Early boot up does not have a buffer yet */
 780        if (!buf->buffer)
 781                return trace_clock_local();
 782
 783        ts = ring_buffer_time_stamp(buf->buffer);
 784        ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
 785
 786        return ts;
 787}
 788
 789u64 ftrace_now(int cpu)
 790{
 791        return buffer_ftrace_now(&global_trace.array_buffer, cpu);
 792}
 793
 794/**
 795 * tracing_is_enabled - Show if global_trace has been enabled
 796 *
 797 * Shows if the global trace has been enabled or not. It uses the
 798 * mirror flag "buffer_disabled" to be used in fast paths such as for
 799 * the irqsoff tracer. But it may be inaccurate due to races. If you
 800 * need to know the accurate state, use tracing_is_on() which is a little
 801 * slower, but accurate.
 802 */
 803int tracing_is_enabled(void)
 804{
 805        /*
 806         * For quick access (irqsoff uses this in fast path), just
 807         * return the mirror variable of the state of the ring buffer.
 808         * It's a little racy, but we don't really care.
 809         */
 810        smp_rmb();
 811        return !global_trace.buffer_disabled;
 812}
 813
 814/*
 815 * trace_buf_size is the size in bytes that is allocated
 816 * for a buffer. Note, the number of bytes is always rounded
 817 * to page size.
 818 *
 819 * This number is purposely set to a low number of 16384.
 820 * If the dump on oops happens, it will be much appreciated
 821 * to not have to wait for all that output. Anyway this can be
 822 * boot time and run time configurable.
 823 */
 824#define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
 825
 826static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
 827
 828/* trace_types holds a link list of available tracers. */
 829static struct tracer            *trace_types __read_mostly;
 830
 831/*
 832 * trace_types_lock is used to protect the trace_types list.
 833 */
 834DEFINE_MUTEX(trace_types_lock);
 835
 836/*
 837 * serialize the access of the ring buffer
 838 *
 839 * ring buffer serializes readers, but it is low level protection.
 840 * The validity of the events (which returns by ring_buffer_peek() ..etc)
 841 * are not protected by ring buffer.
 842 *
 843 * The content of events may become garbage if we allow other process consumes
 844 * these events concurrently:
 845 *   A) the page of the consumed events may become a normal page
 846 *      (not reader page) in ring buffer, and this page will be rewritten
 847 *      by events producer.
 848 *   B) The page of the consumed events may become a page for splice_read,
 849 *      and this page will be returned to system.
 850 *
 851 * These primitives allow multi process access to different cpu ring buffer
 852 * concurrently.
 853 *
 854 * These primitives don't distinguish read-only and read-consume access.
 855 * Multi read-only access are also serialized.
 856 */
 857
 858#ifdef CONFIG_SMP
 859static DECLARE_RWSEM(all_cpu_access_lock);
 860static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
 861
 862static inline void trace_access_lock(int cpu)
 863{
 864        if (cpu == RING_BUFFER_ALL_CPUS) {
 865                /* gain it for accessing the whole ring buffer. */
 866                down_write(&all_cpu_access_lock);
 867        } else {
 868                /* gain it for accessing a cpu ring buffer. */
 869
 870                /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
 871                down_read(&all_cpu_access_lock);
 872
 873                /* Secondly block other access to this @cpu ring buffer. */
 874                mutex_lock(&per_cpu(cpu_access_lock, cpu));
 875        }
 876}
 877
 878static inline void trace_access_unlock(int cpu)
 879{
 880        if (cpu == RING_BUFFER_ALL_CPUS) {
 881                up_write(&all_cpu_access_lock);
 882        } else {
 883                mutex_unlock(&per_cpu(cpu_access_lock, cpu));
 884                up_read(&all_cpu_access_lock);
 885        }
 886}
 887
 888static inline void trace_access_lock_init(void)
 889{
 890        int cpu;
 891
 892        for_each_possible_cpu(cpu)
 893                mutex_init(&per_cpu(cpu_access_lock, cpu));
 894}
 895
 896#else
 897
 898static DEFINE_MUTEX(access_lock);
 899
 900static inline void trace_access_lock(int cpu)
 901{
 902        (void)cpu;
 903        mutex_lock(&access_lock);
 904}
 905
 906static inline void trace_access_unlock(int cpu)
 907{
 908        (void)cpu;
 909        mutex_unlock(&access_lock);
 910}
 911
 912static inline void trace_access_lock_init(void)
 913{
 914}
 915
 916#endif
 917
 918#ifdef CONFIG_STACKTRACE
 919static void __ftrace_trace_stack(struct trace_buffer *buffer,
 920                                 unsigned int trace_ctx,
 921                                 int skip, struct pt_regs *regs);
 922static inline void ftrace_trace_stack(struct trace_array *tr,
 923                                      struct trace_buffer *buffer,
 924                                      unsigned int trace_ctx,
 925                                      int skip, struct pt_regs *regs);
 926
 927#else
 928static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
 929                                        unsigned int trace_ctx,
 930                                        int skip, struct pt_regs *regs)
 931{
 932}
 933static inline void ftrace_trace_stack(struct trace_array *tr,
 934                                      struct trace_buffer *buffer,
 935                                      unsigned long trace_ctx,
 936                                      int skip, struct pt_regs *regs)
 937{
 938}
 939
 940#endif
 941
 942static __always_inline void
 943trace_event_setup(struct ring_buffer_event *event,
 944                  int type, unsigned int trace_ctx)
 945{
 946        struct trace_entry *ent = ring_buffer_event_data(event);
 947
 948        tracing_generic_entry_update(ent, type, trace_ctx);
 949}
 950
 951static __always_inline struct ring_buffer_event *
 952__trace_buffer_lock_reserve(struct trace_buffer *buffer,
 953                          int type,
 954                          unsigned long len,
 955                          unsigned int trace_ctx)
 956{
 957        struct ring_buffer_event *event;
 958
 959        event = ring_buffer_lock_reserve(buffer, len);
 960        if (event != NULL)
 961                trace_event_setup(event, type, trace_ctx);
 962
 963        return event;
 964}
 965
 966void tracer_tracing_on(struct trace_array *tr)
 967{
 968        if (tr->array_buffer.buffer)
 969                ring_buffer_record_on(tr->array_buffer.buffer);
 970        /*
 971         * This flag is looked at when buffers haven't been allocated
 972         * yet, or by some tracers (like irqsoff), that just want to
 973         * know if the ring buffer has been disabled, but it can handle
 974         * races of where it gets disabled but we still do a record.
 975         * As the check is in the fast path of the tracers, it is more
 976         * important to be fast than accurate.
 977         */
 978        tr->buffer_disabled = 0;
 979        /* Make the flag seen by readers */
 980        smp_wmb();
 981}
 982
 983/**
 984 * tracing_on - enable tracing buffers
 985 *
 986 * This function enables tracing buffers that may have been
 987 * disabled with tracing_off.
 988 */
 989void tracing_on(void)
 990{
 991        tracer_tracing_on(&global_trace);
 992}
 993EXPORT_SYMBOL_GPL(tracing_on);
 994
 995
 996static __always_inline void
 997__buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
 998{
 999        __this_cpu_write(trace_taskinfo_save, true);
1000
1001        /* If this is the temp buffer, we need to commit fully */
1002        if (this_cpu_read(trace_buffered_event) == event) {
1003                /* Length is in event->array[0] */
1004                ring_buffer_write(buffer, event->array[0], &event->array[1]);
1005                /* Release the temp buffer */
1006                this_cpu_dec(trace_buffered_event_cnt);
1007        } else
1008                ring_buffer_unlock_commit(buffer, event);
1009}
1010
1011/**
1012 * __trace_puts - write a constant string into the trace buffer.
1013 * @ip:    The address of the caller
1014 * @str:   The constant string to write
1015 * @size:  The size of the string.
1016 */
1017int __trace_puts(unsigned long ip, const char *str, int size)
1018{
1019        struct ring_buffer_event *event;
1020        struct trace_buffer *buffer;
1021        struct print_entry *entry;
1022        unsigned int trace_ctx;
1023        int alloc;
1024
1025        if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1026                return 0;
1027
1028        if (unlikely(tracing_selftest_running || tracing_disabled))
1029                return 0;
1030
1031        alloc = sizeof(*entry) + size + 2; /* possible \n added */
1032
1033        trace_ctx = tracing_gen_ctx();
1034        buffer = global_trace.array_buffer.buffer;
1035        ring_buffer_nest_start(buffer);
1036        event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1037                                            trace_ctx);
1038        if (!event) {
1039                size = 0;
1040                goto out;
1041        }
1042
1043        entry = ring_buffer_event_data(event);
1044        entry->ip = ip;
1045
1046        memcpy(&entry->buf, str, size);
1047
1048        /* Add a newline if necessary */
1049        if (entry->buf[size - 1] != '\n') {
1050                entry->buf[size] = '\n';
1051                entry->buf[size + 1] = '\0';
1052        } else
1053                entry->buf[size] = '\0';
1054
1055        __buffer_unlock_commit(buffer, event);
1056        ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1057 out:
1058        ring_buffer_nest_end(buffer);
1059        return size;
1060}
1061EXPORT_SYMBOL_GPL(__trace_puts);
1062
1063/**
1064 * __trace_bputs - write the pointer to a constant string into trace buffer
1065 * @ip:    The address of the caller
1066 * @str:   The constant string to write to the buffer to
1067 */
1068int __trace_bputs(unsigned long ip, const char *str)
1069{
1070        struct ring_buffer_event *event;
1071        struct trace_buffer *buffer;
1072        struct bputs_entry *entry;
1073        unsigned int trace_ctx;
1074        int size = sizeof(struct bputs_entry);
1075        int ret = 0;
1076
1077        if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1078                return 0;
1079
1080        if (unlikely(tracing_selftest_running || tracing_disabled))
1081                return 0;
1082
1083        trace_ctx = tracing_gen_ctx();
1084        buffer = global_trace.array_buffer.buffer;
1085
1086        ring_buffer_nest_start(buffer);
1087        event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1088                                            trace_ctx);
1089        if (!event)
1090                goto out;
1091
1092        entry = ring_buffer_event_data(event);
1093        entry->ip                       = ip;
1094        entry->str                      = str;
1095
1096        __buffer_unlock_commit(buffer, event);
1097        ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1098
1099        ret = 1;
1100 out:
1101        ring_buffer_nest_end(buffer);
1102        return ret;
1103}
1104EXPORT_SYMBOL_GPL(__trace_bputs);
1105
1106#ifdef CONFIG_TRACER_SNAPSHOT
1107static void tracing_snapshot_instance_cond(struct trace_array *tr,
1108                                           void *cond_data)
1109{
1110        struct tracer *tracer = tr->current_trace;
1111        unsigned long flags;
1112
1113        if (in_nmi()) {
1114                internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1115                internal_trace_puts("*** snapshot is being ignored        ***\n");
1116                return;
1117        }
1118
1119        if (!tr->allocated_snapshot) {
1120                internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1121                internal_trace_puts("*** stopping trace here!   ***\n");
1122                tracing_off();
1123                return;
1124        }
1125
1126        /* Note, snapshot can not be used when the tracer uses it */
1127        if (tracer->use_max_tr) {
1128                internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1129                internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1130                return;
1131        }
1132
1133        local_irq_save(flags);
1134        update_max_tr(tr, current, smp_processor_id(), cond_data);
1135        local_irq_restore(flags);
1136}
1137
1138void tracing_snapshot_instance(struct trace_array *tr)
1139{
1140        tracing_snapshot_instance_cond(tr, NULL);
1141}
1142
1143/**
1144 * tracing_snapshot - take a snapshot of the current buffer.
1145 *
1146 * This causes a swap between the snapshot buffer and the current live
1147 * tracing buffer. You can use this to take snapshots of the live
1148 * trace when some condition is triggered, but continue to trace.
1149 *
1150 * Note, make sure to allocate the snapshot with either
1151 * a tracing_snapshot_alloc(), or by doing it manually
1152 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1153 *
1154 * If the snapshot buffer is not allocated, it will stop tracing.
1155 * Basically making a permanent snapshot.
1156 */
1157void tracing_snapshot(void)
1158{
1159        struct trace_array *tr = &global_trace;
1160
1161        tracing_snapshot_instance(tr);
1162}
1163EXPORT_SYMBOL_GPL(tracing_snapshot);
1164
1165/**
1166 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1167 * @tr:         The tracing instance to snapshot
1168 * @cond_data:  The data to be tested conditionally, and possibly saved
1169 *
1170 * This is the same as tracing_snapshot() except that the snapshot is
1171 * conditional - the snapshot will only happen if the
1172 * cond_snapshot.update() implementation receiving the cond_data
1173 * returns true, which means that the trace array's cond_snapshot
1174 * update() operation used the cond_data to determine whether the
1175 * snapshot should be taken, and if it was, presumably saved it along
1176 * with the snapshot.
1177 */
1178void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1179{
1180        tracing_snapshot_instance_cond(tr, cond_data);
1181}
1182EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1183
1184/**
1185 * tracing_snapshot_cond_data - get the user data associated with a snapshot
1186 * @tr:         The tracing instance
1187 *
1188 * When the user enables a conditional snapshot using
1189 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1190 * with the snapshot.  This accessor is used to retrieve it.
1191 *
1192 * Should not be called from cond_snapshot.update(), since it takes
1193 * the tr->max_lock lock, which the code calling
1194 * cond_snapshot.update() has already done.
1195 *
1196 * Returns the cond_data associated with the trace array's snapshot.
1197 */
1198void *tracing_cond_snapshot_data(struct trace_array *tr)
1199{
1200        void *cond_data = NULL;
1201
1202        arch_spin_lock(&tr->max_lock);
1203
1204        if (tr->cond_snapshot)
1205                cond_data = tr->cond_snapshot->cond_data;
1206
1207        arch_spin_unlock(&tr->max_lock);
1208
1209        return cond_data;
1210}
1211EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1212
1213static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1214                                        struct array_buffer *size_buf, int cpu_id);
1215static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1216
1217int tracing_alloc_snapshot_instance(struct trace_array *tr)
1218{
1219        int ret;
1220
1221        if (!tr->allocated_snapshot) {
1222
1223                /* allocate spare buffer */
1224                ret = resize_buffer_duplicate_size(&tr->max_buffer,
1225                                   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1226                if (ret < 0)
1227                        return ret;
1228
1229                tr->allocated_snapshot = true;
1230        }
1231
1232        return 0;
1233}
1234
1235static void free_snapshot(struct trace_array *tr)
1236{
1237        /*
1238         * We don't free the ring buffer. instead, resize it because
1239         * The max_tr ring buffer has some state (e.g. ring->clock) and
1240         * we want preserve it.
1241         */
1242        ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1243        set_buffer_entries(&tr->max_buffer, 1);
1244        tracing_reset_online_cpus(&tr->max_buffer);
1245        tr->allocated_snapshot = false;
1246}
1247
1248/**
1249 * tracing_alloc_snapshot - allocate snapshot buffer.
1250 *
1251 * This only allocates the snapshot buffer if it isn't already
1252 * allocated - it doesn't also take a snapshot.
1253 *
1254 * This is meant to be used in cases where the snapshot buffer needs
1255 * to be set up for events that can't sleep but need to be able to
1256 * trigger a snapshot.
1257 */
1258int tracing_alloc_snapshot(void)
1259{
1260        struct trace_array *tr = &global_trace;
1261        int ret;
1262
1263        ret = tracing_alloc_snapshot_instance(tr);
1264        WARN_ON(ret < 0);
1265
1266        return ret;
1267}
1268EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1269
1270/**
1271 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1272 *
1273 * This is similar to tracing_snapshot(), but it will allocate the
1274 * snapshot buffer if it isn't already allocated. Use this only
1275 * where it is safe to sleep, as the allocation may sleep.
1276 *
1277 * This causes a swap between the snapshot buffer and the current live
1278 * tracing buffer. You can use this to take snapshots of the live
1279 * trace when some condition is triggered, but continue to trace.
1280 */
1281void tracing_snapshot_alloc(void)
1282{
1283        int ret;
1284
1285        ret = tracing_alloc_snapshot();
1286        if (ret < 0)
1287                return;
1288
1289        tracing_snapshot();
1290}
1291EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1292
1293/**
1294 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1295 * @tr:         The tracing instance
1296 * @cond_data:  User data to associate with the snapshot
1297 * @update:     Implementation of the cond_snapshot update function
1298 *
1299 * Check whether the conditional snapshot for the given instance has
1300 * already been enabled, or if the current tracer is already using a
1301 * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1302 * save the cond_data and update function inside.
1303 *
1304 * Returns 0 if successful, error otherwise.
1305 */
1306int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1307                                 cond_update_fn_t update)
1308{
1309        struct cond_snapshot *cond_snapshot;
1310        int ret = 0;
1311
1312        cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1313        if (!cond_snapshot)
1314                return -ENOMEM;
1315
1316        cond_snapshot->cond_data = cond_data;
1317        cond_snapshot->update = update;
1318
1319        mutex_lock(&trace_types_lock);
1320
1321        ret = tracing_alloc_snapshot_instance(tr);
1322        if (ret)
1323                goto fail_unlock;
1324
1325        if (tr->current_trace->use_max_tr) {
1326                ret = -EBUSY;
1327                goto fail_unlock;
1328        }
1329
1330        /*
1331         * The cond_snapshot can only change to NULL without the
1332         * trace_types_lock. We don't care if we race with it going
1333         * to NULL, but we want to make sure that it's not set to
1334         * something other than NULL when we get here, which we can
1335         * do safely with only holding the trace_types_lock and not
1336         * having to take the max_lock.
1337         */
1338        if (tr->cond_snapshot) {
1339                ret = -EBUSY;
1340                goto fail_unlock;
1341        }
1342
1343        arch_spin_lock(&tr->max_lock);
1344        tr->cond_snapshot = cond_snapshot;
1345        arch_spin_unlock(&tr->max_lock);
1346
1347        mutex_unlock(&trace_types_lock);
1348
1349        return ret;
1350
1351 fail_unlock:
1352        mutex_unlock(&trace_types_lock);
1353        kfree(cond_snapshot);
1354        return ret;
1355}
1356EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1357
1358/**
1359 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1360 * @tr:         The tracing instance
1361 *
1362 * Check whether the conditional snapshot for the given instance is
1363 * enabled; if so, free the cond_snapshot associated with it,
1364 * otherwise return -EINVAL.
1365 *
1366 * Returns 0 if successful, error otherwise.
1367 */
1368int tracing_snapshot_cond_disable(struct trace_array *tr)
1369{
1370        int ret = 0;
1371
1372        arch_spin_lock(&tr->max_lock);
1373
1374        if (!tr->cond_snapshot)
1375                ret = -EINVAL;
1376        else {
1377                kfree(tr->cond_snapshot);
1378                tr->cond_snapshot = NULL;
1379        }
1380
1381        arch_spin_unlock(&tr->max_lock);
1382
1383        return ret;
1384}
1385EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1386#else
1387void tracing_snapshot(void)
1388{
1389        WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1390}
1391EXPORT_SYMBOL_GPL(tracing_snapshot);
1392void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1393{
1394        WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1395}
1396EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1397int tracing_alloc_snapshot(void)
1398{
1399        WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1400        return -ENODEV;
1401}
1402EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1403void tracing_snapshot_alloc(void)
1404{
1405        /* Give warning */
1406        tracing_snapshot();
1407}
1408EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1409void *tracing_cond_snapshot_data(struct trace_array *tr)
1410{
1411        return NULL;
1412}
1413EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1414int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1415{
1416        return -ENODEV;
1417}
1418EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1419int tracing_snapshot_cond_disable(struct trace_array *tr)
1420{
1421        return false;
1422}
1423EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1424#endif /* CONFIG_TRACER_SNAPSHOT */
1425
1426void tracer_tracing_off(struct trace_array *tr)
1427{
1428        if (tr->array_buffer.buffer)
1429                ring_buffer_record_off(tr->array_buffer.buffer);
1430        /*
1431         * This flag is looked at when buffers haven't been allocated
1432         * yet, or by some tracers (like irqsoff), that just want to
1433         * know if the ring buffer has been disabled, but it can handle
1434         * races of where it gets disabled but we still do a record.
1435         * As the check is in the fast path of the tracers, it is more
1436         * important to be fast than accurate.
1437         */
1438        tr->buffer_disabled = 1;
1439        /* Make the flag seen by readers */
1440        smp_wmb();
1441}
1442
1443/**
1444 * tracing_off - turn off tracing buffers
1445 *
1446 * This function stops the tracing buffers from recording data.
1447 * It does not disable any overhead the tracers themselves may
1448 * be causing. This function simply causes all recording to
1449 * the ring buffers to fail.
1450 */
1451void tracing_off(void)
1452{
1453        tracer_tracing_off(&global_trace);
1454}
1455EXPORT_SYMBOL_GPL(tracing_off);
1456
1457void disable_trace_on_warning(void)
1458{
1459        if (__disable_trace_on_warning) {
1460                trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1461                        "Disabling tracing due to warning\n");
1462                tracing_off();
1463        }
1464}
1465
1466/**
1467 * tracer_tracing_is_on - show real state of ring buffer enabled
1468 * @tr : the trace array to know if ring buffer is enabled
1469 *
1470 * Shows real state of the ring buffer if it is enabled or not.
1471 */
1472bool tracer_tracing_is_on(struct trace_array *tr)
1473{
1474        if (tr->array_buffer.buffer)
1475                return ring_buffer_record_is_on(tr->array_buffer.buffer);
1476        return !tr->buffer_disabled;
1477}
1478
1479/**
1480 * tracing_is_on - show state of ring buffers enabled
1481 */
1482int tracing_is_on(void)
1483{
1484        return tracer_tracing_is_on(&global_trace);
1485}
1486EXPORT_SYMBOL_GPL(tracing_is_on);
1487
1488static int __init set_buf_size(char *str)
1489{
1490        unsigned long buf_size;
1491
1492        if (!str)
1493                return 0;
1494        buf_size = memparse(str, &str);
1495        /* nr_entries can not be zero */
1496        if (buf_size == 0)
1497                return 0;
1498        trace_buf_size = buf_size;
1499        return 1;
1500}
1501__setup("trace_buf_size=", set_buf_size);
1502
1503static int __init set_tracing_thresh(char *str)
1504{
1505        unsigned long threshold;
1506        int ret;
1507
1508        if (!str)
1509                return 0;
1510        ret = kstrtoul(str, 0, &threshold);
1511        if (ret < 0)
1512                return 0;
1513        tracing_thresh = threshold * 1000;
1514        return 1;
1515}
1516__setup("tracing_thresh=", set_tracing_thresh);
1517
1518unsigned long nsecs_to_usecs(unsigned long nsecs)
1519{
1520        return nsecs / 1000;
1521}
1522
1523/*
1524 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1525 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1526 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1527 * of strings in the order that the evals (enum) were defined.
1528 */
1529#undef C
1530#define C(a, b) b
1531
1532/* These must match the bit positions in trace_iterator_flags */
1533static const char *trace_options[] = {
1534        TRACE_FLAGS
1535        NULL
1536};
1537
1538static struct {
1539        u64 (*func)(void);
1540        const char *name;
1541        int in_ns;              /* is this clock in nanoseconds? */
1542} trace_clocks[] = {
1543        { trace_clock_local,            "local",        1 },
1544        { trace_clock_global,           "global",       1 },
1545        { trace_clock_counter,          "counter",      0 },
1546        { trace_clock_jiffies,          "uptime",       0 },
1547        { trace_clock,                  "perf",         1 },
1548        { ktime_get_mono_fast_ns,       "mono",         1 },
1549        { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1550        { ktime_get_boot_fast_ns,       "boot",         1 },
1551        ARCH_TRACE_CLOCKS
1552};
1553
1554bool trace_clock_in_ns(struct trace_array *tr)
1555{
1556        if (trace_clocks[tr->clock_id].in_ns)
1557                return true;
1558
1559        return false;
1560}
1561
1562/*
1563 * trace_parser_get_init - gets the buffer for trace parser
1564 */
1565int trace_parser_get_init(struct trace_parser *parser, int size)
1566{
1567        memset(parser, 0, sizeof(*parser));
1568
1569        parser->buffer = kmalloc(size, GFP_KERNEL);
1570        if (!parser->buffer)
1571                return 1;
1572
1573        parser->size = size;
1574        return 0;
1575}
1576
1577/*
1578 * trace_parser_put - frees the buffer for trace parser
1579 */
1580void trace_parser_put(struct trace_parser *parser)
1581{
1582        kfree(parser->buffer);
1583        parser->buffer = NULL;
1584}
1585
1586/*
1587 * trace_get_user - reads the user input string separated by  space
1588 * (matched by isspace(ch))
1589 *
1590 * For each string found the 'struct trace_parser' is updated,
1591 * and the function returns.
1592 *
1593 * Returns number of bytes read.
1594 *
1595 * See kernel/trace/trace.h for 'struct trace_parser' details.
1596 */
1597int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1598        size_t cnt, loff_t *ppos)
1599{
1600        char ch;
1601        size_t read = 0;
1602        ssize_t ret;
1603
1604        if (!*ppos)
1605                trace_parser_clear(parser);
1606
1607        ret = get_user(ch, ubuf++);
1608        if (ret)
1609                goto out;
1610
1611        read++;
1612        cnt--;
1613
1614        /*
1615         * The parser is not finished with the last write,
1616         * continue reading the user input without skipping spaces.
1617         */
1618        if (!parser->cont) {
1619                /* skip white space */
1620                while (cnt && isspace(ch)) {
1621                        ret = get_user(ch, ubuf++);
1622                        if (ret)
1623                                goto out;
1624                        read++;
1625                        cnt--;
1626                }
1627
1628                parser->idx = 0;
1629
1630                /* only spaces were written */
1631                if (isspace(ch) || !ch) {
1632                        *ppos += read;
1633                        ret = read;
1634                        goto out;
1635                }
1636        }
1637
1638        /* read the non-space input */
1639        while (cnt && !isspace(ch) && ch) {
1640                if (parser->idx < parser->size - 1)
1641                        parser->buffer[parser->idx++] = ch;
1642                else {
1643                        ret = -EINVAL;
1644                        goto out;
1645                }
1646                ret = get_user(ch, ubuf++);
1647                if (ret)
1648                        goto out;
1649                read++;
1650                cnt--;
1651        }
1652
1653        /* We either got finished input or we have to wait for another call. */
1654        if (isspace(ch) || !ch) {
1655                parser->buffer[parser->idx] = 0;
1656                parser->cont = false;
1657        } else if (parser->idx < parser->size - 1) {
1658                parser->cont = true;
1659                parser->buffer[parser->idx++] = ch;
1660                /* Make sure the parsed string always terminates with '\0'. */
1661                parser->buffer[parser->idx] = 0;
1662        } else {
1663                ret = -EINVAL;
1664                goto out;
1665        }
1666
1667        *ppos += read;
1668        ret = read;
1669
1670out:
1671        return ret;
1672}
1673
1674/* TODO add a seq_buf_to_buffer() */
1675static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1676{
1677        int len;
1678
1679        if (trace_seq_used(s) <= s->seq.readpos)
1680                return -EBUSY;
1681
1682        len = trace_seq_used(s) - s->seq.readpos;
1683        if (cnt > len)
1684                cnt = len;
1685        memcpy(buf, s->buffer + s->seq.readpos, cnt);
1686
1687        s->seq.readpos += cnt;
1688        return cnt;
1689}
1690
1691unsigned long __read_mostly     tracing_thresh;
1692static const struct file_operations tracing_max_lat_fops;
1693
1694#ifdef LATENCY_FS_NOTIFY
1695
1696static struct workqueue_struct *fsnotify_wq;
1697
1698static void latency_fsnotify_workfn(struct work_struct *work)
1699{
1700        struct trace_array *tr = container_of(work, struct trace_array,
1701                                              fsnotify_work);
1702        fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1703}
1704
1705static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1706{
1707        struct trace_array *tr = container_of(iwork, struct trace_array,
1708                                              fsnotify_irqwork);
1709        queue_work(fsnotify_wq, &tr->fsnotify_work);
1710}
1711
1712static void trace_create_maxlat_file(struct trace_array *tr,
1713                                     struct dentry *d_tracer)
1714{
1715        INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1716        init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1717        tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1718                                              d_tracer, &tr->max_latency,
1719                                              &tracing_max_lat_fops);
1720}
1721
1722__init static int latency_fsnotify_init(void)
1723{
1724        fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1725                                      WQ_UNBOUND | WQ_HIGHPRI, 0);
1726        if (!fsnotify_wq) {
1727                pr_err("Unable to allocate tr_max_lat_wq\n");
1728                return -ENOMEM;
1729        }
1730        return 0;
1731}
1732
1733late_initcall_sync(latency_fsnotify_init);
1734
1735void latency_fsnotify(struct trace_array *tr)
1736{
1737        if (!fsnotify_wq)
1738                return;
1739        /*
1740         * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1741         * possible that we are called from __schedule() or do_idle(), which
1742         * could cause a deadlock.
1743         */
1744        irq_work_queue(&tr->fsnotify_irqwork);
1745}
1746
1747/*
1748 * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1749 *  defined(CONFIG_FSNOTIFY)
1750 */
1751#else
1752
1753#define trace_create_maxlat_file(tr, d_tracer)                          \
1754        trace_create_file("tracing_max_latency", 0644, d_tracer,        \
1755                          &tr->max_latency, &tracing_max_lat_fops)
1756
1757#endif
1758
1759#ifdef CONFIG_TRACER_MAX_TRACE
1760/*
1761 * Copy the new maximum trace into the separate maximum-trace
1762 * structure. (this way the maximum trace is permanently saved,
1763 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1764 */
1765static void
1766__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1767{
1768        struct array_buffer *trace_buf = &tr->array_buffer;
1769        struct array_buffer *max_buf = &tr->max_buffer;
1770        struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1771        struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1772
1773        max_buf->cpu = cpu;
1774        max_buf->time_start = data->preempt_timestamp;
1775
1776        max_data->saved_latency = tr->max_latency;
1777        max_data->critical_start = data->critical_start;
1778        max_data->critical_end = data->critical_end;
1779
1780        strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1781        max_data->pid = tsk->pid;
1782        /*
1783         * If tsk == current, then use current_uid(), as that does not use
1784         * RCU. The irq tracer can be called out of RCU scope.
1785         */
1786        if (tsk == current)
1787                max_data->uid = current_uid();
1788        else
1789                max_data->uid = task_uid(tsk);
1790
1791        max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1792        max_data->policy = tsk->policy;
1793        max_data->rt_priority = tsk->rt_priority;
1794
1795        /* record this tasks comm */
1796        tracing_record_cmdline(tsk);
1797        latency_fsnotify(tr);
1798}
1799
1800/**
1801 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1802 * @tr: tracer
1803 * @tsk: the task with the latency
1804 * @cpu: The cpu that initiated the trace.
1805 * @cond_data: User data associated with a conditional snapshot
1806 *
1807 * Flip the buffers between the @tr and the max_tr and record information
1808 * about which task was the cause of this latency.
1809 */
1810void
1811update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1812              void *cond_data)
1813{
1814        if (tr->stop_count)
1815                return;
1816
1817        WARN_ON_ONCE(!irqs_disabled());
1818
1819        if (!tr->allocated_snapshot) {
1820                /* Only the nop tracer should hit this when disabling */
1821                WARN_ON_ONCE(tr->current_trace != &nop_trace);
1822                return;
1823        }
1824
1825        arch_spin_lock(&tr->max_lock);
1826
1827        /* Inherit the recordable setting from array_buffer */
1828        if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1829                ring_buffer_record_on(tr->max_buffer.buffer);
1830        else
1831                ring_buffer_record_off(tr->max_buffer.buffer);
1832
1833#ifdef CONFIG_TRACER_SNAPSHOT
1834        if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1835                goto out_unlock;
1836#endif
1837        swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1838
1839        __update_max_tr(tr, tsk, cpu);
1840
1841 out_unlock:
1842        arch_spin_unlock(&tr->max_lock);
1843}
1844
1845/**
1846 * update_max_tr_single - only copy one trace over, and reset the rest
1847 * @tr: tracer
1848 * @tsk: task with the latency
1849 * @cpu: the cpu of the buffer to copy.
1850 *
1851 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1852 */
1853void
1854update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1855{
1856        int ret;
1857
1858        if (tr->stop_count)
1859                return;
1860
1861        WARN_ON_ONCE(!irqs_disabled());
1862        if (!tr->allocated_snapshot) {
1863                /* Only the nop tracer should hit this when disabling */
1864                WARN_ON_ONCE(tr->current_trace != &nop_trace);
1865                return;
1866        }
1867
1868        arch_spin_lock(&tr->max_lock);
1869
1870        ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1871
1872        if (ret == -EBUSY) {
1873                /*
1874                 * We failed to swap the buffer due to a commit taking
1875                 * place on this CPU. We fail to record, but we reset
1876                 * the max trace buffer (no one writes directly to it)
1877                 * and flag that it failed.
1878                 */
1879                trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1880                        "Failed to swap buffers due to commit in progress\n");
1881        }
1882
1883        WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1884
1885        __update_max_tr(tr, tsk, cpu);
1886        arch_spin_unlock(&tr->max_lock);
1887}
1888#endif /* CONFIG_TRACER_MAX_TRACE */
1889
1890static int wait_on_pipe(struct trace_iterator *iter, int full)
1891{
1892        /* Iterators are static, they should be filled or empty */
1893        if (trace_buffer_iter(iter, iter->cpu_file))
1894                return 0;
1895
1896        return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1897                                full);
1898}
1899
1900#ifdef CONFIG_FTRACE_STARTUP_TEST
1901static bool selftests_can_run;
1902
1903struct trace_selftests {
1904        struct list_head                list;
1905        struct tracer                   *type;
1906};
1907
1908static LIST_HEAD(postponed_selftests);
1909
1910static int save_selftest(struct tracer *type)
1911{
1912        struct trace_selftests *selftest;
1913
1914        selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1915        if (!selftest)
1916                return -ENOMEM;
1917
1918        selftest->type = type;
1919        list_add(&selftest->list, &postponed_selftests);
1920        return 0;
1921}
1922
1923static int run_tracer_selftest(struct tracer *type)
1924{
1925        struct trace_array *tr = &global_trace;
1926        struct tracer *saved_tracer = tr->current_trace;
1927        int ret;
1928
1929        if (!type->selftest || tracing_selftest_disabled)
1930                return 0;
1931
1932        /*
1933         * If a tracer registers early in boot up (before scheduling is
1934         * initialized and such), then do not run its selftests yet.
1935         * Instead, run it a little later in the boot process.
1936         */
1937        if (!selftests_can_run)
1938                return save_selftest(type);
1939
1940        if (!tracing_is_on()) {
1941                pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1942                        type->name);
1943                return 0;
1944        }
1945
1946        /*
1947         * Run a selftest on this tracer.
1948         * Here we reset the trace buffer, and set the current
1949         * tracer to be this tracer. The tracer can then run some
1950         * internal tracing to verify that everything is in order.
1951         * If we fail, we do not register this tracer.
1952         */
1953        tracing_reset_online_cpus(&tr->array_buffer);
1954
1955        tr->current_trace = type;
1956
1957#ifdef CONFIG_TRACER_MAX_TRACE
1958        if (type->use_max_tr) {
1959                /* If we expanded the buffers, make sure the max is expanded too */
1960                if (ring_buffer_expanded)
1961                        ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1962                                           RING_BUFFER_ALL_CPUS);
1963                tr->allocated_snapshot = true;
1964        }
1965#endif
1966
1967        /* the test is responsible for initializing and enabling */
1968        pr_info("Testing tracer %s: ", type->name);
1969        ret = type->selftest(type, tr);
1970        /* the test is responsible for resetting too */
1971        tr->current_trace = saved_tracer;
1972        if (ret) {
1973                printk(KERN_CONT "FAILED!\n");
1974                /* Add the warning after printing 'FAILED' */
1975                WARN_ON(1);
1976                return -1;
1977        }
1978        /* Only reset on passing, to avoid touching corrupted buffers */
1979        tracing_reset_online_cpus(&tr->array_buffer);
1980
1981#ifdef CONFIG_TRACER_MAX_TRACE
1982        if (type->use_max_tr) {
1983                tr->allocated_snapshot = false;
1984
1985                /* Shrink the max buffer again */
1986                if (ring_buffer_expanded)
1987                        ring_buffer_resize(tr->max_buffer.buffer, 1,
1988                                           RING_BUFFER_ALL_CPUS);
1989        }
1990#endif
1991
1992        printk(KERN_CONT "PASSED\n");
1993        return 0;
1994}
1995
1996static __init int init_trace_selftests(void)
1997{
1998        struct trace_selftests *p, *n;
1999        struct tracer *t, **last;
2000        int ret;
2001
2002        selftests_can_run = true;
2003
2004        mutex_lock(&trace_types_lock);
2005
2006        if (list_empty(&postponed_selftests))
2007                goto out;
2008
2009        pr_info("Running postponed tracer tests:\n");
2010
2011        tracing_selftest_running = true;
2012        list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2013                /* This loop can take minutes when sanitizers are enabled, so
2014                 * lets make sure we allow RCU processing.
2015                 */
2016                cond_resched();
2017                ret = run_tracer_selftest(p->type);
2018                /* If the test fails, then warn and remove from available_tracers */
2019                if (ret < 0) {
2020                        WARN(1, "tracer: %s failed selftest, disabling\n",
2021                             p->type->name);
2022                        last = &trace_types;
2023                        for (t = trace_types; t; t = t->next) {
2024                                if (t == p->type) {
2025                                        *last = t->next;
2026                                        break;
2027                                }
2028                                last = &t->next;
2029                        }
2030                }
2031                list_del(&p->list);
2032                kfree(p);
2033        }
2034        tracing_selftest_running = false;
2035
2036 out:
2037        mutex_unlock(&trace_types_lock);
2038
2039        return 0;
2040}
2041core_initcall(init_trace_selftests);
2042#else
2043static inline int run_tracer_selftest(struct tracer *type)
2044{
2045        return 0;
2046}
2047#endif /* CONFIG_FTRACE_STARTUP_TEST */
2048
2049static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2050
2051static void __init apply_trace_boot_options(void);
2052
2053/**
2054 * register_tracer - register a tracer with the ftrace system.
2055 * @type: the plugin for the tracer
2056 *
2057 * Register a new plugin tracer.
2058 */
2059int __init register_tracer(struct tracer *type)
2060{
2061        struct tracer *t;
2062        int ret = 0;
2063
2064        if (!type->name) {
2065                pr_info("Tracer must have a name\n");
2066                return -1;
2067        }
2068
2069        if (strlen(type->name) >= MAX_TRACER_SIZE) {
2070                pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2071                return -1;
2072        }
2073
2074        if (security_locked_down(LOCKDOWN_TRACEFS)) {
2075                pr_warn("Can not register tracer %s due to lockdown\n",
2076                           type->name);
2077                return -EPERM;
2078        }
2079
2080        mutex_lock(&trace_types_lock);
2081
2082        tracing_selftest_running = true;
2083
2084        for (t = trace_types; t; t = t->next) {
2085                if (strcmp(type->name, t->name) == 0) {
2086                        /* already found */
2087                        pr_info("Tracer %s already registered\n",
2088                                type->name);
2089                        ret = -1;
2090                        goto out;
2091                }
2092        }
2093
2094        if (!type->set_flag)
2095                type->set_flag = &dummy_set_flag;
2096        if (!type->flags) {
2097                /*allocate a dummy tracer_flags*/
2098                type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2099                if (!type->flags) {
2100                        ret = -ENOMEM;
2101                        goto out;
2102                }
2103                type->flags->val = 0;
2104                type->flags->opts = dummy_tracer_opt;
2105        } else
2106                if (!type->flags->opts)
2107                        type->flags->opts = dummy_tracer_opt;
2108
2109        /* store the tracer for __set_tracer_option */
2110        type->flags->trace = type;
2111
2112        ret = run_tracer_selftest(type);
2113        if (ret < 0)
2114                goto out;
2115
2116        type->next = trace_types;
2117        trace_types = type;
2118        add_tracer_options(&global_trace, type);
2119
2120 out:
2121        tracing_selftest_running = false;
2122        mutex_unlock(&trace_types_lock);
2123
2124        if (ret || !default_bootup_tracer)
2125                goto out_unlock;
2126
2127        if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2128                goto out_unlock;
2129
2130        printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2131        /* Do we want this tracer to start on bootup? */
2132        tracing_set_tracer(&global_trace, type->name);
2133        default_bootup_tracer = NULL;
2134
2135        apply_trace_boot_options();
2136
2137        /* disable other selftests, since this will break it. */
2138        disable_tracing_selftest("running a tracer");
2139
2140 out_unlock:
2141        return ret;
2142}
2143
2144static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2145{
2146        struct trace_buffer *buffer = buf->buffer;
2147
2148        if (!buffer)
2149                return;
2150
2151        ring_buffer_record_disable(buffer);
2152
2153        /* Make sure all commits have finished */
2154        synchronize_rcu();
2155        ring_buffer_reset_cpu(buffer, cpu);
2156
2157        ring_buffer_record_enable(buffer);
2158}
2159
2160void tracing_reset_online_cpus(struct array_buffer *buf)
2161{
2162        struct trace_buffer *buffer = buf->buffer;
2163
2164        if (!buffer)
2165                return;
2166
2167        ring_buffer_record_disable(buffer);
2168
2169        /* Make sure all commits have finished */
2170        synchronize_rcu();
2171
2172        buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2173
2174        ring_buffer_reset_online_cpus(buffer);
2175
2176        ring_buffer_record_enable(buffer);
2177}
2178
2179/* Must have trace_types_lock held */
2180void tracing_reset_all_online_cpus(void)
2181{
2182        struct trace_array *tr;
2183
2184        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2185                if (!tr->clear_trace)
2186                        continue;
2187                tr->clear_trace = false;
2188                tracing_reset_online_cpus(&tr->array_buffer);
2189#ifdef CONFIG_TRACER_MAX_TRACE
2190                tracing_reset_online_cpus(&tr->max_buffer);
2191#endif
2192        }
2193}
2194
2195/*
2196 * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2197 * is the tgid last observed corresponding to pid=i.
2198 */
2199static int *tgid_map;
2200
2201/* The maximum valid index into tgid_map. */
2202static size_t tgid_map_max;
2203
2204#define SAVED_CMDLINES_DEFAULT 128
2205#define NO_CMDLINE_MAP UINT_MAX
2206static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2207struct saved_cmdlines_buffer {
2208        unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2209        unsigned *map_cmdline_to_pid;
2210        unsigned cmdline_num;
2211        int cmdline_idx;
2212        char *saved_cmdlines;
2213};
2214static struct saved_cmdlines_buffer *savedcmd;
2215
2216static inline char *get_saved_cmdlines(int idx)
2217{
2218        return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2219}
2220
2221static inline void set_cmdline(int idx, const char *cmdline)
2222{
2223        strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2224}
2225
2226static int allocate_cmdlines_buffer(unsigned int val,
2227                                    struct saved_cmdlines_buffer *s)
2228{
2229        s->map_cmdline_to_pid = kmalloc_array(val,
2230                                              sizeof(*s->map_cmdline_to_pid),
2231                                              GFP_KERNEL);
2232        if (!s->map_cmdline_to_pid)
2233                return -ENOMEM;
2234
2235        s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2236        if (!s->saved_cmdlines) {
2237                kfree(s->map_cmdline_to_pid);
2238                return -ENOMEM;
2239        }
2240
2241        s->cmdline_idx = 0;
2242        s->cmdline_num = val;
2243        memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2244               sizeof(s->map_pid_to_cmdline));
2245        memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2246               val * sizeof(*s->map_cmdline_to_pid));
2247
2248        return 0;
2249}
2250
2251static int trace_create_savedcmd(void)
2252{
2253        int ret;
2254
2255        savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2256        if (!savedcmd)
2257                return -ENOMEM;
2258
2259        ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2260        if (ret < 0) {
2261                kfree(savedcmd);
2262                savedcmd = NULL;
2263                return -ENOMEM;
2264        }
2265
2266        return 0;
2267}
2268
2269int is_tracing_stopped(void)
2270{
2271        return global_trace.stop_count;
2272}
2273
2274/**
2275 * tracing_start - quick start of the tracer
2276 *
2277 * If tracing is enabled but was stopped by tracing_stop,
2278 * this will start the tracer back up.
2279 */
2280void tracing_start(void)
2281{
2282        struct trace_buffer *buffer;
2283        unsigned long flags;
2284
2285        if (tracing_disabled)
2286                return;
2287
2288        raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2289        if (--global_trace.stop_count) {
2290                if (global_trace.stop_count < 0) {
2291                        /* Someone screwed up their debugging */
2292                        WARN_ON_ONCE(1);
2293                        global_trace.stop_count = 0;
2294                }
2295                goto out;
2296        }
2297
2298        /* Prevent the buffers from switching */
2299        arch_spin_lock(&global_trace.max_lock);
2300
2301        buffer = global_trace.array_buffer.buffer;
2302        if (buffer)
2303                ring_buffer_record_enable(buffer);
2304
2305#ifdef CONFIG_TRACER_MAX_TRACE
2306        buffer = global_trace.max_buffer.buffer;
2307        if (buffer)
2308                ring_buffer_record_enable(buffer);
2309#endif
2310
2311        arch_spin_unlock(&global_trace.max_lock);
2312
2313 out:
2314        raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2315}
2316
2317static void tracing_start_tr(struct trace_array *tr)
2318{
2319        struct trace_buffer *buffer;
2320        unsigned long flags;
2321
2322        if (tracing_disabled)
2323                return;
2324
2325        /* If global, we need to also start the max tracer */
2326        if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2327                return tracing_start();
2328
2329        raw_spin_lock_irqsave(&tr->start_lock, flags);
2330
2331        if (--tr->stop_count) {
2332                if (tr->stop_count < 0) {
2333                        /* Someone screwed up their debugging */
2334                        WARN_ON_ONCE(1);
2335                        tr->stop_count = 0;
2336                }
2337                goto out;
2338        }
2339
2340        buffer = tr->array_buffer.buffer;
2341        if (buffer)
2342                ring_buffer_record_enable(buffer);
2343
2344 out:
2345        raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2346}
2347
2348/**
2349 * tracing_stop - quick stop of the tracer
2350 *
2351 * Light weight way to stop tracing. Use in conjunction with
2352 * tracing_start.
2353 */
2354void tracing_stop(void)
2355{
2356        struct trace_buffer *buffer;
2357        unsigned long flags;
2358
2359        raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2360        if (global_trace.stop_count++)
2361                goto out;
2362
2363        /* Prevent the buffers from switching */
2364        arch_spin_lock(&global_trace.max_lock);
2365
2366        buffer = global_trace.array_buffer.buffer;
2367        if (buffer)
2368                ring_buffer_record_disable(buffer);
2369
2370#ifdef CONFIG_TRACER_MAX_TRACE
2371        buffer = global_trace.max_buffer.buffer;
2372        if (buffer)
2373                ring_buffer_record_disable(buffer);
2374#endif
2375
2376        arch_spin_unlock(&global_trace.max_lock);
2377
2378 out:
2379        raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2380}
2381
2382static void tracing_stop_tr(struct trace_array *tr)
2383{
2384        struct trace_buffer *buffer;
2385        unsigned long flags;
2386
2387        /* If global, we need to also stop the max tracer */
2388        if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2389                return tracing_stop();
2390
2391        raw_spin_lock_irqsave(&tr->start_lock, flags);
2392        if (tr->stop_count++)
2393                goto out;
2394
2395        buffer = tr->array_buffer.buffer;
2396        if (buffer)
2397                ring_buffer_record_disable(buffer);
2398
2399 out:
2400        raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2401}
2402
2403static int trace_save_cmdline(struct task_struct *tsk)
2404{
2405        unsigned tpid, idx;
2406
2407        /* treat recording of idle task as a success */
2408        if (!tsk->pid)
2409                return 1;
2410
2411        tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2412
2413        /*
2414         * It's not the end of the world if we don't get
2415         * the lock, but we also don't want to spin
2416         * nor do we want to disable interrupts,
2417         * so if we miss here, then better luck next time.
2418         */
2419        if (!arch_spin_trylock(&trace_cmdline_lock))
2420                return 0;
2421
2422        idx = savedcmd->map_pid_to_cmdline[tpid];
2423        if (idx == NO_CMDLINE_MAP) {
2424                idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2425
2426                savedcmd->map_pid_to_cmdline[tpid] = idx;
2427                savedcmd->cmdline_idx = idx;
2428        }
2429
2430        savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2431        set_cmdline(idx, tsk->comm);
2432
2433        arch_spin_unlock(&trace_cmdline_lock);
2434
2435        return 1;
2436}
2437
2438static void __trace_find_cmdline(int pid, char comm[])
2439{
2440        unsigned map;
2441        int tpid;
2442
2443        if (!pid) {
2444                strcpy(comm, "<idle>");
2445                return;
2446        }
2447
2448        if (WARN_ON_ONCE(pid < 0)) {
2449                strcpy(comm, "<XXX>");
2450                return;
2451        }
2452
2453        tpid = pid & (PID_MAX_DEFAULT - 1);
2454        map = savedcmd->map_pid_to_cmdline[tpid];
2455        if (map != NO_CMDLINE_MAP) {
2456                tpid = savedcmd->map_cmdline_to_pid[map];
2457                if (tpid == pid) {
2458                        strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2459                        return;
2460                }
2461        }
2462        strcpy(comm, "<...>");
2463}
2464
2465void trace_find_cmdline(int pid, char comm[])
2466{
2467        preempt_disable();
2468        arch_spin_lock(&trace_cmdline_lock);
2469
2470        __trace_find_cmdline(pid, comm);
2471
2472        arch_spin_unlock(&trace_cmdline_lock);
2473        preempt_enable();
2474}
2475
2476static int *trace_find_tgid_ptr(int pid)
2477{
2478        /*
2479         * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2480         * if we observe a non-NULL tgid_map then we also observe the correct
2481         * tgid_map_max.
2482         */
2483        int *map = smp_load_acquire(&tgid_map);
2484
2485        if (unlikely(!map || pid > tgid_map_max))
2486                return NULL;
2487
2488        return &map[pid];
2489}
2490
2491int trace_find_tgid(int pid)
2492{
2493        int *ptr = trace_find_tgid_ptr(pid);
2494
2495        return ptr ? *ptr : 0;
2496}
2497
2498static int trace_save_tgid(struct task_struct *tsk)
2499{
2500        int *ptr;
2501
2502        /* treat recording of idle task as a success */
2503        if (!tsk->pid)
2504                return 1;
2505
2506        ptr = trace_find_tgid_ptr(tsk->pid);
2507        if (!ptr)
2508                return 0;
2509
2510        *ptr = tsk->tgid;
2511        return 1;
2512}
2513
2514static bool tracing_record_taskinfo_skip(int flags)
2515{
2516        if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2517                return true;
2518        if (!__this_cpu_read(trace_taskinfo_save))
2519                return true;
2520        return false;
2521}
2522
2523/**
2524 * tracing_record_taskinfo - record the task info of a task
2525 *
2526 * @task:  task to record
2527 * @flags: TRACE_RECORD_CMDLINE for recording comm
2528 *         TRACE_RECORD_TGID for recording tgid
2529 */
2530void tracing_record_taskinfo(struct task_struct *task, int flags)
2531{
2532        bool done;
2533
2534        if (tracing_record_taskinfo_skip(flags))
2535                return;
2536
2537        /*
2538         * Record as much task information as possible. If some fail, continue
2539         * to try to record the others.
2540         */
2541        done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2542        done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2543
2544        /* If recording any information failed, retry again soon. */
2545        if (!done)
2546                return;
2547
2548        __this_cpu_write(trace_taskinfo_save, false);
2549}
2550
2551/**
2552 * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2553 *
2554 * @prev: previous task during sched_switch
2555 * @next: next task during sched_switch
2556 * @flags: TRACE_RECORD_CMDLINE for recording comm
2557 *         TRACE_RECORD_TGID for recording tgid
2558 */
2559void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2560                                          struct task_struct *next, int flags)
2561{
2562        bool done;
2563
2564        if (tracing_record_taskinfo_skip(flags))
2565                return;
2566
2567        /*
2568         * Record as much task information as possible. If some fail, continue
2569         * to try to record the others.
2570         */
2571        done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2572        done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2573        done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2574        done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2575
2576        /* If recording any information failed, retry again soon. */
2577        if (!done)
2578                return;
2579
2580        __this_cpu_write(trace_taskinfo_save, false);
2581}
2582
2583/* Helpers to record a specific task information */
2584void tracing_record_cmdline(struct task_struct *task)
2585{
2586        tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2587}
2588
2589void tracing_record_tgid(struct task_struct *task)
2590{
2591        tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2592}
2593
2594/*
2595 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2596 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2597 * simplifies those functions and keeps them in sync.
2598 */
2599enum print_line_t trace_handle_return(struct trace_seq *s)
2600{
2601        return trace_seq_has_overflowed(s) ?
2602                TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2603}
2604EXPORT_SYMBOL_GPL(trace_handle_return);
2605
2606unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2607{
2608        unsigned int trace_flags = irqs_status;
2609        unsigned int pc;
2610
2611        pc = preempt_count();
2612
2613        if (pc & NMI_MASK)
2614                trace_flags |= TRACE_FLAG_NMI;
2615        if (pc & HARDIRQ_MASK)
2616                trace_flags |= TRACE_FLAG_HARDIRQ;
2617        if (in_serving_softirq())
2618                trace_flags |= TRACE_FLAG_SOFTIRQ;
2619
2620        if (tif_need_resched())
2621                trace_flags |= TRACE_FLAG_NEED_RESCHED;
2622        if (test_preempt_need_resched())
2623                trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2624        return (trace_flags << 16) | (pc & 0xff);
2625}
2626
2627struct ring_buffer_event *
2628trace_buffer_lock_reserve(struct trace_buffer *buffer,
2629                          int type,
2630                          unsigned long len,
2631                          unsigned int trace_ctx)
2632{
2633        return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2634}
2635
2636DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2637DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2638static int trace_buffered_event_ref;
2639
2640/**
2641 * trace_buffered_event_enable - enable buffering events
2642 *
2643 * When events are being filtered, it is quicker to use a temporary
2644 * buffer to write the event data into if there's a likely chance
2645 * that it will not be committed. The discard of the ring buffer
2646 * is not as fast as committing, and is much slower than copying
2647 * a commit.
2648 *
2649 * When an event is to be filtered, allocate per cpu buffers to
2650 * write the event data into, and if the event is filtered and discarded
2651 * it is simply dropped, otherwise, the entire data is to be committed
2652 * in one shot.
2653 */
2654void trace_buffered_event_enable(void)
2655{
2656        struct ring_buffer_event *event;
2657        struct page *page;
2658        int cpu;
2659
2660        WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2661
2662        if (trace_buffered_event_ref++)
2663                return;
2664
2665        for_each_tracing_cpu(cpu) {
2666                page = alloc_pages_node(cpu_to_node(cpu),
2667                                        GFP_KERNEL | __GFP_NORETRY, 0);
2668                if (!page)
2669                        goto failed;
2670
2671                event = page_address(page);
2672                memset(event, 0, sizeof(*event));
2673
2674                per_cpu(trace_buffered_event, cpu) = event;
2675
2676                preempt_disable();
2677                if (cpu == smp_processor_id() &&
2678                    __this_cpu_read(trace_buffered_event) !=
2679                    per_cpu(trace_buffered_event, cpu))
2680                        WARN_ON_ONCE(1);
2681                preempt_enable();
2682        }
2683
2684        return;
2685 failed:
2686        trace_buffered_event_disable();
2687}
2688
2689static void enable_trace_buffered_event(void *data)
2690{
2691        /* Probably not needed, but do it anyway */
2692        smp_rmb();
2693        this_cpu_dec(trace_buffered_event_cnt);
2694}
2695
2696static void disable_trace_buffered_event(void *data)
2697{
2698        this_cpu_inc(trace_buffered_event_cnt);
2699}
2700
2701/**
2702 * trace_buffered_event_disable - disable buffering events
2703 *
2704 * When a filter is removed, it is faster to not use the buffered
2705 * events, and to commit directly into the ring buffer. Free up
2706 * the temp buffers when there are no more users. This requires
2707 * special synchronization with current events.
2708 */
2709void trace_buffered_event_disable(void)
2710{
2711        int cpu;
2712
2713        WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2714
2715        if (WARN_ON_ONCE(!trace_buffered_event_ref))
2716                return;
2717
2718        if (--trace_buffered_event_ref)
2719                return;
2720
2721        preempt_disable();
2722        /* For each CPU, set the buffer as used. */
2723        smp_call_function_many(tracing_buffer_mask,
2724                               disable_trace_buffered_event, NULL, 1);
2725        preempt_enable();
2726
2727        /* Wait for all current users to finish */
2728        synchronize_rcu();
2729
2730        for_each_tracing_cpu(cpu) {
2731                free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2732                per_cpu(trace_buffered_event, cpu) = NULL;
2733        }
2734        /*
2735         * Make sure trace_buffered_event is NULL before clearing
2736         * trace_buffered_event_cnt.
2737         */
2738        smp_wmb();
2739
2740        preempt_disable();
2741        /* Do the work on each cpu */
2742        smp_call_function_many(tracing_buffer_mask,
2743                               enable_trace_buffered_event, NULL, 1);
2744        preempt_enable();
2745}
2746
2747static struct trace_buffer *temp_buffer;
2748
2749struct ring_buffer_event *
2750trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2751                          struct trace_event_file *trace_file,
2752                          int type, unsigned long len,
2753                          unsigned int trace_ctx)
2754{
2755        struct ring_buffer_event *entry;
2756        struct trace_array *tr = trace_file->tr;
2757        int val;
2758
2759        *current_rb = tr->array_buffer.buffer;
2760
2761        if (!tr->no_filter_buffering_ref &&
2762            (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2763            (entry = this_cpu_read(trace_buffered_event))) {
2764                /*
2765                 * Filtering is on, so try to use the per cpu buffer first.
2766                 * This buffer will simulate a ring_buffer_event,
2767                 * where the type_len is zero and the array[0] will
2768                 * hold the full length.
2769                 * (see include/linux/ring-buffer.h for details on
2770                 *  how the ring_buffer_event is structured).
2771                 *
2772                 * Using a temp buffer during filtering and copying it
2773                 * on a matched filter is quicker than writing directly
2774                 * into the ring buffer and then discarding it when
2775                 * it doesn't match. That is because the discard
2776                 * requires several atomic operations to get right.
2777                 * Copying on match and doing nothing on a failed match
2778                 * is still quicker than no copy on match, but having
2779                 * to discard out of the ring buffer on a failed match.
2780                 */
2781                int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2782
2783                val = this_cpu_inc_return(trace_buffered_event_cnt);
2784
2785                /*
2786                 * Preemption is disabled, but interrupts and NMIs
2787                 * can still come in now. If that happens after
2788                 * the above increment, then it will have to go
2789                 * back to the old method of allocating the event
2790                 * on the ring buffer, and if the filter fails, it
2791                 * will have to call ring_buffer_discard_commit()
2792                 * to remove it.
2793                 *
2794                 * Need to also check the unlikely case that the
2795                 * length is bigger than the temp buffer size.
2796                 * If that happens, then the reserve is pretty much
2797                 * guaranteed to fail, as the ring buffer currently
2798                 * only allows events less than a page. But that may
2799                 * change in the future, so let the ring buffer reserve
2800                 * handle the failure in that case.
2801                 */
2802                if (val == 1 && likely(len <= max_len)) {
2803                        trace_event_setup(entry, type, trace_ctx);
2804                        entry->array[0] = len;
2805                        return entry;
2806                }
2807                this_cpu_dec(trace_buffered_event_cnt);
2808        }
2809
2810        entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2811                                            trace_ctx);
2812        /*
2813         * If tracing is off, but we have triggers enabled
2814         * we still need to look at the event data. Use the temp_buffer
2815         * to store the trace event for the trigger to use. It's recursive
2816         * safe and will not be recorded anywhere.
2817         */
2818        if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2819                *current_rb = temp_buffer;
2820                entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2821                                                    trace_ctx);
2822        }
2823        return entry;
2824}
2825EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2826
2827static DEFINE_SPINLOCK(tracepoint_iter_lock);
2828static DEFINE_MUTEX(tracepoint_printk_mutex);
2829
2830static void output_printk(struct trace_event_buffer *fbuffer)
2831{
2832        struct trace_event_call *event_call;
2833        struct trace_event_file *file;
2834        struct trace_event *event;
2835        unsigned long flags;
2836        struct trace_iterator *iter = tracepoint_print_iter;
2837
2838        /* We should never get here if iter is NULL */
2839        if (WARN_ON_ONCE(!iter))
2840                return;
2841
2842        event_call = fbuffer->trace_file->event_call;
2843        if (!event_call || !event_call->event.funcs ||
2844            !event_call->event.funcs->trace)
2845                return;
2846
2847        file = fbuffer->trace_file;
2848        if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2849            (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2850             !filter_match_preds(file->filter, fbuffer->entry)))
2851                return;
2852
2853        event = &fbuffer->trace_file->event_call->event;
2854
2855        spin_lock_irqsave(&tracepoint_iter_lock, flags);
2856        trace_seq_init(&iter->seq);
2857        iter->ent = fbuffer->entry;
2858        event_call->event.funcs->trace(iter, 0, event);
2859        trace_seq_putc(&iter->seq, 0);
2860        printk("%s", iter->seq.buffer);
2861
2862        spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2863}
2864
2865int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2866                             void *buffer, size_t *lenp,
2867                             loff_t *ppos)
2868{
2869        int save_tracepoint_printk;
2870        int ret;
2871
2872        mutex_lock(&tracepoint_printk_mutex);
2873        save_tracepoint_printk = tracepoint_printk;
2874
2875        ret = proc_dointvec(table, write, buffer, lenp, ppos);
2876
2877        /*
2878         * This will force exiting early, as tracepoint_printk
2879         * is always zero when tracepoint_printk_iter is not allocated
2880         */
2881        if (!tracepoint_print_iter)
2882                tracepoint_printk = 0;
2883
2884        if (save_tracepoint_printk == tracepoint_printk)
2885                goto out;
2886
2887        if (tracepoint_printk)
2888                static_key_enable(&tracepoint_printk_key.key);
2889        else
2890                static_key_disable(&tracepoint_printk_key.key);
2891
2892 out:
2893        mutex_unlock(&tracepoint_printk_mutex);
2894
2895        return ret;
2896}
2897
2898void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2899{
2900        enum event_trigger_type tt = ETT_NONE;
2901        struct trace_event_file *file = fbuffer->trace_file;
2902
2903        if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2904                        fbuffer->entry, &tt))
2905                goto discard;
2906
2907        if (static_key_false(&tracepoint_printk_key.key))
2908                output_printk(fbuffer);
2909
2910        if (static_branch_unlikely(&trace_event_exports_enabled))
2911                ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2912
2913        trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2914                        fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2915
2916discard:
2917        if (tt)
2918                event_triggers_post_call(file, tt);
2919
2920}
2921EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2922
2923/*
2924 * Skip 3:
2925 *
2926 *   trace_buffer_unlock_commit_regs()
2927 *   trace_event_buffer_commit()
2928 *   trace_event_raw_event_xxx()
2929 */
2930# define STACK_SKIP 3
2931
2932void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2933                                     struct trace_buffer *buffer,
2934                                     struct ring_buffer_event *event,
2935                                     unsigned int trace_ctx,
2936                                     struct pt_regs *regs)
2937{
2938        __buffer_unlock_commit(buffer, event);
2939
2940        /*
2941         * If regs is not set, then skip the necessary functions.
2942         * Note, we can still get here via blktrace, wakeup tracer
2943         * and mmiotrace, but that's ok if they lose a function or
2944         * two. They are not that meaningful.
2945         */
2946        ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2947        ftrace_trace_userstack(tr, buffer, trace_ctx);
2948}
2949
2950/*
2951 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2952 */
2953void
2954trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2955                                   struct ring_buffer_event *event)
2956{
2957        __buffer_unlock_commit(buffer, event);
2958}
2959
2960void
2961trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2962               parent_ip, unsigned int trace_ctx)
2963{
2964        struct trace_event_call *call = &event_function;
2965        struct trace_buffer *buffer = tr->array_buffer.buffer;
2966        struct ring_buffer_event *event;
2967        struct ftrace_entry *entry;
2968
2969        event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2970                                            trace_ctx);
2971        if (!event)
2972                return;
2973        entry   = ring_buffer_event_data(event);
2974        entry->ip                       = ip;
2975        entry->parent_ip                = parent_ip;
2976
2977        if (!call_filter_check_discard(call, entry, buffer, event)) {
2978                if (static_branch_unlikely(&trace_function_exports_enabled))
2979                        ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2980                __buffer_unlock_commit(buffer, event);
2981        }
2982}
2983
2984#ifdef CONFIG_STACKTRACE
2985
2986/* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2987#define FTRACE_KSTACK_NESTING   4
2988
2989#define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2990
2991struct ftrace_stack {
2992        unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2993};
2994
2995
2996struct ftrace_stacks {
2997        struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2998};
2999
3000static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3001static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3002
3003static void __ftrace_trace_stack(struct trace_buffer *buffer,
3004                                 unsigned int trace_ctx,
3005                                 int skip, struct pt_regs *regs)
3006{
3007        struct trace_event_call *call = &event_kernel_stack;
3008        struct ring_buffer_event *event;
3009        unsigned int size, nr_entries;
3010        struct ftrace_stack *fstack;
3011        struct stack_entry *entry;
3012        int stackidx;
3013
3014        /*
3015         * Add one, for this function and the call to save_stack_trace()
3016         * If regs is set, then these functions will not be in the way.
3017         */
3018#ifndef CONFIG_UNWINDER_ORC
3019        if (!regs)
3020                skip++;
3021#endif
3022
3023        preempt_disable_notrace();
3024
3025        stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3026
3027        /* This should never happen. If it does, yell once and skip */
3028        if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3029                goto out;
3030
3031        /*
3032         * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3033         * interrupt will either see the value pre increment or post
3034         * increment. If the interrupt happens pre increment it will have
3035         * restored the counter when it returns.  We just need a barrier to
3036         * keep gcc from moving things around.
3037         */
3038        barrier();
3039
3040        fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3041        size = ARRAY_SIZE(fstack->calls);
3042
3043        if (regs) {
3044                nr_entries = stack_trace_save_regs(regs, fstack->calls,
3045                                                   size, skip);
3046        } else {
3047                nr_entries = stack_trace_save(fstack->calls, size, skip);
3048        }
3049
3050        size = nr_entries * sizeof(unsigned long);
3051        event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3052                                    (sizeof(*entry) - sizeof(entry->caller)) + size,
3053                                    trace_ctx);
3054        if (!event)
3055                goto out;
3056        entry = ring_buffer_event_data(event);
3057
3058        memcpy(&entry->caller, fstack->calls, size);
3059        entry->size = nr_entries;
3060
3061        if (!call_filter_check_discard(call, entry, buffer, event))
3062                __buffer_unlock_commit(buffer, event);
3063
3064 out:
3065        /* Again, don't let gcc optimize things here */
3066        barrier();
3067        __this_cpu_dec(ftrace_stack_reserve);
3068        preempt_enable_notrace();
3069
3070}
3071
3072static inline void ftrace_trace_stack(struct trace_array *tr,
3073                                      struct trace_buffer *buffer,
3074                                      unsigned int trace_ctx,
3075                                      int skip, struct pt_regs *regs)
3076{
3077        if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3078                return;
3079
3080        __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3081}
3082
3083void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3084                   int skip)
3085{
3086        struct trace_buffer *buffer = tr->array_buffer.buffer;
3087
3088        if (rcu_is_watching()) {
3089                __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3090                return;
3091        }
3092
3093        /*
3094         * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3095         * but if the above rcu_is_watching() failed, then the NMI
3096         * triggered someplace critical, and rcu_irq_enter() should
3097         * not be called from NMI.
3098         */
3099        if (unlikely(in_nmi()))
3100                return;
3101
3102        rcu_irq_enter_irqson();
3103        __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3104        rcu_irq_exit_irqson();
3105}
3106
3107/**
3108 * trace_dump_stack - record a stack back trace in the trace buffer
3109 * @skip: Number of functions to skip (helper handlers)
3110 */
3111void trace_dump_stack(int skip)
3112{
3113        if (tracing_disabled || tracing_selftest_running)
3114                return;
3115
3116#ifndef CONFIG_UNWINDER_ORC
3117        /* Skip 1 to skip this function. */
3118        skip++;
3119#endif
3120        __ftrace_trace_stack(global_trace.array_buffer.buffer,
3121                             tracing_gen_ctx(), skip, NULL);
3122}
3123EXPORT_SYMBOL_GPL(trace_dump_stack);
3124
3125#ifdef CONFIG_USER_STACKTRACE_SUPPORT
3126static DEFINE_PER_CPU(int, user_stack_count);
3127
3128static void
3129ftrace_trace_userstack(struct trace_array *tr,
3130                       struct trace_buffer *buffer, unsigned int trace_ctx)
3131{
3132        struct trace_event_call *call = &event_user_stack;
3133        struct ring_buffer_event *event;
3134        struct userstack_entry *entry;
3135
3136        if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3137                return;
3138
3139        /*
3140         * NMIs can not handle page faults, even with fix ups.
3141         * The save user stack can (and often does) fault.
3142         */
3143        if (unlikely(in_nmi()))
3144                return;
3145
3146        /*
3147         * prevent recursion, since the user stack tracing may
3148         * trigger other kernel events.
3149         */
3150        preempt_disable();
3151        if (__this_cpu_read(user_stack_count))
3152                goto out;
3153
3154        __this_cpu_inc(user_stack_count);
3155
3156        event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3157                                            sizeof(*entry), trace_ctx);
3158        if (!event)
3159                goto out_drop_count;
3160        entry   = ring_buffer_event_data(event);
3161
3162        entry->tgid             = current->tgid;
3163        memset(&entry->caller, 0, sizeof(entry->caller));
3164
3165        stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3166        if (!call_filter_check_discard(call, entry, buffer, event))
3167                __buffer_unlock_commit(buffer, event);
3168
3169 out_drop_count:
3170        __this_cpu_dec(user_stack_count);
3171 out:
3172        preempt_enable();
3173}
3174#else /* CONFIG_USER_STACKTRACE_SUPPORT */
3175static void ftrace_trace_userstack(struct trace_array *tr,
3176                                   struct trace_buffer *buffer,
3177                                   unsigned int trace_ctx)
3178{
3179}
3180#endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3181
3182#endif /* CONFIG_STACKTRACE */
3183
3184static inline void
3185func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3186                          unsigned long long delta)
3187{
3188        entry->bottom_delta_ts = delta & U32_MAX;
3189        entry->top_delta_ts = (delta >> 32);
3190}
3191
3192void trace_last_func_repeats(struct trace_array *tr,
3193                             struct trace_func_repeats *last_info,
3194                             unsigned int trace_ctx)
3195{
3196        struct trace_buffer *buffer = tr->array_buffer.buffer;
3197        struct func_repeats_entry *entry;
3198        struct ring_buffer_event *event;
3199        u64 delta;
3200
3201        event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3202                                            sizeof(*entry), trace_ctx);
3203        if (!event)
3204                return;
3205
3206        delta = ring_buffer_event_time_stamp(buffer, event) -
3207                last_info->ts_last_call;
3208
3209        entry = ring_buffer_event_data(event);
3210        entry->ip = last_info->ip;
3211        entry->parent_ip = last_info->parent_ip;
3212        entry->count = last_info->count;
3213        func_repeats_set_delta_ts(entry, delta);
3214
3215        __buffer_unlock_commit(buffer, event);
3216}
3217
3218/* created for use with alloc_percpu */
3219struct trace_buffer_struct {
3220        int nesting;
3221        char buffer[4][TRACE_BUF_SIZE];
3222};
3223
3224static struct trace_buffer_struct *trace_percpu_buffer;
3225
3226/*
3227 * This allows for lockless recording.  If we're nested too deeply, then
3228 * this returns NULL.
3229 */
3230static char *get_trace_buf(void)
3231{
3232        struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3233
3234        if (!buffer || buffer->nesting >= 4)
3235                return NULL;
3236
3237        buffer->nesting++;
3238
3239        /* Interrupts must see nesting incremented before we use the buffer */
3240        barrier();
3241        return &buffer->buffer[buffer->nesting - 1][0];
3242}
3243
3244static void put_trace_buf(void)
3245{
3246        /* Don't let the decrement of nesting leak before this */
3247        barrier();
3248        this_cpu_dec(trace_percpu_buffer->nesting);
3249}
3250
3251static int alloc_percpu_trace_buffer(void)
3252{
3253        struct trace_buffer_struct *buffers;
3254
3255        if (trace_percpu_buffer)
3256                return 0;
3257
3258        buffers = alloc_percpu(struct trace_buffer_struct);
3259        if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3260                return -ENOMEM;
3261
3262        trace_percpu_buffer = buffers;
3263        return 0;
3264}
3265
3266static int buffers_allocated;
3267
3268void trace_printk_init_buffers(void)
3269{
3270        if (buffers_allocated)
3271                return;
3272
3273        if (alloc_percpu_trace_buffer())
3274                return;
3275
3276        /* trace_printk() is for debug use only. Don't use it in production. */
3277
3278        pr_warn("\n");
3279        pr_warn("**********************************************************\n");
3280        pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3281        pr_warn("**                                                      **\n");
3282        pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3283        pr_warn("**                                                      **\n");
3284        pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3285        pr_warn("** unsafe for production use.                           **\n");
3286        pr_warn("**                                                      **\n");
3287        pr_warn("** If you see this message and you are not debugging    **\n");
3288        pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3289        pr_warn("**                                                      **\n");
3290        pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3291        pr_warn("**********************************************************\n");
3292
3293        /* Expand the buffers to set size */
3294        tracing_update_buffers();
3295
3296        buffers_allocated = 1;
3297
3298        /*
3299         * trace_printk_init_buffers() can be called by modules.
3300         * If that happens, then we need to start cmdline recording
3301         * directly here. If the global_trace.buffer is already
3302         * allocated here, then this was called by module code.
3303         */
3304        if (global_trace.array_buffer.buffer)
3305                tracing_start_cmdline_record();
3306}
3307EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3308
3309void trace_printk_start_comm(void)
3310{
3311        /* Start tracing comms if trace printk is set */
3312        if (!buffers_allocated)
3313                return;
3314        tracing_start_cmdline_record();
3315}
3316
3317static void trace_printk_start_stop_comm(int enabled)
3318{
3319        if (!buffers_allocated)
3320                return;
3321
3322        if (enabled)
3323                tracing_start_cmdline_record();
3324        else
3325                tracing_stop_cmdline_record();
3326}
3327
3328/**
3329 * trace_vbprintk - write binary msg to tracing buffer
3330 * @ip:    The address of the caller
3331 * @fmt:   The string format to write to the buffer
3332 * @args:  Arguments for @fmt
3333 */
3334int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3335{
3336        struct trace_event_call *call = &event_bprint;
3337        struct ring_buffer_event *event;
3338        struct trace_buffer *buffer;
3339        struct trace_array *tr = &global_trace;
3340        struct bprint_entry *entry;
3341        unsigned int trace_ctx;
3342        char *tbuffer;
3343        int len = 0, size;
3344
3345        if (unlikely(tracing_selftest_running || tracing_disabled))
3346                return 0;
3347
3348        /* Don't pollute graph traces with trace_vprintk internals */
3349        pause_graph_tracing();
3350
3351        trace_ctx = tracing_gen_ctx();
3352        preempt_disable_notrace();
3353
3354        tbuffer = get_trace_buf();
3355        if (!tbuffer) {
3356                len = 0;
3357                goto out_nobuffer;
3358        }
3359
3360        len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3361
3362        if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3363                goto out_put;
3364
3365        size = sizeof(*entry) + sizeof(u32) * len;
3366        buffer = tr->array_buffer.buffer;
3367        ring_buffer_nest_start(buffer);
3368        event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3369                                            trace_ctx);
3370        if (!event)
3371                goto out;
3372        entry = ring_buffer_event_data(event);
3373        entry->ip                       = ip;
3374        entry->fmt                      = fmt;
3375
3376        memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3377        if (!call_filter_check_discard(call, entry, buffer, event)) {
3378                __buffer_unlock_commit(buffer, event);
3379                ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3380        }
3381
3382out:
3383        ring_buffer_nest_end(buffer);
3384out_put:
3385        put_trace_buf();
3386
3387out_nobuffer:
3388        preempt_enable_notrace();
3389        unpause_graph_tracing();
3390
3391        return len;
3392}
3393EXPORT_SYMBOL_GPL(trace_vbprintk);
3394
3395__printf(3, 0)
3396static int
3397__trace_array_vprintk(struct trace_buffer *buffer,
3398                      unsigned long ip, const char *fmt, va_list args)
3399{
3400        struct trace_event_call *call = &event_print;
3401        struct ring_buffer_event *event;
3402        int len = 0, size;
3403        struct print_entry *entry;
3404        unsigned int trace_ctx;
3405        char *tbuffer;
3406
3407        if (tracing_disabled || tracing_selftest_running)
3408                return 0;
3409
3410        /* Don't pollute graph traces with trace_vprintk internals */
3411        pause_graph_tracing();
3412
3413        trace_ctx = tracing_gen_ctx();
3414        preempt_disable_notrace();
3415
3416
3417        tbuffer = get_trace_buf();
3418        if (!tbuffer) {
3419                len = 0;
3420                goto out_nobuffer;
3421        }
3422
3423        len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3424
3425        size = sizeof(*entry) + len + 1;
3426        ring_buffer_nest_start(buffer);
3427        event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3428                                            trace_ctx);
3429        if (!event)
3430                goto out;
3431        entry = ring_buffer_event_data(event);
3432        entry->ip = ip;
3433
3434        memcpy(&entry->buf, tbuffer, len + 1);
3435        if (!call_filter_check_discard(call, entry, buffer, event)) {
3436                __buffer_unlock_commit(buffer, event);
3437                ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3438        }
3439
3440out:
3441        ring_buffer_nest_end(buffer);
3442        put_trace_buf();
3443
3444out_nobuffer:
3445        preempt_enable_notrace();
3446        unpause_graph_tracing();
3447
3448        return len;
3449}
3450
3451__printf(3, 0)
3452int trace_array_vprintk(struct trace_array *tr,
3453                        unsigned long ip, const char *fmt, va_list args)
3454{
3455        return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3456}
3457
3458/**
3459 * trace_array_printk - Print a message to a specific instance
3460 * @tr: The instance trace_array descriptor
3461 * @ip: The instruction pointer that this is called from.
3462 * @fmt: The format to print (printf format)
3463 *
3464 * If a subsystem sets up its own instance, they have the right to
3465 * printk strings into their tracing instance buffer using this
3466 * function. Note, this function will not write into the top level
3467 * buffer (use trace_printk() for that), as writing into the top level
3468 * buffer should only have events that can be individually disabled.
3469 * trace_printk() is only used for debugging a kernel, and should not
3470 * be ever incorporated in normal use.
3471 *
3472 * trace_array_printk() can be used, as it will not add noise to the
3473 * top level tracing buffer.
3474 *
3475 * Note, trace_array_init_printk() must be called on @tr before this
3476 * can be used.
3477 */
3478__printf(3, 0)
3479int trace_array_printk(struct trace_array *tr,
3480                       unsigned long ip, const char *fmt, ...)
3481{
3482        int ret;
3483        va_list ap;
3484
3485        if (!tr)
3486                return -ENOENT;
3487
3488        /* This is only allowed for created instances */
3489        if (tr == &global_trace)
3490                return 0;
3491
3492        if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3493                return 0;
3494
3495        va_start(ap, fmt);
3496        ret = trace_array_vprintk(tr, ip, fmt, ap);
3497        va_end(ap);
3498        return ret;
3499}
3500EXPORT_SYMBOL_GPL(trace_array_printk);
3501
3502/**
3503 * trace_array_init_printk - Initialize buffers for trace_array_printk()
3504 * @tr: The trace array to initialize the buffers for
3505 *
3506 * As trace_array_printk() only writes into instances, they are OK to
3507 * have in the kernel (unlike trace_printk()). This needs to be called
3508 * before trace_array_printk() can be used on a trace_array.
3509 */
3510int trace_array_init_printk(struct trace_array *tr)
3511{
3512        if (!tr)
3513                return -ENOENT;
3514
3515        /* This is only allowed for created instances */
3516        if (tr == &global_trace)
3517                return -EINVAL;
3518
3519        return alloc_percpu_trace_buffer();
3520}
3521EXPORT_SYMBOL_GPL(trace_array_init_printk);
3522
3523__printf(3, 4)
3524int trace_array_printk_buf(struct trace_buffer *buffer,
3525                           unsigned long ip, const char *fmt, ...)
3526{
3527        int ret;
3528        va_list ap;
3529
3530        if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3531                return 0;
3532
3533        va_start(ap, fmt);
3534        ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3535        va_end(ap);
3536        return ret;
3537}
3538
3539__printf(2, 0)
3540int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3541{
3542        return trace_array_vprintk(&global_trace, ip, fmt, args);
3543}
3544EXPORT_SYMBOL_GPL(trace_vprintk);
3545
3546static void trace_iterator_increment(struct trace_iterator *iter)
3547{
3548        struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3549
3550        iter->idx++;
3551        if (buf_iter)
3552                ring_buffer_iter_advance(buf_iter);
3553}
3554
3555static struct trace_entry *
3556peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3557                unsigned long *lost_events)
3558{
3559        struct ring_buffer_event *event;
3560        struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3561
3562        if (buf_iter) {
3563                event = ring_buffer_iter_peek(buf_iter, ts);
3564                if (lost_events)
3565                        *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3566                                (unsigned long)-1 : 0;
3567        } else {
3568                event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3569                                         lost_events);
3570        }
3571
3572        if (event) {
3573                iter->ent_size = ring_buffer_event_length(event);
3574                return ring_buffer_event_data(event);
3575        }
3576        iter->ent_size = 0;
3577        return NULL;
3578}
3579
3580static struct trace_entry *
3581__find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3582                  unsigned long *missing_events, u64 *ent_ts)
3583{
3584        struct trace_buffer *buffer = iter->array_buffer->buffer;
3585        struct trace_entry *ent, *next = NULL;
3586        unsigned long lost_events = 0, next_lost = 0;
3587        int cpu_file = iter->cpu_file;
3588        u64 next_ts = 0, ts;
3589        int next_cpu = -1;
3590        int next_size = 0;
3591        int cpu;
3592
3593        /*
3594         * If we are in a per_cpu trace file, don't bother by iterating over
3595         * all cpu and peek directly.
3596         */
3597        if (cpu_file > RING_BUFFER_ALL_CPUS) {
3598                if (ring_buffer_empty_cpu(buffer, cpu_file))
3599                        return NULL;
3600                ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3601                if (ent_cpu)
3602                        *ent_cpu = cpu_file;
3603
3604                return ent;
3605        }
3606
3607        for_each_tracing_cpu(cpu) {
3608
3609                if (ring_buffer_empty_cpu(buffer, cpu))
3610                        continue;
3611
3612                ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3613
3614                /*
3615                 * Pick the entry with the smallest timestamp:
3616                 */
3617                if (ent && (!next || ts < next_ts)) {
3618                        next = ent;
3619                        next_cpu = cpu;
3620                        next_ts = ts;
3621                        next_lost = lost_events;
3622                        next_size = iter->ent_size;
3623                }
3624        }
3625
3626        iter->ent_size = next_size;
3627
3628        if (ent_cpu)
3629                *ent_cpu = next_cpu;
3630
3631        if (ent_ts)
3632                *ent_ts = next_ts;
3633
3634        if (missing_events)
3635                *missing_events = next_lost;
3636
3637        return next;
3638}
3639
3640#define STATIC_FMT_BUF_SIZE     128
3641static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3642
3643static char *trace_iter_expand_format(struct trace_iterator *iter)
3644{
3645        char *tmp;
3646
3647        /*
3648         * iter->tr is NULL when used with tp_printk, which makes
3649         * this get called where it is not safe to call krealloc().
3650         */
3651        if (!iter->tr || iter->fmt == static_fmt_buf)
3652                return NULL;
3653
3654        tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3655                       GFP_KERNEL);
3656        if (tmp) {
3657                iter->fmt_size += STATIC_FMT_BUF_SIZE;
3658                iter->fmt = tmp;
3659        }
3660
3661        return tmp;
3662}
3663
3664/* Returns true if the string is safe to dereference from an event */
3665static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3666{
3667        unsigned long addr = (unsigned long)str;
3668        struct trace_event *trace_event;
3669        struct trace_event_call *event;
3670
3671        /* OK if part of the event data */
3672        if ((addr >= (unsigned long)iter->ent) &&
3673            (addr < (unsigned long)iter->ent + iter->ent_size))
3674                return true;
3675
3676        /* OK if part of the temp seq buffer */
3677        if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3678            (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3679                return true;
3680
3681        /* Core rodata can not be freed */
3682        if (is_kernel_rodata(addr))
3683                return true;
3684
3685        if (trace_is_tracepoint_string(str))
3686                return true;
3687
3688        /*
3689         * Now this could be a module event, referencing core module
3690         * data, which is OK.
3691         */
3692        if (!iter->ent)
3693                return false;
3694
3695        trace_event = ftrace_find_event(iter->ent->type);
3696        if (!trace_event)
3697                return false;
3698
3699        event = container_of(trace_event, struct trace_event_call, event);
3700        if (!event->mod)
3701                return false;
3702
3703        /* Would rather have rodata, but this will suffice */
3704        if (within_module_core(addr, event->mod))
3705                return true;
3706
3707        return false;
3708}
3709
3710static const char *show_buffer(struct trace_seq *s)
3711{
3712        struct seq_buf *seq = &s->seq;
3713
3714        seq_buf_terminate(seq);
3715
3716        return seq->buffer;
3717}
3718
3719static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3720
3721static int test_can_verify_check(const char *fmt, ...)
3722{
3723        char buf[16];
3724        va_list ap;
3725        int ret;
3726
3727        /*
3728         * The verifier is dependent on vsnprintf() modifies the va_list
3729         * passed to it, where it is sent as a reference. Some architectures
3730         * (like x86_32) passes it by value, which means that vsnprintf()
3731         * does not modify the va_list passed to it, and the verifier
3732         * would then need to be able to understand all the values that
3733         * vsnprintf can use. If it is passed by value, then the verifier
3734         * is disabled.
3735         */
3736        va_start(ap, fmt);
3737        vsnprintf(buf, 16, "%d", ap);
3738        ret = va_arg(ap, int);
3739        va_end(ap);
3740
3741        return ret;
3742}
3743
3744static void test_can_verify(void)
3745{
3746        if (!test_can_verify_check("%d %d", 0, 1)) {
3747                pr_info("trace event string verifier disabled\n");
3748                static_branch_inc(&trace_no_verify);
3749        }
3750}
3751
3752/**
3753 * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3754 * @iter: The iterator that holds the seq buffer and the event being printed
3755 * @fmt: The format used to print the event
3756 * @ap: The va_list holding the data to print from @fmt.
3757 *
3758 * This writes the data into the @iter->seq buffer using the data from
3759 * @fmt and @ap. If the format has a %s, then the source of the string
3760 * is examined to make sure it is safe to print, otherwise it will
3761 * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3762 * pointer.
3763 */
3764void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3765                         va_list ap)
3766{
3767        const char *p = fmt;
3768        const char *str;
3769        int i, j;
3770
3771        if (WARN_ON_ONCE(!fmt))
3772                return;
3773
3774        if (static_branch_unlikely(&trace_no_verify))
3775                goto print;
3776
3777        /* Don't bother checking when doing a ftrace_dump() */
3778        if (iter->fmt == static_fmt_buf)
3779                goto print;
3780
3781        while (*p) {
3782                bool star = false;
3783                int len = 0;
3784
3785                j = 0;
3786
3787                /* We only care about %s and variants */
3788                for (i = 0; p[i]; i++) {
3789                        if (i + 1 >= iter->fmt_size) {
3790                                /*
3791                                 * If we can't expand the copy buffer,
3792                                 * just print it.
3793                                 */
3794                                if (!trace_iter_expand_format(iter))
3795                                        goto print;
3796                        }
3797
3798                        if (p[i] == '\\' && p[i+1]) {
3799                                i++;
3800                                continue;
3801                        }
3802                        if (p[i] == '%') {
3803                                /* Need to test cases like %08.*s */
3804                                for (j = 1; p[i+j]; j++) {
3805                                        if (isdigit(p[i+j]) ||
3806                                            p[i+j] == '.')
3807                                                continue;
3808                                        if (p[i+j] == '*') {
3809                                                star = true;
3810                                                continue;
3811                                        }
3812                                        break;
3813                                }
3814                                if (p[i+j] == 's')
3815                                        break;
3816                                star = false;
3817                        }
3818                        j = 0;
3819                }
3820                /* If no %s found then just print normally */
3821                if (!p[i])
3822                        break;
3823
3824                /* Copy up to the %s, and print that */
3825                strncpy(iter->fmt, p, i);
3826                iter->fmt[i] = '\0';
3827                trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3828
3829                if (star)
3830                        len = va_arg(ap, int);
3831
3832                /* The ap now points to the string data of the %s */
3833                str = va_arg(ap, const char *);
3834
3835                /*
3836                 * If you hit this warning, it is likely that the
3837                 * trace event in question used %s on a string that
3838                 * was saved at the time of the event, but may not be
3839                 * around when the trace is read. Use __string(),
3840                 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3841                 * instead. See samples/trace_events/trace-events-sample.h
3842                 * for reference.
3843                 */
3844                if (WARN_ONCE(!trace_safe_str(iter, str),
3845                              "fmt: '%s' current_buffer: '%s'",
3846                              fmt, show_buffer(&iter->seq))) {
3847                        int ret;
3848
3849                        /* Try to safely read the string */
3850                        if (star) {
3851                                if (len + 1 > iter->fmt_size)
3852                                        len = iter->fmt_size - 1;
3853                                if (len < 0)
3854                                        len = 0;
3855                                ret = copy_from_kernel_nofault(iter->fmt, str, len);
3856                                iter->fmt[len] = 0;
3857                                star = false;
3858                        } else {
3859                                ret = strncpy_from_kernel_nofault(iter->fmt, str,
3860                                                                  iter->fmt_size);
3861                        }
3862                        if (ret < 0)
3863                                trace_seq_printf(&iter->seq, "(0x%px)", str);
3864                        else
3865                                trace_seq_printf(&iter->seq, "(0x%px:%s)",
3866                                                 str, iter->fmt);
3867                        str = "[UNSAFE-MEMORY]";
3868                        strcpy(iter->fmt, "%s");
3869                } else {
3870                        strncpy(iter->fmt, p + i, j + 1);
3871                        iter->fmt[j+1] = '\0';
3872                }
3873                if (star)
3874                        trace_seq_printf(&iter->seq, iter->fmt, len, str);
3875                else
3876                        trace_seq_printf(&iter->seq, iter->fmt, str);
3877
3878                p += i + j + 1;
3879        }
3880 print:
3881        if (*p)
3882                trace_seq_vprintf(&iter->seq, p, ap);
3883}
3884
3885const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3886{
3887        const char *p, *new_fmt;
3888        char *q;
3889
3890        if (WARN_ON_ONCE(!fmt))
3891                return fmt;
3892
3893        if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3894                return fmt;
3895
3896        p = fmt;
3897        new_fmt = q = iter->fmt;
3898        while (*p) {
3899                if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3900                        if (!trace_iter_expand_format(iter))
3901                                return fmt;
3902
3903                        q += iter->fmt - new_fmt;
3904                        new_fmt = iter->fmt;
3905                }
3906
3907                *q++ = *p++;
3908
3909                /* Replace %p with %px */
3910                if (p[-1] == '%') {
3911                        if (p[0] == '%') {
3912                                *q++ = *p++;
3913                        } else if (p[0] == 'p' && !isalnum(p[1])) {
3914                                *q++ = *p++;
3915                                *q++ = 'x';
3916                        }
3917                }
3918        }
3919        *q = '\0';
3920
3921        return new_fmt;
3922}
3923
3924#define STATIC_TEMP_BUF_SIZE    128
3925static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3926
3927/* Find the next real entry, without updating the iterator itself */
3928struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3929                                          int *ent_cpu, u64 *ent_ts)
3930{
3931        /* __find_next_entry will reset ent_size */
3932        int ent_size = iter->ent_size;
3933        struct trace_entry *entry;
3934
3935        /*
3936         * If called from ftrace_dump(), then the iter->temp buffer
3937         * will be the static_temp_buf and not created from kmalloc.
3938         * If the entry size is greater than the buffer, we can
3939         * not save it. Just return NULL in that case. This is only
3940         * used to add markers when two consecutive events' time
3941         * stamps have a large delta. See trace_print_lat_context()
3942         */
3943        if (iter->temp == static_temp_buf &&
3944            STATIC_TEMP_BUF_SIZE < ent_size)
3945                return NULL;
3946
3947        /*
3948         * The __find_next_entry() may call peek_next_entry(), which may
3949         * call ring_buffer_peek() that may make the contents of iter->ent
3950         * undefined. Need to copy iter->ent now.
3951         */
3952        if (iter->ent && iter->ent != iter->temp) {
3953                if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3954                    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3955                        void *temp;
3956                        temp = kmalloc(iter->ent_size, GFP_KERNEL);
3957                        if (!temp)
3958                                return NULL;
3959                        kfree(iter->temp);
3960                        iter->temp = temp;
3961                        iter->temp_size = iter->ent_size;
3962                }
3963                memcpy(iter->temp, iter->ent, iter->ent_size);
3964                iter->ent = iter->temp;
3965        }
3966        entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3967        /* Put back the original ent_size */
3968        iter->ent_size = ent_size;
3969
3970        return entry;
3971}
3972
3973/* Find the next real entry, and increment the iterator to the next entry */
3974void *trace_find_next_entry_inc(struct trace_iterator *iter)
3975{
3976        iter->ent = __find_next_entry(iter, &iter->cpu,
3977                                      &iter->lost_events, &iter->ts);
3978
3979        if (iter->ent)
3980                trace_iterator_increment(iter);
3981
3982        return iter->ent ? iter : NULL;
3983}
3984
3985static void trace_consume(struct trace_iterator *iter)
3986{
3987        ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3988                            &iter->lost_events);
3989}
3990
3991static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3992{
3993        struct trace_iterator *iter = m->private;
3994        int i = (int)*pos;
3995        void *ent;
3996
3997        WARN_ON_ONCE(iter->leftover);
3998
3999        (*pos)++;
4000
4001        /* can't go backwards */
4002        if (iter->idx > i)
4003                return NULL;
4004
4005        if (iter->idx < 0)
4006                ent = trace_find_next_entry_inc(iter);
4007        else
4008                ent = iter;
4009
4010        while (ent && iter->idx < i)
4011                ent = trace_find_next_entry_inc(iter);
4012
4013        iter->pos = *pos;
4014
4015        return ent;
4016}
4017
4018void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4019{
4020        struct ring_buffer_iter *buf_iter;
4021        unsigned long entries = 0;
4022        u64 ts;
4023
4024        per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4025
4026        buf_iter = trace_buffer_iter(iter, cpu);
4027        if (!buf_iter)
4028                return;
4029
4030        ring_buffer_iter_reset(buf_iter);
4031
4032        /*
4033         * We could have the case with the max latency tracers
4034         * that a reset never took place on a cpu. This is evident
4035         * by the timestamp being before the start of the buffer.
4036         */
4037        while (ring_buffer_iter_peek(buf_iter, &ts)) {
4038                if (ts >= iter->array_buffer->time_start)
4039                        break;
4040                entries++;
4041                ring_buffer_iter_advance(buf_iter);
4042        }
4043
4044        per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4045}
4046
4047/*
4048 * The current tracer is copied to avoid a global locking
4049 * all around.
4050 */
4051static void *s_start(struct seq_file *m, loff_t *pos)
4052{
4053        struct trace_iterator *iter = m->private;
4054        struct trace_array *tr = iter->tr;
4055        int cpu_file = iter->cpu_file;
4056        void *p = NULL;
4057        loff_t l = 0;
4058        int cpu;
4059
4060        /*
4061         * copy the tracer to avoid using a global lock all around.
4062         * iter->trace is a copy of current_trace, the pointer to the
4063         * name may be used instead of a strcmp(), as iter->trace->name
4064         * will point to the same string as current_trace->name.
4065         */
4066        mutex_lock(&trace_types_lock);
4067        if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4068                *iter->trace = *tr->current_trace;
4069        mutex_unlock(&trace_types_lock);
4070
4071#ifdef CONFIG_TRACER_MAX_TRACE
4072        if (iter->snapshot && iter->trace->use_max_tr)
4073                return ERR_PTR(-EBUSY);
4074#endif
4075
4076        if (*pos != iter->pos) {
4077                iter->ent = NULL;
4078                iter->cpu = 0;
4079                iter->idx = -1;
4080
4081                if (cpu_file == RING_BUFFER_ALL_CPUS) {
4082                        for_each_tracing_cpu(cpu)
4083                                tracing_iter_reset(iter, cpu);
4084                } else
4085                        tracing_iter_reset(iter, cpu_file);
4086
4087                iter->leftover = 0;
4088                for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4089                        ;
4090
4091        } else {
4092                /*
4093                 * If we overflowed the seq_file before, then we want
4094                 * to just reuse the trace_seq buffer again.
4095                 */
4096                if (iter->leftover)
4097                        p = iter;
4098                else {
4099                        l = *pos - 1;
4100                        p = s_next(m, p, &l);
4101                }
4102        }
4103
4104        trace_event_read_lock();
4105        trace_access_lock(cpu_file);
4106        return p;
4107}
4108
4109static void s_stop(struct seq_file *m, void *p)
4110{
4111        struct trace_iterator *iter = m->private;
4112
4113#ifdef CONFIG_TRACER_MAX_TRACE
4114        if (iter->snapshot && iter->trace->use_max_tr)
4115                return;
4116#endif
4117
4118        trace_access_unlock(iter->cpu_file);
4119        trace_event_read_unlock();
4120}
4121
4122static void
4123get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4124                      unsigned long *entries, int cpu)
4125{
4126        unsigned long count;
4127
4128        count = ring_buffer_entries_cpu(buf->buffer, cpu);
4129        /*
4130         * If this buffer has skipped entries, then we hold all
4131         * entries for the trace and we need to ignore the
4132         * ones before the time stamp.
4133         */
4134        if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4135                count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4136                /* total is the same as the entries */
4137                *total = count;
4138        } else
4139                *total = count +
4140                        ring_buffer_overrun_cpu(buf->buffer, cpu);
4141        *entries = count;
4142}
4143
4144static void
4145get_total_entries(struct array_buffer *buf,
4146                  unsigned long *total, unsigned long *entries)
4147{
4148        unsigned long t, e;
4149        int cpu;
4150
4151        *total = 0;
4152        *entries = 0;
4153
4154        for_each_tracing_cpu(cpu) {
4155                get_total_entries_cpu(buf, &t, &e, cpu);
4156                *total += t;
4157                *entries += e;
4158        }
4159}
4160
4161unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4162{
4163        unsigned long total, entries;
4164
4165        if (!tr)
4166                tr = &global_trace;
4167
4168        get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4169
4170        return entries;
4171}
4172
4173unsigned long trace_total_entries(struct trace_array *tr)
4174{
4175        unsigned long total, entries;
4176
4177        if (!tr)
4178                tr = &global_trace;
4179
4180        get_total_entries(&tr->array_buffer, &total, &entries);
4181
4182        return entries;
4183}
4184
4185static void print_lat_help_header(struct seq_file *m)
4186{
4187        seq_puts(m, "#                    _------=> CPU#            \n"
4188                    "#                   / _-----=> irqs-off        \n"
4189                    "#                  | / _----=> need-resched    \n"
4190                    "#                  || / _---=> hardirq/softirq \n"
4191                    "#                  ||| / _--=> preempt-depth   \n"
4192                    "#                  |||| /     delay            \n"
4193                    "#  cmd     pid     ||||| time  |   caller      \n"
4194                    "#     \\   /        |||||  \\    |   /         \n");
4195}
4196
4197static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4198{
4199        unsigned long total;
4200        unsigned long entries;
4201
4202        get_total_entries(buf, &total, &entries);
4203        seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4204                   entries, total, num_online_cpus());
4205        seq_puts(m, "#\n");
4206}
4207
4208static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4209                                   unsigned int flags)
4210{
4211        bool tgid = flags & TRACE_ITER_RECORD_TGID;
4212
4213        print_event_info(buf, m);
4214
4215        seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4216        seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4217}
4218
4219static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4220                                       unsigned int flags)
4221{
4222        bool tgid = flags & TRACE_ITER_RECORD_TGID;
4223        const char *space = "            ";
4224        int prec = tgid ? 12 : 2;
4225
4226        print_event_info(buf, m);
4227
4228        seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
4229        seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4230        seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4231        seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4232        seq_printf(m, "#                            %.*s||| /     delay\n", prec, space);
4233        seq_printf(m, "#           TASK-PID  %.*s CPU#  ||||   TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4234        seq_printf(m, "#              | |    %.*s   |   ||||      |         |\n", prec, "       |    ");
4235}
4236
4237void
4238print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4239{
4240        unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4241        struct array_buffer *buf = iter->array_buffer;
4242        struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4243        struct tracer *type = iter->trace;
4244        unsigned long entries;
4245        unsigned long total;
4246        const char *name = "preemption";
4247
4248        name = type->name;
4249
4250        get_total_entries(buf, &total, &entries);
4251
4252        seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4253                   name, UTS_RELEASE);
4254        seq_puts(m, "# -----------------------------------"
4255                 "---------------------------------\n");
4256        seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4257                   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4258                   nsecs_to_usecs(data->saved_latency),
4259                   entries,
4260                   total,
4261                   buf->cpu,
4262#if defined(CONFIG_PREEMPT_NONE)
4263                   "server",
4264#elif defined(CONFIG_PREEMPT_VOLUNTARY)
4265                   "desktop",
4266#elif defined(CONFIG_PREEMPT)
4267                   "preempt",
4268#elif defined(CONFIG_PREEMPT_RT)
4269                   "preempt_rt",
4270#else
4271                   "unknown",
4272#endif
4273                   /* These are reserved for later use */
4274                   0, 0, 0, 0);
4275#ifdef CONFIG_SMP
4276        seq_printf(m, " #P:%d)\n", num_online_cpus());
4277#else
4278        seq_puts(m, ")\n");
4279#endif
4280        seq_puts(m, "#    -----------------\n");
4281        seq_printf(m, "#    | task: %.16s-%d "
4282                   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4283                   data->comm, data->pid,
4284                   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4285                   data->policy, data->rt_priority);
4286        seq_puts(m, "#    -----------------\n");
4287
4288        if (data->critical_start) {
4289                seq_puts(m, "#  => started at: ");
4290                seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4291                trace_print_seq(m, &iter->seq);
4292                seq_puts(m, "\n#  => ended at:   ");
4293                seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4294                trace_print_seq(m, &iter->seq);
4295                seq_puts(m, "\n#\n");
4296        }
4297
4298        seq_puts(m, "#\n");
4299}
4300
4301static void test_cpu_buff_start(struct trace_iterator *iter)
4302{
4303        struct trace_seq *s = &iter->seq;
4304        struct trace_array *tr = iter->tr;
4305
4306        if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4307                return;
4308
4309        if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4310                return;
4311
4312        if (cpumask_available(iter->started) &&
4313            cpumask_test_cpu(iter->cpu, iter->started))
4314                return;
4315
4316        if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4317                return;
4318
4319        if (cpumask_available(iter->started))
4320                cpumask_set_cpu(iter->cpu, iter->started);
4321
4322        /* Don't print started cpu buffer for the first entry of the trace */
4323        if (iter->idx > 1)
4324                trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4325                                iter->cpu);
4326}
4327
4328static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4329{
4330        struct trace_array *tr = iter->tr;
4331        struct trace_seq *s = &iter->seq;
4332        unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4333        struct trace_entry *entry;
4334        struct trace_event *event;
4335
4336        entry = iter->ent;
4337
4338        test_cpu_buff_start(iter);
4339
4340        event = ftrace_find_event(entry->type);
4341
4342        if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4343                if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4344                        trace_print_lat_context(iter);
4345                else
4346                        trace_print_context(iter);
4347        }
4348
4349        if (trace_seq_has_overflowed(s))
4350                return TRACE_TYPE_PARTIAL_LINE;
4351
4352        if (event)
4353                return event->funcs->trace(iter, sym_flags, event);
4354
4355        trace_seq_printf(s, "Unknown type %d\n", entry->type);
4356
4357        return trace_handle_return(s);
4358}
4359
4360static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4361{
4362        struct trace_array *tr = iter->tr;
4363        struct trace_seq *s = &iter->seq;
4364        struct trace_entry *entry;
4365        struct trace_event *event;
4366
4367        entry = iter->ent;
4368
4369        if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4370                trace_seq_printf(s, "%d %d %llu ",
4371                                 entry->pid, iter->cpu, iter->ts);
4372
4373        if (trace_seq_has_overflowed(s))
4374                return TRACE_TYPE_PARTIAL_LINE;
4375
4376        event = ftrace_find_event(entry->type);
4377        if (event)
4378                return event->funcs->raw(iter, 0, event);
4379
4380        trace_seq_printf(s, "%d ?\n", entry->type);
4381
4382        return trace_handle_return(s);
4383}
4384
4385static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4386{
4387        struct trace_array *tr = iter->tr;
4388        struct trace_seq *s = &iter->seq;
4389        unsigned char newline = '\n';
4390        struct trace_entry *entry;
4391        struct trace_event *event;
4392
4393        entry = iter->ent;
4394
4395        if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4396                SEQ_PUT_HEX_FIELD(s, entry->pid);
4397                SEQ_PUT_HEX_FIELD(s, iter->cpu);
4398                SEQ_PUT_HEX_FIELD(s, iter->ts);
4399                if (trace_seq_has_overflowed(s))
4400                        return TRACE_TYPE_PARTIAL_LINE;
4401        }
4402
4403        event = ftrace_find_event(entry->type);
4404        if (event) {
4405                enum print_line_t ret = event->funcs->hex(iter, 0, event);
4406                if (ret != TRACE_TYPE_HANDLED)
4407                        return ret;
4408        }
4409
4410        SEQ_PUT_FIELD(s, newline);
4411
4412        return trace_handle_return(s);
4413}
4414
4415static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4416{
4417        struct trace_array *tr = iter->tr;
4418        struct trace_seq *s = &iter->seq;
4419        struct trace_entry *entry;
4420        struct trace_event *event;
4421
4422        entry = iter->ent;
4423
4424        if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4425                SEQ_PUT_FIELD(s, entry->pid);
4426                SEQ_PUT_FIELD(s, iter->cpu);
4427                SEQ_PUT_FIELD(s, iter->ts);
4428                if (trace_seq_has_overflowed(s))
4429                        return TRACE_TYPE_PARTIAL_LINE;
4430        }
4431
4432        event = ftrace_find_event(entry->type);
4433        return event ? event->funcs->binary(iter, 0, event) :
4434                TRACE_TYPE_HANDLED;
4435}
4436
4437int trace_empty(struct trace_iterator *iter)
4438{
4439        struct ring_buffer_iter *buf_iter;
4440        int cpu;
4441
4442        /* If we are looking at one CPU buffer, only check that one */
4443        if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4444                cpu = iter->cpu_file;
4445                buf_iter = trace_buffer_iter(iter, cpu);
4446                if (buf_iter) {
4447                        if (!ring_buffer_iter_empty(buf_iter))
4448                                return 0;
4449                } else {
4450                        if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4451                                return 0;
4452                }
4453                return 1;
4454        }
4455
4456        for_each_tracing_cpu(cpu) {
4457                buf_iter = trace_buffer_iter(iter, cpu);
4458                if (buf_iter) {
4459                        if (!ring_buffer_iter_empty(buf_iter))
4460                                return 0;
4461                } else {
4462                        if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4463                                return 0;
4464                }
4465        }
4466
4467        return 1;
4468}
4469
4470/*  Called with trace_event_read_lock() held. */
4471enum print_line_t print_trace_line(struct trace_iterator *iter)
4472{
4473        struct trace_array *tr = iter->tr;
4474        unsigned long trace_flags = tr->trace_flags;
4475        enum print_line_t ret;
4476
4477        if (iter->lost_events) {
4478                if (iter->lost_events == (unsigned long)-1)
4479                        trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4480                                         iter->cpu);
4481                else
4482                        trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4483                                         iter->cpu, iter->lost_events);
4484                if (trace_seq_has_overflowed(&iter->seq))
4485                        return TRACE_TYPE_PARTIAL_LINE;
4486        }
4487
4488        if (iter->trace && iter->trace->print_line) {
4489                ret = iter->trace->print_line(iter);
4490                if (ret != TRACE_TYPE_UNHANDLED)
4491                        return ret;
4492        }
4493
4494        if (iter->ent->type == TRACE_BPUTS &&
4495                        trace_flags & TRACE_ITER_PRINTK &&
4496                        trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4497                return trace_print_bputs_msg_only(iter);
4498
4499        if (iter->ent->type == TRACE_BPRINT &&
4500                        trace_flags & TRACE_ITER_PRINTK &&
4501                        trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4502                return trace_print_bprintk_msg_only(iter);
4503
4504        if (iter->ent->type == TRACE_PRINT &&
4505                        trace_flags & TRACE_ITER_PRINTK &&
4506                        trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4507                return trace_print_printk_msg_only(iter);
4508
4509        if (trace_flags & TRACE_ITER_BIN)
4510                return print_bin_fmt(iter);
4511
4512        if (trace_flags & TRACE_ITER_HEX)
4513                return print_hex_fmt(iter);
4514
4515        if (trace_flags & TRACE_ITER_RAW)
4516                return print_raw_fmt(iter);
4517
4518        return print_trace_fmt(iter);
4519}
4520
4521void trace_latency_header(struct seq_file *m)
4522{
4523        struct trace_iterator *iter = m->private;
4524        struct trace_array *tr = iter->tr;
4525
4526        /* print nothing if the buffers are empty */
4527        if (trace_empty(iter))
4528                return;
4529
4530        if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4531                print_trace_header(m, iter);
4532
4533        if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4534                print_lat_help_header(m);
4535}
4536
4537void trace_default_header(struct seq_file *m)
4538{
4539        struct trace_iterator *iter = m->private;
4540        struct trace_array *tr = iter->tr;
4541        unsigned long trace_flags = tr->trace_flags;
4542
4543        if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4544                return;
4545
4546        if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4547                /* print nothing if the buffers are empty */
4548                if (trace_empty(iter))
4549                        return;
4550                print_trace_header(m, iter);
4551                if (!(trace_flags & TRACE_ITER_VERBOSE))
4552                        print_lat_help_header(m);
4553        } else {
4554                if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4555                        if (trace_flags & TRACE_ITER_IRQ_INFO)
4556                                print_func_help_header_irq(iter->array_buffer,
4557                                                           m, trace_flags);
4558                        else
4559                                print_func_help_header(iter->array_buffer, m,
4560                                                       trace_flags);
4561                }
4562        }
4563}
4564
4565static void test_ftrace_alive(struct seq_file *m)
4566{
4567        if (!ftrace_is_dead())
4568                return;
4569        seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4570                    "#          MAY BE MISSING FUNCTION EVENTS\n");
4571}
4572
4573#ifdef CONFIG_TRACER_MAX_TRACE
4574static void show_snapshot_main_help(struct seq_file *m)
4575{
4576        seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4577                    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4578                    "#                      Takes a snapshot of the main buffer.\n"
4579                    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4580                    "#                      (Doesn't have to be '2' works with any number that\n"
4581                    "#                       is not a '0' or '1')\n");
4582}
4583
4584static void show_snapshot_percpu_help(struct seq_file *m)
4585{
4586        seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4587#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4588        seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4589                    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4590#else
4591        seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4592                    "#                     Must use main snapshot file to allocate.\n");
4593#endif
4594        seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4595                    "#                      (Doesn't have to be '2' works with any number that\n"
4596                    "#                       is not a '0' or '1')\n");
4597}
4598
4599static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4600{
4601        if (iter->tr->allocated_snapshot)
4602                seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4603        else
4604                seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4605
4606        seq_puts(m, "# Snapshot commands:\n");
4607        if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4608                show_snapshot_main_help(m);
4609        else
4610                show_snapshot_percpu_help(m);
4611}
4612#else
4613/* Should never be called */
4614static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4615#endif
4616
4617static int s_show(struct seq_file *m, void *v)
4618{
4619        struct trace_iterator *iter = v;
4620        int ret;
4621
4622        if (iter->ent == NULL) {
4623                if (iter->tr) {
4624                        seq_printf(m, "# tracer: %s\n", iter->trace->name);
4625                        seq_puts(m, "#\n");
4626                        test_ftrace_alive(m);
4627                }
4628                if (iter->snapshot && trace_empty(iter))
4629                        print_snapshot_help(m, iter);
4630                else if (iter->trace && iter->trace->print_header)
4631                        iter->trace->print_header(m);
4632                else
4633                        trace_default_header(m);
4634
4635        } else if (iter->leftover) {
4636                /*
4637                 * If we filled the seq_file buffer earlier, we
4638                 * want to just show it now.
4639                 */
4640                ret = trace_print_seq(m, &iter->seq);
4641
4642                /* ret should this time be zero, but you never know */
4643                iter->leftover = ret;
4644
4645        } else {
4646                print_trace_line(iter);
4647                ret = trace_print_seq(m, &iter->seq);
4648                /*
4649                 * If we overflow the seq_file buffer, then it will
4650                 * ask us for this data again at start up.
4651                 * Use that instead.
4652                 *  ret is 0 if seq_file write succeeded.
4653                 *        -1 otherwise.
4654                 */
4655                iter->leftover = ret;
4656        }
4657
4658        return 0;
4659}
4660
4661/*
4662 * Should be used after trace_array_get(), trace_types_lock
4663 * ensures that i_cdev was already initialized.
4664 */
4665static inline int tracing_get_cpu(struct inode *inode)
4666{
4667        if (inode->i_cdev) /* See trace_create_cpu_file() */
4668                return (long)inode->i_cdev - 1;
4669        return RING_BUFFER_ALL_CPUS;
4670}
4671
4672static const struct seq_operations tracer_seq_ops = {
4673        .start          = s_start,
4674        .next           = s_next,
4675        .stop           = s_stop,
4676        .show           = s_show,
4677};
4678
4679static struct trace_iterator *
4680__tracing_open(struct inode *inode, struct file *file, bool snapshot)
4681{
4682        struct trace_array *tr = inode->i_private;
4683        struct trace_iterator *iter;
4684        int cpu;
4685
4686        if (tracing_disabled)
4687                return ERR_PTR(-ENODEV);
4688
4689        iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4690        if (!iter)
4691                return ERR_PTR(-ENOMEM);
4692
4693        iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4694                                    GFP_KERNEL);
4695        if (!iter->buffer_iter)
4696                goto release;
4697
4698        /*
4699         * trace_find_next_entry() may need to save off iter->ent.
4700         * It will place it into the iter->temp buffer. As most
4701         * events are less than 128, allocate a buffer of that size.
4702         * If one is greater, then trace_find_next_entry() will
4703         * allocate a new buffer to adjust for the bigger iter->ent.
4704         * It's not critical if it fails to get allocated here.
4705         */
4706        iter->temp = kmalloc(128, GFP_KERNEL);
4707        if (iter->temp)
4708                iter->temp_size = 128;
4709
4710        /*
4711         * trace_event_printf() may need to modify given format
4712         * string to replace %p with %px so that it shows real address
4713         * instead of hash value. However, that is only for the event
4714         * tracing, other tracer may not need. Defer the allocation
4715         * until it is needed.
4716         */
4717        iter->fmt = NULL;
4718        iter->fmt_size = 0;
4719
4720        /*
4721         * We make a copy of the current tracer to avoid concurrent
4722         * changes on it while we are reading.
4723         */
4724        mutex_lock(&trace_types_lock);
4725        iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4726        if (!iter->trace)
4727                goto fail;
4728
4729        *iter->trace = *tr->current_trace;
4730
4731        if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4732                goto fail;
4733
4734        iter->tr = tr;
4735
4736#ifdef CONFIG_TRACER_MAX_TRACE
4737        /* Currently only the top directory has a snapshot */
4738        if (tr->current_trace->print_max || snapshot)
4739                iter->array_buffer = &tr->max_buffer;
4740        else
4741#endif
4742                iter->array_buffer = &tr->array_buffer;
4743        iter->snapshot = snapshot;
4744        iter->pos = -1;
4745        iter->cpu_file = tracing_get_cpu(inode);
4746        mutex_init(&iter->mutex);
4747
4748        /* Notify the tracer early; before we stop tracing. */
4749        if (iter->trace->open)
4750                iter->trace->open(iter);
4751
4752        /* Annotate start of buffers if we had overruns */
4753        if (ring_buffer_overruns(iter->array_buffer->buffer))
4754                iter->iter_flags |= TRACE_FILE_ANNOTATE;
4755
4756        /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4757