linux/tools/perf/builtin-record.c
<<
>>
Prefs
   1/*
   2 * builtin-record.c
   3 *
   4 * Builtin record command: Record the profile of a workload
   5 * (or a CPU, or a PID) into the perf.data output file - for
   6 * later analysis via perf report.
   7 */
   8#define _FILE_OFFSET_BITS 64
   9
  10#include "builtin.h"
  11
  12#include "perf.h"
  13
  14#include "util/build-id.h"
  15#include "util/util.h"
  16#include "util/parse-options.h"
  17#include "util/parse-events.h"
  18
  19#include "util/header.h"
  20#include "util/event.h"
  21#include "util/evlist.h"
  22#include "util/evsel.h"
  23#include "util/debug.h"
  24#include "util/session.h"
  25#include "util/tool.h"
  26#include "util/symbol.h"
  27#include "util/cpumap.h"
  28#include "util/thread_map.h"
  29
  30#include <unistd.h>
  31#include <sched.h>
  32#include <sys/mman.h>
  33
  34enum write_mode_t {
  35        WRITE_FORCE,
  36        WRITE_APPEND
  37};
  38
  39struct perf_record {
  40        struct perf_tool        tool;
  41        struct perf_record_opts opts;
  42        u64                     bytes_written;
  43        const char              *output_name;
  44        struct perf_evlist      *evlist;
  45        struct perf_session     *session;
  46        const char              *progname;
  47        int                     output;
  48        unsigned int            page_size;
  49        int                     realtime_prio;
  50        enum write_mode_t       write_mode;
  51        bool                    no_buildid;
  52        bool                    no_buildid_cache;
  53        bool                    force;
  54        bool                    file_new;
  55        bool                    append_file;
  56        long                    samples;
  57        off_t                   post_processing_offset;
  58};
  59
  60static void advance_output(struct perf_record *rec, size_t size)
  61{
  62        rec->bytes_written += size;
  63}
  64
  65static int write_output(struct perf_record *rec, void *buf, size_t size)
  66{
  67        while (size) {
  68                int ret = write(rec->output, buf, size);
  69
  70                if (ret < 0) {
  71                        pr_err("failed to write\n");
  72                        return -1;
  73                }
  74
  75                size -= ret;
  76                buf += ret;
  77
  78                rec->bytes_written += ret;
  79        }
  80
  81        return 0;
  82}
  83
  84static int process_synthesized_event(struct perf_tool *tool,
  85                                     union perf_event *event,
  86                                     struct perf_sample *sample __maybe_unused,
  87                                     struct machine *machine __maybe_unused)
  88{
  89        struct perf_record *rec = container_of(tool, struct perf_record, tool);
  90        if (write_output(rec, event, event->header.size) < 0)
  91                return -1;
  92
  93        return 0;
  94}
  95
  96static int perf_record__mmap_read(struct perf_record *rec,
  97                                   struct perf_mmap *md)
  98{
  99        unsigned int head = perf_mmap__read_head(md);
 100        unsigned int old = md->prev;
 101        unsigned char *data = md->base + rec->page_size;
 102        unsigned long size;
 103        void *buf;
 104        int rc = 0;
 105
 106        if (old == head)
 107                return 0;
 108
 109        rec->samples++;
 110
 111        size = head - old;
 112
 113        if ((old & md->mask) + size != (head & md->mask)) {
 114                buf = &data[old & md->mask];
 115                size = md->mask + 1 - (old & md->mask);
 116                old += size;
 117
 118                if (write_output(rec, buf, size) < 0) {
 119                        rc = -1;
 120                        goto out;
 121                }
 122        }
 123
 124        buf = &data[old & md->mask];
 125        size = head - old;
 126        old += size;
 127
 128        if (write_output(rec, buf, size) < 0) {
 129                rc = -1;
 130                goto out;
 131        }
 132
 133        md->prev = old;
 134        perf_mmap__write_tail(md, old);
 135
 136out:
 137        return rc;
 138}
 139
 140static volatile int done = 0;
 141static volatile int signr = -1;
 142static volatile int child_finished = 0;
 143
 144static void sig_handler(int sig)
 145{
 146        if (sig == SIGCHLD)
 147                child_finished = 1;
 148
 149        done = 1;
 150        signr = sig;
 151}
 152
 153static void perf_record__sig_exit(int exit_status __maybe_unused, void *arg)
 154{
 155        struct perf_record *rec = arg;
 156        int status;
 157
 158        if (rec->evlist->workload.pid > 0) {
 159                if (!child_finished)
 160                        kill(rec->evlist->workload.pid, SIGTERM);
 161
 162                wait(&status);
 163                if (WIFSIGNALED(status))
 164                        psignal(WTERMSIG(status), rec->progname);
 165        }
 166
 167        if (signr == -1 || signr == SIGUSR1)
 168                return;
 169
 170        signal(signr, SIG_DFL);
 171        kill(getpid(), signr);
 172}
 173
 174static bool perf_evlist__equal(struct perf_evlist *evlist,
 175                               struct perf_evlist *other)
 176{
 177        struct perf_evsel *pos, *pair;
 178
 179        if (evlist->nr_entries != other->nr_entries)
 180                return false;
 181
 182        pair = perf_evlist__first(other);
 183
 184        list_for_each_entry(pos, &evlist->entries, node) {
 185                if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
 186                        return false;
 187                pair = perf_evsel__next(pair);
 188        }
 189
 190        return true;
 191}
 192
 193static int perf_record__open(struct perf_record *rec)
 194{
 195        struct perf_evsel *pos;
 196        struct perf_evlist *evlist = rec->evlist;
 197        struct perf_session *session = rec->session;
 198        struct perf_record_opts *opts = &rec->opts;
 199        int rc = 0;
 200
 201        perf_evlist__config_attrs(evlist, opts);
 202
 203        if (opts->group)
 204                perf_evlist__set_leader(evlist);
 205
 206        list_for_each_entry(pos, &evlist->entries, node) {
 207                struct perf_event_attr *attr = &pos->attr;
 208                /*
 209                 * Check if parse_single_tracepoint_event has already asked for
 210                 * PERF_SAMPLE_TIME.
 211                 *
 212                 * XXX this is kludgy but short term fix for problems introduced by
 213                 * eac23d1c that broke 'perf script' by having different sample_types
 214                 * when using multiple tracepoint events when we use a perf binary
 215                 * that tries to use sample_id_all on an older kernel.
 216                 *
 217                 * We need to move counter creation to perf_session, support
 218                 * different sample_types, etc.
 219                 */
 220                bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
 221
 222fallback_missing_features:
 223                if (opts->exclude_guest_missing)
 224                        attr->exclude_guest = attr->exclude_host = 0;
 225retry_sample_id:
 226                attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
 227try_again:
 228                if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
 229                        int err = errno;
 230
 231                        if (err == EPERM || err == EACCES) {
 232                                ui__error_paranoid();
 233                                rc = -err;
 234                                goto out;
 235                        } else if (err ==  ENODEV && opts->target.cpu_list) {
 236                                pr_err("No such device - did you specify"
 237                                       " an out-of-range profile CPU?\n");
 238                                rc = -err;
 239                                goto out;
 240                        } else if (err == EINVAL) {
 241                                if (!opts->exclude_guest_missing &&
 242                                    (attr->exclude_guest || attr->exclude_host)) {
 243                                        pr_debug("Old kernel, cannot exclude "
 244                                                 "guest or host samples.\n");
 245                                        opts->exclude_guest_missing = true;
 246                                        goto fallback_missing_features;
 247                                } else if (!opts->sample_id_all_missing) {
 248                                        /*
 249                                         * Old kernel, no attr->sample_id_type_all field
 250                                         */
 251                                        opts->sample_id_all_missing = true;
 252                                        if (!opts->sample_time && !opts->raw_samples && !time_needed)
 253                                                attr->sample_type &= ~PERF_SAMPLE_TIME;
 254
 255                                        goto retry_sample_id;
 256                                }
 257                        }
 258
 259                        /*
 260                         * If it's cycles then fall back to hrtimer
 261                         * based cpu-clock-tick sw counter, which
 262                         * is always available even if no PMU support.
 263                         *
 264                         * PPC returns ENXIO until 2.6.37 (behavior changed
 265                         * with commit b0a873e).
 266                         */
 267                        if ((err == ENOENT || err == ENXIO)
 268                                        && attr->type == PERF_TYPE_HARDWARE
 269                                        && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
 270
 271                                if (verbose)
 272                                        ui__warning("The cycles event is not supported, "
 273                                                    "trying to fall back to cpu-clock-ticks\n");
 274                                attr->type = PERF_TYPE_SOFTWARE;
 275                                attr->config = PERF_COUNT_SW_CPU_CLOCK;
 276                                if (pos->name) {
 277                                        free(pos->name);
 278                                        pos->name = NULL;
 279                                }
 280                                goto try_again;
 281                        }
 282
 283                        if (err == ENOENT) {
 284                                ui__error("The %s event is not supported.\n",
 285                                          perf_evsel__name(pos));
 286                                rc = -err;
 287                                goto out;
 288                        }
 289
 290                        printf("\n");
 291                        error("sys_perf_event_open() syscall returned with %d "
 292                              "(%s) for event %s. /bin/dmesg may provide "
 293                              "additional information.\n",
 294                              err, strerror(err), perf_evsel__name(pos));
 295
 296#if defined(__i386__) || defined(__x86_64__)
 297                        if (attr->type == PERF_TYPE_HARDWARE &&
 298                            err == EOPNOTSUPP) {
 299                                pr_err("No hardware sampling interrupt available."
 300                                       " No APIC? If so then you can boot the kernel"
 301                                       " with the \"lapic\" boot parameter to"
 302                                       " force-enable it.\n");
 303                                rc = -err;
 304                                goto out;
 305                        }
 306#endif
 307
 308                        pr_err("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
 309                        rc = -err;
 310                        goto out;
 311                }
 312        }
 313
 314        if (perf_evlist__apply_filters(evlist)) {
 315                error("failed to set filter with %d (%s)\n", errno,
 316                        strerror(errno));
 317                rc = -1;
 318                goto out;
 319        }
 320
 321        if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
 322                if (errno == EPERM) {
 323                        pr_err("Permission error mapping pages.\n"
 324                               "Consider increasing "
 325                               "/proc/sys/kernel/perf_event_mlock_kb,\n"
 326                               "or try again with a smaller value of -m/--mmap_pages.\n"
 327                               "(current value: %d)\n", opts->mmap_pages);
 328                        rc = -errno;
 329                } else if (!is_power_of_2(opts->mmap_pages)) {
 330                        pr_err("--mmap_pages/-m value must be a power of two.");
 331                        rc = -EINVAL;
 332                } else {
 333                        pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
 334                        rc = -errno;
 335                }
 336                goto out;
 337        }
 338
 339        if (rec->file_new)
 340                session->evlist = evlist;
 341        else {
 342                if (!perf_evlist__equal(session->evlist, evlist)) {
 343                        fprintf(stderr, "incompatible append\n");
 344                        rc = -1;
 345                        goto out;
 346                }
 347        }
 348
 349        perf_session__set_id_hdr_size(session);
 350out:
 351        return rc;
 352}
 353
 354static int process_buildids(struct perf_record *rec)
 355{
 356        u64 size = lseek(rec->output, 0, SEEK_CUR);
 357
 358        if (size == 0)
 359                return 0;
 360
 361        rec->session->fd = rec->output;
 362        return __perf_session__process_events(rec->session, rec->post_processing_offset,
 363                                              size - rec->post_processing_offset,
 364                                              size, &build_id__mark_dso_hit_ops);
 365}
 366
 367static void perf_record__exit(int status, void *arg)
 368{
 369        struct perf_record *rec = arg;
 370
 371        if (status != 0)
 372                return;
 373
 374        if (!rec->opts.pipe_output) {
 375                rec->session->header.data_size += rec->bytes_written;
 376
 377                if (!rec->no_buildid)
 378                        process_buildids(rec);
 379                perf_session__write_header(rec->session, rec->evlist,
 380                                           rec->output, true);
 381                perf_session__delete(rec->session);
 382                perf_evlist__delete(rec->evlist);
 383                symbol__exit();
 384        }
 385}
 386
 387static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
 388{
 389        int err;
 390        struct perf_tool *tool = data;
 391
 392        if (machine__is_host(machine))
 393                return;
 394
 395        /*
 396         *As for guest kernel when processing subcommand record&report,
 397         *we arrange module mmap prior to guest kernel mmap and trigger
 398         *a preload dso because default guest module symbols are loaded
 399         *from guest kallsyms instead of /lib/modules/XXX/XXX. This
 400         *method is used to avoid symbol missing when the first addr is
 401         *in module instead of in guest kernel.
 402         */
 403        err = perf_event__synthesize_modules(tool, process_synthesized_event,
 404                                             machine);
 405        if (err < 0)
 406                pr_err("Couldn't record guest kernel [%d]'s reference"
 407                       " relocation symbol.\n", machine->pid);
 408
 409        /*
 410         * We use _stext for guest kernel because guest kernel's /proc/kallsyms
 411         * have no _text sometimes.
 412         */
 413        err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
 414                                                 machine, "_text");
 415        if (err < 0)
 416                err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
 417                                                         machine, "_stext");
 418        if (err < 0)
 419                pr_err("Couldn't record guest kernel [%d]'s reference"
 420                       " relocation symbol.\n", machine->pid);
 421}
 422
 423static struct perf_event_header finished_round_event = {
 424        .size = sizeof(struct perf_event_header),
 425        .type = PERF_RECORD_FINISHED_ROUND,
 426};
 427
 428static int perf_record__mmap_read_all(struct perf_record *rec)
 429{
 430        int i;
 431        int rc = 0;
 432
 433        for (i = 0; i < rec->evlist->nr_mmaps; i++) {
 434                if (rec->evlist->mmap[i].base) {
 435                        if (perf_record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
 436                                rc = -1;
 437                                goto out;
 438                        }
 439                }
 440        }
 441
 442        if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
 443                rc = write_output(rec, &finished_round_event,
 444                                  sizeof(finished_round_event));
 445
 446out:
 447        return rc;
 448}
 449
 450static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
 451{
 452        struct stat st;
 453        int flags;
 454        int err, output, feat;
 455        unsigned long waking = 0;
 456        const bool forks = argc > 0;
 457        struct machine *machine;
 458        struct perf_tool *tool = &rec->tool;
 459        struct perf_record_opts *opts = &rec->opts;
 460        struct perf_evlist *evsel_list = rec->evlist;
 461        const char *output_name = rec->output_name;
 462        struct perf_session *session;
 463
 464        rec->progname = argv[0];
 465
 466        rec->page_size = sysconf(_SC_PAGE_SIZE);
 467
 468        on_exit(perf_record__sig_exit, rec);
 469        signal(SIGCHLD, sig_handler);
 470        signal(SIGINT, sig_handler);
 471        signal(SIGUSR1, sig_handler);
 472
 473        if (!output_name) {
 474                if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
 475                        opts->pipe_output = true;
 476                else
 477                        rec->output_name = output_name = "perf.data";
 478        }
 479        if (output_name) {
 480                if (!strcmp(output_name, "-"))
 481                        opts->pipe_output = true;
 482                else if (!stat(output_name, &st) && st.st_size) {
 483                        if (rec->write_mode == WRITE_FORCE) {
 484                                char oldname[PATH_MAX];
 485                                snprintf(oldname, sizeof(oldname), "%s.old",
 486                                         output_name);
 487                                unlink(oldname);
 488                                rename(output_name, oldname);
 489                        }
 490                } else if (rec->write_mode == WRITE_APPEND) {
 491                        rec->write_mode = WRITE_FORCE;
 492                }
 493        }
 494
 495        flags = O_CREAT|O_RDWR;
 496        if (rec->write_mode == WRITE_APPEND)
 497                rec->file_new = 0;
 498        else
 499                flags |= O_TRUNC;
 500
 501        if (opts->pipe_output)
 502                output = STDOUT_FILENO;
 503        else
 504                output = open(output_name, flags, S_IRUSR | S_IWUSR);
 505        if (output < 0) {
 506                perror("failed to create output file");
 507                return -1;
 508        }
 509
 510        rec->output = output;
 511
 512        session = perf_session__new(output_name, O_WRONLY,
 513                                    rec->write_mode == WRITE_FORCE, false, NULL);
 514        if (session == NULL) {
 515                pr_err("Not enough memory for reading perf file header\n");
 516                return -1;
 517        }
 518
 519        rec->session = session;
 520
 521        for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
 522                perf_header__set_feat(&session->header, feat);
 523
 524        if (rec->no_buildid)
 525                perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
 526
 527        if (!have_tracepoints(&evsel_list->entries))
 528                perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
 529
 530        if (!rec->opts.branch_stack)
 531                perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
 532
 533        if (!rec->file_new) {
 534                err = perf_session__read_header(session, output);
 535                if (err < 0)
 536                        goto out_delete_session;
 537        }
 538
 539        if (forks) {
 540                err = perf_evlist__prepare_workload(evsel_list, opts, argv);
 541                if (err < 0) {
 542                        pr_err("Couldn't run the workload!\n");
 543                        goto out_delete_session;
 544                }
 545        }
 546
 547        if (perf_record__open(rec) != 0) {
 548                err = -1;
 549                goto out_delete_session;
 550        }
 551
 552        /*
 553         * perf_session__delete(session) will be called at perf_record__exit()
 554         */
 555        on_exit(perf_record__exit, rec);
 556
 557        if (opts->pipe_output) {
 558                err = perf_header__write_pipe(output);
 559                if (err < 0)
 560                        goto out_delete_session;
 561        } else if (rec->file_new) {
 562                err = perf_session__write_header(session, evsel_list,
 563                                                 output, false);
 564                if (err < 0)
 565                        goto out_delete_session;
 566        }
 567
 568        if (!rec->no_buildid
 569            && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
 570                pr_err("Couldn't generate buildids. "
 571                       "Use --no-buildid to profile anyway.\n");
 572                err = -1;
 573                goto out_delete_session;
 574        }
 575
 576        rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
 577
 578        machine = perf_session__find_host_machine(session);
 579        if (!machine) {
 580                pr_err("Couldn't find native kernel information.\n");
 581                err = -1;
 582                goto out_delete_session;
 583        }
 584
 585        if (opts->pipe_output) {
 586                err = perf_event__synthesize_attrs(tool, session,
 587                                                   process_synthesized_event);
 588                if (err < 0) {
 589                        pr_err("Couldn't synthesize attrs.\n");
 590                        goto out_delete_session;
 591                }
 592
 593                err = perf_event__synthesize_event_types(tool, process_synthesized_event,
 594                                                         machine);
 595                if (err < 0) {
 596                        pr_err("Couldn't synthesize event_types.\n");
 597                        goto out_delete_session;
 598                }
 599
 600                if (have_tracepoints(&evsel_list->entries)) {
 601                        /*
 602                         * FIXME err <= 0 here actually means that
 603                         * there were no tracepoints so its not really
 604                         * an error, just that we don't need to
 605                         * synthesize anything.  We really have to
 606                         * return this more properly and also
 607                         * propagate errors that now are calling die()
 608                         */
 609                        err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
 610                                                                  process_synthesized_event);
 611                        if (err <= 0) {
 612                                pr_err("Couldn't record tracing data.\n");
 613                                goto out_delete_session;
 614                        }
 615                        advance_output(rec, err);
 616                }
 617        }
 618
 619        err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
 620                                                 machine, "_text");
 621        if (err < 0)
 622                err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
 623                                                         machine, "_stext");
 624        if (err < 0)
 625                pr_err("Couldn't record kernel reference relocation symbol\n"
 626                       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
 627                       "Check /proc/kallsyms permission or run as root.\n");
 628
 629        err = perf_event__synthesize_modules(tool, process_synthesized_event,
 630                                             machine);
 631        if (err < 0)
 632                pr_err("Couldn't record kernel module information.\n"
 633                       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
 634                       "Check /proc/modules permission or run as root.\n");
 635
 636        if (perf_guest)
 637                perf_session__process_machines(session, tool,
 638                                               perf_event__synthesize_guest_os);
 639
 640        if (!opts->target.system_wide)
 641                err = perf_event__synthesize_thread_map(tool, evsel_list->threads,
 642                                                  process_synthesized_event,
 643                                                  machine);
 644        else
 645                err = perf_event__synthesize_threads(tool, process_synthesized_event,
 646                                               machine);
 647
 648        if (err != 0)
 649                goto out_delete_session;
 650
 651        if (rec->realtime_prio) {
 652                struct sched_param param;
 653
 654                param.sched_priority = rec->realtime_prio;
 655                if (sched_setscheduler(0, SCHED_FIFO, &param)) {
 656                        pr_err("Could not set realtime priority.\n");
 657                        err = -1;
 658                        goto out_delete_session;
 659                }
 660        }
 661
 662        perf_evlist__enable(evsel_list);
 663
 664        /*
 665         * Let the child rip
 666         */
 667        if (forks)
 668                perf_evlist__start_workload(evsel_list);
 669
 670        for (;;) {
 671                int hits = rec->samples;
 672
 673                if (perf_record__mmap_read_all(rec) < 0) {
 674                        err = -1;
 675                        goto out_delete_session;
 676                }
 677
 678                if (hits == rec->samples) {
 679                        if (done)
 680                                break;
 681                        err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
 682                        waking++;
 683                }
 684
 685                if (done)
 686                        perf_evlist__disable(evsel_list);
 687        }
 688
 689        if (quiet || signr == SIGUSR1)
 690                return 0;
 691
 692        fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
 693
 694        /*
 695         * Approximate RIP event size: 24 bytes.
 696         */
 697        fprintf(stderr,
 698                "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
 699                (double)rec->bytes_written / 1024.0 / 1024.0,
 700                output_name,
 701                rec->bytes_written / 24);
 702
 703        return 0;
 704
 705out_delete_session:
 706        perf_session__delete(session);
 707        return err;
 708}
 709
 710#define BRANCH_OPT(n, m) \
 711        { .name = n, .mode = (m) }
 712
 713#define BRANCH_END { .name = NULL }
 714
 715struct branch_mode {
 716        const char *name;
 717        int mode;
 718};
 719
 720static const struct branch_mode branch_modes[] = {
 721        BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
 722        BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
 723        BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
 724        BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
 725        BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
 726        BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
 727        BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
 728        BRANCH_END
 729};
 730
 731static int
 732parse_branch_stack(const struct option *opt, const char *str, int unset)
 733{
 734#define ONLY_PLM \
 735        (PERF_SAMPLE_BRANCH_USER        |\
 736         PERF_SAMPLE_BRANCH_KERNEL      |\
 737         PERF_SAMPLE_BRANCH_HV)
 738
 739        uint64_t *mode = (uint64_t *)opt->value;
 740        const struct branch_mode *br;
 741        char *s, *os = NULL, *p;
 742        int ret = -1;
 743
 744        if (unset)
 745                return 0;
 746
 747        /*
 748         * cannot set it twice, -b + --branch-filter for instance
 749         */
 750        if (*mode)
 751                return -1;
 752
 753        /* str may be NULL in case no arg is passed to -b */
 754        if (str) {
 755                /* because str is read-only */
 756                s = os = strdup(str);
 757                if (!s)
 758                        return -1;
 759
 760                for (;;) {
 761                        p = strchr(s, ',');
 762                        if (p)
 763                                *p = '\0';
 764
 765                        for (br = branch_modes; br->name; br++) {
 766                                if (!strcasecmp(s, br->name))
 767                                        break;
 768                        }
 769                        if (!br->name) {
 770                                ui__warning("unknown branch filter %s,"
 771                                            " check man page\n", s);
 772                                goto error;
 773                        }
 774
 775                        *mode |= br->mode;
 776
 777                        if (!p)
 778                                break;
 779
 780                        s = p + 1;
 781                }
 782        }
 783        ret = 0;
 784
 785        /* default to any branch */
 786        if ((*mode & ~ONLY_PLM) == 0) {
 787                *mode = PERF_SAMPLE_BRANCH_ANY;
 788        }
 789error:
 790        free(os);
 791        return ret;
 792}
 793
 794#ifdef LIBUNWIND_SUPPORT
 795static int get_stack_size(char *str, unsigned long *_size)
 796{
 797        char *endptr;
 798        unsigned long size;
 799        unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
 800
 801        size = strtoul(str, &endptr, 0);
 802
 803        do {
 804                if (*endptr)
 805                        break;
 806
 807                size = round_up(size, sizeof(u64));
 808                if (!size || size > max_size)
 809                        break;
 810
 811                *_size = size;
 812                return 0;
 813
 814        } while (0);
 815
 816        pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
 817               max_size, str);
 818        return -1;
 819}
 820#endif /* LIBUNWIND_SUPPORT */
 821
 822static int
 823parse_callchain_opt(const struct option *opt __maybe_unused, const char *arg,
 824                    int unset)
 825{
 826        struct perf_record *rec = (struct perf_record *)opt->value;
 827        char *tok, *name, *saveptr = NULL;
 828        char *buf;
 829        int ret = -1;
 830
 831        /* --no-call-graph */
 832        if (unset)
 833                return 0;
 834
 835        /* We specified default option if none is provided. */
 836        BUG_ON(!arg);
 837
 838        /* We need buffer that we know we can write to. */
 839        buf = malloc(strlen(arg) + 1);
 840        if (!buf)
 841                return -ENOMEM;
 842
 843        strcpy(buf, arg);
 844
 845        tok = strtok_r((char *)buf, ",", &saveptr);
 846        name = tok ? : (char *)buf;
 847
 848        do {
 849                /* Framepointer style */
 850                if (!strncmp(name, "fp", sizeof("fp"))) {
 851                        if (!strtok_r(NULL, ",", &saveptr)) {
 852                                rec->opts.call_graph = CALLCHAIN_FP;
 853                                ret = 0;
 854                        } else
 855                                pr_err("callchain: No more arguments "
 856                                       "needed for -g fp\n");
 857                        break;
 858
 859#ifdef LIBUNWIND_SUPPORT
 860                /* Dwarf style */
 861                } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
 862                        const unsigned long default_stack_dump_size = 8192;
 863
 864                        ret = 0;
 865                        rec->opts.call_graph = CALLCHAIN_DWARF;
 866                        rec->opts.stack_dump_size = default_stack_dump_size;
 867
 868                        tok = strtok_r(NULL, ",", &saveptr);
 869                        if (tok) {
 870                                unsigned long size = 0;
 871
 872                                ret = get_stack_size(tok, &size);
 873                                rec->opts.stack_dump_size = size;
 874                        }
 875
 876                        if (!ret)
 877                                pr_debug("callchain: stack dump size %d\n",
 878                                         rec->opts.stack_dump_size);
 879#endif /* LIBUNWIND_SUPPORT */
 880                } else {
 881                        pr_err("callchain: Unknown -g option "
 882                               "value: %s\n", arg);
 883                        break;
 884                }
 885
 886        } while (0);
 887
 888        free(buf);
 889
 890        if (!ret)
 891                pr_debug("callchain: type %d\n", rec->opts.call_graph);
 892
 893        return ret;
 894}
 895
 896static const char * const record_usage[] = {
 897        "perf record [<options>] [<command>]",
 898        "perf record [<options>] -- <command> [<options>]",
 899        NULL
 900};
 901
 902/*
 903 * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
 904 * because we need to have access to it in perf_record__exit, that is called
 905 * after cmd_record() exits, but since record_options need to be accessible to
 906 * builtin-script, leave it here.
 907 *
 908 * At least we don't ouch it in all the other functions here directly.
 909 *
 910 * Just say no to tons of global variables, sigh.
 911 */
 912static struct perf_record record = {
 913        .opts = {
 914                .mmap_pages          = UINT_MAX,
 915                .user_freq           = UINT_MAX,
 916                .user_interval       = ULLONG_MAX,
 917                .freq                = 4000,
 918                .target              = {
 919                        .uses_mmap   = true,
 920                },
 921        },
 922        .write_mode = WRITE_FORCE,
 923        .file_new   = true,
 924};
 925
 926#define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: "
 927
 928#ifdef LIBUNWIND_SUPPORT
 929static const char callchain_help[] = CALLCHAIN_HELP "[fp] dwarf";
 930#else
 931static const char callchain_help[] = CALLCHAIN_HELP "[fp]";
 932#endif
 933
 934/*
 935 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
 936 * with it and switch to use the library functions in perf_evlist that came
 937 * from builtin-record.c, i.e. use perf_record_opts,
 938 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
 939 * using pipes, etc.
 940 */
 941const struct option record_options[] = {
 942        OPT_CALLBACK('e', "event", &record.evlist, "event",
 943                     "event selector. use 'perf list' to list available events",
 944                     parse_events_option),
 945        OPT_CALLBACK(0, "filter", &record.evlist, "filter",
 946                     "event filter", parse_filter),
 947        OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
 948                    "record events on existing process id"),
 949        OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
 950                    "record events on existing thread id"),
 951        OPT_INTEGER('r', "realtime", &record.realtime_prio,
 952                    "collect data with this RT SCHED_FIFO priority"),
 953        OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
 954                    "collect data without buffering"),
 955        OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
 956                    "collect raw sample records from all opened counters"),
 957        OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
 958                            "system-wide collection from all CPUs"),
 959        OPT_BOOLEAN('A', "append", &record.append_file,
 960                            "append to the output file to do incremental profiling"),
 961        OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
 962                    "list of cpus to monitor"),
 963        OPT_BOOLEAN('f', "force", &record.force,
 964                        "overwrite existing data file (deprecated)"),
 965        OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
 966        OPT_STRING('o', "output", &record.output_name, "file",
 967                    "output file name"),
 968        OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
 969                    "child tasks do not inherit counters"),
 970        OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
 971        OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
 972                     "number of mmap data pages"),
 973        OPT_BOOLEAN(0, "group", &record.opts.group,
 974                    "put the counters into a counter group"),
 975        OPT_CALLBACK_DEFAULT('g', "call-graph", &record, "mode[,dump_size]",
 976                             callchain_help, &parse_callchain_opt,
 977                             "fp"),
 978        OPT_INCR('v', "verbose", &verbose,
 979                    "be more verbose (show counter open errors, etc)"),
 980        OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
 981        OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
 982                    "per thread counts"),
 983        OPT_BOOLEAN('d', "data", &record.opts.sample_address,
 984                    "Sample addresses"),
 985        OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
 986        OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
 987        OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
 988                    "don't sample"),
 989        OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
 990                    "do not update the buildid cache"),
 991        OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
 992                    "do not collect buildids in perf.data"),
 993        OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
 994                     "monitor event in cgroup name only",
 995                     parse_cgroups),
 996        OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
 997                   "user to profile"),
 998
 999        OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1000                     "branch any", "sample any taken branches",
1001                     parse_branch_stack),
1002
1003        OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1004                     "branch filter mask", "branch stack filter modes",
1005                     parse_branch_stack),
1006        OPT_END()
1007};
1008
1009int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
1010{
1011        int err = -ENOMEM;
1012        struct perf_evsel *pos;
1013        struct perf_evlist *evsel_list;
1014        struct perf_record *rec = &record;
1015        char errbuf[BUFSIZ];
1016
1017        evsel_list = perf_evlist__new(NULL, NULL);
1018        if (evsel_list == NULL)
1019                return -ENOMEM;
1020
1021        rec->evlist = evsel_list;
1022
1023        argc = parse_options(argc, argv, record_options, record_usage,
1024                            PARSE_OPT_STOP_AT_NON_OPTION);
1025        if (!argc && perf_target__none(&rec->opts.target))
1026                usage_with_options(record_usage, record_options);
1027
1028        if (rec->force && rec->append_file) {
1029                ui__error("Can't overwrite and append at the same time."
1030                          " You need to choose between -f and -A");
1031                usage_with_options(record_usage, record_options);
1032        } else if (rec->append_file) {
1033                rec->write_mode = WRITE_APPEND;
1034        } else {
1035                rec->write_mode = WRITE_FORCE;
1036        }
1037
1038        if (nr_cgroups && !rec->opts.target.system_wide) {
1039                ui__error("cgroup monitoring only available in"
1040                          " system-wide mode\n");
1041                usage_with_options(record_usage, record_options);
1042        }
1043
1044        symbol__init();
1045
1046        if (symbol_conf.kptr_restrict)
1047                pr_warning(
1048"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1049"check /proc/sys/kernel/kptr_restrict.\n\n"
1050"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1051"file is not found in the buildid cache or in the vmlinux path.\n\n"
1052"Samples in kernel modules won't be resolved at all.\n\n"
1053"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1054"even with a suitable vmlinux or kallsyms file.\n\n");
1055
1056        if (rec->no_buildid_cache || rec->no_buildid)
1057                disable_buildid_cache();
1058
1059        if (evsel_list->nr_entries == 0 &&
1060            perf_evlist__add_default(evsel_list) < 0) {
1061                pr_err("Not enough memory for event selector list\n");
1062                goto out_symbol_exit;
1063        }
1064
1065        err = perf_target__validate(&rec->opts.target);
1066        if (err) {
1067                perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1068                ui__warning("%s", errbuf);
1069        }
1070
1071        err = perf_target__parse_uid(&rec->opts.target);
1072        if (err) {
1073                int saved_errno = errno;
1074
1075                perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1076                ui__error("%s", errbuf);
1077
1078                err = -saved_errno;
1079                goto out_free_fd;
1080        }
1081
1082        err = -ENOMEM;
1083        if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
1084                usage_with_options(record_usage, record_options);
1085
1086        list_for_each_entry(pos, &evsel_list->entries, node) {
1087                if (perf_header__push_event(pos->attr.config, perf_evsel__name(pos)))
1088                        goto out_free_fd;
1089        }
1090
1091        if (rec->opts.user_interval != ULLONG_MAX)
1092                rec->opts.default_interval = rec->opts.user_interval;
1093        if (rec->opts.user_freq != UINT_MAX)
1094                rec->opts.freq = rec->opts.user_freq;
1095
1096        /*
1097         * User specified count overrides default frequency.
1098         */
1099        if (rec->opts.default_interval)
1100                rec->opts.freq = 0;
1101        else if (rec->opts.freq) {
1102                rec->opts.default_interval = rec->opts.freq;
1103        } else {
1104                ui__error("frequency and count are zero, aborting\n");
1105                err = -EINVAL;
1106                goto out_free_fd;
1107        }
1108
1109        err = __cmd_record(&record, argc, argv);
1110out_free_fd:
1111        perf_evlist__delete_maps(evsel_list);
1112out_symbol_exit:
1113        symbol__exit();
1114        return err;
1115}
1116
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.