linux/tools/perf/builtin-record.c
<<
>>
Prefs
   1/*
   2 * builtin-record.c
   3 *
   4 * Builtin record command: Record the profile of a workload
   5 * (or a CPU, or a PID) into the perf.data output file - for
   6 * later analysis via perf report.
   7 */
   8#define _FILE_OFFSET_BITS 64
   9
  10#include "builtin.h"
  11
  12#include "perf.h"
  13
  14#include "util/build-id.h"
  15#include "util/util.h"
  16#include "util/parse-options.h"
  17#include "util/parse-events.h"
  18
  19#include "util/header.h"
  20#include "util/event.h"
  21#include "util/evsel.h"
  22#include "util/debug.h"
  23#include "util/session.h"
  24#include "util/symbol.h"
  25#include "util/cpumap.h"
  26
  27#include <unistd.h>
  28#include <sched.h>
  29#include <sys/mman.h>
  30
  31#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
  32
  33enum write_mode_t {
  34        WRITE_FORCE,
  35        WRITE_APPEND
  36};
  37
  38static u64                      user_interval                   = ULLONG_MAX;
  39static u64                      default_interval                =      0;
  40static u64                      sample_type;
  41
  42static struct cpu_map           *cpus;
  43static unsigned int             page_size;
  44static unsigned int             mmap_pages                      =    128;
  45static unsigned int             user_freq                       = UINT_MAX;
  46static int                      freq                            =   1000;
  47static int                      output;
  48static int                      pipe_output                     =      0;
  49static const char               *output_name                    = "perf.data";
  50static int                      group                           =      0;
  51static int                      realtime_prio                   =      0;
  52static bool                     nodelay                         =  false;
  53static bool                     raw_samples                     =  false;
  54static bool                     sample_id_all_avail             =   true;
  55static bool                     system_wide                     =  false;
  56static pid_t                    target_pid                      =     -1;
  57static pid_t                    target_tid                      =     -1;
  58static struct thread_map        *threads;
  59static pid_t                    child_pid                       =     -1;
  60static bool                     no_inherit                      =  false;
  61static enum write_mode_t        write_mode                      = WRITE_FORCE;
  62static bool                     call_graph                      =  false;
  63static bool                     inherit_stat                    =  false;
  64static bool                     no_samples                      =  false;
  65static bool                     sample_address                  =  false;
  66static bool                     sample_time                     =  false;
  67static bool                     no_buildid                      =  false;
  68static bool                     no_buildid_cache                =  false;
  69
  70static long                     samples                         =      0;
  71static u64                      bytes_written                   =      0;
  72
  73static struct pollfd            *event_array;
  74
  75static int                      nr_poll                         =      0;
  76static int                      nr_cpu                          =      0;
  77
  78static int                      file_new                        =      1;
  79static off_t                    post_processing_offset;
  80
  81static struct perf_session      *session;
  82static const char               *cpu_list;
  83
  84struct mmap_data {
  85        void                    *base;
  86        unsigned int            mask;
  87        unsigned int            prev;
  88};
  89
  90static struct mmap_data         mmap_array[MAX_NR_CPUS];
  91
  92static unsigned long mmap_read_head(struct mmap_data *md)
  93{
  94        struct perf_event_mmap_page *pc = md->base;
  95        long head;
  96
  97        head = pc->data_head;
  98        rmb();
  99
 100        return head;
 101}
 102
 103static void mmap_write_tail(struct mmap_data *md, unsigned long tail)
 104{
 105        struct perf_event_mmap_page *pc = md->base;
 106
 107        /*
 108         * ensure all reads are done before we write the tail out.
 109         */
 110        /* mb(); */
 111        pc->data_tail = tail;
 112}
 113
 114static void advance_output(size_t size)
 115{
 116        bytes_written += size;
 117}
 118
 119static void write_output(void *buf, size_t size)
 120{
 121        while (size) {
 122                int ret = write(output, buf, size);
 123
 124                if (ret < 0)
 125                        die("failed to write");
 126
 127                size -= ret;
 128                buf += ret;
 129
 130                bytes_written += ret;
 131        }
 132}
 133
 134static int process_synthesized_event(event_t *event,
 135                                     struct sample_data *sample __used,
 136                                     struct perf_session *self __used)
 137{
 138        write_output(event, event->header.size);
 139        return 0;
 140}
 141
 142static void mmap_read(struct mmap_data *md)
 143{
 144        unsigned int head = mmap_read_head(md);
 145        unsigned int old = md->prev;
 146        unsigned char *data = md->base + page_size;
 147        unsigned long size;
 148        void *buf;
 149        int diff;
 150
 151        /*
 152         * If we're further behind than half the buffer, there's a chance
 153         * the writer will bite our tail and mess up the samples under us.
 154         *
 155         * If we somehow ended up ahead of the head, we got messed up.
 156         *
 157         * In either case, truncate and restart at head.
 158         */
 159        diff = head - old;
 160        if (diff < 0) {
 161                fprintf(stderr, "WARNING: failed to keep up with mmap data\n");
 162                /*
 163                 * head points to a known good entry, start there.
 164                 */
 165                old = head;
 166        }
 167
 168        if (old != head)
 169                samples++;
 170
 171        size = head - old;
 172
 173        if ((old & md->mask) + size != (head & md->mask)) {
 174                buf = &data[old & md->mask];
 175                size = md->mask + 1 - (old & md->mask);
 176                old += size;
 177
 178                write_output(buf, size);
 179        }
 180
 181        buf = &data[old & md->mask];
 182        size = head - old;
 183        old += size;
 184
 185        write_output(buf, size);
 186
 187        md->prev = old;
 188        mmap_write_tail(md, old);
 189}
 190
 191static volatile int done = 0;
 192static volatile int signr = -1;
 193
 194static void sig_handler(int sig)
 195{
 196        done = 1;
 197        signr = sig;
 198}
 199
 200static void sig_atexit(void)
 201{
 202        if (child_pid > 0)
 203                kill(child_pid, SIGTERM);
 204
 205        if (signr == -1 || signr == SIGUSR1)
 206                return;
 207
 208        signal(signr, SIG_DFL);
 209        kill(getpid(), signr);
 210}
 211
 212static int group_fd;
 213
 214static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int nr)
 215{
 216        struct perf_header_attr *h_attr;
 217
 218        if (nr < session->header.attrs) {
 219                h_attr = session->header.attr[nr];
 220        } else {
 221                h_attr = perf_header_attr__new(a);
 222                if (h_attr != NULL)
 223                        if (perf_header__add_attr(&session->header, h_attr) < 0) {
 224                                perf_header_attr__delete(h_attr);
 225                                h_attr = NULL;
 226                        }
 227        }
 228
 229        return h_attr;
 230}
 231
 232static void create_counter(struct perf_evsel *evsel, int cpu)
 233{
 234        char *filter = evsel->filter;
 235        struct perf_event_attr *attr = &evsel->attr;
 236        struct perf_header_attr *h_attr;
 237        int track = !evsel->idx; /* only the first counter needs these */
 238        int thread_index;
 239        int ret;
 240        struct {
 241                u64 count;
 242                u64 time_enabled;
 243                u64 time_running;
 244                u64 id;
 245        } read_data;
 246        /*
 247         * Check if parse_single_tracepoint_event has already asked for
 248         * PERF_SAMPLE_TIME.
 249         *
 250         * XXX this is kludgy but short term fix for problems introduced by
 251         * eac23d1c that broke 'perf script' by having different sample_types
 252         * when using multiple tracepoint events when we use a perf binary
 253         * that tries to use sample_id_all on an older kernel.
 254         *
 255         * We need to move counter creation to perf_session, support
 256         * different sample_types, etc.
 257         */
 258        bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
 259
 260        attr->read_format       = PERF_FORMAT_TOTAL_TIME_ENABLED |
 261                                  PERF_FORMAT_TOTAL_TIME_RUNNING |
 262                                  PERF_FORMAT_ID;
 263
 264        attr->sample_type       |= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
 265
 266        if (nr_counters > 1)
 267                attr->sample_type |= PERF_SAMPLE_ID;
 268
 269        /*
 270         * We default some events to a 1 default interval. But keep
 271         * it a weak assumption overridable by the user.
 272         */
 273        if (!attr->sample_period || (user_freq != UINT_MAX &&
 274                                     user_interval != ULLONG_MAX)) {
 275                if (freq) {
 276                        attr->sample_type       |= PERF_SAMPLE_PERIOD;
 277                        attr->freq              = 1;
 278                        attr->sample_freq       = freq;
 279                } else {
 280                        attr->sample_period = default_interval;
 281                }
 282        }
 283
 284        if (no_samples)
 285                attr->sample_freq = 0;
 286
 287        if (inherit_stat)
 288                attr->inherit_stat = 1;
 289
 290        if (sample_address) {
 291                attr->sample_type       |= PERF_SAMPLE_ADDR;
 292                attr->mmap_data = track;
 293        }
 294
 295        if (call_graph)
 296                attr->sample_type       |= PERF_SAMPLE_CALLCHAIN;
 297
 298        if (system_wide)
 299                attr->sample_type       |= PERF_SAMPLE_CPU;
 300
 301        if (sample_id_all_avail &&
 302            (sample_time || system_wide || !no_inherit || cpu_list))
 303                attr->sample_type       |= PERF_SAMPLE_TIME;
 304
 305        if (raw_samples) {
 306                attr->sample_type       |= PERF_SAMPLE_TIME;
 307                attr->sample_type       |= PERF_SAMPLE_RAW;
 308                attr->sample_type       |= PERF_SAMPLE_CPU;
 309        }
 310
 311        if (nodelay) {
 312                attr->watermark = 0;
 313                attr->wakeup_events = 1;
 314        }
 315
 316        attr->mmap              = track;
 317        attr->comm              = track;
 318        attr->inherit           = !no_inherit;
 319        if (target_pid == -1 && target_tid == -1 && !system_wide) {
 320                attr->disabled = 1;
 321                attr->enable_on_exec = 1;
 322        }
 323retry_sample_id:
 324        attr->sample_id_all = sample_id_all_avail ? 1 : 0;
 325
 326        for (thread_index = 0; thread_index < threads->nr; thread_index++) {
 327try_again:
 328                FD(evsel, nr_cpu, thread_index) = sys_perf_event_open(attr, threads->map[thread_index], cpu, group_fd, 0);
 329
 330                if (FD(evsel, nr_cpu, thread_index) < 0) {
 331                        int err = errno;
 332
 333                        if (err == EPERM || err == EACCES)
 334                                die("Permission error - are you root?\n"
 335                                        "\t Consider tweaking"
 336                                        " /proc/sys/kernel/perf_event_paranoid.\n");
 337                        else if (err ==  ENODEV && cpu_list) {
 338                                die("No such device - did you specify"
 339                                        " an out-of-range profile CPU?\n");
 340                        } else if (err == EINVAL && sample_id_all_avail) {
 341                                /*
 342                                 * Old kernel, no attr->sample_id_type_all field
 343                                 */
 344                                sample_id_all_avail = false;
 345                                if (!sample_time && !raw_samples && !time_needed)
 346                                        attr->sample_type &= ~PERF_SAMPLE_TIME;
 347
 348                                goto retry_sample_id;
 349                        }
 350
 351                        /*
 352                         * If it's cycles then fall back to hrtimer
 353                         * based cpu-clock-tick sw counter, which
 354                         * is always available even if no PMU support:
 355                         */
 356                        if (attr->type == PERF_TYPE_HARDWARE
 357                                        && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
 358
 359                                if (verbose)
 360                                        warning(" ... trying to fall back to cpu-clock-ticks\n");
 361                                attr->type = PERF_TYPE_SOFTWARE;
 362                                attr->config = PERF_COUNT_SW_CPU_CLOCK;
 363                                goto try_again;
 364                        }
 365                        printf("\n");
 366                        error("sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information.\n",
 367                              FD(evsel, nr_cpu, thread_index), strerror(err));
 368
 369#if defined(__i386__) || defined(__x86_64__)
 370                        if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
 371                                die("No hardware sampling interrupt available."
 372                                    " No APIC? If so then you can boot the kernel"
 373                                    " with the \"lapic\" boot parameter to"
 374                                    " force-enable it.\n");
 375#endif
 376
 377                        die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
 378                        exit(-1);
 379                }
 380
 381                h_attr = get_header_attr(attr, evsel->idx);
 382                if (h_attr == NULL)
 383                        die("nomem\n");
 384
 385                if (!file_new) {
 386                        if (memcmp(&h_attr->attr, attr, sizeof(*attr))) {
 387                                fprintf(stderr, "incompatible append\n");
 388                                exit(-1);
 389                        }
 390                }
 391
 392                if (read(FD(evsel, nr_cpu, thread_index), &read_data, sizeof(read_data)) == -1) {
 393                        perror("Unable to read perf file descriptor");
 394                        exit(-1);
 395                }
 396
 397                if (perf_header_attr__add_id(h_attr, read_data.id) < 0) {
 398                        pr_warning("Not enough memory to add id\n");
 399                        exit(-1);
 400                }
 401
 402                assert(FD(evsel, nr_cpu, thread_index) >= 0);
 403                fcntl(FD(evsel, nr_cpu, thread_index), F_SETFL, O_NONBLOCK);
 404
 405                /*
 406                 * First counter acts as the group leader:
 407                 */
 408                if (group && group_fd == -1)
 409                        group_fd = FD(evsel, nr_cpu, thread_index);
 410
 411                if (evsel->idx || thread_index) {
 412                        struct perf_evsel *first;
 413                        first = list_entry(evsel_list.next, struct perf_evsel, node);
 414                        ret = ioctl(FD(evsel, nr_cpu, thread_index),
 415                                    PERF_EVENT_IOC_SET_OUTPUT,
 416                                    FD(first, nr_cpu, 0));
 417                        if (ret) {
 418                                error("failed to set output: %d (%s)\n", errno,
 419                                                strerror(errno));
 420                                exit(-1);
 421                        }
 422                } else {
 423                        mmap_array[nr_cpu].prev = 0;
 424                        mmap_array[nr_cpu].mask = mmap_pages*page_size - 1;
 425                        mmap_array[nr_cpu].base = mmap(NULL, (mmap_pages+1)*page_size,
 426                                PROT_READ | PROT_WRITE, MAP_SHARED, FD(evsel, nr_cpu, thread_index), 0);
 427                        if (mmap_array[nr_cpu].base == MAP_FAILED) {
 428                                error("failed to mmap with %d (%s)\n", errno, strerror(errno));
 429                                exit(-1);
 430                        }
 431
 432                        event_array[nr_poll].fd = FD(evsel, nr_cpu, thread_index);
 433                        event_array[nr_poll].events = POLLIN;
 434                        nr_poll++;
 435                }
 436
 437                if (filter != NULL) {
 438                        ret = ioctl(FD(evsel, nr_cpu, thread_index),
 439                                    PERF_EVENT_IOC_SET_FILTER, filter);
 440                        if (ret) {
 441                                error("failed to set filter with %d (%s)\n", errno,
 442                                                strerror(errno));
 443                                exit(-1);
 444                        }
 445                }
 446        }
 447
 448        if (!sample_type)
 449                sample_type = attr->sample_type;
 450}
 451
 452static void open_counters(int cpu)
 453{
 454        struct perf_evsel *pos;
 455
 456        group_fd = -1;
 457
 458        list_for_each_entry(pos, &evsel_list, node)
 459                create_counter(pos, cpu);
 460
 461        nr_cpu++;
 462}
 463
 464static int process_buildids(void)
 465{
 466        u64 size = lseek(output, 0, SEEK_CUR);
 467
 468        if (size == 0)
 469                return 0;
 470
 471        session->fd = output;
 472        return __perf_session__process_events(session, post_processing_offset,
 473                                              size - post_processing_offset,
 474                                              size, &build_id__mark_dso_hit_ops);
 475}
 476
 477static void atexit_header(void)
 478{
 479        if (!pipe_output) {
 480                session->header.data_size += bytes_written;
 481
 482                if (!no_buildid)
 483                        process_buildids();
 484                perf_header__write(&session->header, output, true);
 485                perf_session__delete(session);
 486                perf_evsel_list__delete();
 487                symbol__exit();
 488        }
 489}
 490
 491static void event__synthesize_guest_os(struct machine *machine, void *data)
 492{
 493        int err;
 494        struct perf_session *psession = data;
 495
 496        if (machine__is_host(machine))
 497                return;
 498
 499        /*
 500         *As for guest kernel when processing subcommand record&report,
 501         *we arrange module mmap prior to guest kernel mmap and trigger
 502         *a preload dso because default guest module symbols are loaded
 503         *from guest kallsyms instead of /lib/modules/XXX/XXX. This
 504         *method is used to avoid symbol missing when the first addr is
 505         *in module instead of in guest kernel.
 506         */
 507        err = event__synthesize_modules(process_synthesized_event,
 508                                        psession, machine);
 509        if (err < 0)
 510                pr_err("Couldn't record guest kernel [%d]'s reference"
 511                       " relocation symbol.\n", machine->pid);
 512
 513        /*
 514         * We use _stext for guest kernel because guest kernel's /proc/kallsyms
 515         * have no _text sometimes.
 516         */
 517        err = event__synthesize_kernel_mmap(process_synthesized_event,
 518                                            psession, machine, "_text");
 519        if (err < 0)
 520                err = event__synthesize_kernel_mmap(process_synthesized_event,
 521                                                    psession, machine, "_stext");
 522        if (err < 0)
 523                pr_err("Couldn't record guest kernel [%d]'s reference"
 524                       " relocation symbol.\n", machine->pid);
 525}
 526
 527static struct perf_event_header finished_round_event = {
 528        .size = sizeof(struct perf_event_header),
 529        .type = PERF_RECORD_FINISHED_ROUND,
 530};
 531
 532static void mmap_read_all(void)
 533{
 534        int i;
 535
 536        for (i = 0; i < nr_cpu; i++) {
 537                if (mmap_array[i].base)
 538                        mmap_read(&mmap_array[i]);
 539        }
 540
 541        if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO))
 542                write_output(&finished_round_event, sizeof(finished_round_event));
 543}
 544
 545static int __cmd_record(int argc, const char **argv)
 546{
 547        int i;
 548        struct stat st;
 549        int flags;
 550        int err;
 551        unsigned long waking = 0;
 552        int child_ready_pipe[2], go_pipe[2];
 553        const bool forks = argc > 0;
 554        char buf;
 555        struct machine *machine;
 556
 557        page_size = sysconf(_SC_PAGE_SIZE);
 558
 559        atexit(sig_atexit);
 560        signal(SIGCHLD, sig_handler);
 561        signal(SIGINT, sig_handler);
 562        signal(SIGUSR1, sig_handler);
 563
 564        if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
 565                perror("failed to create pipes");
 566                exit(-1);
 567        }
 568
 569        if (!strcmp(output_name, "-"))
 570                pipe_output = 1;
 571        else if (!stat(output_name, &st) && st.st_size) {
 572                if (write_mode == WRITE_FORCE) {
 573                        char oldname[PATH_MAX];
 574                        snprintf(oldname, sizeof(oldname), "%s.old",
 575                                 output_name);
 576                        unlink(oldname);
 577                        rename(output_name, oldname);
 578                }
 579        } else if (write_mode == WRITE_APPEND) {
 580                write_mode = WRITE_FORCE;
 581        }
 582
 583        flags = O_CREAT|O_RDWR;
 584        if (write_mode == WRITE_APPEND)
 585                file_new = 0;
 586        else
 587                flags |= O_TRUNC;
 588
 589        if (pipe_output)
 590                output = STDOUT_FILENO;
 591        else
 592                output = open(output_name, flags, S_IRUSR | S_IWUSR);
 593        if (output < 0) {
 594                perror("failed to create output file");
 595                exit(-1);
 596        }
 597
 598        session = perf_session__new(output_name, O_WRONLY,
 599                                    write_mode == WRITE_FORCE, false, NULL);
 600        if (session == NULL) {
 601                pr_err("Not enough memory for reading perf file header\n");
 602                return -1;
 603        }
 604
 605        if (!no_buildid)
 606                perf_header__set_feat(&session->header, HEADER_BUILD_ID);
 607
 608        if (!file_new) {
 609                err = perf_header__read(session, output);
 610                if (err < 0)
 611                        goto out_delete_session;
 612        }
 613
 614        if (have_tracepoints(&evsel_list))
 615                perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
 616
 617        /*
 618         * perf_session__delete(session) will be called at atexit_header()
 619         */
 620        atexit(atexit_header);
 621
 622        if (forks) {
 623                child_pid = fork();
 624                if (child_pid < 0) {
 625                        perror("failed to fork");
 626                        exit(-1);
 627                }
 628
 629                if (!child_pid) {
 630                        if (pipe_output)
 631                                dup2(2, 1);
 632                        close(child_ready_pipe[0]);
 633                        close(go_pipe[1]);
 634                        fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
 635
 636                        /*
 637                         * Do a dummy execvp to get the PLT entry resolved,
 638                         * so we avoid the resolver overhead on the real
 639                         * execvp call.
 640                         */
 641                        execvp("", (char **)argv);
 642
 643                        /*
 644                         * Tell the parent we're ready to go
 645                         */
 646                        close(child_ready_pipe[1]);
 647
 648                        /*
 649                         * Wait until the parent tells us to go.
 650                         */
 651                        if (read(go_pipe[0], &buf, 1) == -1)
 652                                perror("unable to read pipe");
 653
 654                        execvp(argv[0], (char **)argv);
 655
 656                        perror(argv[0]);
 657                        kill(getppid(), SIGUSR1);
 658                        exit(-1);
 659                }
 660
 661                if (!system_wide && target_tid == -1 && target_pid == -1)
 662                        threads->map[0] = child_pid;
 663
 664                close(child_ready_pipe[1]);
 665                close(go_pipe[0]);
 666                /*
 667                 * wait for child to settle
 668                 */
 669                if (read(child_ready_pipe[0], &buf, 1) == -1) {
 670                        perror("unable to read pipe");
 671                        exit(-1);
 672                }
 673                close(child_ready_pipe[0]);
 674        }
 675
 676        if (!system_wide && no_inherit && !cpu_list) {
 677                open_counters(-1);
 678        } else {
 679                for (i = 0; i < cpus->nr; i++)
 680                        open_counters(cpus->map[i]);
 681        }
 682
 683        perf_session__set_sample_type(session, sample_type);
 684
 685        if (pipe_output) {
 686                err = perf_header__write_pipe(output);
 687                if (err < 0)
 688                        return err;
 689        } else if (file_new) {
 690                err = perf_header__write(&session->header, output, false);
 691                if (err < 0)
 692                        return err;
 693        }
 694
 695        post_processing_offset = lseek(output, 0, SEEK_CUR);
 696
 697        perf_session__set_sample_id_all(session, sample_id_all_avail);
 698
 699        if (pipe_output) {
 700                err = event__synthesize_attrs(&session->header,
 701                                              process_synthesized_event,
 702                                              session);
 703                if (err < 0) {
 704                        pr_err("Couldn't synthesize attrs.\n");
 705                        return err;
 706                }
 707
 708                err = event__synthesize_event_types(process_synthesized_event,
 709                                                    session);
 710                if (err < 0) {
 711                        pr_err("Couldn't synthesize event_types.\n");
 712                        return err;
 713                }
 714
 715                if (have_tracepoints(&evsel_list)) {
 716                        /*
 717                         * FIXME err <= 0 here actually means that
 718                         * there were no tracepoints so its not really
 719                         * an error, just that we don't need to
 720                         * synthesize anything.  We really have to
 721                         * return this more properly and also
 722                         * propagate errors that now are calling die()
 723                         */
 724                        err = event__synthesize_tracing_data(output, &evsel_list,
 725                                                             process_synthesized_event,
 726                                                             session);
 727                        if (err <= 0) {
 728                                pr_err("Couldn't record tracing data.\n");
 729                                return err;
 730                        }
 731                        advance_output(err);
 732                }
 733        }
 734
 735        machine = perf_session__find_host_machine(session);
 736        if (!machine) {
 737                pr_err("Couldn't find native kernel information.\n");
 738                return -1;
 739        }
 740
 741        err = event__synthesize_kernel_mmap(process_synthesized_event,
 742                                            session, machine, "_text");
 743        if (err < 0)
 744                err = event__synthesize_kernel_mmap(process_synthesized_event,
 745                                                    session, machine, "_stext");
 746        if (err < 0)
 747                pr_err("Couldn't record kernel reference relocation symbol\n"
 748                       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
 749                       "Check /proc/kallsyms permission or run as root.\n");
 750
 751        err = event__synthesize_modules(process_synthesized_event,
 752                                        session, machine);
 753        if (err < 0)
 754                pr_err("Couldn't record kernel module information.\n"
 755                       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
 756                       "Check /proc/modules permission or run as root.\n");
 757
 758        if (perf_guest)
 759                perf_session__process_machines(session, event__synthesize_guest_os);
 760
 761        if (!system_wide)
 762                event__synthesize_thread_map(threads, process_synthesized_event,
 763                                             session);
 764        else
 765                event__synthesize_threads(process_synthesized_event, session);
 766
 767        if (realtime_prio) {
 768                struct sched_param param;
 769
 770                param.sched_priority = realtime_prio;
 771                if (sched_setscheduler(0, SCHED_FIFO, &param)) {
 772                        pr_err("Could not set realtime priority.\n");
 773                        exit(-1);
 774                }
 775        }
 776
 777        /*
 778         * Let the child rip
 779         */
 780        if (forks)
 781                close(go_pipe[1]);
 782
 783        for (;;) {
 784                int hits = samples;
 785                int thread;
 786
 787                mmap_read_all();
 788
 789                if (hits == samples) {
 790                        if (done)
 791                                break;
 792                        err = poll(event_array, nr_poll, -1);
 793                        waking++;
 794                }
 795
 796                if (done) {
 797                        for (i = 0; i < nr_cpu; i++) {
 798                                struct perf_evsel *pos;
 799
 800                                list_for_each_entry(pos, &evsel_list, node) {
 801                                        for (thread = 0;
 802                                                thread < threads->nr;
 803                                                thread++)
 804                                                ioctl(FD(pos, i, thread),
 805                                                        PERF_EVENT_IOC_DISABLE);
 806                                }
 807                        }
 808                }
 809        }
 810
 811        if (quiet || signr == SIGUSR1)
 812                return 0;
 813
 814        fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
 815
 816        /*
 817         * Approximate RIP event size: 24 bytes.
 818         */
 819        fprintf(stderr,
 820                "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
 821                (double)bytes_written / 1024.0 / 1024.0,
 822                output_name,
 823                bytes_written / 24);
 824
 825        return 0;
 826
 827out_delete_session:
 828        perf_session__delete(session);
 829        return err;
 830}
 831
 832static const char * const record_usage[] = {
 833        "perf record [<options>] [<command>]",
 834        "perf record [<options>] -- <command> [<options>]",
 835        NULL
 836};
 837
 838static bool force, append_file;
 839
 840const struct option record_options[] = {
 841        OPT_CALLBACK('e', "event", NULL, "event",
 842                     "event selector. use 'perf list' to list available events",
 843                     parse_events),
 844        OPT_CALLBACK(0, "filter", NULL, "filter",
 845                     "event filter", parse_filter),
 846        OPT_INTEGER('p', "pid", &target_pid,
 847                    "record events on existing process id"),
 848        OPT_INTEGER('t', "tid", &target_tid,
 849                    "record events on existing thread id"),
 850        OPT_INTEGER('r', "realtime", &realtime_prio,
 851                    "collect data with this RT SCHED_FIFO priority"),
 852        OPT_BOOLEAN('D', "no-delay", &nodelay,
 853                    "collect data without buffering"),
 854        OPT_BOOLEAN('R', "raw-samples", &raw_samples,
 855                    "collect raw sample records from all opened counters"),
 856        OPT_BOOLEAN('a', "all-cpus", &system_wide,
 857                            "system-wide collection from all CPUs"),
 858        OPT_BOOLEAN('A', "append", &append_file,
 859                            "append to the output file to do incremental profiling"),
 860        OPT_STRING('C', "cpu", &cpu_list, "cpu",
 861                    "list of cpus to monitor"),
 862        OPT_BOOLEAN('f', "force", &force,
 863                        "overwrite existing data file (deprecated)"),
 864        OPT_U64('c', "count", &user_interval, "event period to sample"),
 865        OPT_STRING('o', "output", &output_name, "file",
 866                    "output file name"),
 867        OPT_BOOLEAN('i', "no-inherit", &no_inherit,
 868                    "child tasks do not inherit counters"),
 869        OPT_UINTEGER('F', "freq", &user_freq, "profile at this frequency"),
 870        OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"),
 871        OPT_BOOLEAN('g', "call-graph", &call_graph,
 872                    "do call-graph (stack chain/backtrace) recording"),
 873        OPT_INCR('v', "verbose", &verbose,
 874                    "be more verbose (show counter open errors, etc)"),
 875        OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
 876        OPT_BOOLEAN('s', "stat", &inherit_stat,
 877                    "per thread counts"),
 878        OPT_BOOLEAN('d', "data", &sample_address,
 879                    "Sample addresses"),
 880        OPT_BOOLEAN('T', "timestamp", &sample_time, "Sample timestamps"),
 881        OPT_BOOLEAN('n', "no-samples", &no_samples,
 882                    "don't sample"),
 883        OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid_cache,
 884                    "do not update the buildid cache"),
 885        OPT_BOOLEAN('B', "no-buildid", &no_buildid,
 886                    "do not collect buildids in perf.data"),
 887        OPT_END()
 888};
 889
 890int cmd_record(int argc, const char **argv, const char *prefix __used)
 891{
 892        int err = -ENOMEM;
 893        struct perf_evsel *pos;
 894
 895        argc = parse_options(argc, argv, record_options, record_usage,
 896                            PARSE_OPT_STOP_AT_NON_OPTION);
 897        if (!argc && target_pid == -1 && target_tid == -1 &&
 898                !system_wide && !cpu_list)
 899                usage_with_options(record_usage, record_options);
 900
 901        if (force && append_file) {
 902                fprintf(stderr, "Can't overwrite and append at the same time."
 903                                " You need to choose between -f and -A");
 904                usage_with_options(record_usage, record_options);
 905        } else if (append_file) {
 906                write_mode = WRITE_APPEND;
 907        } else {
 908                write_mode = WRITE_FORCE;
 909        }
 910
 911        symbol__init();
 912
 913        if (no_buildid_cache || no_buildid)
 914                disable_buildid_cache();
 915
 916        if (list_empty(&evsel_list) && perf_evsel_list__create_default() < 0) {
 917                pr_err("Not enough memory for event selector list\n");
 918                goto out_symbol_exit;
 919        }
 920
 921        if (target_pid != -1)
 922                target_tid = target_pid;
 923
 924        threads = thread_map__new(target_pid, target_tid);
 925        if (threads == NULL) {
 926                pr_err("Problems finding threads of monitor\n");
 927                usage_with_options(record_usage, record_options);
 928        }
 929
 930        cpus = cpu_map__new(cpu_list);
 931        if (cpus == NULL) {
 932                perror("failed to parse CPUs map");
 933                return -1;
 934        }
 935
 936        list_for_each_entry(pos, &evsel_list, node) {
 937                if (perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0)
 938                        goto out_free_fd;
 939                if (perf_header__push_event(pos->attr.config, event_name(pos)))
 940                        goto out_free_fd;
 941        }
 942        event_array = malloc((sizeof(struct pollfd) * MAX_NR_CPUS *
 943                              MAX_COUNTERS * threads->nr));
 944        if (!event_array)
 945                goto out_free_fd;
 946
 947        if (user_interval != ULLONG_MAX)
 948                default_interval = user_interval;
 949        if (user_freq != UINT_MAX)
 950                freq = user_freq;
 951
 952        /*
 953         * User specified count overrides default frequency.
 954         */
 955        if (default_interval)
 956                freq = 0;
 957        else if (freq) {
 958                default_interval = freq;
 959        } else {
 960                fprintf(stderr, "frequency and count are zero, aborting\n");
 961                err = -EINVAL;
 962                goto out_free_event_array;
 963        }
 964
 965        err = __cmd_record(argc, argv);
 966
 967out_free_event_array:
 968        free(event_array);
 969out_free_fd:
 970        thread_map__delete(threads);
 971        threads = NULL;
 972out_symbol_exit:
 973        symbol__exit();
 974        return err;
 975}
 976