1 // SPDX-License-Identifier: GPL-2.0
5 * Builtin record command: Record the profile of a workload
6 * (or a CPU, or a PID) into the perf.data output file - for
7 * later analysis via perf report.
13 #include "util/build-id.h"
14 #include "util/util.h"
15 #include <subcmd/parse-options.h>
16 #include "util/parse-events.h"
17 #include "util/config.h"
19 #include "util/callchain.h"
20 #include "util/cgroup.h"
21 #include "util/header.h"
22 #include "util/event.h"
23 #include "util/evlist.h"
24 #include "util/evsel.h"
25 #include "util/debug.h"
26 #include "util/session.h"
27 #include "util/tool.h"
28 #include "util/symbol.h"
29 #include "util/cpumap.h"
30 #include "util/thread_map.h"
31 #include "util/data.h"
32 #include "util/perf_regs.h"
33 #include "util/auxtrace.h"
35 #include "util/parse-branch-options.h"
36 #include "util/parse-regs-options.h"
37 #include "util/llvm-utils.h"
38 #include "util/bpf-loader.h"
39 #include "util/trigger.h"
40 #include "util/perf-hooks.h"
41 #include "util/cpu-set-sched.h"
42 #include "util/time-utils.h"
43 #include "util/units.h"
44 #include "util/bpf-event.h"
56 #include <linux/time64.h>
58 struct switch_output {
71 struct perf_tool tool;
72 struct record_opts opts;
74 struct perf_data data;
75 struct auxtrace_record *itr;
76 struct perf_evlist *evlist;
77 struct perf_session *session;
81 bool no_buildid_cache;
82 bool no_buildid_cache_set;
84 bool timestamp_filename;
85 bool timestamp_boundary;
86 struct switch_output switch_output;
87 unsigned long long samples;
88 cpu_set_t affinity_mask;
91 static volatile int auxtrace_record__snapshot_started;
92 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
93 static DEFINE_TRIGGER(switch_output_trigger);
95 static const char *affinity_tags[PERF_AFFINITY_MAX] = {
99 static bool switch_output_signal(struct record *rec)
101 return rec->switch_output.signal &&
102 trigger_is_ready(&switch_output_trigger);
105 static bool switch_output_size(struct record *rec)
107 return rec->switch_output.size &&
108 trigger_is_ready(&switch_output_trigger) &&
109 (rec->bytes_written >= rec->switch_output.size);
112 static bool switch_output_time(struct record *rec)
114 return rec->switch_output.time &&
115 trigger_is_ready(&switch_output_trigger);
118 static int record__write(struct record *rec, struct perf_mmap *map __maybe_unused,
119 void *bf, size_t size)
121 struct perf_data_file *file = &rec->session->data->file;
123 if (perf_data_file__write(file, bf, size) < 0) {
124 pr_err("failed to write perf data, error: %m\n");
128 rec->bytes_written += size;
130 if (switch_output_size(rec))
131 trigger_hit(&switch_output_trigger);
136 #ifdef HAVE_AIO_SUPPORT
137 static int record__aio_write(struct aiocb *cblock, int trace_fd,
138 void *buf, size_t size, off_t off)
142 cblock->aio_fildes = trace_fd;
143 cblock->aio_buf = buf;
144 cblock->aio_nbytes = size;
145 cblock->aio_offset = off;
146 cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
149 rc = aio_write(cblock);
152 } else if (errno != EAGAIN) {
153 cblock->aio_fildes = -1;
154 pr_err("failed to queue perf data, error: %m\n");
162 static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock)
168 ssize_t aio_ret, written;
170 aio_errno = aio_error(cblock);
171 if (aio_errno == EINPROGRESS)
174 written = aio_ret = aio_return(cblock);
176 if (aio_errno != EINTR)
177 pr_err("failed to write perf data, error: %m\n");
181 rem_size = cblock->aio_nbytes - written;
184 cblock->aio_fildes = -1;
186 * md->refcount is incremented in perf_mmap__push() for
187 * every enqueued aio write request so decrement it because
188 * the request is now complete.
194 * aio write request may require restart with the
195 * reminder if the kernel didn't write whole
198 rem_off = cblock->aio_offset + written;
199 rem_buf = (void *)(cblock->aio_buf + written);
200 record__aio_write(cblock, cblock->aio_fildes,
201 rem_buf, rem_size, rem_off);
208 static int record__aio_sync(struct perf_mmap *md, bool sync_all)
210 struct aiocb **aiocb = md->aio.aiocb;
211 struct aiocb *cblocks = md->aio.cblocks;
212 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */
217 for (i = 0; i < md->aio.nr_cblocks; ++i) {
218 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
225 * Started aio write is not complete yet
226 * so it has to be waited before the
229 aiocb[i] = &cblocks[i];
236 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
237 if (!(errno == EAGAIN || errno == EINTR))
238 pr_err("failed to sync perf data, error: %m\n");
243 static int record__aio_pushfn(void *to, struct aiocb *cblock, void *bf, size_t size, off_t off)
245 struct record *rec = to;
246 int ret, trace_fd = rec->session->data->file.fd;
250 ret = record__aio_write(cblock, trace_fd, bf, size, off);
252 rec->bytes_written += size;
253 if (switch_output_size(rec))
254 trigger_hit(&switch_output_trigger);
260 static off_t record__aio_get_pos(int trace_fd)
262 return lseek(trace_fd, 0, SEEK_CUR);
265 static void record__aio_set_pos(int trace_fd, off_t pos)
267 lseek(trace_fd, pos, SEEK_SET);
270 static void record__aio_mmap_read_sync(struct record *rec)
273 struct perf_evlist *evlist = rec->evlist;
274 struct perf_mmap *maps = evlist->mmap;
276 if (!rec->opts.nr_cblocks)
279 for (i = 0; i < evlist->nr_mmaps; i++) {
280 struct perf_mmap *map = &maps[i];
283 record__aio_sync(map, true);
287 static int nr_cblocks_default = 1;
288 static int nr_cblocks_max = 4;
290 static int record__aio_parse(const struct option *opt,
294 struct record_opts *opts = (struct record_opts *)opt->value;
297 opts->nr_cblocks = 0;
300 opts->nr_cblocks = strtol(str, NULL, 0);
301 if (!opts->nr_cblocks)
302 opts->nr_cblocks = nr_cblocks_default;
307 #else /* HAVE_AIO_SUPPORT */
308 static int nr_cblocks_max = 0;
310 static int record__aio_sync(struct perf_mmap *md __maybe_unused, bool sync_all __maybe_unused)
315 static int record__aio_pushfn(void *to __maybe_unused, struct aiocb *cblock __maybe_unused,
316 void *bf __maybe_unused, size_t size __maybe_unused, off_t off __maybe_unused)
321 static off_t record__aio_get_pos(int trace_fd __maybe_unused)
326 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
330 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
335 static int record__aio_enabled(struct record *rec)
337 return rec->opts.nr_cblocks > 0;
340 #define MMAP_FLUSH_DEFAULT 1
341 static int record__mmap_flush_parse(const struct option *opt,
346 struct record_opts *opts = (struct record_opts *)opt->value;
347 static struct parse_tag tags[] = {
348 { .tag = 'B', .mult = 1 },
349 { .tag = 'K', .mult = 1 << 10 },
350 { .tag = 'M', .mult = 1 << 20 },
351 { .tag = 'G', .mult = 1 << 30 },
359 opts->mmap_flush = parse_tag_value(str, tags);
360 if (opts->mmap_flush == (int)-1)
361 opts->mmap_flush = strtol(str, NULL, 0);
364 if (!opts->mmap_flush)
365 opts->mmap_flush = MMAP_FLUSH_DEFAULT;
367 flush_max = perf_evlist__mmap_size(opts->mmap_pages);
369 if (opts->mmap_flush > flush_max)
370 opts->mmap_flush = flush_max;
375 static int record__comp_enabled(struct record *rec)
377 return rec->opts.comp_level > 0;
380 static int process_synthesized_event(struct perf_tool *tool,
381 union perf_event *event,
382 struct perf_sample *sample __maybe_unused,
383 struct machine *machine __maybe_unused)
385 struct record *rec = container_of(tool, struct record, tool);
386 return record__write(rec, NULL, event, event->header.size);
389 static int record__pushfn(struct perf_mmap *map, void *to, void *bf, size_t size)
391 struct record *rec = to;
394 return record__write(rec, map, bf, size);
397 static volatile int done;
398 static volatile int signr = -1;
399 static volatile int child_finished;
401 static void sig_handler(int sig)
411 static void sigsegv_handler(int sig)
413 perf_hooks__recover();
414 sighandler_dump_stack(sig);
417 static void record__sig_exit(void)
422 signal(signr, SIG_DFL);
426 #ifdef HAVE_AUXTRACE_SUPPORT
428 static int record__process_auxtrace(struct perf_tool *tool,
429 struct perf_mmap *map,
430 union perf_event *event, void *data1,
431 size_t len1, void *data2, size_t len2)
433 struct record *rec = container_of(tool, struct record, tool);
434 struct perf_data *data = &rec->data;
438 if (!perf_data__is_pipe(data) && !perf_data__is_dir(data)) {
440 int fd = perf_data__fd(data);
443 file_offset = lseek(fd, 0, SEEK_CUR);
444 if (file_offset == -1)
446 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
452 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
453 padding = (len1 + len2) & 7;
455 padding = 8 - padding;
457 record__write(rec, map, event, event->header.size);
458 record__write(rec, map, data1, len1);
460 record__write(rec, map, data2, len2);
461 record__write(rec, map, &pad, padding);
466 static int record__auxtrace_mmap_read(struct record *rec,
467 struct perf_mmap *map)
471 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
472 record__process_auxtrace);
482 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
483 struct perf_mmap *map)
487 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
488 record__process_auxtrace,
489 rec->opts.auxtrace_snapshot_size);
499 static int record__auxtrace_read_snapshot_all(struct record *rec)
504 for (i = 0; i < rec->evlist->nr_mmaps; i++) {
505 struct perf_mmap *map = &rec->evlist->mmap[i];
507 if (!map->auxtrace_mmap.base)
510 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
519 static void record__read_auxtrace_snapshot(struct record *rec)
521 pr_debug("Recording AUX area tracing snapshot\n");
522 if (record__auxtrace_read_snapshot_all(rec) < 0) {
523 trigger_error(&auxtrace_snapshot_trigger);
525 if (auxtrace_record__snapshot_finish(rec->itr))
526 trigger_error(&auxtrace_snapshot_trigger);
528 trigger_ready(&auxtrace_snapshot_trigger);
532 static int record__auxtrace_init(struct record *rec)
537 rec->itr = auxtrace_record__init(rec->evlist, &err);
542 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
543 rec->opts.auxtrace_snapshot_opts);
547 return auxtrace_parse_filters(rec->evlist);
553 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
554 struct perf_mmap *map __maybe_unused)
560 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
565 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
570 static int record__auxtrace_init(struct record *rec __maybe_unused)
577 static int record__mmap_evlist(struct record *rec,
578 struct perf_evlist *evlist)
580 struct record_opts *opts = &rec->opts;
583 if (opts->affinity != PERF_AFFINITY_SYS)
584 cpu__setup_cpunode_map();
586 if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
587 opts->auxtrace_mmap_pages,
588 opts->auxtrace_snapshot_mode,
589 opts->nr_cblocks, opts->affinity,
590 opts->mmap_flush) < 0) {
591 if (errno == EPERM) {
592 pr_err("Permission error mapping pages.\n"
593 "Consider increasing "
594 "/proc/sys/kernel/perf_event_mlock_kb,\n"
595 "or try again with a smaller value of -m/--mmap_pages.\n"
596 "(current value: %u,%u)\n",
597 opts->mmap_pages, opts->auxtrace_mmap_pages);
600 pr_err("failed to mmap with %d (%s)\n", errno,
601 str_error_r(errno, msg, sizeof(msg)));
611 static int record__mmap(struct record *rec)
613 return record__mmap_evlist(rec, rec->evlist);
616 static int record__open(struct record *rec)
619 struct perf_evsel *pos;
620 struct perf_evlist *evlist = rec->evlist;
621 struct perf_session *session = rec->session;
622 struct record_opts *opts = &rec->opts;
626 * For initial_delay we need to add a dummy event so that we can track
627 * PERF_RECORD_MMAP while we wait for the initial delay to enable the
628 * real events, the ones asked by the user.
630 if (opts->initial_delay) {
631 if (perf_evlist__add_dummy(evlist))
634 pos = perf_evlist__first(evlist);
636 pos = perf_evlist__last(evlist);
638 pos->attr.enable_on_exec = 1;
641 perf_evlist__config(evlist, opts, &callchain_param);
643 evlist__for_each_entry(evlist, pos) {
645 if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
646 if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
648 ui__warning("%s\n", msg);
651 if ((errno == EINVAL || errno == EBADF) &&
652 pos->leader != pos &&
654 pos = perf_evlist__reset_weak_group(evlist, pos);
658 perf_evsel__open_strerror(pos, &opts->target,
659 errno, msg, sizeof(msg));
660 ui__error("%s\n", msg);
664 pos->supported = true;
667 if (perf_evlist__apply_filters(evlist, &pos)) {
668 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
669 pos->filter, perf_evsel__name(pos), errno,
670 str_error_r(errno, msg, sizeof(msg)));
675 rc = record__mmap(rec);
679 session->evlist = evlist;
680 perf_session__set_id_hdr_size(session);
685 static int process_sample_event(struct perf_tool *tool,
686 union perf_event *event,
687 struct perf_sample *sample,
688 struct perf_evsel *evsel,
689 struct machine *machine)
691 struct record *rec = container_of(tool, struct record, tool);
693 if (rec->evlist->first_sample_time == 0)
694 rec->evlist->first_sample_time = sample->time;
696 rec->evlist->last_sample_time = sample->time;
698 if (rec->buildid_all)
702 return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
705 static int process_buildids(struct record *rec)
707 struct perf_session *session = rec->session;
709 if (perf_data__size(&rec->data) == 0)
713 * During this process, it'll load kernel map and replace the
714 * dso->long_name to a real pathname it found. In this case
715 * we prefer the vmlinux path like
716 * /lib/modules/3.16.4/build/vmlinux
718 * rather than build-id path (in debug directory).
719 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
721 symbol_conf.ignore_vmlinux_buildid = true;
724 * If --buildid-all is given, it marks all DSO regardless of hits,
725 * so no need to process samples. But if timestamp_boundary is enabled,
726 * it still needs to walk on all samples to get the timestamps of
727 * first/last samples.
729 if (rec->buildid_all && !rec->timestamp_boundary)
730 rec->tool.sample = NULL;
732 return perf_session__process_events(session);
735 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
738 struct perf_tool *tool = data;
740 *As for guest kernel when processing subcommand record&report,
741 *we arrange module mmap prior to guest kernel mmap and trigger
742 *a preload dso because default guest module symbols are loaded
743 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
744 *method is used to avoid symbol missing when the first addr is
745 *in module instead of in guest kernel.
747 err = perf_event__synthesize_modules(tool, process_synthesized_event,
750 pr_err("Couldn't record guest kernel [%d]'s reference"
751 " relocation symbol.\n", machine->pid);
754 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
755 * have no _text sometimes.
757 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
760 pr_err("Couldn't record guest kernel [%d]'s reference"
761 " relocation symbol.\n", machine->pid);
764 static struct perf_event_header finished_round_event = {
765 .size = sizeof(struct perf_event_header),
766 .type = PERF_RECORD_FINISHED_ROUND,
769 static void record__adjust_affinity(struct record *rec, struct perf_mmap *map)
771 if (rec->opts.affinity != PERF_AFFINITY_SYS &&
772 !CPU_EQUAL(&rec->affinity_mask, &map->affinity_mask)) {
773 CPU_ZERO(&rec->affinity_mask);
774 CPU_OR(&rec->affinity_mask, &rec->affinity_mask, &map->affinity_mask);
775 sched_setaffinity(0, sizeof(rec->affinity_mask), &rec->affinity_mask);
779 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
780 bool overwrite, bool synch)
782 u64 bytes_written = rec->bytes_written;
785 struct perf_mmap *maps;
786 int trace_fd = rec->data.file.fd;
792 maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
796 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
799 if (record__aio_enabled(rec))
800 off = record__aio_get_pos(trace_fd);
802 for (i = 0; i < evlist->nr_mmaps; i++) {
804 struct perf_mmap *map = &maps[i];
807 record__adjust_affinity(rec, map);
812 if (!record__aio_enabled(rec)) {
813 if (perf_mmap__push(map, rec, record__pushfn) != 0) {
822 * Call record__aio_sync() to wait till map->data buffer
823 * becomes available after previous aio write request.
825 idx = record__aio_sync(map, false);
826 if (perf_mmap__aio_push(map, rec, idx, record__aio_pushfn, &off) != 0) {
827 record__aio_set_pos(trace_fd, off);
838 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
839 record__auxtrace_mmap_read(rec, map) != 0) {
845 if (record__aio_enabled(rec))
846 record__aio_set_pos(trace_fd, off);
849 * Mark the round finished in case we wrote
850 * at least one event.
852 if (bytes_written != rec->bytes_written)
853 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
856 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
861 static int record__mmap_read_all(struct record *rec, bool synch)
865 err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
869 return record__mmap_read_evlist(rec, rec->evlist, true, synch);
872 static void record__init_features(struct record *rec)
874 struct perf_session *session = rec->session;
877 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
878 perf_header__set_feat(&session->header, feat);
881 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
883 if (!have_tracepoints(&rec->evlist->entries))
884 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
886 if (!rec->opts.branch_stack)
887 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
889 if (!rec->opts.full_auxtrace)
890 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
892 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
893 perf_header__clear_feat(&session->header, HEADER_CLOCKID);
895 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
896 if (!record__comp_enabled(rec))
897 perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
899 perf_header__clear_feat(&session->header, HEADER_STAT);
903 record__finish_output(struct record *rec)
905 struct perf_data *data = &rec->data;
906 int fd = perf_data__fd(data);
911 rec->session->header.data_size += rec->bytes_written;
912 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
914 if (!rec->no_buildid) {
915 process_buildids(rec);
917 if (rec->buildid_all)
918 dsos__hit_all(rec->session);
920 perf_session__write_header(rec->session, rec->evlist, fd, true);
925 static int record__synthesize_workload(struct record *rec, bool tail)
928 struct thread_map *thread_map;
930 if (rec->opts.tail_synthesize != tail)
933 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
934 if (thread_map == NULL)
937 err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
938 process_synthesized_event,
939 &rec->session->machines.host,
940 rec->opts.sample_address);
941 thread_map__put(thread_map);
945 static int record__synthesize(struct record *rec, bool tail);
948 record__switch_output(struct record *rec, bool at_exit)
950 struct perf_data *data = &rec->data;
954 /* Same Size: "2015122520103046"*/
955 char timestamp[] = "InvalidTimestamp";
957 record__aio_mmap_read_sync(rec);
959 record__synthesize(rec, true);
960 if (target__none(&rec->opts.target))
961 record__synthesize_workload(rec, true);
964 record__finish_output(rec);
965 err = fetch_current_timestamp(timestamp, sizeof(timestamp));
967 pr_err("Failed to get current timestamp\n");
971 fd = perf_data__switch(data, timestamp,
972 rec->session->header.data_offset,
973 at_exit, &new_filename);
974 if (fd >= 0 && !at_exit) {
975 rec->bytes_written = 0;
976 rec->session->header.data_size = 0;
980 fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
981 data->path, timestamp);
983 if (rec->switch_output.num_files) {
984 int n = rec->switch_output.cur_file + 1;
986 if (n >= rec->switch_output.num_files)
988 rec->switch_output.cur_file = n;
989 if (rec->switch_output.filenames[n]) {
990 remove(rec->switch_output.filenames[n]);
991 free(rec->switch_output.filenames[n]);
993 rec->switch_output.filenames[n] = new_filename;
998 /* Output tracking events */
1000 record__synthesize(rec, false);
1003 * In 'perf record --switch-output' without -a,
1004 * record__synthesize() in record__switch_output() won't
1005 * generate tracking events because there's no thread_map
1006 * in evlist. Which causes newly created perf.data doesn't
1007 * contain map and comm information.
1008 * Create a fake thread_map and directly call
1009 * perf_event__synthesize_thread_map() for those events.
1011 if (target__none(&rec->opts.target))
1012 record__synthesize_workload(rec, false);
1017 static volatile int workload_exec_errno;
1020 * perf_evlist__prepare_workload will send a SIGUSR1
1021 * if the fork fails, since we asked by setting its
1022 * want_signal to true.
1024 static void workload_exec_failed_signal(int signo __maybe_unused,
1026 void *ucontext __maybe_unused)
1028 workload_exec_errno = info->si_value.sival_int;
1033 static void snapshot_sig_handler(int sig);
1034 static void alarm_sig_handler(int sig);
1037 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
1038 struct perf_tool *tool __maybe_unused,
1039 perf_event__handler_t process __maybe_unused,
1040 struct machine *machine __maybe_unused)
1045 static const struct perf_event_mmap_page *
1046 perf_evlist__pick_pc(struct perf_evlist *evlist)
1049 if (evlist->mmap && evlist->mmap[0].base)
1050 return evlist->mmap[0].base;
1051 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base)
1052 return evlist->overwrite_mmap[0].base;
1057 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
1059 const struct perf_event_mmap_page *pc;
1061 pc = perf_evlist__pick_pc(rec->evlist);
1067 static int record__synthesize(struct record *rec, bool tail)
1069 struct perf_session *session = rec->session;
1070 struct machine *machine = &session->machines.host;
1071 struct perf_data *data = &rec->data;
1072 struct record_opts *opts = &rec->opts;
1073 struct perf_tool *tool = &rec->tool;
1074 int fd = perf_data__fd(data);
1077 if (rec->opts.tail_synthesize != tail)
1080 if (data->is_pipe) {
1082 * We need to synthesize events first, because some
1083 * features works on top of them (on report side).
1085 err = perf_event__synthesize_attrs(tool, rec->evlist,
1086 process_synthesized_event);
1088 pr_err("Couldn't synthesize attrs.\n");
1092 err = perf_event__synthesize_features(tool, session, rec->evlist,
1093 process_synthesized_event);
1095 pr_err("Couldn't synthesize features.\n");
1099 if (have_tracepoints(&rec->evlist->entries)) {
1101 * FIXME err <= 0 here actually means that
1102 * there were no tracepoints so its not really
1103 * an error, just that we don't need to
1104 * synthesize anything. We really have to
1105 * return this more properly and also
1106 * propagate errors that now are calling die()
1108 err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
1109 process_synthesized_event);
1111 pr_err("Couldn't record tracing data.\n");
1114 rec->bytes_written += err;
1118 err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
1119 process_synthesized_event, machine);
1123 if (rec->opts.full_auxtrace) {
1124 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
1125 session, process_synthesized_event);
1130 if (!perf_evlist__exclude_kernel(rec->evlist)) {
1131 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1133 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
1134 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1135 "Check /proc/kallsyms permission or run as root.\n");
1137 err = perf_event__synthesize_modules(tool, process_synthesized_event,
1139 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
1140 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1141 "Check /proc/modules permission or run as root.\n");
1145 machines__process_guests(&session->machines,
1146 perf_event__synthesize_guest_os, tool);
1149 err = perf_event__synthesize_extra_attr(&rec->tool,
1151 process_synthesized_event,
1156 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->threads,
1157 process_synthesized_event,
1160 pr_err("Couldn't synthesize thread map.\n");
1164 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->cpus,
1165 process_synthesized_event, NULL);
1167 pr_err("Couldn't synthesize cpu map.\n");
1171 err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
1174 pr_warning("Couldn't synthesize bpf events.\n");
1176 err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
1177 process_synthesized_event, opts->sample_address,
1183 static int __cmd_record(struct record *rec, int argc, const char **argv)
1187 unsigned long waking = 0;
1188 const bool forks = argc > 0;
1189 struct perf_tool *tool = &rec->tool;
1190 struct record_opts *opts = &rec->opts;
1191 struct perf_data *data = &rec->data;
1192 struct perf_session *session;
1193 bool disabled = false, draining = false;
1194 struct perf_evlist *sb_evlist = NULL;
1198 atexit(record__sig_exit);
1199 signal(SIGCHLD, sig_handler);
1200 signal(SIGINT, sig_handler);
1201 signal(SIGTERM, sig_handler);
1202 signal(SIGSEGV, sigsegv_handler);
1204 if (rec->opts.record_namespaces)
1205 tool->namespace_events = true;
1207 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
1208 signal(SIGUSR2, snapshot_sig_handler);
1209 if (rec->opts.auxtrace_snapshot_mode)
1210 trigger_on(&auxtrace_snapshot_trigger);
1211 if (rec->switch_output.enabled)
1212 trigger_on(&switch_output_trigger);
1214 signal(SIGUSR2, SIG_IGN);
1217 session = perf_session__new(data, false, tool);
1218 if (session == NULL) {
1219 pr_err("Perf session creation failed.\n");
1223 fd = perf_data__fd(data);
1224 rec->session = session;
1226 record__init_features(rec);
1228 if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
1229 session->header.env.clockid_res_ns = rec->opts.clockid_res_ns;
1232 err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
1233 argv, data->is_pipe,
1234 workload_exec_failed_signal);
1236 pr_err("Couldn't run the workload!\n");
1238 goto out_delete_session;
1243 * If we have just single event and are sending data
1244 * through pipe, we need to force the ids allocation,
1245 * because we synthesize event name through the pipe
1246 * and need the id for that.
1248 if (data->is_pipe && rec->evlist->nr_entries == 1)
1249 rec->opts.sample_id = true;
1251 if (record__open(rec) != 0) {
1255 session->header.env.comp_mmap_len = session->evlist->mmap_len;
1257 err = bpf__apply_obj_config();
1259 char errbuf[BUFSIZ];
1261 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
1262 pr_err("ERROR: Apply config to BPF failed: %s\n",
1268 * Normally perf_session__new would do this, but it doesn't have the
1271 if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
1272 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
1273 rec->tool.ordered_events = false;
1276 if (!rec->evlist->nr_groups)
1277 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
1279 if (data->is_pipe) {
1280 err = perf_header__write_pipe(fd);
1284 err = perf_session__write_header(session, rec->evlist, fd, false);
1289 if (!rec->no_buildid
1290 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
1291 pr_err("Couldn't generate buildids. "
1292 "Use --no-buildid to profile anyway.\n");
1297 if (!opts->no_bpf_event)
1298 bpf_event__add_sb_event(&sb_evlist, &session->header.env);
1300 if (perf_evlist__start_sb_thread(sb_evlist, &rec->opts.target)) {
1301 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
1302 opts->no_bpf_event = true;
1305 err = record__synthesize(rec, false);
1309 if (rec->realtime_prio) {
1310 struct sched_param param;
1312 param.sched_priority = rec->realtime_prio;
1313 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) {
1314 pr_err("Could not set realtime priority.\n");
1321 * When perf is starting the traced process, all the events
1322 * (apart from group members) have enable_on_exec=1 set,
1323 * so don't spoil it by prematurely enabling them.
1325 if (!target__none(&opts->target) && !opts->initial_delay)
1326 perf_evlist__enable(rec->evlist);
1332 struct machine *machine = &session->machines.host;
1333 union perf_event *event;
1336 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
1337 if (event == NULL) {
1343 * Some H/W events are generated before COMM event
1344 * which is emitted during exec(), so perf script
1345 * cannot see a correct process name for those events.
1346 * Synthesize COMM event to prevent it.
1348 tgid = perf_event__synthesize_comm(tool, event,
1349 rec->evlist->workload.pid,
1350 process_synthesized_event,
1357 event = malloc(sizeof(event->namespaces) +
1358 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
1359 machine->id_hdr_size);
1360 if (event == NULL) {
1366 * Synthesize NAMESPACES event for the command specified.
1368 perf_event__synthesize_namespaces(tool, event,
1369 rec->evlist->workload.pid,
1370 tgid, process_synthesized_event,
1374 perf_evlist__start_workload(rec->evlist);
1377 if (opts->initial_delay) {
1378 usleep(opts->initial_delay * USEC_PER_MSEC);
1379 perf_evlist__enable(rec->evlist);
1382 trigger_ready(&auxtrace_snapshot_trigger);
1383 trigger_ready(&switch_output_trigger);
1384 perf_hooks__invoke_record_start();
1386 unsigned long long hits = rec->samples;
1389 * rec->evlist->bkw_mmap_state is possible to be
1390 * BKW_MMAP_EMPTY here: when done == true and
1391 * hits != rec->samples in previous round.
1393 * perf_evlist__toggle_bkw_mmap ensure we never
1394 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1396 if (trigger_is_hit(&switch_output_trigger) || done || draining)
1397 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1399 if (record__mmap_read_all(rec, false) < 0) {
1400 trigger_error(&auxtrace_snapshot_trigger);
1401 trigger_error(&switch_output_trigger);
1406 if (auxtrace_record__snapshot_started) {
1407 auxtrace_record__snapshot_started = 0;
1408 if (!trigger_is_error(&auxtrace_snapshot_trigger))
1409 record__read_auxtrace_snapshot(rec);
1410 if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1411 pr_err("AUX area tracing snapshot failed\n");
1417 if (trigger_is_hit(&switch_output_trigger)) {
1419 * If switch_output_trigger is hit, the data in
1420 * overwritable ring buffer should have been collected,
1421 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1423 * If SIGUSR2 raise after or during record__mmap_read_all(),
1424 * record__mmap_read_all() didn't collect data from
1425 * overwritable ring buffer. Read again.
1427 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1429 trigger_ready(&switch_output_trigger);
1432 * Reenable events in overwrite ring buffer after
1433 * record__mmap_read_all(): we should have collected
1436 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1439 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1442 fd = record__switch_output(rec, false);
1444 pr_err("Failed to switch to new file\n");
1445 trigger_error(&switch_output_trigger);
1450 /* re-arm the alarm */
1451 if (rec->switch_output.time)
1452 alarm(rec->switch_output.time);
1455 if (hits == rec->samples) {
1456 if (done || draining)
1458 err = perf_evlist__poll(rec->evlist, -1);
1460 * Propagate error, only if there's any. Ignore positive
1461 * number of returned events and interrupt error.
1463 if (err > 0 || (err < 0 && errno == EINTR))
1467 if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1472 * When perf is starting the traced process, at the end events
1473 * die with the process and we wait for that. Thus no need to
1474 * disable events in this case.
1476 if (done && !disabled && !target__none(&opts->target)) {
1477 trigger_off(&auxtrace_snapshot_trigger);
1478 perf_evlist__disable(rec->evlist);
1482 trigger_off(&auxtrace_snapshot_trigger);
1483 trigger_off(&switch_output_trigger);
1485 if (forks && workload_exec_errno) {
1486 char msg[STRERR_BUFSIZE];
1487 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1488 pr_err("Workload failed: %s\n", emsg);
1494 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1496 if (target__none(&rec->opts.target))
1497 record__synthesize_workload(rec, true);
1500 record__mmap_read_all(rec, true);
1501 record__aio_mmap_read_sync(rec);
1503 if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
1504 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
1505 session->header.env.comp_ratio = ratio + 0.5;
1511 if (!child_finished)
1512 kill(rec->evlist->workload.pid, SIGTERM);
1518 else if (WIFEXITED(exit_status))
1519 status = WEXITSTATUS(exit_status);
1520 else if (WIFSIGNALED(exit_status))
1521 signr = WTERMSIG(exit_status);
1525 record__synthesize(rec, true);
1526 /* this will be recalculated during process_buildids() */
1530 if (!rec->timestamp_filename) {
1531 record__finish_output(rec);
1533 fd = record__switch_output(rec, true);
1536 goto out_delete_session;
1541 perf_hooks__invoke_record_end();
1543 if (!err && !quiet) {
1545 const char *postfix = rec->timestamp_filename ?
1546 ".<timestamp>" : "";
1548 if (rec->samples && !rec->opts.full_auxtrace)
1549 scnprintf(samples, sizeof(samples),
1550 " (%" PRIu64 " samples)", rec->samples);
1554 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s",
1555 perf_data__size(data) / 1024.0 / 1024.0,
1556 data->path, postfix, samples);
1558 fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)",
1559 rec->session->bytes_transferred / 1024.0 / 1024.0,
1562 fprintf(stderr, " ]\n");
1566 perf_session__delete(session);
1568 if (!opts->no_bpf_event)
1569 perf_evlist__stop_sb_thread(sb_evlist);
1573 static void callchain_debug(struct callchain_param *callchain)
1575 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1577 pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1579 if (callchain->record_mode == CALLCHAIN_DWARF)
1580 pr_debug("callchain: stack dump size %d\n",
1581 callchain->dump_size);
1584 int record_opts__parse_callchain(struct record_opts *record,
1585 struct callchain_param *callchain,
1586 const char *arg, bool unset)
1589 callchain->enabled = !unset;
1591 /* --no-call-graph */
1593 callchain->record_mode = CALLCHAIN_NONE;
1594 pr_debug("callchain: disabled\n");
1598 ret = parse_callchain_record_opt(arg, callchain);
1600 /* Enable data address sampling for DWARF unwind. */
1601 if (callchain->record_mode == CALLCHAIN_DWARF)
1602 record->sample_address = true;
1603 callchain_debug(callchain);
1609 int record_parse_callchain_opt(const struct option *opt,
1613 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1616 int record_callchain_opt(const struct option *opt,
1617 const char *arg __maybe_unused,
1618 int unset __maybe_unused)
1620 struct callchain_param *callchain = opt->value;
1622 callchain->enabled = true;
1624 if (callchain->record_mode == CALLCHAIN_NONE)
1625 callchain->record_mode = CALLCHAIN_FP;
1627 callchain_debug(callchain);
1631 static int perf_record_config(const char *var, const char *value, void *cb)
1633 struct record *rec = cb;
1635 if (!strcmp(var, "record.build-id")) {
1636 if (!strcmp(value, "cache"))
1637 rec->no_buildid_cache = false;
1638 else if (!strcmp(value, "no-cache"))
1639 rec->no_buildid_cache = true;
1640 else if (!strcmp(value, "skip"))
1641 rec->no_buildid = true;
1646 if (!strcmp(var, "record.call-graph")) {
1647 var = "call-graph.record-mode";
1648 return perf_default_config(var, value, cb);
1650 #ifdef HAVE_AIO_SUPPORT
1651 if (!strcmp(var, "record.aio")) {
1652 rec->opts.nr_cblocks = strtol(value, NULL, 0);
1653 if (!rec->opts.nr_cblocks)
1654 rec->opts.nr_cblocks = nr_cblocks_default;
1661 struct clockid_map {
1666 #define CLOCKID_MAP(n, c) \
1667 { .name = n, .clockid = (c), }
1669 #define CLOCKID_END { .name = NULL, }
1673 * Add the missing ones, we need to build on many distros...
1675 #ifndef CLOCK_MONOTONIC_RAW
1676 #define CLOCK_MONOTONIC_RAW 4
1678 #ifndef CLOCK_BOOTTIME
1679 #define CLOCK_BOOTTIME 7
1682 #define CLOCK_TAI 11
1685 static const struct clockid_map clockids[] = {
1686 /* available for all events, NMI safe */
1687 CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1688 CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1690 /* available for some events */
1691 CLOCKID_MAP("realtime", CLOCK_REALTIME),
1692 CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1693 CLOCKID_MAP("tai", CLOCK_TAI),
1695 /* available for the lazy */
1696 CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1697 CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1698 CLOCKID_MAP("real", CLOCK_REALTIME),
1699 CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1704 static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
1706 struct timespec res;
1709 if (!clock_getres(clk_id, &res))
1710 *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
1712 pr_warning("WARNING: Failed to determine specified clock resolution.\n");
1717 static int parse_clockid(const struct option *opt, const char *str, int unset)
1719 struct record_opts *opts = (struct record_opts *)opt->value;
1720 const struct clockid_map *cm;
1721 const char *ostr = str;
1724 opts->use_clockid = 0;
1732 /* no setting it twice */
1733 if (opts->use_clockid)
1736 opts->use_clockid = true;
1738 /* if its a number, we're done */
1739 if (sscanf(str, "%d", &opts->clockid) == 1)
1740 return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
1742 /* allow a "CLOCK_" prefix to the name */
1743 if (!strncasecmp(str, "CLOCK_", 6))
1746 for (cm = clockids; cm->name; cm++) {
1747 if (!strcasecmp(str, cm->name)) {
1748 opts->clockid = cm->clockid;
1749 return get_clockid_res(opts->clockid,
1750 &opts->clockid_res_ns);
1754 opts->use_clockid = false;
1755 ui__warning("unknown clockid %s, check man page\n", ostr);
1759 static int record__parse_affinity(const struct option *opt, const char *str, int unset)
1761 struct record_opts *opts = (struct record_opts *)opt->value;
1766 if (!strcasecmp(str, "node"))
1767 opts->affinity = PERF_AFFINITY_NODE;
1768 else if (!strcasecmp(str, "cpu"))
1769 opts->affinity = PERF_AFFINITY_CPU;
1774 static int record__parse_mmap_pages(const struct option *opt,
1776 int unset __maybe_unused)
1778 struct record_opts *opts = opt->value;
1780 unsigned int mmap_pages;
1795 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1798 opts->mmap_pages = mmap_pages;
1806 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1810 opts->auxtrace_mmap_pages = mmap_pages;
1817 static void switch_output_size_warn(struct record *rec)
1819 u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages);
1820 struct switch_output *s = &rec->switch_output;
1824 if (s->size < wakeup_size) {
1827 unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
1828 pr_warning("WARNING: switch-output data size lower than "
1829 "wakeup kernel buffer size (%s) "
1830 "expect bigger perf.data sizes\n", buf);
1834 static int switch_output_setup(struct record *rec)
1836 struct switch_output *s = &rec->switch_output;
1837 static struct parse_tag tags_size[] = {
1838 { .tag = 'B', .mult = 1 },
1839 { .tag = 'K', .mult = 1 << 10 },
1840 { .tag = 'M', .mult = 1 << 20 },
1841 { .tag = 'G', .mult = 1 << 30 },
1844 static struct parse_tag tags_time[] = {
1845 { .tag = 's', .mult = 1 },
1846 { .tag = 'm', .mult = 60 },
1847 { .tag = 'h', .mult = 60*60 },
1848 { .tag = 'd', .mult = 60*60*24 },
1856 if (!strcmp(s->str, "signal")) {
1858 pr_debug("switch-output with SIGUSR2 signal\n");
1862 val = parse_tag_value(s->str, tags_size);
1863 if (val != (unsigned long) -1) {
1865 pr_debug("switch-output with %s size threshold\n", s->str);
1869 val = parse_tag_value(s->str, tags_time);
1870 if (val != (unsigned long) -1) {
1872 pr_debug("switch-output with %s time threshold (%lu seconds)\n",
1880 rec->timestamp_filename = true;
1883 if (s->size && !rec->opts.no_buffering)
1884 switch_output_size_warn(rec);
1889 static const char * const __record_usage[] = {
1890 "perf record [<options>] [<command>]",
1891 "perf record [<options>] -- <command> [<options>]",
1894 const char * const *record_usage = __record_usage;
1897 * XXX Ideally would be local to cmd_record() and passed to a record__new
1898 * because we need to have access to it in record__exit, that is called
1899 * after cmd_record() exits, but since record_options need to be accessible to
1900 * builtin-script, leave it here.
1902 * At least we don't ouch it in all the other functions here directly.
1904 * Just say no to tons of global variables, sigh.
1906 static struct record record = {
1908 .sample_time = true,
1909 .mmap_pages = UINT_MAX,
1910 .user_freq = UINT_MAX,
1911 .user_interval = ULLONG_MAX,
1915 .default_per_cpu = true,
1917 .mmap_flush = MMAP_FLUSH_DEFAULT,
1920 .sample = process_sample_event,
1921 .fork = perf_event__process_fork,
1922 .exit = perf_event__process_exit,
1923 .comm = perf_event__process_comm,
1924 .namespaces = perf_event__process_namespaces,
1925 .mmap = perf_event__process_mmap,
1926 .mmap2 = perf_event__process_mmap2,
1927 .ordered_events = true,
1931 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1932 "\n\t\t\t\tDefault: fp";
1934 static bool dry_run;
1937 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1938 * with it and switch to use the library functions in perf_evlist that came
1939 * from builtin-record.c, i.e. use record_opts,
1940 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1943 static struct option __record_options[] = {
1944 OPT_CALLBACK('e', "event", &record.evlist, "event",
1945 "event selector. use 'perf list' to list available events",
1946 parse_events_option),
1947 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1948 "event filter", parse_filter),
1949 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1950 NULL, "don't record events from perf itself",
1952 OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1953 "record events on existing process id"),
1954 OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1955 "record events on existing thread id"),
1956 OPT_INTEGER('r', "realtime", &record.realtime_prio,
1957 "collect data with this RT SCHED_FIFO priority"),
1958 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1959 "collect data without buffering"),
1960 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1961 "collect raw sample records from all opened counters"),
1962 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1963 "system-wide collection from all CPUs"),
1964 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1965 "list of cpus to monitor"),
1966 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1967 OPT_STRING('o', "output", &record.data.path, "file",
1968 "output file name"),
1969 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1970 &record.opts.no_inherit_set,
1971 "child tasks do not inherit counters"),
1972 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1973 "synthesize non-sample events at the end of output"),
1974 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
1975 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "record bpf events"),
1976 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
1977 "Fail if the specified frequency can't be used"),
1978 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
1979 "profile at this frequency",
1980 record__parse_freq),
1981 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1982 "number of mmap data pages and AUX area tracing mmap pages",
1983 record__parse_mmap_pages),
1984 OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
1985 "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
1986 record__mmap_flush_parse),
1987 OPT_BOOLEAN(0, "group", &record.opts.group,
1988 "put the counters into a counter group"),
1989 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1990 NULL, "enables call-graph recording" ,
1991 &record_callchain_opt),
1992 OPT_CALLBACK(0, "call-graph", &record.opts,
1993 "record_mode[,record_size]", record_callchain_help,
1994 &record_parse_callchain_opt),
1995 OPT_INCR('v', "verbose", &verbose,
1996 "be more verbose (show counter open errors, etc)"),
1997 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1998 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1999 "per thread counts"),
2000 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
2001 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
2002 "Record the sample physical addresses"),
2003 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
2004 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
2005 &record.opts.sample_time_set,
2006 "Record the sample timestamps"),
2007 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
2008 "Record the sample period"),
2009 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
2011 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
2012 &record.no_buildid_cache_set,
2013 "do not update the buildid cache"),
2014 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
2015 &record.no_buildid_set,
2016 "do not collect buildids in perf.data"),
2017 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
2018 "monitor event in cgroup name only",
2020 OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
2021 "ms to wait before starting measurement after program start"),
2022 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
2025 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
2026 "branch any", "sample any taken branches",
2027 parse_branch_stack),
2029 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
2030 "branch filter mask", "branch stack filter modes",
2031 parse_branch_stack),
2032 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
2033 "sample by weight (on special events only)"),
2034 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
2035 "sample transaction flags (special events only)"),
2036 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
2037 "use per-thread mmaps"),
2038 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
2039 "sample selected machine registers on interrupt,"
2040 " use '-I?' to list register names", parse_regs),
2041 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
2042 "sample selected machine registers on interrupt,"
2043 " use '-I?' to list register names", parse_regs),
2044 OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
2045 "Record running/enabled time of read (:S) events"),
2046 OPT_CALLBACK('k', "clockid", &record.opts,
2047 "clockid", "clockid to use for events, see clock_gettime()",
2049 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
2050 "opts", "AUX area tracing Snapshot Mode", ""),
2051 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
2052 "per thread proc mmap processing timeout in ms"),
2053 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
2054 "Record namespaces events"),
2055 OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
2056 "Record context switch events"),
2057 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
2058 "Configure all used events to run in kernel space.",
2059 PARSE_OPT_EXCLUSIVE),
2060 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
2061 "Configure all used events to run in user space.",
2062 PARSE_OPT_EXCLUSIVE),
2063 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
2064 "clang binary to use for compiling BPF scriptlets"),
2065 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
2066 "options passed to clang when compiling BPF scriptlets"),
2067 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
2068 "file", "vmlinux pathname"),
2069 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
2070 "Record build-id of all DSOs regardless of hits"),
2071 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
2072 "append timestamp to output filename"),
2073 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
2074 "Record timestamp boundary (time of first/last samples)"),
2075 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
2076 &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
2077 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
2079 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
2080 "Limit number of switch output generated files"),
2081 OPT_BOOLEAN(0, "dry-run", &dry_run,
2082 "Parse options then exit"),
2083 #ifdef HAVE_AIO_SUPPORT
2084 OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
2085 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
2088 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
2089 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
2090 record__parse_affinity),
2094 struct option *record_options = __record_options;
2096 int cmd_record(int argc, const char **argv)
2099 struct record *rec = &record;
2100 char errbuf[BUFSIZ];
2102 setlocale(LC_ALL, "");
2104 #ifndef HAVE_LIBBPF_SUPPORT
2105 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
2106 set_nobuild('\0', "clang-path", true);
2107 set_nobuild('\0', "clang-opt", true);
2111 #ifndef HAVE_BPF_PROLOGUE
2112 # if !defined (HAVE_DWARF_SUPPORT)
2113 # define REASON "NO_DWARF=1"
2114 # elif !defined (HAVE_LIBBPF_SUPPORT)
2115 # define REASON "NO_LIBBPF=1"
2117 # define REASON "this architecture doesn't support BPF prologue"
2119 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
2120 set_nobuild('\0', "vmlinux", true);
2125 CPU_ZERO(&rec->affinity_mask);
2126 rec->opts.affinity = PERF_AFFINITY_SYS;
2128 rec->evlist = perf_evlist__new();
2129 if (rec->evlist == NULL)
2132 err = perf_config(perf_record_config, rec);
2136 argc = parse_options(argc, argv, record_options, record_usage,
2137 PARSE_OPT_STOP_AT_NON_OPTION);
2139 perf_quiet_option();
2141 /* Make system wide (-a) the default target. */
2142 if (!argc && target__none(&rec->opts.target))
2143 rec->opts.target.system_wide = true;
2145 if (nr_cgroups && !rec->opts.target.system_wide) {
2146 usage_with_options_msg(record_usage, record_options,
2147 "cgroup monitoring only available in system-wide mode");
2150 if (rec->opts.record_switch_events &&
2151 !perf_can_record_switch_events()) {
2152 ui__error("kernel does not support recording context switch events\n");
2153 parse_options_usage(record_usage, record_options, "switch-events", 0);
2157 if (switch_output_setup(rec)) {
2158 parse_options_usage(record_usage, record_options, "switch-output", 0);
2162 if (rec->switch_output.time) {
2163 signal(SIGALRM, alarm_sig_handler);
2164 alarm(rec->switch_output.time);
2167 if (rec->switch_output.num_files) {
2168 rec->switch_output.filenames = calloc(sizeof(char *),
2169 rec->switch_output.num_files);
2170 if (!rec->switch_output.filenames)
2175 * Allow aliases to facilitate the lookup of symbols for address
2176 * filters. Refer to auxtrace_parse_filters().
2178 symbol_conf.allow_aliases = true;
2182 err = record__auxtrace_init(rec);
2189 err = bpf__setup_stdout(rec->evlist);
2191 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
2192 pr_err("ERROR: Setup BPF stdout failed: %s\n",
2199 if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist))
2201 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
2202 "check /proc/sys/kernel/kptr_restrict.\n\n"
2203 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
2204 "file is not found in the buildid cache or in the vmlinux path.\n\n"
2205 "Samples in kernel modules won't be resolved at all.\n\n"
2206 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
2207 "even with a suitable vmlinux or kallsyms file.\n\n");
2209 if (rec->no_buildid_cache || rec->no_buildid) {
2210 disable_buildid_cache();
2211 } else if (rec->switch_output.enabled) {
2213 * In 'perf record --switch-output', disable buildid
2214 * generation by default to reduce data file switching
2215 * overhead. Still generate buildid if they are required
2218 * perf record --switch-output --no-no-buildid \
2219 * --no-no-buildid-cache
2221 * Following code equals to:
2223 * if ((rec->no_buildid || !rec->no_buildid_set) &&
2224 * (rec->no_buildid_cache || !rec->no_buildid_cache_set))
2225 * disable_buildid_cache();
2227 bool disable = true;
2229 if (rec->no_buildid_set && !rec->no_buildid)
2231 if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
2234 rec->no_buildid = true;
2235 rec->no_buildid_cache = true;
2236 disable_buildid_cache();
2240 if (record.opts.overwrite)
2241 record.opts.tail_synthesize = true;
2243 if (rec->evlist->nr_entries == 0 &&
2244 __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
2245 pr_err("Not enough memory for event selector list\n");
2249 if (rec->opts.target.tid && !rec->opts.no_inherit_set)
2250 rec->opts.no_inherit = true;
2252 err = target__validate(&rec->opts.target);
2254 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2255 ui__warning("%s\n", errbuf);
2258 err = target__parse_uid(&rec->opts.target);
2260 int saved_errno = errno;
2262 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2263 ui__error("%s", errbuf);
2269 /* Enable ignoring missing threads when -u/-p option is defined. */
2270 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
2273 if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
2274 usage_with_options(record_usage, record_options);
2276 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
2281 * We take all buildids when the file contains
2282 * AUX area tracing data because we do not decode the
2283 * trace because it would take too long.
2285 if (rec->opts.full_auxtrace)
2286 rec->buildid_all = true;
2288 if (record_opts__config(&rec->opts)) {
2293 if (rec->opts.nr_cblocks > nr_cblocks_max)
2294 rec->opts.nr_cblocks = nr_cblocks_max;
2296 pr_info("nr_cblocks: %d\n", rec->opts.nr_cblocks);
2298 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
2299 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
2301 err = __cmd_record(&record, argc, argv);
2303 perf_evlist__delete(rec->evlist);
2305 auxtrace_record__free(rec->itr);
2309 static void snapshot_sig_handler(int sig __maybe_unused)
2311 struct record *rec = &record;
2313 if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2314 trigger_hit(&auxtrace_snapshot_trigger);
2315 auxtrace_record__snapshot_started = 1;
2316 if (auxtrace_record__snapshot_start(record.itr))
2317 trigger_error(&auxtrace_snapshot_trigger);
2320 if (switch_output_signal(rec))
2321 trigger_hit(&switch_output_trigger);
2324 static void alarm_sig_handler(int sig __maybe_unused)
2326 struct record *rec = &record;
2328 if (switch_output_time(rec))
2329 trigger_hit(&switch_output_trigger);