1 // SPDX-License-Identifier: GPL-2.0
5 * Builtin record command: Record the profile of a workload
6 * (or a CPU, or a PID) into the perf.data output file - for
7 * later analysis via perf report.
13 #include "util/build-id.h"
14 #include "util/util.h"
15 #include <subcmd/parse-options.h>
16 #include "util/parse-events.h"
17 #include "util/config.h"
19 #include "util/callchain.h"
20 #include "util/cgroup.h"
21 #include "util/header.h"
22 #include "util/event.h"
23 #include "util/evlist.h"
24 #include "util/evsel.h"
25 #include "util/debug.h"
26 #include "util/session.h"
27 #include "util/tool.h"
28 #include "util/symbol.h"
29 #include "util/cpumap.h"
30 #include "util/thread_map.h"
31 #include "util/data.h"
32 #include "util/perf_regs.h"
33 #include "util/auxtrace.h"
35 #include "util/parse-branch-options.h"
36 #include "util/parse-regs-options.h"
37 #include "util/llvm-utils.h"
38 #include "util/bpf-loader.h"
39 #include "util/trigger.h"
40 #include "util/perf-hooks.h"
41 #include "util/cpu-set-sched.h"
42 #include "util/time-utils.h"
43 #include "util/units.h"
44 #include "util/bpf-event.h"
56 #include <linux/time64.h>
58 struct switch_output {
68 struct perf_tool tool;
69 struct record_opts opts;
71 struct perf_data data;
72 struct auxtrace_record *itr;
73 struct perf_evlist *evlist;
74 struct perf_session *session;
78 bool no_buildid_cache;
79 bool no_buildid_cache_set;
81 bool timestamp_filename;
82 bool timestamp_boundary;
83 struct switch_output switch_output;
84 unsigned long long samples;
85 cpu_set_t affinity_mask;
88 static volatile int auxtrace_record__snapshot_started;
89 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
90 static DEFINE_TRIGGER(switch_output_trigger);
92 static const char *affinity_tags[PERF_AFFINITY_MAX] = {
96 static bool switch_output_signal(struct record *rec)
98 return rec->switch_output.signal &&
99 trigger_is_ready(&switch_output_trigger);
102 static bool switch_output_size(struct record *rec)
104 return rec->switch_output.size &&
105 trigger_is_ready(&switch_output_trigger) &&
106 (rec->bytes_written >= rec->switch_output.size);
109 static bool switch_output_time(struct record *rec)
111 return rec->switch_output.time &&
112 trigger_is_ready(&switch_output_trigger);
115 static int record__write(struct record *rec, struct perf_mmap *map __maybe_unused,
116 void *bf, size_t size)
118 struct perf_data_file *file = &rec->session->data->file;
120 if (perf_data_file__write(file, bf, size) < 0) {
121 pr_err("failed to write perf data, error: %m\n");
125 rec->bytes_written += size;
127 if (switch_output_size(rec))
128 trigger_hit(&switch_output_trigger);
133 #ifdef HAVE_AIO_SUPPORT
134 static int record__aio_write(struct aiocb *cblock, int trace_fd,
135 void *buf, size_t size, off_t off)
139 cblock->aio_fildes = trace_fd;
140 cblock->aio_buf = buf;
141 cblock->aio_nbytes = size;
142 cblock->aio_offset = off;
143 cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
146 rc = aio_write(cblock);
149 } else if (errno != EAGAIN) {
150 cblock->aio_fildes = -1;
151 pr_err("failed to queue perf data, error: %m\n");
159 static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock)
165 ssize_t aio_ret, written;
167 aio_errno = aio_error(cblock);
168 if (aio_errno == EINPROGRESS)
171 written = aio_ret = aio_return(cblock);
173 if (aio_errno != EINTR)
174 pr_err("failed to write perf data, error: %m\n");
178 rem_size = cblock->aio_nbytes - written;
181 cblock->aio_fildes = -1;
183 * md->refcount is incremented in perf_mmap__push() for
184 * every enqueued aio write request so decrement it because
185 * the request is now complete.
191 * aio write request may require restart with the
192 * reminder if the kernel didn't write whole
195 rem_off = cblock->aio_offset + written;
196 rem_buf = (void *)(cblock->aio_buf + written);
197 record__aio_write(cblock, cblock->aio_fildes,
198 rem_buf, rem_size, rem_off);
205 static int record__aio_sync(struct perf_mmap *md, bool sync_all)
207 struct aiocb **aiocb = md->aio.aiocb;
208 struct aiocb *cblocks = md->aio.cblocks;
209 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */
214 for (i = 0; i < md->aio.nr_cblocks; ++i) {
215 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
222 * Started aio write is not complete yet
223 * so it has to be waited before the
226 aiocb[i] = &cblocks[i];
233 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
234 if (!(errno == EAGAIN || errno == EINTR))
235 pr_err("failed to sync perf data, error: %m\n");
240 static int record__aio_pushfn(void *to, struct aiocb *cblock, void *bf, size_t size, off_t off)
242 struct record *rec = to;
243 int ret, trace_fd = rec->session->data->file.fd;
247 ret = record__aio_write(cblock, trace_fd, bf, size, off);
249 rec->bytes_written += size;
250 if (switch_output_size(rec))
251 trigger_hit(&switch_output_trigger);
257 static off_t record__aio_get_pos(int trace_fd)
259 return lseek(trace_fd, 0, SEEK_CUR);
262 static void record__aio_set_pos(int trace_fd, off_t pos)
264 lseek(trace_fd, pos, SEEK_SET);
267 static void record__aio_mmap_read_sync(struct record *rec)
270 struct perf_evlist *evlist = rec->evlist;
271 struct perf_mmap *maps = evlist->mmap;
273 if (!rec->opts.nr_cblocks)
276 for (i = 0; i < evlist->nr_mmaps; i++) {
277 struct perf_mmap *map = &maps[i];
280 record__aio_sync(map, true);
284 static int nr_cblocks_default = 1;
285 static int nr_cblocks_max = 4;
287 static int record__aio_parse(const struct option *opt,
291 struct record_opts *opts = (struct record_opts *)opt->value;
294 opts->nr_cblocks = 0;
297 opts->nr_cblocks = strtol(str, NULL, 0);
298 if (!opts->nr_cblocks)
299 opts->nr_cblocks = nr_cblocks_default;
304 #else /* HAVE_AIO_SUPPORT */
305 static int nr_cblocks_max = 0;
307 static int record__aio_sync(struct perf_mmap *md __maybe_unused, bool sync_all __maybe_unused)
312 static int record__aio_pushfn(void *to __maybe_unused, struct aiocb *cblock __maybe_unused,
313 void *bf __maybe_unused, size_t size __maybe_unused, off_t off __maybe_unused)
318 static off_t record__aio_get_pos(int trace_fd __maybe_unused)
323 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
327 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
332 static int record__aio_enabled(struct record *rec)
334 return rec->opts.nr_cblocks > 0;
337 static int process_synthesized_event(struct perf_tool *tool,
338 union perf_event *event,
339 struct perf_sample *sample __maybe_unused,
340 struct machine *machine __maybe_unused)
342 struct record *rec = container_of(tool, struct record, tool);
343 return record__write(rec, NULL, event, event->header.size);
346 static int record__pushfn(struct perf_mmap *map, void *to, void *bf, size_t size)
348 struct record *rec = to;
351 return record__write(rec, map, bf, size);
354 static volatile int done;
355 static volatile int signr = -1;
356 static volatile int child_finished;
358 static void sig_handler(int sig)
368 static void sigsegv_handler(int sig)
370 perf_hooks__recover();
371 sighandler_dump_stack(sig);
374 static void record__sig_exit(void)
379 signal(signr, SIG_DFL);
383 #ifdef HAVE_AUXTRACE_SUPPORT
385 static int record__process_auxtrace(struct perf_tool *tool,
386 struct perf_mmap *map,
387 union perf_event *event, void *data1,
388 size_t len1, void *data2, size_t len2)
390 struct record *rec = container_of(tool, struct record, tool);
391 struct perf_data *data = &rec->data;
395 if (!perf_data__is_pipe(data)) {
397 int fd = perf_data__fd(data);
400 file_offset = lseek(fd, 0, SEEK_CUR);
401 if (file_offset == -1)
403 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
409 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
410 padding = (len1 + len2) & 7;
412 padding = 8 - padding;
414 record__write(rec, map, event, event->header.size);
415 record__write(rec, map, data1, len1);
417 record__write(rec, map, data2, len2);
418 record__write(rec, map, &pad, padding);
423 static int record__auxtrace_mmap_read(struct record *rec,
424 struct perf_mmap *map)
428 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
429 record__process_auxtrace);
439 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
440 struct perf_mmap *map)
444 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
445 record__process_auxtrace,
446 rec->opts.auxtrace_snapshot_size);
456 static int record__auxtrace_read_snapshot_all(struct record *rec)
461 for (i = 0; i < rec->evlist->nr_mmaps; i++) {
462 struct perf_mmap *map = &rec->evlist->mmap[i];
464 if (!map->auxtrace_mmap.base)
467 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
476 static void record__read_auxtrace_snapshot(struct record *rec)
478 pr_debug("Recording AUX area tracing snapshot\n");
479 if (record__auxtrace_read_snapshot_all(rec) < 0) {
480 trigger_error(&auxtrace_snapshot_trigger);
482 if (auxtrace_record__snapshot_finish(rec->itr))
483 trigger_error(&auxtrace_snapshot_trigger);
485 trigger_ready(&auxtrace_snapshot_trigger);
489 static int record__auxtrace_init(struct record *rec)
494 rec->itr = auxtrace_record__init(rec->evlist, &err);
499 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
500 rec->opts.auxtrace_snapshot_opts);
504 return auxtrace_parse_filters(rec->evlist);
510 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
511 struct perf_mmap *map __maybe_unused)
517 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
522 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
527 static int record__auxtrace_init(struct record *rec __maybe_unused)
534 static int record__mmap_evlist(struct record *rec,
535 struct perf_evlist *evlist)
537 struct record_opts *opts = &rec->opts;
540 if (opts->affinity != PERF_AFFINITY_SYS)
541 cpu__setup_cpunode_map();
543 if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
544 opts->auxtrace_mmap_pages,
545 opts->auxtrace_snapshot_mode,
546 opts->nr_cblocks, opts->affinity) < 0) {
547 if (errno == EPERM) {
548 pr_err("Permission error mapping pages.\n"
549 "Consider increasing "
550 "/proc/sys/kernel/perf_event_mlock_kb,\n"
551 "or try again with a smaller value of -m/--mmap_pages.\n"
552 "(current value: %u,%u)\n",
553 opts->mmap_pages, opts->auxtrace_mmap_pages);
556 pr_err("failed to mmap with %d (%s)\n", errno,
557 str_error_r(errno, msg, sizeof(msg)));
567 static int record__mmap(struct record *rec)
569 return record__mmap_evlist(rec, rec->evlist);
572 static int record__open(struct record *rec)
575 struct perf_evsel *pos;
576 struct perf_evlist *evlist = rec->evlist;
577 struct perf_session *session = rec->session;
578 struct record_opts *opts = &rec->opts;
582 * For initial_delay we need to add a dummy event so that we can track
583 * PERF_RECORD_MMAP while we wait for the initial delay to enable the
584 * real events, the ones asked by the user.
586 if (opts->initial_delay) {
587 if (perf_evlist__add_dummy(evlist))
590 pos = perf_evlist__first(evlist);
592 pos = perf_evlist__last(evlist);
594 pos->attr.enable_on_exec = 1;
597 perf_evlist__config(evlist, opts, &callchain_param);
599 evlist__for_each_entry(evlist, pos) {
601 if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
602 if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
604 ui__warning("%s\n", msg);
607 if ((errno == EINVAL || errno == EBADF) &&
608 pos->leader != pos &&
610 pos = perf_evlist__reset_weak_group(evlist, pos);
614 perf_evsel__open_strerror(pos, &opts->target,
615 errno, msg, sizeof(msg));
616 ui__error("%s\n", msg);
620 pos->supported = true;
623 if (perf_evlist__apply_filters(evlist, &pos)) {
624 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
625 pos->filter, perf_evsel__name(pos), errno,
626 str_error_r(errno, msg, sizeof(msg)));
631 rc = record__mmap(rec);
635 session->evlist = evlist;
636 perf_session__set_id_hdr_size(session);
641 static int process_sample_event(struct perf_tool *tool,
642 union perf_event *event,
643 struct perf_sample *sample,
644 struct perf_evsel *evsel,
645 struct machine *machine)
647 struct record *rec = container_of(tool, struct record, tool);
649 if (rec->evlist->first_sample_time == 0)
650 rec->evlist->first_sample_time = sample->time;
652 rec->evlist->last_sample_time = sample->time;
654 if (rec->buildid_all)
658 return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
661 static int process_buildids(struct record *rec)
663 struct perf_session *session = rec->session;
665 if (perf_data__size(&rec->data) == 0)
669 * During this process, it'll load kernel map and replace the
670 * dso->long_name to a real pathname it found. In this case
671 * we prefer the vmlinux path like
672 * /lib/modules/3.16.4/build/vmlinux
674 * rather than build-id path (in debug directory).
675 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
677 symbol_conf.ignore_vmlinux_buildid = true;
680 * If --buildid-all is given, it marks all DSO regardless of hits,
681 * so no need to process samples. But if timestamp_boundary is enabled,
682 * it still needs to walk on all samples to get the timestamps of
683 * first/last samples.
685 if (rec->buildid_all && !rec->timestamp_boundary)
686 rec->tool.sample = NULL;
688 return perf_session__process_events(session);
691 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
694 struct perf_tool *tool = data;
696 *As for guest kernel when processing subcommand record&report,
697 *we arrange module mmap prior to guest kernel mmap and trigger
698 *a preload dso because default guest module symbols are loaded
699 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
700 *method is used to avoid symbol missing when the first addr is
701 *in module instead of in guest kernel.
703 err = perf_event__synthesize_modules(tool, process_synthesized_event,
706 pr_err("Couldn't record guest kernel [%d]'s reference"
707 " relocation symbol.\n", machine->pid);
710 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
711 * have no _text sometimes.
713 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
716 pr_err("Couldn't record guest kernel [%d]'s reference"
717 " relocation symbol.\n", machine->pid);
720 static struct perf_event_header finished_round_event = {
721 .size = sizeof(struct perf_event_header),
722 .type = PERF_RECORD_FINISHED_ROUND,
725 static void record__adjust_affinity(struct record *rec, struct perf_mmap *map)
727 if (rec->opts.affinity != PERF_AFFINITY_SYS &&
728 !CPU_EQUAL(&rec->affinity_mask, &map->affinity_mask)) {
729 CPU_ZERO(&rec->affinity_mask);
730 CPU_OR(&rec->affinity_mask, &rec->affinity_mask, &map->affinity_mask);
731 sched_setaffinity(0, sizeof(rec->affinity_mask), &rec->affinity_mask);
735 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
738 u64 bytes_written = rec->bytes_written;
741 struct perf_mmap *maps;
742 int trace_fd = rec->data.file.fd;
748 maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
752 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
755 if (record__aio_enabled(rec))
756 off = record__aio_get_pos(trace_fd);
758 for (i = 0; i < evlist->nr_mmaps; i++) {
759 struct perf_mmap *map = &maps[i];
762 record__adjust_affinity(rec, map);
763 if (!record__aio_enabled(rec)) {
764 if (perf_mmap__push(map, rec, record__pushfn) != 0) {
771 * Call record__aio_sync() to wait till map->data buffer
772 * becomes available after previous aio write request.
774 idx = record__aio_sync(map, false);
775 if (perf_mmap__aio_push(map, rec, idx, record__aio_pushfn, &off) != 0) {
776 record__aio_set_pos(trace_fd, off);
783 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
784 record__auxtrace_mmap_read(rec, map) != 0) {
790 if (record__aio_enabled(rec))
791 record__aio_set_pos(trace_fd, off);
794 * Mark the round finished in case we wrote
795 * at least one event.
797 if (bytes_written != rec->bytes_written)
798 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
801 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
806 static int record__mmap_read_all(struct record *rec)
810 err = record__mmap_read_evlist(rec, rec->evlist, false);
814 return record__mmap_read_evlist(rec, rec->evlist, true);
817 static void record__init_features(struct record *rec)
819 struct perf_session *session = rec->session;
822 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
823 perf_header__set_feat(&session->header, feat);
826 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
828 if (!have_tracepoints(&rec->evlist->entries))
829 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
831 if (!rec->opts.branch_stack)
832 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
834 if (!rec->opts.full_auxtrace)
835 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
837 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
838 perf_header__clear_feat(&session->header, HEADER_CLOCKID);
840 perf_header__clear_feat(&session->header, HEADER_STAT);
844 record__finish_output(struct record *rec)
846 struct perf_data *data = &rec->data;
847 int fd = perf_data__fd(data);
852 rec->session->header.data_size += rec->bytes_written;
853 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
855 if (!rec->no_buildid) {
856 process_buildids(rec);
858 if (rec->buildid_all)
859 dsos__hit_all(rec->session);
861 perf_session__write_header(rec->session, rec->evlist, fd, true);
866 static int record__synthesize_workload(struct record *rec, bool tail)
869 struct thread_map *thread_map;
871 if (rec->opts.tail_synthesize != tail)
874 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
875 if (thread_map == NULL)
878 err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
879 process_synthesized_event,
880 &rec->session->machines.host,
881 rec->opts.sample_address);
882 thread_map__put(thread_map);
886 static int record__synthesize(struct record *rec, bool tail);
889 record__switch_output(struct record *rec, bool at_exit)
891 struct perf_data *data = &rec->data;
894 /* Same Size: "2015122520103046"*/
895 char timestamp[] = "InvalidTimestamp";
897 record__aio_mmap_read_sync(rec);
899 record__synthesize(rec, true);
900 if (target__none(&rec->opts.target))
901 record__synthesize_workload(rec, true);
904 record__finish_output(rec);
905 err = fetch_current_timestamp(timestamp, sizeof(timestamp));
907 pr_err("Failed to get current timestamp\n");
911 fd = perf_data__switch(data, timestamp,
912 rec->session->header.data_offset,
914 if (fd >= 0 && !at_exit) {
915 rec->bytes_written = 0;
916 rec->session->header.data_size = 0;
920 fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
921 data->path, timestamp);
923 /* Output tracking events */
925 record__synthesize(rec, false);
928 * In 'perf record --switch-output' without -a,
929 * record__synthesize() in record__switch_output() won't
930 * generate tracking events because there's no thread_map
931 * in evlist. Which causes newly created perf.data doesn't
932 * contain map and comm information.
933 * Create a fake thread_map and directly call
934 * perf_event__synthesize_thread_map() for those events.
936 if (target__none(&rec->opts.target))
937 record__synthesize_workload(rec, false);
942 static volatile int workload_exec_errno;
945 * perf_evlist__prepare_workload will send a SIGUSR1
946 * if the fork fails, since we asked by setting its
947 * want_signal to true.
949 static void workload_exec_failed_signal(int signo __maybe_unused,
951 void *ucontext __maybe_unused)
953 workload_exec_errno = info->si_value.sival_int;
958 static void snapshot_sig_handler(int sig);
959 static void alarm_sig_handler(int sig);
962 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
963 struct perf_tool *tool __maybe_unused,
964 perf_event__handler_t process __maybe_unused,
965 struct machine *machine __maybe_unused)
970 static const struct perf_event_mmap_page *
971 perf_evlist__pick_pc(struct perf_evlist *evlist)
974 if (evlist->mmap && evlist->mmap[0].base)
975 return evlist->mmap[0].base;
976 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base)
977 return evlist->overwrite_mmap[0].base;
982 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
984 const struct perf_event_mmap_page *pc;
986 pc = perf_evlist__pick_pc(rec->evlist);
992 static int record__synthesize(struct record *rec, bool tail)
994 struct perf_session *session = rec->session;
995 struct machine *machine = &session->machines.host;
996 struct perf_data *data = &rec->data;
997 struct record_opts *opts = &rec->opts;
998 struct perf_tool *tool = &rec->tool;
999 int fd = perf_data__fd(data);
1002 if (rec->opts.tail_synthesize != tail)
1005 if (data->is_pipe) {
1007 * We need to synthesize events first, because some
1008 * features works on top of them (on report side).
1010 err = perf_event__synthesize_attrs(tool, rec->evlist,
1011 process_synthesized_event);
1013 pr_err("Couldn't synthesize attrs.\n");
1017 err = perf_event__synthesize_features(tool, session, rec->evlist,
1018 process_synthesized_event);
1020 pr_err("Couldn't synthesize features.\n");
1024 if (have_tracepoints(&rec->evlist->entries)) {
1026 * FIXME err <= 0 here actually means that
1027 * there were no tracepoints so its not really
1028 * an error, just that we don't need to
1029 * synthesize anything. We really have to
1030 * return this more properly and also
1031 * propagate errors that now are calling die()
1033 err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
1034 process_synthesized_event);
1036 pr_err("Couldn't record tracing data.\n");
1039 rec->bytes_written += err;
1043 err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
1044 process_synthesized_event, machine);
1048 if (rec->opts.full_auxtrace) {
1049 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
1050 session, process_synthesized_event);
1055 if (!perf_evlist__exclude_kernel(rec->evlist)) {
1056 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1058 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
1059 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1060 "Check /proc/kallsyms permission or run as root.\n");
1062 err = perf_event__synthesize_modules(tool, process_synthesized_event,
1064 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
1065 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1066 "Check /proc/modules permission or run as root.\n");
1070 machines__process_guests(&session->machines,
1071 perf_event__synthesize_guest_os, tool);
1074 err = perf_event__synthesize_extra_attr(&rec->tool,
1076 process_synthesized_event,
1081 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->threads,
1082 process_synthesized_event,
1085 pr_err("Couldn't synthesize thread map.\n");
1089 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->cpus,
1090 process_synthesized_event, NULL);
1092 pr_err("Couldn't synthesize cpu map.\n");
1096 err = perf_event__synthesize_bpf_events(tool, process_synthesized_event,
1099 pr_warning("Couldn't synthesize bpf events.\n");
1101 err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
1102 process_synthesized_event, opts->sample_address,
1108 static int __cmd_record(struct record *rec, int argc, const char **argv)
1112 unsigned long waking = 0;
1113 const bool forks = argc > 0;
1114 struct perf_tool *tool = &rec->tool;
1115 struct record_opts *opts = &rec->opts;
1116 struct perf_data *data = &rec->data;
1117 struct perf_session *session;
1118 bool disabled = false, draining = false;
1121 atexit(record__sig_exit);
1122 signal(SIGCHLD, sig_handler);
1123 signal(SIGINT, sig_handler);
1124 signal(SIGTERM, sig_handler);
1125 signal(SIGSEGV, sigsegv_handler);
1127 if (rec->opts.record_namespaces)
1128 tool->namespace_events = true;
1130 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
1131 signal(SIGUSR2, snapshot_sig_handler);
1132 if (rec->opts.auxtrace_snapshot_mode)
1133 trigger_on(&auxtrace_snapshot_trigger);
1134 if (rec->switch_output.enabled)
1135 trigger_on(&switch_output_trigger);
1137 signal(SIGUSR2, SIG_IGN);
1140 session = perf_session__new(data, false, tool);
1141 if (session == NULL) {
1142 pr_err("Perf session creation failed.\n");
1146 fd = perf_data__fd(data);
1147 rec->session = session;
1149 record__init_features(rec);
1151 if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
1152 session->header.env.clockid_res_ns = rec->opts.clockid_res_ns;
1155 err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
1156 argv, data->is_pipe,
1157 workload_exec_failed_signal);
1159 pr_err("Couldn't run the workload!\n");
1161 goto out_delete_session;
1166 * If we have just single event and are sending data
1167 * through pipe, we need to force the ids allocation,
1168 * because we synthesize event name through the pipe
1169 * and need the id for that.
1171 if (data->is_pipe && rec->evlist->nr_entries == 1)
1172 rec->opts.sample_id = true;
1174 if (record__open(rec) != 0) {
1179 err = bpf__apply_obj_config();
1181 char errbuf[BUFSIZ];
1183 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
1184 pr_err("ERROR: Apply config to BPF failed: %s\n",
1190 * Normally perf_session__new would do this, but it doesn't have the
1193 if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
1194 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
1195 rec->tool.ordered_events = false;
1198 if (!rec->evlist->nr_groups)
1199 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
1201 if (data->is_pipe) {
1202 err = perf_header__write_pipe(fd);
1206 err = perf_session__write_header(session, rec->evlist, fd, false);
1211 if (!rec->no_buildid
1212 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
1213 pr_err("Couldn't generate buildids. "
1214 "Use --no-buildid to profile anyway.\n");
1219 err = record__synthesize(rec, false);
1223 if (rec->realtime_prio) {
1224 struct sched_param param;
1226 param.sched_priority = rec->realtime_prio;
1227 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) {
1228 pr_err("Could not set realtime priority.\n");
1235 * When perf is starting the traced process, all the events
1236 * (apart from group members) have enable_on_exec=1 set,
1237 * so don't spoil it by prematurely enabling them.
1239 if (!target__none(&opts->target) && !opts->initial_delay)
1240 perf_evlist__enable(rec->evlist);
1246 struct machine *machine = &session->machines.host;
1247 union perf_event *event;
1250 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
1251 if (event == NULL) {
1257 * Some H/W events are generated before COMM event
1258 * which is emitted during exec(), so perf script
1259 * cannot see a correct process name for those events.
1260 * Synthesize COMM event to prevent it.
1262 tgid = perf_event__synthesize_comm(tool, event,
1263 rec->evlist->workload.pid,
1264 process_synthesized_event,
1271 event = malloc(sizeof(event->namespaces) +
1272 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
1273 machine->id_hdr_size);
1274 if (event == NULL) {
1280 * Synthesize NAMESPACES event for the command specified.
1282 perf_event__synthesize_namespaces(tool, event,
1283 rec->evlist->workload.pid,
1284 tgid, process_synthesized_event,
1288 perf_evlist__start_workload(rec->evlist);
1291 if (opts->initial_delay) {
1292 usleep(opts->initial_delay * USEC_PER_MSEC);
1293 perf_evlist__enable(rec->evlist);
1296 trigger_ready(&auxtrace_snapshot_trigger);
1297 trigger_ready(&switch_output_trigger);
1298 perf_hooks__invoke_record_start();
1300 unsigned long long hits = rec->samples;
1303 * rec->evlist->bkw_mmap_state is possible to be
1304 * BKW_MMAP_EMPTY here: when done == true and
1305 * hits != rec->samples in previous round.
1307 * perf_evlist__toggle_bkw_mmap ensure we never
1308 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1310 if (trigger_is_hit(&switch_output_trigger) || done || draining)
1311 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1313 if (record__mmap_read_all(rec) < 0) {
1314 trigger_error(&auxtrace_snapshot_trigger);
1315 trigger_error(&switch_output_trigger);
1320 if (auxtrace_record__snapshot_started) {
1321 auxtrace_record__snapshot_started = 0;
1322 if (!trigger_is_error(&auxtrace_snapshot_trigger))
1323 record__read_auxtrace_snapshot(rec);
1324 if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1325 pr_err("AUX area tracing snapshot failed\n");
1331 if (trigger_is_hit(&switch_output_trigger)) {
1333 * If switch_output_trigger is hit, the data in
1334 * overwritable ring buffer should have been collected,
1335 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1337 * If SIGUSR2 raise after or during record__mmap_read_all(),
1338 * record__mmap_read_all() didn't collect data from
1339 * overwritable ring buffer. Read again.
1341 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1343 trigger_ready(&switch_output_trigger);
1346 * Reenable events in overwrite ring buffer after
1347 * record__mmap_read_all(): we should have collected
1350 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1353 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1356 fd = record__switch_output(rec, false);
1358 pr_err("Failed to switch to new file\n");
1359 trigger_error(&switch_output_trigger);
1364 /* re-arm the alarm */
1365 if (rec->switch_output.time)
1366 alarm(rec->switch_output.time);
1369 if (hits == rec->samples) {
1370 if (done || draining)
1372 err = perf_evlist__poll(rec->evlist, -1);
1374 * Propagate error, only if there's any. Ignore positive
1375 * number of returned events and interrupt error.
1377 if (err > 0 || (err < 0 && errno == EINTR))
1381 if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1386 * When perf is starting the traced process, at the end events
1387 * die with the process and we wait for that. Thus no need to
1388 * disable events in this case.
1390 if (done && !disabled && !target__none(&opts->target)) {
1391 trigger_off(&auxtrace_snapshot_trigger);
1392 perf_evlist__disable(rec->evlist);
1396 trigger_off(&auxtrace_snapshot_trigger);
1397 trigger_off(&switch_output_trigger);
1399 if (forks && workload_exec_errno) {
1400 char msg[STRERR_BUFSIZE];
1401 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1402 pr_err("Workload failed: %s\n", emsg);
1408 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1410 if (target__none(&rec->opts.target))
1411 record__synthesize_workload(rec, true);
1414 record__aio_mmap_read_sync(rec);
1419 if (!child_finished)
1420 kill(rec->evlist->workload.pid, SIGTERM);
1426 else if (WIFEXITED(exit_status))
1427 status = WEXITSTATUS(exit_status);
1428 else if (WIFSIGNALED(exit_status))
1429 signr = WTERMSIG(exit_status);
1433 record__synthesize(rec, true);
1434 /* this will be recalculated during process_buildids() */
1438 if (!rec->timestamp_filename) {
1439 record__finish_output(rec);
1441 fd = record__switch_output(rec, true);
1444 goto out_delete_session;
1449 perf_hooks__invoke_record_end();
1451 if (!err && !quiet) {
1453 const char *postfix = rec->timestamp_filename ?
1454 ".<timestamp>" : "";
1456 if (rec->samples && !rec->opts.full_auxtrace)
1457 scnprintf(samples, sizeof(samples),
1458 " (%" PRIu64 " samples)", rec->samples);
1462 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1463 perf_data__size(data) / 1024.0 / 1024.0,
1464 data->path, postfix, samples);
1468 perf_session__delete(session);
1472 static void callchain_debug(struct callchain_param *callchain)
1474 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1476 pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1478 if (callchain->record_mode == CALLCHAIN_DWARF)
1479 pr_debug("callchain: stack dump size %d\n",
1480 callchain->dump_size);
1483 int record_opts__parse_callchain(struct record_opts *record,
1484 struct callchain_param *callchain,
1485 const char *arg, bool unset)
1488 callchain->enabled = !unset;
1490 /* --no-call-graph */
1492 callchain->record_mode = CALLCHAIN_NONE;
1493 pr_debug("callchain: disabled\n");
1497 ret = parse_callchain_record_opt(arg, callchain);
1499 /* Enable data address sampling for DWARF unwind. */
1500 if (callchain->record_mode == CALLCHAIN_DWARF)
1501 record->sample_address = true;
1502 callchain_debug(callchain);
1508 int record_parse_callchain_opt(const struct option *opt,
1512 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1515 int record_callchain_opt(const struct option *opt,
1516 const char *arg __maybe_unused,
1517 int unset __maybe_unused)
1519 struct callchain_param *callchain = opt->value;
1521 callchain->enabled = true;
1523 if (callchain->record_mode == CALLCHAIN_NONE)
1524 callchain->record_mode = CALLCHAIN_FP;
1526 callchain_debug(callchain);
1530 static int perf_record_config(const char *var, const char *value, void *cb)
1532 struct record *rec = cb;
1534 if (!strcmp(var, "record.build-id")) {
1535 if (!strcmp(value, "cache"))
1536 rec->no_buildid_cache = false;
1537 else if (!strcmp(value, "no-cache"))
1538 rec->no_buildid_cache = true;
1539 else if (!strcmp(value, "skip"))
1540 rec->no_buildid = true;
1545 if (!strcmp(var, "record.call-graph")) {
1546 var = "call-graph.record-mode";
1547 return perf_default_config(var, value, cb);
1549 #ifdef HAVE_AIO_SUPPORT
1550 if (!strcmp(var, "record.aio")) {
1551 rec->opts.nr_cblocks = strtol(value, NULL, 0);
1552 if (!rec->opts.nr_cblocks)
1553 rec->opts.nr_cblocks = nr_cblocks_default;
1560 struct clockid_map {
1565 #define CLOCKID_MAP(n, c) \
1566 { .name = n, .clockid = (c), }
1568 #define CLOCKID_END { .name = NULL, }
1572 * Add the missing ones, we need to build on many distros...
1574 #ifndef CLOCK_MONOTONIC_RAW
1575 #define CLOCK_MONOTONIC_RAW 4
1577 #ifndef CLOCK_BOOTTIME
1578 #define CLOCK_BOOTTIME 7
1581 #define CLOCK_TAI 11
1584 static const struct clockid_map clockids[] = {
1585 /* available for all events, NMI safe */
1586 CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1587 CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1589 /* available for some events */
1590 CLOCKID_MAP("realtime", CLOCK_REALTIME),
1591 CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1592 CLOCKID_MAP("tai", CLOCK_TAI),
1594 /* available for the lazy */
1595 CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1596 CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1597 CLOCKID_MAP("real", CLOCK_REALTIME),
1598 CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1603 static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
1605 struct timespec res;
1608 if (!clock_getres(clk_id, &res))
1609 *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
1611 pr_warning("WARNING: Failed to determine specified clock resolution.\n");
1616 static int parse_clockid(const struct option *opt, const char *str, int unset)
1618 struct record_opts *opts = (struct record_opts *)opt->value;
1619 const struct clockid_map *cm;
1620 const char *ostr = str;
1623 opts->use_clockid = 0;
1631 /* no setting it twice */
1632 if (opts->use_clockid)
1635 opts->use_clockid = true;
1637 /* if its a number, we're done */
1638 if (sscanf(str, "%d", &opts->clockid) == 1)
1639 return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
1641 /* allow a "CLOCK_" prefix to the name */
1642 if (!strncasecmp(str, "CLOCK_", 6))
1645 for (cm = clockids; cm->name; cm++) {
1646 if (!strcasecmp(str, cm->name)) {
1647 opts->clockid = cm->clockid;
1648 return get_clockid_res(opts->clockid,
1649 &opts->clockid_res_ns);
1653 opts->use_clockid = false;
1654 ui__warning("unknown clockid %s, check man page\n", ostr);
1658 static int record__parse_affinity(const struct option *opt, const char *str, int unset)
1660 struct record_opts *opts = (struct record_opts *)opt->value;
1665 if (!strcasecmp(str, "node"))
1666 opts->affinity = PERF_AFFINITY_NODE;
1667 else if (!strcasecmp(str, "cpu"))
1668 opts->affinity = PERF_AFFINITY_CPU;
1673 static int record__parse_mmap_pages(const struct option *opt,
1675 int unset __maybe_unused)
1677 struct record_opts *opts = opt->value;
1679 unsigned int mmap_pages;
1694 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1697 opts->mmap_pages = mmap_pages;
1705 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1709 opts->auxtrace_mmap_pages = mmap_pages;
1716 static void switch_output_size_warn(struct record *rec)
1718 u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages);
1719 struct switch_output *s = &rec->switch_output;
1723 if (s->size < wakeup_size) {
1726 unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
1727 pr_warning("WARNING: switch-output data size lower than "
1728 "wakeup kernel buffer size (%s) "
1729 "expect bigger perf.data sizes\n", buf);
1733 static int switch_output_setup(struct record *rec)
1735 struct switch_output *s = &rec->switch_output;
1736 static struct parse_tag tags_size[] = {
1737 { .tag = 'B', .mult = 1 },
1738 { .tag = 'K', .mult = 1 << 10 },
1739 { .tag = 'M', .mult = 1 << 20 },
1740 { .tag = 'G', .mult = 1 << 30 },
1743 static struct parse_tag tags_time[] = {
1744 { .tag = 's', .mult = 1 },
1745 { .tag = 'm', .mult = 60 },
1746 { .tag = 'h', .mult = 60*60 },
1747 { .tag = 'd', .mult = 60*60*24 },
1755 if (!strcmp(s->str, "signal")) {
1757 pr_debug("switch-output with SIGUSR2 signal\n");
1761 val = parse_tag_value(s->str, tags_size);
1762 if (val != (unsigned long) -1) {
1764 pr_debug("switch-output with %s size threshold\n", s->str);
1768 val = parse_tag_value(s->str, tags_time);
1769 if (val != (unsigned long) -1) {
1771 pr_debug("switch-output with %s time threshold (%lu seconds)\n",
1779 rec->timestamp_filename = true;
1782 if (s->size && !rec->opts.no_buffering)
1783 switch_output_size_warn(rec);
1788 static const char * const __record_usage[] = {
1789 "perf record [<options>] [<command>]",
1790 "perf record [<options>] -- <command> [<options>]",
1793 const char * const *record_usage = __record_usage;
1796 * XXX Ideally would be local to cmd_record() and passed to a record__new
1797 * because we need to have access to it in record__exit, that is called
1798 * after cmd_record() exits, but since record_options need to be accessible to
1799 * builtin-script, leave it here.
1801 * At least we don't ouch it in all the other functions here directly.
1803 * Just say no to tons of global variables, sigh.
1805 static struct record record = {
1807 .sample_time = true,
1808 .mmap_pages = UINT_MAX,
1809 .user_freq = UINT_MAX,
1810 .user_interval = ULLONG_MAX,
1814 .default_per_cpu = true,
1818 .sample = process_sample_event,
1819 .fork = perf_event__process_fork,
1820 .exit = perf_event__process_exit,
1821 .comm = perf_event__process_comm,
1822 .namespaces = perf_event__process_namespaces,
1823 .mmap = perf_event__process_mmap,
1824 .mmap2 = perf_event__process_mmap2,
1825 .ordered_events = true,
1829 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1830 "\n\t\t\t\tDefault: fp";
1832 static bool dry_run;
1835 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1836 * with it and switch to use the library functions in perf_evlist that came
1837 * from builtin-record.c, i.e. use record_opts,
1838 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1841 static struct option __record_options[] = {
1842 OPT_CALLBACK('e', "event", &record.evlist, "event",
1843 "event selector. use 'perf list' to list available events",
1844 parse_events_option),
1845 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1846 "event filter", parse_filter),
1847 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1848 NULL, "don't record events from perf itself",
1850 OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1851 "record events on existing process id"),
1852 OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1853 "record events on existing thread id"),
1854 OPT_INTEGER('r', "realtime", &record.realtime_prio,
1855 "collect data with this RT SCHED_FIFO priority"),
1856 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1857 "collect data without buffering"),
1858 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1859 "collect raw sample records from all opened counters"),
1860 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1861 "system-wide collection from all CPUs"),
1862 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1863 "list of cpus to monitor"),
1864 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1865 OPT_STRING('o', "output", &record.data.path, "file",
1866 "output file name"),
1867 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1868 &record.opts.no_inherit_set,
1869 "child tasks do not inherit counters"),
1870 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1871 "synthesize non-sample events at the end of output"),
1872 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
1873 OPT_BOOLEAN(0, "bpf-event", &record.opts.bpf_event, "record bpf events"),
1874 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
1875 "Fail if the specified frequency can't be used"),
1876 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
1877 "profile at this frequency",
1878 record__parse_freq),
1879 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1880 "number of mmap data pages and AUX area tracing mmap pages",
1881 record__parse_mmap_pages),
1882 OPT_BOOLEAN(0, "group", &record.opts.group,
1883 "put the counters into a counter group"),
1884 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1885 NULL, "enables call-graph recording" ,
1886 &record_callchain_opt),
1887 OPT_CALLBACK(0, "call-graph", &record.opts,
1888 "record_mode[,record_size]", record_callchain_help,
1889 &record_parse_callchain_opt),
1890 OPT_INCR('v', "verbose", &verbose,
1891 "be more verbose (show counter open errors, etc)"),
1892 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1893 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1894 "per thread counts"),
1895 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1896 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
1897 "Record the sample physical addresses"),
1898 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
1899 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1900 &record.opts.sample_time_set,
1901 "Record the sample timestamps"),
1902 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
1903 "Record the sample period"),
1904 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1906 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1907 &record.no_buildid_cache_set,
1908 "do not update the buildid cache"),
1909 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1910 &record.no_buildid_set,
1911 "do not collect buildids in perf.data"),
1912 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1913 "monitor event in cgroup name only",
1915 OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1916 "ms to wait before starting measurement after program start"),
1917 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1920 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1921 "branch any", "sample any taken branches",
1922 parse_branch_stack),
1924 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1925 "branch filter mask", "branch stack filter modes",
1926 parse_branch_stack),
1927 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1928 "sample by weight (on special events only)"),
1929 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1930 "sample transaction flags (special events only)"),
1931 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1932 "use per-thread mmaps"),
1933 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1934 "sample selected machine registers on interrupt,"
1935 " use -I ? to list register names", parse_regs),
1936 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
1937 "sample selected machine registers on interrupt,"
1938 " use -I ? to list register names", parse_regs),
1939 OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1940 "Record running/enabled time of read (:S) events"),
1941 OPT_CALLBACK('k', "clockid", &record.opts,
1942 "clockid", "clockid to use for events, see clock_gettime()",
1944 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1945 "opts", "AUX area tracing Snapshot Mode", ""),
1946 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
1947 "per thread proc mmap processing timeout in ms"),
1948 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
1949 "Record namespaces events"),
1950 OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1951 "Record context switch events"),
1952 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1953 "Configure all used events to run in kernel space.",
1954 PARSE_OPT_EXCLUSIVE),
1955 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1956 "Configure all used events to run in user space.",
1957 PARSE_OPT_EXCLUSIVE),
1958 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1959 "clang binary to use for compiling BPF scriptlets"),
1960 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1961 "options passed to clang when compiling BPF scriptlets"),
1962 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1963 "file", "vmlinux pathname"),
1964 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1965 "Record build-id of all DSOs regardless of hits"),
1966 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
1967 "append timestamp to output filename"),
1968 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
1969 "Record timestamp boundary (time of first/last samples)"),
1970 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
1971 &record.switch_output.set, "signal,size,time",
1972 "Switch output when receive SIGUSR2 or cross size,time threshold",
1974 OPT_BOOLEAN(0, "dry-run", &dry_run,
1975 "Parse options then exit"),
1976 #ifdef HAVE_AIO_SUPPORT
1977 OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
1978 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
1981 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
1982 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
1983 record__parse_affinity),
1987 struct option *record_options = __record_options;
1989 int cmd_record(int argc, const char **argv)
1992 struct record *rec = &record;
1993 char errbuf[BUFSIZ];
1995 setlocale(LC_ALL, "");
1997 #ifndef HAVE_LIBBPF_SUPPORT
1998 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1999 set_nobuild('\0', "clang-path", true);
2000 set_nobuild('\0', "clang-opt", true);
2004 #ifndef HAVE_BPF_PROLOGUE
2005 # if !defined (HAVE_DWARF_SUPPORT)
2006 # define REASON "NO_DWARF=1"
2007 # elif !defined (HAVE_LIBBPF_SUPPORT)
2008 # define REASON "NO_LIBBPF=1"
2010 # define REASON "this architecture doesn't support BPF prologue"
2012 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
2013 set_nobuild('\0', "vmlinux", true);
2018 CPU_ZERO(&rec->affinity_mask);
2019 rec->opts.affinity = PERF_AFFINITY_SYS;
2021 rec->evlist = perf_evlist__new();
2022 if (rec->evlist == NULL)
2025 err = perf_config(perf_record_config, rec);
2029 argc = parse_options(argc, argv, record_options, record_usage,
2030 PARSE_OPT_STOP_AT_NON_OPTION);
2032 perf_quiet_option();
2034 /* Make system wide (-a) the default target. */
2035 if (!argc && target__none(&rec->opts.target))
2036 rec->opts.target.system_wide = true;
2038 if (nr_cgroups && !rec->opts.target.system_wide) {
2039 usage_with_options_msg(record_usage, record_options,
2040 "cgroup monitoring only available in system-wide mode");
2043 if (rec->opts.record_switch_events &&
2044 !perf_can_record_switch_events()) {
2045 ui__error("kernel does not support recording context switch events\n");
2046 parse_options_usage(record_usage, record_options, "switch-events", 0);
2050 if (switch_output_setup(rec)) {
2051 parse_options_usage(record_usage, record_options, "switch-output", 0);
2055 if (rec->switch_output.time) {
2056 signal(SIGALRM, alarm_sig_handler);
2057 alarm(rec->switch_output.time);
2061 * Allow aliases to facilitate the lookup of symbols for address
2062 * filters. Refer to auxtrace_parse_filters().
2064 symbol_conf.allow_aliases = true;
2068 err = record__auxtrace_init(rec);
2075 err = bpf__setup_stdout(rec->evlist);
2077 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
2078 pr_err("ERROR: Setup BPF stdout failed: %s\n",
2085 if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist))
2087 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
2088 "check /proc/sys/kernel/kptr_restrict.\n\n"
2089 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
2090 "file is not found in the buildid cache or in the vmlinux path.\n\n"
2091 "Samples in kernel modules won't be resolved at all.\n\n"
2092 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
2093 "even with a suitable vmlinux or kallsyms file.\n\n");
2095 if (rec->no_buildid_cache || rec->no_buildid) {
2096 disable_buildid_cache();
2097 } else if (rec->switch_output.enabled) {
2099 * In 'perf record --switch-output', disable buildid
2100 * generation by default to reduce data file switching
2101 * overhead. Still generate buildid if they are required
2104 * perf record --switch-output --no-no-buildid \
2105 * --no-no-buildid-cache
2107 * Following code equals to:
2109 * if ((rec->no_buildid || !rec->no_buildid_set) &&
2110 * (rec->no_buildid_cache || !rec->no_buildid_cache_set))
2111 * disable_buildid_cache();
2113 bool disable = true;
2115 if (rec->no_buildid_set && !rec->no_buildid)
2117 if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
2120 rec->no_buildid = true;
2121 rec->no_buildid_cache = true;
2122 disable_buildid_cache();
2126 if (record.opts.overwrite)
2127 record.opts.tail_synthesize = true;
2129 if (rec->evlist->nr_entries == 0 &&
2130 __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
2131 pr_err("Not enough memory for event selector list\n");
2135 if (rec->opts.target.tid && !rec->opts.no_inherit_set)
2136 rec->opts.no_inherit = true;
2138 err = target__validate(&rec->opts.target);
2140 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2141 ui__warning("%s\n", errbuf);
2144 err = target__parse_uid(&rec->opts.target);
2146 int saved_errno = errno;
2148 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2149 ui__error("%s", errbuf);
2155 /* Enable ignoring missing threads when -u/-p option is defined. */
2156 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
2159 if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
2160 usage_with_options(record_usage, record_options);
2162 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
2167 * We take all buildids when the file contains
2168 * AUX area tracing data because we do not decode the
2169 * trace because it would take too long.
2171 if (rec->opts.full_auxtrace)
2172 rec->buildid_all = true;
2174 if (record_opts__config(&rec->opts)) {
2179 if (rec->opts.nr_cblocks > nr_cblocks_max)
2180 rec->opts.nr_cblocks = nr_cblocks_max;
2182 pr_info("nr_cblocks: %d\n", rec->opts.nr_cblocks);
2184 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
2186 err = __cmd_record(&record, argc, argv);
2188 perf_evlist__delete(rec->evlist);
2190 auxtrace_record__free(rec->itr);
2194 static void snapshot_sig_handler(int sig __maybe_unused)
2196 struct record *rec = &record;
2198 if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2199 trigger_hit(&auxtrace_snapshot_trigger);
2200 auxtrace_record__snapshot_started = 1;
2201 if (auxtrace_record__snapshot_start(record.itr))
2202 trigger_error(&auxtrace_snapshot_trigger);
2205 if (switch_output_signal(rec))
2206 trigger_hit(&switch_output_trigger);
2209 static void alarm_sig_handler(int sig __maybe_unused)
2211 struct record *rec = &record;
2213 if (switch_output_time(rec))
2214 trigger_hit(&switch_output_trigger);