]> asedeno.scripts.mit.edu Git - linux.git/blob - tools/perf/builtin-record.c
Merge tag 'v4.10-rc8' into perf/core, to pick up fixes
[linux.git] / tools / perf / builtin-record.c
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #include "builtin.h"
9
10 #include "perf.h"
11
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include <subcmd/parse-options.h>
15 #include "util/parse-events.h"
16 #include "util/config.h"
17
18 #include "util/callchain.h"
19 #include "util/cgroup.h"
20 #include "util/header.h"
21 #include "util/event.h"
22 #include "util/evlist.h"
23 #include "util/evsel.h"
24 #include "util/debug.h"
25 #include "util/drv_configs.h"
26 #include "util/session.h"
27 #include "util/tool.h"
28 #include "util/symbol.h"
29 #include "util/cpumap.h"
30 #include "util/thread_map.h"
31 #include "util/data.h"
32 #include "util/perf_regs.h"
33 #include "util/auxtrace.h"
34 #include "util/tsc.h"
35 #include "util/parse-branch-options.h"
36 #include "util/parse-regs-options.h"
37 #include "util/llvm-utils.h"
38 #include "util/bpf-loader.h"
39 #include "util/trigger.h"
40 #include "util/perf-hooks.h"
41 #include "asm/bug.h"
42
43 #include <unistd.h>
44 #include <sched.h>
45 #include <sys/mman.h>
46 #include <asm/bug.h>
47 #include <linux/time64.h>
48
49 struct switch_output {
50         bool             enabled;
51         bool             signal;
52         unsigned long    size;
53         unsigned long    time;
54         const char      *str;
55         bool             set;
56 };
57
58 struct record {
59         struct perf_tool        tool;
60         struct record_opts      opts;
61         u64                     bytes_written;
62         struct perf_data_file   file;
63         struct auxtrace_record  *itr;
64         struct perf_evlist      *evlist;
65         struct perf_session     *session;
66         const char              *progname;
67         int                     realtime_prio;
68         bool                    no_buildid;
69         bool                    no_buildid_set;
70         bool                    no_buildid_cache;
71         bool                    no_buildid_cache_set;
72         bool                    buildid_all;
73         bool                    timestamp_filename;
74         struct switch_output    switch_output;
75         unsigned long long      samples;
76 };
77
78 static volatile int auxtrace_record__snapshot_started;
79 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
80 static DEFINE_TRIGGER(switch_output_trigger);
81
82 static bool switch_output_signal(struct record *rec)
83 {
84         return rec->switch_output.signal &&
85                trigger_is_ready(&switch_output_trigger);
86 }
87
88 static bool switch_output_size(struct record *rec)
89 {
90         return rec->switch_output.size &&
91                trigger_is_ready(&switch_output_trigger) &&
92                (rec->bytes_written >= rec->switch_output.size);
93 }
94
95 static bool switch_output_time(struct record *rec)
96 {
97         return rec->switch_output.time &&
98                trigger_is_ready(&switch_output_trigger);
99 }
100
101 static int record__write(struct record *rec, void *bf, size_t size)
102 {
103         if (perf_data_file__write(rec->session->file, bf, size) < 0) {
104                 pr_err("failed to write perf data, error: %m\n");
105                 return -1;
106         }
107
108         rec->bytes_written += size;
109
110         if (switch_output_size(rec))
111                 trigger_hit(&switch_output_trigger);
112
113         return 0;
114 }
115
116 static int process_synthesized_event(struct perf_tool *tool,
117                                      union perf_event *event,
118                                      struct perf_sample *sample __maybe_unused,
119                                      struct machine *machine __maybe_unused)
120 {
121         struct record *rec = container_of(tool, struct record, tool);
122         return record__write(rec, event, event->header.size);
123 }
124
125 static int
126 backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
127 {
128         struct perf_event_header *pheader;
129         u64 evt_head = head;
130         int size = mask + 1;
131
132         pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head);
133         pheader = (struct perf_event_header *)(buf + (head & mask));
134         *start = head;
135         while (true) {
136                 if (evt_head - head >= (unsigned int)size) {
137                         pr_debug("Finished reading backward ring buffer: rewind\n");
138                         if (evt_head - head > (unsigned int)size)
139                                 evt_head -= pheader->size;
140                         *end = evt_head;
141                         return 0;
142                 }
143
144                 pheader = (struct perf_event_header *)(buf + (evt_head & mask));
145
146                 if (pheader->size == 0) {
147                         pr_debug("Finished reading backward ring buffer: get start\n");
148                         *end = evt_head;
149                         return 0;
150                 }
151
152                 evt_head += pheader->size;
153                 pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
154         }
155         WARN_ONCE(1, "Shouldn't get here\n");
156         return -1;
157 }
158
159 static int
160 rb_find_range(void *data, int mask, u64 head, u64 old,
161               u64 *start, u64 *end, bool backward)
162 {
163         if (!backward) {
164                 *start = old;
165                 *end = head;
166                 return 0;
167         }
168
169         return backward_rb_find_range(data, mask, head, start, end);
170 }
171
172 static int
173 record__mmap_read(struct record *rec, struct perf_mmap *md,
174                   bool overwrite, bool backward)
175 {
176         u64 head = perf_mmap__read_head(md);
177         u64 old = md->prev;
178         u64 end = head, start = old;
179         unsigned char *data = md->base + page_size;
180         unsigned long size;
181         void *buf;
182         int rc = 0;
183
184         if (rb_find_range(data, md->mask, head,
185                           old, &start, &end, backward))
186                 return -1;
187
188         if (start == end)
189                 return 0;
190
191         rec->samples++;
192
193         size = end - start;
194         if (size > (unsigned long)(md->mask) + 1) {
195                 WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
196
197                 md->prev = head;
198                 perf_mmap__consume(md, overwrite || backward);
199                 return 0;
200         }
201
202         if ((start & md->mask) + size != (end & md->mask)) {
203                 buf = &data[start & md->mask];
204                 size = md->mask + 1 - (start & md->mask);
205                 start += size;
206
207                 if (record__write(rec, buf, size) < 0) {
208                         rc = -1;
209                         goto out;
210                 }
211         }
212
213         buf = &data[start & md->mask];
214         size = end - start;
215         start += size;
216
217         if (record__write(rec, buf, size) < 0) {
218                 rc = -1;
219                 goto out;
220         }
221
222         md->prev = head;
223         perf_mmap__consume(md, overwrite || backward);
224 out:
225         return rc;
226 }
227
228 static volatile int done;
229 static volatile int signr = -1;
230 static volatile int child_finished;
231
232 static void sig_handler(int sig)
233 {
234         if (sig == SIGCHLD)
235                 child_finished = 1;
236         else
237                 signr = sig;
238
239         done = 1;
240 }
241
242 static void sigsegv_handler(int sig)
243 {
244         perf_hooks__recover();
245         sighandler_dump_stack(sig);
246 }
247
248 static void record__sig_exit(void)
249 {
250         if (signr == -1)
251                 return;
252
253         signal(signr, SIG_DFL);
254         raise(signr);
255 }
256
257 #ifdef HAVE_AUXTRACE_SUPPORT
258
259 static int record__process_auxtrace(struct perf_tool *tool,
260                                     union perf_event *event, void *data1,
261                                     size_t len1, void *data2, size_t len2)
262 {
263         struct record *rec = container_of(tool, struct record, tool);
264         struct perf_data_file *file = &rec->file;
265         size_t padding;
266         u8 pad[8] = {0};
267
268         if (!perf_data_file__is_pipe(file)) {
269                 off_t file_offset;
270                 int fd = perf_data_file__fd(file);
271                 int err;
272
273                 file_offset = lseek(fd, 0, SEEK_CUR);
274                 if (file_offset == -1)
275                         return -1;
276                 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
277                                                      event, file_offset);
278                 if (err)
279                         return err;
280         }
281
282         /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
283         padding = (len1 + len2) & 7;
284         if (padding)
285                 padding = 8 - padding;
286
287         record__write(rec, event, event->header.size);
288         record__write(rec, data1, len1);
289         if (len2)
290                 record__write(rec, data2, len2);
291         record__write(rec, &pad, padding);
292
293         return 0;
294 }
295
296 static int record__auxtrace_mmap_read(struct record *rec,
297                                       struct auxtrace_mmap *mm)
298 {
299         int ret;
300
301         ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
302                                   record__process_auxtrace);
303         if (ret < 0)
304                 return ret;
305
306         if (ret)
307                 rec->samples++;
308
309         return 0;
310 }
311
312 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
313                                                struct auxtrace_mmap *mm)
314 {
315         int ret;
316
317         ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
318                                            record__process_auxtrace,
319                                            rec->opts.auxtrace_snapshot_size);
320         if (ret < 0)
321                 return ret;
322
323         if (ret)
324                 rec->samples++;
325
326         return 0;
327 }
328
329 static int record__auxtrace_read_snapshot_all(struct record *rec)
330 {
331         int i;
332         int rc = 0;
333
334         for (i = 0; i < rec->evlist->nr_mmaps; i++) {
335                 struct auxtrace_mmap *mm =
336                                 &rec->evlist->mmap[i].auxtrace_mmap;
337
338                 if (!mm->base)
339                         continue;
340
341                 if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
342                         rc = -1;
343                         goto out;
344                 }
345         }
346 out:
347         return rc;
348 }
349
350 static void record__read_auxtrace_snapshot(struct record *rec)
351 {
352         pr_debug("Recording AUX area tracing snapshot\n");
353         if (record__auxtrace_read_snapshot_all(rec) < 0) {
354                 trigger_error(&auxtrace_snapshot_trigger);
355         } else {
356                 if (auxtrace_record__snapshot_finish(rec->itr))
357                         trigger_error(&auxtrace_snapshot_trigger);
358                 else
359                         trigger_ready(&auxtrace_snapshot_trigger);
360         }
361 }
362
363 #else
364
365 static inline
366 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
367                                struct auxtrace_mmap *mm __maybe_unused)
368 {
369         return 0;
370 }
371
372 static inline
373 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
374 {
375 }
376
377 static inline
378 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
379 {
380         return 0;
381 }
382
383 #endif
384
385 static int record__mmap_evlist(struct record *rec,
386                                struct perf_evlist *evlist)
387 {
388         struct record_opts *opts = &rec->opts;
389         char msg[512];
390
391         if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
392                                  opts->auxtrace_mmap_pages,
393                                  opts->auxtrace_snapshot_mode) < 0) {
394                 if (errno == EPERM) {
395                         pr_err("Permission error mapping pages.\n"
396                                "Consider increasing "
397                                "/proc/sys/kernel/perf_event_mlock_kb,\n"
398                                "or try again with a smaller value of -m/--mmap_pages.\n"
399                                "(current value: %u,%u)\n",
400                                opts->mmap_pages, opts->auxtrace_mmap_pages);
401                         return -errno;
402                 } else {
403                         pr_err("failed to mmap with %d (%s)\n", errno,
404                                 str_error_r(errno, msg, sizeof(msg)));
405                         if (errno)
406                                 return -errno;
407                         else
408                                 return -EINVAL;
409                 }
410         }
411         return 0;
412 }
413
414 static int record__mmap(struct record *rec)
415 {
416         return record__mmap_evlist(rec, rec->evlist);
417 }
418
419 static int record__open(struct record *rec)
420 {
421         char msg[512];
422         struct perf_evsel *pos;
423         struct perf_evlist *evlist = rec->evlist;
424         struct perf_session *session = rec->session;
425         struct record_opts *opts = &rec->opts;
426         struct perf_evsel_config_term *err_term;
427         int rc = 0;
428
429         perf_evlist__config(evlist, opts, &callchain_param);
430
431         evlist__for_each_entry(evlist, pos) {
432 try_again:
433                 if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
434                         if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
435                                 if (verbose)
436                                         ui__warning("%s\n", msg);
437                                 goto try_again;
438                         }
439
440                         rc = -errno;
441                         perf_evsel__open_strerror(pos, &opts->target,
442                                                   errno, msg, sizeof(msg));
443                         ui__error("%s\n", msg);
444                         goto out;
445                 }
446         }
447
448         if (perf_evlist__apply_filters(evlist, &pos)) {
449                 error("failed to set filter \"%s\" on event %s with %d (%s)\n",
450                         pos->filter, perf_evsel__name(pos), errno,
451                         str_error_r(errno, msg, sizeof(msg)));
452                 rc = -1;
453                 goto out;
454         }
455
456         if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) {
457                 error("failed to set config \"%s\" on event %s with %d (%s)\n",
458                       err_term->val.drv_cfg, perf_evsel__name(pos), errno,
459                       str_error_r(errno, msg, sizeof(msg)));
460                 rc = -1;
461                 goto out;
462         }
463
464         rc = record__mmap(rec);
465         if (rc)
466                 goto out;
467
468         session->evlist = evlist;
469         perf_session__set_id_hdr_size(session);
470 out:
471         return rc;
472 }
473
474 static int process_sample_event(struct perf_tool *tool,
475                                 union perf_event *event,
476                                 struct perf_sample *sample,
477                                 struct perf_evsel *evsel,
478                                 struct machine *machine)
479 {
480         struct record *rec = container_of(tool, struct record, tool);
481
482         rec->samples++;
483
484         return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
485 }
486
487 static int process_buildids(struct record *rec)
488 {
489         struct perf_data_file *file  = &rec->file;
490         struct perf_session *session = rec->session;
491
492         if (file->size == 0)
493                 return 0;
494
495         /*
496          * During this process, it'll load kernel map and replace the
497          * dso->long_name to a real pathname it found.  In this case
498          * we prefer the vmlinux path like
499          *   /lib/modules/3.16.4/build/vmlinux
500          *
501          * rather than build-id path (in debug directory).
502          *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
503          */
504         symbol_conf.ignore_vmlinux_buildid = true;
505
506         /*
507          * If --buildid-all is given, it marks all DSO regardless of hits,
508          * so no need to process samples.
509          */
510         if (rec->buildid_all)
511                 rec->tool.sample = NULL;
512
513         return perf_session__process_events(session);
514 }
515
516 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
517 {
518         int err;
519         struct perf_tool *tool = data;
520         /*
521          *As for guest kernel when processing subcommand record&report,
522          *we arrange module mmap prior to guest kernel mmap and trigger
523          *a preload dso because default guest module symbols are loaded
524          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
525          *method is used to avoid symbol missing when the first addr is
526          *in module instead of in guest kernel.
527          */
528         err = perf_event__synthesize_modules(tool, process_synthesized_event,
529                                              machine);
530         if (err < 0)
531                 pr_err("Couldn't record guest kernel [%d]'s reference"
532                        " relocation symbol.\n", machine->pid);
533
534         /*
535          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
536          * have no _text sometimes.
537          */
538         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
539                                                  machine);
540         if (err < 0)
541                 pr_err("Couldn't record guest kernel [%d]'s reference"
542                        " relocation symbol.\n", machine->pid);
543 }
544
545 static struct perf_event_header finished_round_event = {
546         .size = sizeof(struct perf_event_header),
547         .type = PERF_RECORD_FINISHED_ROUND,
548 };
549
550 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
551                                     bool backward)
552 {
553         u64 bytes_written = rec->bytes_written;
554         int i;
555         int rc = 0;
556         struct perf_mmap *maps;
557
558         if (!evlist)
559                 return 0;
560
561         maps = backward ? evlist->backward_mmap : evlist->mmap;
562         if (!maps)
563                 return 0;
564
565         if (backward && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
566                 return 0;
567
568         for (i = 0; i < evlist->nr_mmaps; i++) {
569                 struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;
570
571                 if (maps[i].base) {
572                         if (record__mmap_read(rec, &maps[i],
573                                               evlist->overwrite, backward) != 0) {
574                                 rc = -1;
575                                 goto out;
576                         }
577                 }
578
579                 if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
580                     record__auxtrace_mmap_read(rec, mm) != 0) {
581                         rc = -1;
582                         goto out;
583                 }
584         }
585
586         /*
587          * Mark the round finished in case we wrote
588          * at least one event.
589          */
590         if (bytes_written != rec->bytes_written)
591                 rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
592
593         if (backward)
594                 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
595 out:
596         return rc;
597 }
598
599 static int record__mmap_read_all(struct record *rec)
600 {
601         int err;
602
603         err = record__mmap_read_evlist(rec, rec->evlist, false);
604         if (err)
605                 return err;
606
607         return record__mmap_read_evlist(rec, rec->evlist, true);
608 }
609
610 static void record__init_features(struct record *rec)
611 {
612         struct perf_session *session = rec->session;
613         int feat;
614
615         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
616                 perf_header__set_feat(&session->header, feat);
617
618         if (rec->no_buildid)
619                 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
620
621         if (!have_tracepoints(&rec->evlist->entries))
622                 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
623
624         if (!rec->opts.branch_stack)
625                 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
626
627         if (!rec->opts.full_auxtrace)
628                 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
629
630         perf_header__clear_feat(&session->header, HEADER_STAT);
631 }
632
633 static void
634 record__finish_output(struct record *rec)
635 {
636         struct perf_data_file *file = &rec->file;
637         int fd = perf_data_file__fd(file);
638
639         if (file->is_pipe)
640                 return;
641
642         rec->session->header.data_size += rec->bytes_written;
643         file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
644
645         if (!rec->no_buildid) {
646                 process_buildids(rec);
647
648                 if (rec->buildid_all)
649                         dsos__hit_all(rec->session);
650         }
651         perf_session__write_header(rec->session, rec->evlist, fd, true);
652
653         return;
654 }
655
656 static int record__synthesize_workload(struct record *rec, bool tail)
657 {
658         struct {
659                 struct thread_map map;
660                 struct thread_map_data map_data;
661         } thread_map;
662
663         if (rec->opts.tail_synthesize != tail)
664                 return 0;
665
666         thread_map.map.nr = 1;
667         thread_map.map.map[0].pid = rec->evlist->workload.pid;
668         thread_map.map.map[0].comm = NULL;
669         return perf_event__synthesize_thread_map(&rec->tool, &thread_map.map,
670                                                  process_synthesized_event,
671                                                  &rec->session->machines.host,
672                                                  rec->opts.sample_address,
673                                                  rec->opts.proc_map_timeout);
674 }
675
676 static int record__synthesize(struct record *rec, bool tail);
677
678 static int
679 record__switch_output(struct record *rec, bool at_exit)
680 {
681         struct perf_data_file *file = &rec->file;
682         int fd, err;
683
684         /* Same Size:      "2015122520103046"*/
685         char timestamp[] = "InvalidTimestamp";
686
687         record__synthesize(rec, true);
688         if (target__none(&rec->opts.target))
689                 record__synthesize_workload(rec, true);
690
691         rec->samples = 0;
692         record__finish_output(rec);
693         err = fetch_current_timestamp(timestamp, sizeof(timestamp));
694         if (err) {
695                 pr_err("Failed to get current timestamp\n");
696                 return -EINVAL;
697         }
698
699         fd = perf_data_file__switch(file, timestamp,
700                                     rec->session->header.data_offset,
701                                     at_exit);
702         if (fd >= 0 && !at_exit) {
703                 rec->bytes_written = 0;
704                 rec->session->header.data_size = 0;
705         }
706
707         if (!quiet)
708                 fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
709                         file->path, timestamp);
710
711         /* Output tracking events */
712         if (!at_exit) {
713                 record__synthesize(rec, false);
714
715                 /*
716                  * In 'perf record --switch-output' without -a,
717                  * record__synthesize() in record__switch_output() won't
718                  * generate tracking events because there's no thread_map
719                  * in evlist. Which causes newly created perf.data doesn't
720                  * contain map and comm information.
721                  * Create a fake thread_map and directly call
722                  * perf_event__synthesize_thread_map() for those events.
723                  */
724                 if (target__none(&rec->opts.target))
725                         record__synthesize_workload(rec, false);
726         }
727         return fd;
728 }
729
730 static volatile int workload_exec_errno;
731
732 /*
733  * perf_evlist__prepare_workload will send a SIGUSR1
734  * if the fork fails, since we asked by setting its
735  * want_signal to true.
736  */
737 static void workload_exec_failed_signal(int signo __maybe_unused,
738                                         siginfo_t *info,
739                                         void *ucontext __maybe_unused)
740 {
741         workload_exec_errno = info->si_value.sival_int;
742         done = 1;
743         child_finished = 1;
744 }
745
746 static void snapshot_sig_handler(int sig);
747 static void alarm_sig_handler(int sig);
748
749 int __weak
750 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
751                             struct perf_tool *tool __maybe_unused,
752                             perf_event__handler_t process __maybe_unused,
753                             struct machine *machine __maybe_unused)
754 {
755         return 0;
756 }
757
758 static const struct perf_event_mmap_page *
759 perf_evlist__pick_pc(struct perf_evlist *evlist)
760 {
761         if (evlist) {
762                 if (evlist->mmap && evlist->mmap[0].base)
763                         return evlist->mmap[0].base;
764                 if (evlist->backward_mmap && evlist->backward_mmap[0].base)
765                         return evlist->backward_mmap[0].base;
766         }
767         return NULL;
768 }
769
770 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
771 {
772         const struct perf_event_mmap_page *pc;
773
774         pc = perf_evlist__pick_pc(rec->evlist);
775         if (pc)
776                 return pc;
777         return NULL;
778 }
779
780 static int record__synthesize(struct record *rec, bool tail)
781 {
782         struct perf_session *session = rec->session;
783         struct machine *machine = &session->machines.host;
784         struct perf_data_file *file = &rec->file;
785         struct record_opts *opts = &rec->opts;
786         struct perf_tool *tool = &rec->tool;
787         int fd = perf_data_file__fd(file);
788         int err = 0;
789
790         if (rec->opts.tail_synthesize != tail)
791                 return 0;
792
793         if (file->is_pipe) {
794                 err = perf_event__synthesize_attrs(tool, session,
795                                                    process_synthesized_event);
796                 if (err < 0) {
797                         pr_err("Couldn't synthesize attrs.\n");
798                         goto out;
799                 }
800
801                 if (have_tracepoints(&rec->evlist->entries)) {
802                         /*
803                          * FIXME err <= 0 here actually means that
804                          * there were no tracepoints so its not really
805                          * an error, just that we don't need to
806                          * synthesize anything.  We really have to
807                          * return this more properly and also
808                          * propagate errors that now are calling die()
809                          */
810                         err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
811                                                                   process_synthesized_event);
812                         if (err <= 0) {
813                                 pr_err("Couldn't record tracing data.\n");
814                                 goto out;
815                         }
816                         rec->bytes_written += err;
817                 }
818         }
819
820         err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
821                                           process_synthesized_event, machine);
822         if (err)
823                 goto out;
824
825         if (rec->opts.full_auxtrace) {
826                 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
827                                         session, process_synthesized_event);
828                 if (err)
829                         goto out;
830         }
831
832         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
833                                                  machine);
834         WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
835                            "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
836                            "Check /proc/kallsyms permission or run as root.\n");
837
838         err = perf_event__synthesize_modules(tool, process_synthesized_event,
839                                              machine);
840         WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
841                            "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
842                            "Check /proc/modules permission or run as root.\n");
843
844         if (perf_guest) {
845                 machines__process_guests(&session->machines,
846                                          perf_event__synthesize_guest_os, tool);
847         }
848
849         err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
850                                             process_synthesized_event, opts->sample_address,
851                                             opts->proc_map_timeout);
852 out:
853         return err;
854 }
855
856 static int __cmd_record(struct record *rec, int argc, const char **argv)
857 {
858         int err;
859         int status = 0;
860         unsigned long waking = 0;
861         const bool forks = argc > 0;
862         struct machine *machine;
863         struct perf_tool *tool = &rec->tool;
864         struct record_opts *opts = &rec->opts;
865         struct perf_data_file *file = &rec->file;
866         struct perf_session *session;
867         bool disabled = false, draining = false;
868         int fd;
869
870         rec->progname = argv[0];
871
872         atexit(record__sig_exit);
873         signal(SIGCHLD, sig_handler);
874         signal(SIGINT, sig_handler);
875         signal(SIGTERM, sig_handler);
876         signal(SIGSEGV, sigsegv_handler);
877
878         if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
879                 signal(SIGUSR2, snapshot_sig_handler);
880                 if (rec->opts.auxtrace_snapshot_mode)
881                         trigger_on(&auxtrace_snapshot_trigger);
882                 if (rec->switch_output.enabled)
883                         trigger_on(&switch_output_trigger);
884         } else {
885                 signal(SIGUSR2, SIG_IGN);
886         }
887
888         session = perf_session__new(file, false, tool);
889         if (session == NULL) {
890                 pr_err("Perf session creation failed.\n");
891                 return -1;
892         }
893
894         fd = perf_data_file__fd(file);
895         rec->session = session;
896
897         record__init_features(rec);
898
899         if (forks) {
900                 err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
901                                                     argv, file->is_pipe,
902                                                     workload_exec_failed_signal);
903                 if (err < 0) {
904                         pr_err("Couldn't run the workload!\n");
905                         status = err;
906                         goto out_delete_session;
907                 }
908         }
909
910         if (record__open(rec) != 0) {
911                 err = -1;
912                 goto out_child;
913         }
914
915         err = bpf__apply_obj_config();
916         if (err) {
917                 char errbuf[BUFSIZ];
918
919                 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
920                 pr_err("ERROR: Apply config to BPF failed: %s\n",
921                          errbuf);
922                 goto out_child;
923         }
924
925         /*
926          * Normally perf_session__new would do this, but it doesn't have the
927          * evlist.
928          */
929         if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
930                 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
931                 rec->tool.ordered_events = false;
932         }
933
934         if (!rec->evlist->nr_groups)
935                 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
936
937         if (file->is_pipe) {
938                 err = perf_header__write_pipe(fd);
939                 if (err < 0)
940                         goto out_child;
941         } else {
942                 err = perf_session__write_header(session, rec->evlist, fd, false);
943                 if (err < 0)
944                         goto out_child;
945         }
946
947         if (!rec->no_buildid
948             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
949                 pr_err("Couldn't generate buildids. "
950                        "Use --no-buildid to profile anyway.\n");
951                 err = -1;
952                 goto out_child;
953         }
954
955         machine = &session->machines.host;
956
957         err = record__synthesize(rec, false);
958         if (err < 0)
959                 goto out_child;
960
961         if (rec->realtime_prio) {
962                 struct sched_param param;
963
964                 param.sched_priority = rec->realtime_prio;
965                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
966                         pr_err("Could not set realtime priority.\n");
967                         err = -1;
968                         goto out_child;
969                 }
970         }
971
972         /*
973          * When perf is starting the traced process, all the events
974          * (apart from group members) have enable_on_exec=1 set,
975          * so don't spoil it by prematurely enabling them.
976          */
977         if (!target__none(&opts->target) && !opts->initial_delay)
978                 perf_evlist__enable(rec->evlist);
979
980         /*
981          * Let the child rip
982          */
983         if (forks) {
984                 union perf_event *event;
985
986                 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
987                 if (event == NULL) {
988                         err = -ENOMEM;
989                         goto out_child;
990                 }
991
992                 /*
993                  * Some H/W events are generated before COMM event
994                  * which is emitted during exec(), so perf script
995                  * cannot see a correct process name for those events.
996                  * Synthesize COMM event to prevent it.
997                  */
998                 perf_event__synthesize_comm(tool, event,
999                                             rec->evlist->workload.pid,
1000                                             process_synthesized_event,
1001                                             machine);
1002                 free(event);
1003
1004                 perf_evlist__start_workload(rec->evlist);
1005         }
1006
1007         if (opts->initial_delay) {
1008                 usleep(opts->initial_delay * USEC_PER_MSEC);
1009                 perf_evlist__enable(rec->evlist);
1010         }
1011
1012         trigger_ready(&auxtrace_snapshot_trigger);
1013         trigger_ready(&switch_output_trigger);
1014         perf_hooks__invoke_record_start();
1015         for (;;) {
1016                 unsigned long long hits = rec->samples;
1017
1018                 /*
1019                  * rec->evlist->bkw_mmap_state is possible to be
1020                  * BKW_MMAP_EMPTY here: when done == true and
1021                  * hits != rec->samples in previous round.
1022                  *
1023                  * perf_evlist__toggle_bkw_mmap ensure we never
1024                  * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1025                  */
1026                 if (trigger_is_hit(&switch_output_trigger) || done || draining)
1027                         perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1028
1029                 if (record__mmap_read_all(rec) < 0) {
1030                         trigger_error(&auxtrace_snapshot_trigger);
1031                         trigger_error(&switch_output_trigger);
1032                         err = -1;
1033                         goto out_child;
1034                 }
1035
1036                 if (auxtrace_record__snapshot_started) {
1037                         auxtrace_record__snapshot_started = 0;
1038                         if (!trigger_is_error(&auxtrace_snapshot_trigger))
1039                                 record__read_auxtrace_snapshot(rec);
1040                         if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1041                                 pr_err("AUX area tracing snapshot failed\n");
1042                                 err = -1;
1043                                 goto out_child;
1044                         }
1045                 }
1046
1047                 if (trigger_is_hit(&switch_output_trigger)) {
1048                         /*
1049                          * If switch_output_trigger is hit, the data in
1050                          * overwritable ring buffer should have been collected,
1051                          * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1052                          *
1053                          * If SIGUSR2 raise after or during record__mmap_read_all(),
1054                          * record__mmap_read_all() didn't collect data from
1055                          * overwritable ring buffer. Read again.
1056                          */
1057                         if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1058                                 continue;
1059                         trigger_ready(&switch_output_trigger);
1060
1061                         /*
1062                          * Reenable events in overwrite ring buffer after
1063                          * record__mmap_read_all(): we should have collected
1064                          * data from it.
1065                          */
1066                         perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1067
1068                         if (!quiet)
1069                                 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1070                                         waking);
1071                         waking = 0;
1072                         fd = record__switch_output(rec, false);
1073                         if (fd < 0) {
1074                                 pr_err("Failed to switch to new file\n");
1075                                 trigger_error(&switch_output_trigger);
1076                                 err = fd;
1077                                 goto out_child;
1078                         }
1079
1080                         /* re-arm the alarm */
1081                         if (rec->switch_output.time)
1082                                 alarm(rec->switch_output.time);
1083                 }
1084
1085                 if (hits == rec->samples) {
1086                         if (done || draining)
1087                                 break;
1088                         err = perf_evlist__poll(rec->evlist, -1);
1089                         /*
1090                          * Propagate error, only if there's any. Ignore positive
1091                          * number of returned events and interrupt error.
1092                          */
1093                         if (err > 0 || (err < 0 && errno == EINTR))
1094                                 err = 0;
1095                         waking++;
1096
1097                         if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1098                                 draining = true;
1099                 }
1100
1101                 /*
1102                  * When perf is starting the traced process, at the end events
1103                  * die with the process and we wait for that. Thus no need to
1104                  * disable events in this case.
1105                  */
1106                 if (done && !disabled && !target__none(&opts->target)) {
1107                         trigger_off(&auxtrace_snapshot_trigger);
1108                         perf_evlist__disable(rec->evlist);
1109                         disabled = true;
1110                 }
1111         }
1112         trigger_off(&auxtrace_snapshot_trigger);
1113         trigger_off(&switch_output_trigger);
1114
1115         if (forks && workload_exec_errno) {
1116                 char msg[STRERR_BUFSIZE];
1117                 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1118                 pr_err("Workload failed: %s\n", emsg);
1119                 err = -1;
1120                 goto out_child;
1121         }
1122
1123         if (!quiet)
1124                 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1125
1126         if (target__none(&rec->opts.target))
1127                 record__synthesize_workload(rec, true);
1128
1129 out_child:
1130         if (forks) {
1131                 int exit_status;
1132
1133                 if (!child_finished)
1134                         kill(rec->evlist->workload.pid, SIGTERM);
1135
1136                 wait(&exit_status);
1137
1138                 if (err < 0)
1139                         status = err;
1140                 else if (WIFEXITED(exit_status))
1141                         status = WEXITSTATUS(exit_status);
1142                 else if (WIFSIGNALED(exit_status))
1143                         signr = WTERMSIG(exit_status);
1144         } else
1145                 status = err;
1146
1147         record__synthesize(rec, true);
1148         /* this will be recalculated during process_buildids() */
1149         rec->samples = 0;
1150
1151         if (!err) {
1152                 if (!rec->timestamp_filename) {
1153                         record__finish_output(rec);
1154                 } else {
1155                         fd = record__switch_output(rec, true);
1156                         if (fd < 0) {
1157                                 status = fd;
1158                                 goto out_delete_session;
1159                         }
1160                 }
1161         }
1162
1163         perf_hooks__invoke_record_end();
1164
1165         if (!err && !quiet) {
1166                 char samples[128];
1167                 const char *postfix = rec->timestamp_filename ?
1168                                         ".<timestamp>" : "";
1169
1170                 if (rec->samples && !rec->opts.full_auxtrace)
1171                         scnprintf(samples, sizeof(samples),
1172                                   " (%" PRIu64 " samples)", rec->samples);
1173                 else
1174                         samples[0] = '\0';
1175
1176                 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1177                         perf_data_file__size(file) / 1024.0 / 1024.0,
1178                         file->path, postfix, samples);
1179         }
1180
1181 out_delete_session:
1182         perf_session__delete(session);
1183         return status;
1184 }
1185
1186 static void callchain_debug(struct callchain_param *callchain)
1187 {
1188         static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1189
1190         pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1191
1192         if (callchain->record_mode == CALLCHAIN_DWARF)
1193                 pr_debug("callchain: stack dump size %d\n",
1194                          callchain->dump_size);
1195 }
1196
1197 int record_opts__parse_callchain(struct record_opts *record,
1198                                  struct callchain_param *callchain,
1199                                  const char *arg, bool unset)
1200 {
1201         int ret;
1202         callchain->enabled = !unset;
1203
1204         /* --no-call-graph */
1205         if (unset) {
1206                 callchain->record_mode = CALLCHAIN_NONE;
1207                 pr_debug("callchain: disabled\n");
1208                 return 0;
1209         }
1210
1211         ret = parse_callchain_record_opt(arg, callchain);
1212         if (!ret) {
1213                 /* Enable data address sampling for DWARF unwind. */
1214                 if (callchain->record_mode == CALLCHAIN_DWARF)
1215                         record->sample_address = true;
1216                 callchain_debug(callchain);
1217         }
1218
1219         return ret;
1220 }
1221
1222 int record_parse_callchain_opt(const struct option *opt,
1223                                const char *arg,
1224                                int unset)
1225 {
1226         return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1227 }
1228
1229 int record_callchain_opt(const struct option *opt,
1230                          const char *arg __maybe_unused,
1231                          int unset __maybe_unused)
1232 {
1233         struct callchain_param *callchain = opt->value;
1234
1235         callchain->enabled = true;
1236
1237         if (callchain->record_mode == CALLCHAIN_NONE)
1238                 callchain->record_mode = CALLCHAIN_FP;
1239
1240         callchain_debug(callchain);
1241         return 0;
1242 }
1243
1244 static int perf_record_config(const char *var, const char *value, void *cb)
1245 {
1246         struct record *rec = cb;
1247
1248         if (!strcmp(var, "record.build-id")) {
1249                 if (!strcmp(value, "cache"))
1250                         rec->no_buildid_cache = false;
1251                 else if (!strcmp(value, "no-cache"))
1252                         rec->no_buildid_cache = true;
1253                 else if (!strcmp(value, "skip"))
1254                         rec->no_buildid = true;
1255                 else
1256                         return -1;
1257                 return 0;
1258         }
1259         if (!strcmp(var, "record.call-graph"))
1260                 var = "call-graph.record-mode"; /* fall-through */
1261
1262         return perf_default_config(var, value, cb);
1263 }
1264
1265 struct clockid_map {
1266         const char *name;
1267         int clockid;
1268 };
1269
1270 #define CLOCKID_MAP(n, c)       \
1271         { .name = n, .clockid = (c), }
1272
1273 #define CLOCKID_END     { .name = NULL, }
1274
1275
1276 /*
1277  * Add the missing ones, we need to build on many distros...
1278  */
1279 #ifndef CLOCK_MONOTONIC_RAW
1280 #define CLOCK_MONOTONIC_RAW 4
1281 #endif
1282 #ifndef CLOCK_BOOTTIME
1283 #define CLOCK_BOOTTIME 7
1284 #endif
1285 #ifndef CLOCK_TAI
1286 #define CLOCK_TAI 11
1287 #endif
1288
1289 static const struct clockid_map clockids[] = {
1290         /* available for all events, NMI safe */
1291         CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1292         CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1293
1294         /* available for some events */
1295         CLOCKID_MAP("realtime", CLOCK_REALTIME),
1296         CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1297         CLOCKID_MAP("tai", CLOCK_TAI),
1298
1299         /* available for the lazy */
1300         CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1301         CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1302         CLOCKID_MAP("real", CLOCK_REALTIME),
1303         CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1304
1305         CLOCKID_END,
1306 };
1307
1308 static int parse_clockid(const struct option *opt, const char *str, int unset)
1309 {
1310         struct record_opts *opts = (struct record_opts *)opt->value;
1311         const struct clockid_map *cm;
1312         const char *ostr = str;
1313
1314         if (unset) {
1315                 opts->use_clockid = 0;
1316                 return 0;
1317         }
1318
1319         /* no arg passed */
1320         if (!str)
1321                 return 0;
1322
1323         /* no setting it twice */
1324         if (opts->use_clockid)
1325                 return -1;
1326
1327         opts->use_clockid = true;
1328
1329         /* if its a number, we're done */
1330         if (sscanf(str, "%d", &opts->clockid) == 1)
1331                 return 0;
1332
1333         /* allow a "CLOCK_" prefix to the name */
1334         if (!strncasecmp(str, "CLOCK_", 6))
1335                 str += 6;
1336
1337         for (cm = clockids; cm->name; cm++) {
1338                 if (!strcasecmp(str, cm->name)) {
1339                         opts->clockid = cm->clockid;
1340                         return 0;
1341                 }
1342         }
1343
1344         opts->use_clockid = false;
1345         ui__warning("unknown clockid %s, check man page\n", ostr);
1346         return -1;
1347 }
1348
1349 static int record__parse_mmap_pages(const struct option *opt,
1350                                     const char *str,
1351                                     int unset __maybe_unused)
1352 {
1353         struct record_opts *opts = opt->value;
1354         char *s, *p;
1355         unsigned int mmap_pages;
1356         int ret;
1357
1358         if (!str)
1359                 return -EINVAL;
1360
1361         s = strdup(str);
1362         if (!s)
1363                 return -ENOMEM;
1364
1365         p = strchr(s, ',');
1366         if (p)
1367                 *p = '\0';
1368
1369         if (*s) {
1370                 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1371                 if (ret)
1372                         goto out_free;
1373                 opts->mmap_pages = mmap_pages;
1374         }
1375
1376         if (!p) {
1377                 ret = 0;
1378                 goto out_free;
1379         }
1380
1381         ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1382         if (ret)
1383                 goto out_free;
1384
1385         opts->auxtrace_mmap_pages = mmap_pages;
1386
1387 out_free:
1388         free(s);
1389         return ret;
1390 }
1391
1392 static void switch_output_size_warn(struct record *rec)
1393 {
1394         u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages);
1395         struct switch_output *s = &rec->switch_output;
1396
1397         wakeup_size /= 2;
1398
1399         if (s->size < wakeup_size) {
1400                 char buf[100];
1401
1402                 unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
1403                 pr_warning("WARNING: switch-output data size lower than "
1404                            "wakeup kernel buffer size (%s) "
1405                            "expect bigger perf.data sizes\n", buf);
1406         }
1407 }
1408
1409 static int switch_output_setup(struct record *rec)
1410 {
1411         struct switch_output *s = &rec->switch_output;
1412         static struct parse_tag tags_size[] = {
1413                 { .tag  = 'B', .mult = 1       },
1414                 { .tag  = 'K', .mult = 1 << 10 },
1415                 { .tag  = 'M', .mult = 1 << 20 },
1416                 { .tag  = 'G', .mult = 1 << 30 },
1417                 { .tag  = 0 },
1418         };
1419         static struct parse_tag tags_time[] = {
1420                 { .tag  = 's', .mult = 1        },
1421                 { .tag  = 'm', .mult = 60       },
1422                 { .tag  = 'h', .mult = 60*60    },
1423                 { .tag  = 'd', .mult = 60*60*24 },
1424                 { .tag  = 0 },
1425         };
1426         unsigned long val;
1427
1428         if (!s->set)
1429                 return 0;
1430
1431         if (!strcmp(s->str, "signal")) {
1432                 s->signal = true;
1433                 pr_debug("switch-output with SIGUSR2 signal\n");
1434                 goto enabled;
1435         }
1436
1437         val = parse_tag_value(s->str, tags_size);
1438         if (val != (unsigned long) -1) {
1439                 s->size = val;
1440                 pr_debug("switch-output with %s size threshold\n", s->str);
1441                 goto enabled;
1442         }
1443
1444         val = parse_tag_value(s->str, tags_time);
1445         if (val != (unsigned long) -1) {
1446                 s->time = val;
1447                 pr_debug("switch-output with %s time threshold (%lu seconds)\n",
1448                          s->str, s->time);
1449                 goto enabled;
1450         }
1451
1452         return -1;
1453
1454 enabled:
1455         rec->timestamp_filename = true;
1456         s->enabled              = true;
1457
1458         if (s->size && !rec->opts.no_buffering)
1459                 switch_output_size_warn(rec);
1460
1461         return 0;
1462 }
1463
1464 static const char * const __record_usage[] = {
1465         "perf record [<options>] [<command>]",
1466         "perf record [<options>] -- <command> [<options>]",
1467         NULL
1468 };
1469 const char * const *record_usage = __record_usage;
1470
1471 /*
1472  * XXX Ideally would be local to cmd_record() and passed to a record__new
1473  * because we need to have access to it in record__exit, that is called
1474  * after cmd_record() exits, but since record_options need to be accessible to
1475  * builtin-script, leave it here.
1476  *
1477  * At least we don't ouch it in all the other functions here directly.
1478  *
1479  * Just say no to tons of global variables, sigh.
1480  */
1481 static struct record record = {
1482         .opts = {
1483                 .sample_time         = true,
1484                 .mmap_pages          = UINT_MAX,
1485                 .user_freq           = UINT_MAX,
1486                 .user_interval       = ULLONG_MAX,
1487                 .freq                = 4000,
1488                 .target              = {
1489                         .uses_mmap   = true,
1490                         .default_per_cpu = true,
1491                 },
1492                 .proc_map_timeout     = 500,
1493         },
1494         .tool = {
1495                 .sample         = process_sample_event,
1496                 .fork           = perf_event__process_fork,
1497                 .exit           = perf_event__process_exit,
1498                 .comm           = perf_event__process_comm,
1499                 .mmap           = perf_event__process_mmap,
1500                 .mmap2          = perf_event__process_mmap2,
1501                 .ordered_events = true,
1502         },
1503 };
1504
1505 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1506         "\n\t\t\t\tDefault: fp";
1507
1508 static bool dry_run;
1509
1510 /*
1511  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1512  * with it and switch to use the library functions in perf_evlist that came
1513  * from builtin-record.c, i.e. use record_opts,
1514  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1515  * using pipes, etc.
1516  */
1517 static struct option __record_options[] = {
1518         OPT_CALLBACK('e', "event", &record.evlist, "event",
1519                      "event selector. use 'perf list' to list available events",
1520                      parse_events_option),
1521         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1522                      "event filter", parse_filter),
1523         OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1524                            NULL, "don't record events from perf itself",
1525                            exclude_perf),
1526         OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1527                     "record events on existing process id"),
1528         OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1529                     "record events on existing thread id"),
1530         OPT_INTEGER('r', "realtime", &record.realtime_prio,
1531                     "collect data with this RT SCHED_FIFO priority"),
1532         OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1533                     "collect data without buffering"),
1534         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1535                     "collect raw sample records from all opened counters"),
1536         OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1537                             "system-wide collection from all CPUs"),
1538         OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1539                     "list of cpus to monitor"),
1540         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1541         OPT_STRING('o', "output", &record.file.path, "file",
1542                     "output file name"),
1543         OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1544                         &record.opts.no_inherit_set,
1545                         "child tasks do not inherit counters"),
1546         OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1547                     "synthesize non-sample events at the end of output"),
1548         OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
1549         OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
1550         OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1551                      "number of mmap data pages and AUX area tracing mmap pages",
1552                      record__parse_mmap_pages),
1553         OPT_BOOLEAN(0, "group", &record.opts.group,
1554                     "put the counters into a counter group"),
1555         OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1556                            NULL, "enables call-graph recording" ,
1557                            &record_callchain_opt),
1558         OPT_CALLBACK(0, "call-graph", &record.opts,
1559                      "record_mode[,record_size]", record_callchain_help,
1560                      &record_parse_callchain_opt),
1561         OPT_INCR('v', "verbose", &verbose,
1562                     "be more verbose (show counter open errors, etc)"),
1563         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1564         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1565                     "per thread counts"),
1566         OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1567         OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
1568         OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1569                         &record.opts.sample_time_set,
1570                         "Record the sample timestamps"),
1571         OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"),
1572         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1573                     "don't sample"),
1574         OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1575                         &record.no_buildid_cache_set,
1576                         "do not update the buildid cache"),
1577         OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1578                         &record.no_buildid_set,
1579                         "do not collect buildids in perf.data"),
1580         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1581                      "monitor event in cgroup name only",
1582                      parse_cgroups),
1583         OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1584                   "ms to wait before starting measurement after program start"),
1585         OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1586                    "user to profile"),
1587
1588         OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1589                      "branch any", "sample any taken branches",
1590                      parse_branch_stack),
1591
1592         OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1593                      "branch filter mask", "branch stack filter modes",
1594                      parse_branch_stack),
1595         OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1596                     "sample by weight (on special events only)"),
1597         OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1598                     "sample transaction flags (special events only)"),
1599         OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1600                     "use per-thread mmaps"),
1601         OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1602                     "sample selected machine registers on interrupt,"
1603                     " use -I ? to list register names", parse_regs),
1604         OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1605                     "Record running/enabled time of read (:S) events"),
1606         OPT_CALLBACK('k', "clockid", &record.opts,
1607         "clockid", "clockid to use for events, see clock_gettime()",
1608         parse_clockid),
1609         OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1610                           "opts", "AUX area tracing Snapshot Mode", ""),
1611         OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
1612                         "per thread proc mmap processing timeout in ms"),
1613         OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1614                     "Record context switch events"),
1615         OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1616                          "Configure all used events to run in kernel space.",
1617                          PARSE_OPT_EXCLUSIVE),
1618         OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1619                          "Configure all used events to run in user space.",
1620                          PARSE_OPT_EXCLUSIVE),
1621         OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1622                    "clang binary to use for compiling BPF scriptlets"),
1623         OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1624                    "options passed to clang when compiling BPF scriptlets"),
1625         OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1626                    "file", "vmlinux pathname"),
1627         OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1628                     "Record build-id of all DSOs regardless of hits"),
1629         OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
1630                     "append timestamp to output filename"),
1631         OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
1632                           &record.switch_output.set, "signal,size,time",
1633                           "Switch output when receive SIGUSR2 or cross size,time threshold",
1634                           "signal"),
1635         OPT_BOOLEAN(0, "dry-run", &dry_run,
1636                     "Parse options then exit"),
1637         OPT_END()
1638 };
1639
1640 struct option *record_options = __record_options;
1641
1642 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
1643 {
1644         int err;
1645         struct record *rec = &record;
1646         char errbuf[BUFSIZ];
1647
1648 #ifndef HAVE_LIBBPF_SUPPORT
1649 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1650         set_nobuild('\0', "clang-path", true);
1651         set_nobuild('\0', "clang-opt", true);
1652 # undef set_nobuild
1653 #endif
1654
1655 #ifndef HAVE_BPF_PROLOGUE
1656 # if !defined (HAVE_DWARF_SUPPORT)
1657 #  define REASON  "NO_DWARF=1"
1658 # elif !defined (HAVE_LIBBPF_SUPPORT)
1659 #  define REASON  "NO_LIBBPF=1"
1660 # else
1661 #  define REASON  "this architecture doesn't support BPF prologue"
1662 # endif
1663 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
1664         set_nobuild('\0', "vmlinux", true);
1665 # undef set_nobuild
1666 # undef REASON
1667 #endif
1668
1669         rec->evlist = perf_evlist__new();
1670         if (rec->evlist == NULL)
1671                 return -ENOMEM;
1672
1673         err = perf_config(perf_record_config, rec);
1674         if (err)
1675                 return err;
1676
1677         argc = parse_options(argc, argv, record_options, record_usage,
1678                             PARSE_OPT_STOP_AT_NON_OPTION);
1679         if (!argc && target__none(&rec->opts.target))
1680                 usage_with_options(record_usage, record_options);
1681
1682         if (nr_cgroups && !rec->opts.target.system_wide) {
1683                 usage_with_options_msg(record_usage, record_options,
1684                         "cgroup monitoring only available in system-wide mode");
1685
1686         }
1687         if (rec->opts.record_switch_events &&
1688             !perf_can_record_switch_events()) {
1689                 ui__error("kernel does not support recording context switch events\n");
1690                 parse_options_usage(record_usage, record_options, "switch-events", 0);
1691                 return -EINVAL;
1692         }
1693
1694         if (switch_output_setup(rec)) {
1695                 parse_options_usage(record_usage, record_options, "switch-output", 0);
1696                 return -EINVAL;
1697         }
1698
1699         if (rec->switch_output.time) {
1700                 signal(SIGALRM, alarm_sig_handler);
1701                 alarm(rec->switch_output.time);
1702         }
1703
1704         if (!rec->itr) {
1705                 rec->itr = auxtrace_record__init(rec->evlist, &err);
1706                 if (err)
1707                         goto out;
1708         }
1709
1710         err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
1711                                               rec->opts.auxtrace_snapshot_opts);
1712         if (err)
1713                 goto out;
1714
1715         /*
1716          * Allow aliases to facilitate the lookup of symbols for address
1717          * filters. Refer to auxtrace_parse_filters().
1718          */
1719         symbol_conf.allow_aliases = true;
1720
1721         symbol__init(NULL);
1722
1723         err = auxtrace_parse_filters(rec->evlist);
1724         if (err)
1725                 goto out;
1726
1727         if (dry_run)
1728                 goto out;
1729
1730         err = bpf__setup_stdout(rec->evlist);
1731         if (err) {
1732                 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
1733                 pr_err("ERROR: Setup BPF stdout failed: %s\n",
1734                          errbuf);
1735                 goto out;
1736         }
1737
1738         err = -ENOMEM;
1739
1740         if (symbol_conf.kptr_restrict)
1741                 pr_warning(
1742 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1743 "check /proc/sys/kernel/kptr_restrict.\n\n"
1744 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1745 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1746 "Samples in kernel modules won't be resolved at all.\n\n"
1747 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1748 "even with a suitable vmlinux or kallsyms file.\n\n");
1749
1750         if (rec->no_buildid_cache || rec->no_buildid) {
1751                 disable_buildid_cache();
1752         } else if (rec->switch_output.enabled) {
1753                 /*
1754                  * In 'perf record --switch-output', disable buildid
1755                  * generation by default to reduce data file switching
1756                  * overhead. Still generate buildid if they are required
1757                  * explicitly using
1758                  *
1759                  *  perf record --switch-output --no-no-buildid \
1760                  *              --no-no-buildid-cache
1761                  *
1762                  * Following code equals to:
1763                  *
1764                  * if ((rec->no_buildid || !rec->no_buildid_set) &&
1765                  *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
1766                  *         disable_buildid_cache();
1767                  */
1768                 bool disable = true;
1769
1770                 if (rec->no_buildid_set && !rec->no_buildid)
1771                         disable = false;
1772                 if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
1773                         disable = false;
1774                 if (disable) {
1775                         rec->no_buildid = true;
1776                         rec->no_buildid_cache = true;
1777                         disable_buildid_cache();
1778                 }
1779         }
1780
1781         if (record.opts.overwrite)
1782                 record.opts.tail_synthesize = true;
1783
1784         if (rec->evlist->nr_entries == 0 &&
1785             perf_evlist__add_default(rec->evlist) < 0) {
1786                 pr_err("Not enough memory for event selector list\n");
1787                 goto out;
1788         }
1789
1790         if (rec->opts.target.tid && !rec->opts.no_inherit_set)
1791                 rec->opts.no_inherit = true;
1792
1793         err = target__validate(&rec->opts.target);
1794         if (err) {
1795                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1796                 ui__warning("%s", errbuf);
1797         }
1798
1799         err = target__parse_uid(&rec->opts.target);
1800         if (err) {
1801                 int saved_errno = errno;
1802
1803                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1804                 ui__error("%s", errbuf);
1805
1806                 err = -saved_errno;
1807                 goto out;
1808         }
1809
1810         /* Enable ignoring missing threads when -u option is defined. */
1811         rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX;
1812
1813         err = -ENOMEM;
1814         if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1815                 usage_with_options(record_usage, record_options);
1816
1817         err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
1818         if (err)
1819                 goto out;
1820
1821         /*
1822          * We take all buildids when the file contains
1823          * AUX area tracing data because we do not decode the
1824          * trace because it would take too long.
1825          */
1826         if (rec->opts.full_auxtrace)
1827                 rec->buildid_all = true;
1828
1829         if (record_opts__config(&rec->opts)) {
1830                 err = -EINVAL;
1831                 goto out;
1832         }
1833
1834         err = __cmd_record(&record, argc, argv);
1835 out:
1836         perf_evlist__delete(rec->evlist);
1837         symbol__exit();
1838         auxtrace_record__free(rec->itr);
1839         return err;
1840 }
1841
1842 static void snapshot_sig_handler(int sig __maybe_unused)
1843 {
1844         struct record *rec = &record;
1845
1846         if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
1847                 trigger_hit(&auxtrace_snapshot_trigger);
1848                 auxtrace_record__snapshot_started = 1;
1849                 if (auxtrace_record__snapshot_start(record.itr))
1850                         trigger_error(&auxtrace_snapshot_trigger);
1851         }
1852
1853         if (switch_output_signal(rec))
1854                 trigger_hit(&switch_output_trigger);
1855 }
1856
1857 static void alarm_sig_handler(int sig __maybe_unused)
1858 {
1859         struct record *rec = &record;
1860
1861         if (switch_output_time(rec))
1862                 trigger_hit(&switch_output_trigger);
1863 }