]> asedeno.scripts.mit.edu Git - linux.git/blob - tools/perf/builtin-record.c
Merge tag 'mips_fixes_5.1_1' of git://git.kernel.org/pub/scm/linux/kernel/git/mips/linux
[linux.git] / tools / perf / builtin-record.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * builtin-record.c
4  *
5  * Builtin record command: Record the profile of a workload
6  * (or a CPU, or a PID) into the perf.data output file - for
7  * later analysis via perf report.
8  */
9 #include "builtin.h"
10
11 #include "perf.h"
12
13 #include "util/build-id.h"
14 #include "util/util.h"
15 #include <subcmd/parse-options.h>
16 #include "util/parse-events.h"
17 #include "util/config.h"
18
19 #include "util/callchain.h"
20 #include "util/cgroup.h"
21 #include "util/header.h"
22 #include "util/event.h"
23 #include "util/evlist.h"
24 #include "util/evsel.h"
25 #include "util/debug.h"
26 #include "util/session.h"
27 #include "util/tool.h"
28 #include "util/symbol.h"
29 #include "util/cpumap.h"
30 #include "util/thread_map.h"
31 #include "util/data.h"
32 #include "util/perf_regs.h"
33 #include "util/auxtrace.h"
34 #include "util/tsc.h"
35 #include "util/parse-branch-options.h"
36 #include "util/parse-regs-options.h"
37 #include "util/llvm-utils.h"
38 #include "util/bpf-loader.h"
39 #include "util/trigger.h"
40 #include "util/perf-hooks.h"
41 #include "util/cpu-set-sched.h"
42 #include "util/time-utils.h"
43 #include "util/units.h"
44 #include "util/bpf-event.h"
45 #include "asm/bug.h"
46
47 #include <errno.h>
48 #include <inttypes.h>
49 #include <locale.h>
50 #include <poll.h>
51 #include <unistd.h>
52 #include <sched.h>
53 #include <signal.h>
54 #include <sys/mman.h>
55 #include <sys/wait.h>
56 #include <linux/time64.h>
57
58 struct switch_output {
59         bool             enabled;
60         bool             signal;
61         unsigned long    size;
62         unsigned long    time;
63         const char      *str;
64         bool             set;
65 };
66
67 struct record {
68         struct perf_tool        tool;
69         struct record_opts      opts;
70         u64                     bytes_written;
71         struct perf_data        data;
72         struct auxtrace_record  *itr;
73         struct perf_evlist      *evlist;
74         struct perf_session     *session;
75         int                     realtime_prio;
76         bool                    no_buildid;
77         bool                    no_buildid_set;
78         bool                    no_buildid_cache;
79         bool                    no_buildid_cache_set;
80         bool                    buildid_all;
81         bool                    timestamp_filename;
82         bool                    timestamp_boundary;
83         struct switch_output    switch_output;
84         unsigned long long      samples;
85         cpu_set_t               affinity_mask;
86 };
87
88 static volatile int auxtrace_record__snapshot_started;
89 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
90 static DEFINE_TRIGGER(switch_output_trigger);
91
92 static const char *affinity_tags[PERF_AFFINITY_MAX] = {
93         "SYS", "NODE", "CPU"
94 };
95
96 static bool switch_output_signal(struct record *rec)
97 {
98         return rec->switch_output.signal &&
99                trigger_is_ready(&switch_output_trigger);
100 }
101
102 static bool switch_output_size(struct record *rec)
103 {
104         return rec->switch_output.size &&
105                trigger_is_ready(&switch_output_trigger) &&
106                (rec->bytes_written >= rec->switch_output.size);
107 }
108
109 static bool switch_output_time(struct record *rec)
110 {
111         return rec->switch_output.time &&
112                trigger_is_ready(&switch_output_trigger);
113 }
114
115 static int record__write(struct record *rec, struct perf_mmap *map __maybe_unused,
116                          void *bf, size_t size)
117 {
118         struct perf_data_file *file = &rec->session->data->file;
119
120         if (perf_data_file__write(file, bf, size) < 0) {
121                 pr_err("failed to write perf data, error: %m\n");
122                 return -1;
123         }
124
125         rec->bytes_written += size;
126
127         if (switch_output_size(rec))
128                 trigger_hit(&switch_output_trigger);
129
130         return 0;
131 }
132
133 #ifdef HAVE_AIO_SUPPORT
134 static int record__aio_write(struct aiocb *cblock, int trace_fd,
135                 void *buf, size_t size, off_t off)
136 {
137         int rc;
138
139         cblock->aio_fildes = trace_fd;
140         cblock->aio_buf    = buf;
141         cblock->aio_nbytes = size;
142         cblock->aio_offset = off;
143         cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
144
145         do {
146                 rc = aio_write(cblock);
147                 if (rc == 0) {
148                         break;
149                 } else if (errno != EAGAIN) {
150                         cblock->aio_fildes = -1;
151                         pr_err("failed to queue perf data, error: %m\n");
152                         break;
153                 }
154         } while (1);
155
156         return rc;
157 }
158
159 static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock)
160 {
161         void *rem_buf;
162         off_t rem_off;
163         size_t rem_size;
164         int rc, aio_errno;
165         ssize_t aio_ret, written;
166
167         aio_errno = aio_error(cblock);
168         if (aio_errno == EINPROGRESS)
169                 return 0;
170
171         written = aio_ret = aio_return(cblock);
172         if (aio_ret < 0) {
173                 if (aio_errno != EINTR)
174                         pr_err("failed to write perf data, error: %m\n");
175                 written = 0;
176         }
177
178         rem_size = cblock->aio_nbytes - written;
179
180         if (rem_size == 0) {
181                 cblock->aio_fildes = -1;
182                 /*
183                  * md->refcount is incremented in perf_mmap__push() for
184                  * every enqueued aio write request so decrement it because
185                  * the request is now complete.
186                  */
187                 perf_mmap__put(md);
188                 rc = 1;
189         } else {
190                 /*
191                  * aio write request may require restart with the
192                  * reminder if the kernel didn't write whole
193                  * chunk at once.
194                  */
195                 rem_off = cblock->aio_offset + written;
196                 rem_buf = (void *)(cblock->aio_buf + written);
197                 record__aio_write(cblock, cblock->aio_fildes,
198                                 rem_buf, rem_size, rem_off);
199                 rc = 0;
200         }
201
202         return rc;
203 }
204
205 static int record__aio_sync(struct perf_mmap *md, bool sync_all)
206 {
207         struct aiocb **aiocb = md->aio.aiocb;
208         struct aiocb *cblocks = md->aio.cblocks;
209         struct timespec timeout = { 0, 1000 * 1000  * 1 }; /* 1ms */
210         int i, do_suspend;
211
212         do {
213                 do_suspend = 0;
214                 for (i = 0; i < md->aio.nr_cblocks; ++i) {
215                         if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
216                                 if (sync_all)
217                                         aiocb[i] = NULL;
218                                 else
219                                         return i;
220                         } else {
221                                 /*
222                                  * Started aio write is not complete yet
223                                  * so it has to be waited before the
224                                  * next allocation.
225                                  */
226                                 aiocb[i] = &cblocks[i];
227                                 do_suspend = 1;
228                         }
229                 }
230                 if (!do_suspend)
231                         return -1;
232
233                 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
234                         if (!(errno == EAGAIN || errno == EINTR))
235                                 pr_err("failed to sync perf data, error: %m\n");
236                 }
237         } while (1);
238 }
239
240 static int record__aio_pushfn(void *to, struct aiocb *cblock, void *bf, size_t size, off_t off)
241 {
242         struct record *rec = to;
243         int ret, trace_fd = rec->session->data->file.fd;
244
245         rec->samples++;
246
247         ret = record__aio_write(cblock, trace_fd, bf, size, off);
248         if (!ret) {
249                 rec->bytes_written += size;
250                 if (switch_output_size(rec))
251                         trigger_hit(&switch_output_trigger);
252         }
253
254         return ret;
255 }
256
257 static off_t record__aio_get_pos(int trace_fd)
258 {
259         return lseek(trace_fd, 0, SEEK_CUR);
260 }
261
262 static void record__aio_set_pos(int trace_fd, off_t pos)
263 {
264         lseek(trace_fd, pos, SEEK_SET);
265 }
266
267 static void record__aio_mmap_read_sync(struct record *rec)
268 {
269         int i;
270         struct perf_evlist *evlist = rec->evlist;
271         struct perf_mmap *maps = evlist->mmap;
272
273         if (!rec->opts.nr_cblocks)
274                 return;
275
276         for (i = 0; i < evlist->nr_mmaps; i++) {
277                 struct perf_mmap *map = &maps[i];
278
279                 if (map->base)
280                         record__aio_sync(map, true);
281         }
282 }
283
284 static int nr_cblocks_default = 1;
285 static int nr_cblocks_max = 4;
286
287 static int record__aio_parse(const struct option *opt,
288                              const char *str,
289                              int unset)
290 {
291         struct record_opts *opts = (struct record_opts *)opt->value;
292
293         if (unset) {
294                 opts->nr_cblocks = 0;
295         } else {
296                 if (str)
297                         opts->nr_cblocks = strtol(str, NULL, 0);
298                 if (!opts->nr_cblocks)
299                         opts->nr_cblocks = nr_cblocks_default;
300         }
301
302         return 0;
303 }
304 #else /* HAVE_AIO_SUPPORT */
305 static int nr_cblocks_max = 0;
306
307 static int record__aio_sync(struct perf_mmap *md __maybe_unused, bool sync_all __maybe_unused)
308 {
309         return -1;
310 }
311
312 static int record__aio_pushfn(void *to __maybe_unused, struct aiocb *cblock __maybe_unused,
313                 void *bf __maybe_unused, size_t size __maybe_unused, off_t off __maybe_unused)
314 {
315         return -1;
316 }
317
318 static off_t record__aio_get_pos(int trace_fd __maybe_unused)
319 {
320         return -1;
321 }
322
323 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
324 {
325 }
326
327 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
328 {
329 }
330 #endif
331
332 static int record__aio_enabled(struct record *rec)
333 {
334         return rec->opts.nr_cblocks > 0;
335 }
336
337 static int process_synthesized_event(struct perf_tool *tool,
338                                      union perf_event *event,
339                                      struct perf_sample *sample __maybe_unused,
340                                      struct machine *machine __maybe_unused)
341 {
342         struct record *rec = container_of(tool, struct record, tool);
343         return record__write(rec, NULL, event, event->header.size);
344 }
345
346 static int record__pushfn(struct perf_mmap *map, void *to, void *bf, size_t size)
347 {
348         struct record *rec = to;
349
350         rec->samples++;
351         return record__write(rec, map, bf, size);
352 }
353
354 static volatile int done;
355 static volatile int signr = -1;
356 static volatile int child_finished;
357
358 static void sig_handler(int sig)
359 {
360         if (sig == SIGCHLD)
361                 child_finished = 1;
362         else
363                 signr = sig;
364
365         done = 1;
366 }
367
368 static void sigsegv_handler(int sig)
369 {
370         perf_hooks__recover();
371         sighandler_dump_stack(sig);
372 }
373
374 static void record__sig_exit(void)
375 {
376         if (signr == -1)
377                 return;
378
379         signal(signr, SIG_DFL);
380         raise(signr);
381 }
382
383 #ifdef HAVE_AUXTRACE_SUPPORT
384
385 static int record__process_auxtrace(struct perf_tool *tool,
386                                     struct perf_mmap *map,
387                                     union perf_event *event, void *data1,
388                                     size_t len1, void *data2, size_t len2)
389 {
390         struct record *rec = container_of(tool, struct record, tool);
391         struct perf_data *data = &rec->data;
392         size_t padding;
393         u8 pad[8] = {0};
394
395         if (!perf_data__is_pipe(data)) {
396                 off_t file_offset;
397                 int fd = perf_data__fd(data);
398                 int err;
399
400                 file_offset = lseek(fd, 0, SEEK_CUR);
401                 if (file_offset == -1)
402                         return -1;
403                 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
404                                                      event, file_offset);
405                 if (err)
406                         return err;
407         }
408
409         /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
410         padding = (len1 + len2) & 7;
411         if (padding)
412                 padding = 8 - padding;
413
414         record__write(rec, map, event, event->header.size);
415         record__write(rec, map, data1, len1);
416         if (len2)
417                 record__write(rec, map, data2, len2);
418         record__write(rec, map, &pad, padding);
419
420         return 0;
421 }
422
423 static int record__auxtrace_mmap_read(struct record *rec,
424                                       struct perf_mmap *map)
425 {
426         int ret;
427
428         ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
429                                   record__process_auxtrace);
430         if (ret < 0)
431                 return ret;
432
433         if (ret)
434                 rec->samples++;
435
436         return 0;
437 }
438
439 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
440                                                struct perf_mmap *map)
441 {
442         int ret;
443
444         ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
445                                            record__process_auxtrace,
446                                            rec->opts.auxtrace_snapshot_size);
447         if (ret < 0)
448                 return ret;
449
450         if (ret)
451                 rec->samples++;
452
453         return 0;
454 }
455
456 static int record__auxtrace_read_snapshot_all(struct record *rec)
457 {
458         int i;
459         int rc = 0;
460
461         for (i = 0; i < rec->evlist->nr_mmaps; i++) {
462                 struct perf_mmap *map = &rec->evlist->mmap[i];
463
464                 if (!map->auxtrace_mmap.base)
465                         continue;
466
467                 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
468                         rc = -1;
469                         goto out;
470                 }
471         }
472 out:
473         return rc;
474 }
475
476 static void record__read_auxtrace_snapshot(struct record *rec)
477 {
478         pr_debug("Recording AUX area tracing snapshot\n");
479         if (record__auxtrace_read_snapshot_all(rec) < 0) {
480                 trigger_error(&auxtrace_snapshot_trigger);
481         } else {
482                 if (auxtrace_record__snapshot_finish(rec->itr))
483                         trigger_error(&auxtrace_snapshot_trigger);
484                 else
485                         trigger_ready(&auxtrace_snapshot_trigger);
486         }
487 }
488
489 static int record__auxtrace_init(struct record *rec)
490 {
491         int err;
492
493         if (!rec->itr) {
494                 rec->itr = auxtrace_record__init(rec->evlist, &err);
495                 if (err)
496                         return err;
497         }
498
499         err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
500                                               rec->opts.auxtrace_snapshot_opts);
501         if (err)
502                 return err;
503
504         return auxtrace_parse_filters(rec->evlist);
505 }
506
507 #else
508
509 static inline
510 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
511                                struct perf_mmap *map __maybe_unused)
512 {
513         return 0;
514 }
515
516 static inline
517 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
518 {
519 }
520
521 static inline
522 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
523 {
524         return 0;
525 }
526
527 static int record__auxtrace_init(struct record *rec __maybe_unused)
528 {
529         return 0;
530 }
531
532 #endif
533
534 static int record__mmap_evlist(struct record *rec,
535                                struct perf_evlist *evlist)
536 {
537         struct record_opts *opts = &rec->opts;
538         char msg[512];
539
540         if (opts->affinity != PERF_AFFINITY_SYS)
541                 cpu__setup_cpunode_map();
542
543         if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
544                                  opts->auxtrace_mmap_pages,
545                                  opts->auxtrace_snapshot_mode,
546                                  opts->nr_cblocks, opts->affinity) < 0) {
547                 if (errno == EPERM) {
548                         pr_err("Permission error mapping pages.\n"
549                                "Consider increasing "
550                                "/proc/sys/kernel/perf_event_mlock_kb,\n"
551                                "or try again with a smaller value of -m/--mmap_pages.\n"
552                                "(current value: %u,%u)\n",
553                                opts->mmap_pages, opts->auxtrace_mmap_pages);
554                         return -errno;
555                 } else {
556                         pr_err("failed to mmap with %d (%s)\n", errno,
557                                 str_error_r(errno, msg, sizeof(msg)));
558                         if (errno)
559                                 return -errno;
560                         else
561                                 return -EINVAL;
562                 }
563         }
564         return 0;
565 }
566
567 static int record__mmap(struct record *rec)
568 {
569         return record__mmap_evlist(rec, rec->evlist);
570 }
571
572 static int record__open(struct record *rec)
573 {
574         char msg[BUFSIZ];
575         struct perf_evsel *pos;
576         struct perf_evlist *evlist = rec->evlist;
577         struct perf_session *session = rec->session;
578         struct record_opts *opts = &rec->opts;
579         int rc = 0;
580
581         /*
582          * For initial_delay we need to add a dummy event so that we can track
583          * PERF_RECORD_MMAP while we wait for the initial delay to enable the
584          * real events, the ones asked by the user.
585          */
586         if (opts->initial_delay) {
587                 if (perf_evlist__add_dummy(evlist))
588                         return -ENOMEM;
589
590                 pos = perf_evlist__first(evlist);
591                 pos->tracking = 0;
592                 pos = perf_evlist__last(evlist);
593                 pos->tracking = 1;
594                 pos->attr.enable_on_exec = 1;
595         }
596
597         perf_evlist__config(evlist, opts, &callchain_param);
598
599         evlist__for_each_entry(evlist, pos) {
600 try_again:
601                 if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
602                         if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
603                                 if (verbose > 0)
604                                         ui__warning("%s\n", msg);
605                                 goto try_again;
606                         }
607                         if ((errno == EINVAL || errno == EBADF) &&
608                             pos->leader != pos &&
609                             pos->weak_group) {
610                                 pos = perf_evlist__reset_weak_group(evlist, pos);
611                                 goto try_again;
612                         }
613                         rc = -errno;
614                         perf_evsel__open_strerror(pos, &opts->target,
615                                                   errno, msg, sizeof(msg));
616                         ui__error("%s\n", msg);
617                         goto out;
618                 }
619
620                 pos->supported = true;
621         }
622
623         if (perf_evlist__apply_filters(evlist, &pos)) {
624                 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
625                         pos->filter, perf_evsel__name(pos), errno,
626                         str_error_r(errno, msg, sizeof(msg)));
627                 rc = -1;
628                 goto out;
629         }
630
631         rc = record__mmap(rec);
632         if (rc)
633                 goto out;
634
635         session->evlist = evlist;
636         perf_session__set_id_hdr_size(session);
637 out:
638         return rc;
639 }
640
641 static int process_sample_event(struct perf_tool *tool,
642                                 union perf_event *event,
643                                 struct perf_sample *sample,
644                                 struct perf_evsel *evsel,
645                                 struct machine *machine)
646 {
647         struct record *rec = container_of(tool, struct record, tool);
648
649         if (rec->evlist->first_sample_time == 0)
650                 rec->evlist->first_sample_time = sample->time;
651
652         rec->evlist->last_sample_time = sample->time;
653
654         if (rec->buildid_all)
655                 return 0;
656
657         rec->samples++;
658         return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
659 }
660
661 static int process_buildids(struct record *rec)
662 {
663         struct perf_session *session = rec->session;
664
665         if (perf_data__size(&rec->data) == 0)
666                 return 0;
667
668         /*
669          * During this process, it'll load kernel map and replace the
670          * dso->long_name to a real pathname it found.  In this case
671          * we prefer the vmlinux path like
672          *   /lib/modules/3.16.4/build/vmlinux
673          *
674          * rather than build-id path (in debug directory).
675          *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
676          */
677         symbol_conf.ignore_vmlinux_buildid = true;
678
679         /*
680          * If --buildid-all is given, it marks all DSO regardless of hits,
681          * so no need to process samples. But if timestamp_boundary is enabled,
682          * it still needs to walk on all samples to get the timestamps of
683          * first/last samples.
684          */
685         if (rec->buildid_all && !rec->timestamp_boundary)
686                 rec->tool.sample = NULL;
687
688         return perf_session__process_events(session);
689 }
690
691 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
692 {
693         int err;
694         struct perf_tool *tool = data;
695         /*
696          *As for guest kernel when processing subcommand record&report,
697          *we arrange module mmap prior to guest kernel mmap and trigger
698          *a preload dso because default guest module symbols are loaded
699          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
700          *method is used to avoid symbol missing when the first addr is
701          *in module instead of in guest kernel.
702          */
703         err = perf_event__synthesize_modules(tool, process_synthesized_event,
704                                              machine);
705         if (err < 0)
706                 pr_err("Couldn't record guest kernel [%d]'s reference"
707                        " relocation symbol.\n", machine->pid);
708
709         /*
710          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
711          * have no _text sometimes.
712          */
713         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
714                                                  machine);
715         if (err < 0)
716                 pr_err("Couldn't record guest kernel [%d]'s reference"
717                        " relocation symbol.\n", machine->pid);
718 }
719
720 static struct perf_event_header finished_round_event = {
721         .size = sizeof(struct perf_event_header),
722         .type = PERF_RECORD_FINISHED_ROUND,
723 };
724
725 static void record__adjust_affinity(struct record *rec, struct perf_mmap *map)
726 {
727         if (rec->opts.affinity != PERF_AFFINITY_SYS &&
728             !CPU_EQUAL(&rec->affinity_mask, &map->affinity_mask)) {
729                 CPU_ZERO(&rec->affinity_mask);
730                 CPU_OR(&rec->affinity_mask, &rec->affinity_mask, &map->affinity_mask);
731                 sched_setaffinity(0, sizeof(rec->affinity_mask), &rec->affinity_mask);
732         }
733 }
734
735 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
736                                     bool overwrite)
737 {
738         u64 bytes_written = rec->bytes_written;
739         int i;
740         int rc = 0;
741         struct perf_mmap *maps;
742         int trace_fd = rec->data.file.fd;
743         off_t off;
744
745         if (!evlist)
746                 return 0;
747
748         maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
749         if (!maps)
750                 return 0;
751
752         if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
753                 return 0;
754
755         if (record__aio_enabled(rec))
756                 off = record__aio_get_pos(trace_fd);
757
758         for (i = 0; i < evlist->nr_mmaps; i++) {
759                 struct perf_mmap *map = &maps[i];
760
761                 if (map->base) {
762                         record__adjust_affinity(rec, map);
763                         if (!record__aio_enabled(rec)) {
764                                 if (perf_mmap__push(map, rec, record__pushfn) != 0) {
765                                         rc = -1;
766                                         goto out;
767                                 }
768                         } else {
769                                 int idx;
770                                 /*
771                                  * Call record__aio_sync() to wait till map->data buffer
772                                  * becomes available after previous aio write request.
773                                  */
774                                 idx = record__aio_sync(map, false);
775                                 if (perf_mmap__aio_push(map, rec, idx, record__aio_pushfn, &off) != 0) {
776                                         record__aio_set_pos(trace_fd, off);
777                                         rc = -1;
778                                         goto out;
779                                 }
780                         }
781                 }
782
783                 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
784                     record__auxtrace_mmap_read(rec, map) != 0) {
785                         rc = -1;
786                         goto out;
787                 }
788         }
789
790         if (record__aio_enabled(rec))
791                 record__aio_set_pos(trace_fd, off);
792
793         /*
794          * Mark the round finished in case we wrote
795          * at least one event.
796          */
797         if (bytes_written != rec->bytes_written)
798                 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
799
800         if (overwrite)
801                 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
802 out:
803         return rc;
804 }
805
806 static int record__mmap_read_all(struct record *rec)
807 {
808         int err;
809
810         err = record__mmap_read_evlist(rec, rec->evlist, false);
811         if (err)
812                 return err;
813
814         return record__mmap_read_evlist(rec, rec->evlist, true);
815 }
816
817 static void record__init_features(struct record *rec)
818 {
819         struct perf_session *session = rec->session;
820         int feat;
821
822         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
823                 perf_header__set_feat(&session->header, feat);
824
825         if (rec->no_buildid)
826                 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
827
828         if (!have_tracepoints(&rec->evlist->entries))
829                 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
830
831         if (!rec->opts.branch_stack)
832                 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
833
834         if (!rec->opts.full_auxtrace)
835                 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
836
837         if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
838                 perf_header__clear_feat(&session->header, HEADER_CLOCKID);
839
840         perf_header__clear_feat(&session->header, HEADER_STAT);
841 }
842
843 static void
844 record__finish_output(struct record *rec)
845 {
846         struct perf_data *data = &rec->data;
847         int fd = perf_data__fd(data);
848
849         if (data->is_pipe)
850                 return;
851
852         rec->session->header.data_size += rec->bytes_written;
853         data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
854
855         if (!rec->no_buildid) {
856                 process_buildids(rec);
857
858                 if (rec->buildid_all)
859                         dsos__hit_all(rec->session);
860         }
861         perf_session__write_header(rec->session, rec->evlist, fd, true);
862
863         return;
864 }
865
866 static int record__synthesize_workload(struct record *rec, bool tail)
867 {
868         int err;
869         struct thread_map *thread_map;
870
871         if (rec->opts.tail_synthesize != tail)
872                 return 0;
873
874         thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
875         if (thread_map == NULL)
876                 return -1;
877
878         err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
879                                                  process_synthesized_event,
880                                                  &rec->session->machines.host,
881                                                  rec->opts.sample_address);
882         thread_map__put(thread_map);
883         return err;
884 }
885
886 static int record__synthesize(struct record *rec, bool tail);
887
888 static int
889 record__switch_output(struct record *rec, bool at_exit)
890 {
891         struct perf_data *data = &rec->data;
892         int fd, err;
893
894         /* Same Size:      "2015122520103046"*/
895         char timestamp[] = "InvalidTimestamp";
896
897         record__aio_mmap_read_sync(rec);
898
899         record__synthesize(rec, true);
900         if (target__none(&rec->opts.target))
901                 record__synthesize_workload(rec, true);
902
903         rec->samples = 0;
904         record__finish_output(rec);
905         err = fetch_current_timestamp(timestamp, sizeof(timestamp));
906         if (err) {
907                 pr_err("Failed to get current timestamp\n");
908                 return -EINVAL;
909         }
910
911         fd = perf_data__switch(data, timestamp,
912                                     rec->session->header.data_offset,
913                                     at_exit);
914         if (fd >= 0 && !at_exit) {
915                 rec->bytes_written = 0;
916                 rec->session->header.data_size = 0;
917         }
918
919         if (!quiet)
920                 fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
921                         data->path, timestamp);
922
923         /* Output tracking events */
924         if (!at_exit) {
925                 record__synthesize(rec, false);
926
927                 /*
928                  * In 'perf record --switch-output' without -a,
929                  * record__synthesize() in record__switch_output() won't
930                  * generate tracking events because there's no thread_map
931                  * in evlist. Which causes newly created perf.data doesn't
932                  * contain map and comm information.
933                  * Create a fake thread_map and directly call
934                  * perf_event__synthesize_thread_map() for those events.
935                  */
936                 if (target__none(&rec->opts.target))
937                         record__synthesize_workload(rec, false);
938         }
939         return fd;
940 }
941
942 static volatile int workload_exec_errno;
943
944 /*
945  * perf_evlist__prepare_workload will send a SIGUSR1
946  * if the fork fails, since we asked by setting its
947  * want_signal to true.
948  */
949 static void workload_exec_failed_signal(int signo __maybe_unused,
950                                         siginfo_t *info,
951                                         void *ucontext __maybe_unused)
952 {
953         workload_exec_errno = info->si_value.sival_int;
954         done = 1;
955         child_finished = 1;
956 }
957
958 static void snapshot_sig_handler(int sig);
959 static void alarm_sig_handler(int sig);
960
961 int __weak
962 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
963                             struct perf_tool *tool __maybe_unused,
964                             perf_event__handler_t process __maybe_unused,
965                             struct machine *machine __maybe_unused)
966 {
967         return 0;
968 }
969
970 static const struct perf_event_mmap_page *
971 perf_evlist__pick_pc(struct perf_evlist *evlist)
972 {
973         if (evlist) {
974                 if (evlist->mmap && evlist->mmap[0].base)
975                         return evlist->mmap[0].base;
976                 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base)
977                         return evlist->overwrite_mmap[0].base;
978         }
979         return NULL;
980 }
981
982 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
983 {
984         const struct perf_event_mmap_page *pc;
985
986         pc = perf_evlist__pick_pc(rec->evlist);
987         if (pc)
988                 return pc;
989         return NULL;
990 }
991
992 static int record__synthesize(struct record *rec, bool tail)
993 {
994         struct perf_session *session = rec->session;
995         struct machine *machine = &session->machines.host;
996         struct perf_data *data = &rec->data;
997         struct record_opts *opts = &rec->opts;
998         struct perf_tool *tool = &rec->tool;
999         int fd = perf_data__fd(data);
1000         int err = 0;
1001
1002         if (rec->opts.tail_synthesize != tail)
1003                 return 0;
1004
1005         if (data->is_pipe) {
1006                 /*
1007                  * We need to synthesize events first, because some
1008                  * features works on top of them (on report side).
1009                  */
1010                 err = perf_event__synthesize_attrs(tool, rec->evlist,
1011                                                    process_synthesized_event);
1012                 if (err < 0) {
1013                         pr_err("Couldn't synthesize attrs.\n");
1014                         goto out;
1015                 }
1016
1017                 err = perf_event__synthesize_features(tool, session, rec->evlist,
1018                                                       process_synthesized_event);
1019                 if (err < 0) {
1020                         pr_err("Couldn't synthesize features.\n");
1021                         return err;
1022                 }
1023
1024                 if (have_tracepoints(&rec->evlist->entries)) {
1025                         /*
1026                          * FIXME err <= 0 here actually means that
1027                          * there were no tracepoints so its not really
1028                          * an error, just that we don't need to
1029                          * synthesize anything.  We really have to
1030                          * return this more properly and also
1031                          * propagate errors that now are calling die()
1032                          */
1033                         err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
1034                                                                   process_synthesized_event);
1035                         if (err <= 0) {
1036                                 pr_err("Couldn't record tracing data.\n");
1037                                 goto out;
1038                         }
1039                         rec->bytes_written += err;
1040                 }
1041         }
1042
1043         err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
1044                                           process_synthesized_event, machine);
1045         if (err)
1046                 goto out;
1047
1048         if (rec->opts.full_auxtrace) {
1049                 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
1050                                         session, process_synthesized_event);
1051                 if (err)
1052                         goto out;
1053         }
1054
1055         if (!perf_evlist__exclude_kernel(rec->evlist)) {
1056                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1057                                                          machine);
1058                 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
1059                                    "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1060                                    "Check /proc/kallsyms permission or run as root.\n");
1061
1062                 err = perf_event__synthesize_modules(tool, process_synthesized_event,
1063                                                      machine);
1064                 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
1065                                    "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1066                                    "Check /proc/modules permission or run as root.\n");
1067         }
1068
1069         if (perf_guest) {
1070                 machines__process_guests(&session->machines,
1071                                          perf_event__synthesize_guest_os, tool);
1072         }
1073
1074         err = perf_event__synthesize_extra_attr(&rec->tool,
1075                                                 rec->evlist,
1076                                                 process_synthesized_event,
1077                                                 data->is_pipe);
1078         if (err)
1079                 goto out;
1080
1081         err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->threads,
1082                                                  process_synthesized_event,
1083                                                 NULL);
1084         if (err < 0) {
1085                 pr_err("Couldn't synthesize thread map.\n");
1086                 return err;
1087         }
1088
1089         err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->cpus,
1090                                              process_synthesized_event, NULL);
1091         if (err < 0) {
1092                 pr_err("Couldn't synthesize cpu map.\n");
1093                 return err;
1094         }
1095
1096         err = perf_event__synthesize_bpf_events(tool, process_synthesized_event,
1097                                                 machine, opts);
1098         if (err < 0)
1099                 pr_warning("Couldn't synthesize bpf events.\n");
1100
1101         err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
1102                                             process_synthesized_event, opts->sample_address,
1103                                             1);
1104 out:
1105         return err;
1106 }
1107
1108 static int __cmd_record(struct record *rec, int argc, const char **argv)
1109 {
1110         int err;
1111         int status = 0;
1112         unsigned long waking = 0;
1113         const bool forks = argc > 0;
1114         struct perf_tool *tool = &rec->tool;
1115         struct record_opts *opts = &rec->opts;
1116         struct perf_data *data = &rec->data;
1117         struct perf_session *session;
1118         bool disabled = false, draining = false;
1119         int fd;
1120
1121         atexit(record__sig_exit);
1122         signal(SIGCHLD, sig_handler);
1123         signal(SIGINT, sig_handler);
1124         signal(SIGTERM, sig_handler);
1125         signal(SIGSEGV, sigsegv_handler);
1126
1127         if (rec->opts.record_namespaces)
1128                 tool->namespace_events = true;
1129
1130         if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
1131                 signal(SIGUSR2, snapshot_sig_handler);
1132                 if (rec->opts.auxtrace_snapshot_mode)
1133                         trigger_on(&auxtrace_snapshot_trigger);
1134                 if (rec->switch_output.enabled)
1135                         trigger_on(&switch_output_trigger);
1136         } else {
1137                 signal(SIGUSR2, SIG_IGN);
1138         }
1139
1140         session = perf_session__new(data, false, tool);
1141         if (session == NULL) {
1142                 pr_err("Perf session creation failed.\n");
1143                 return -1;
1144         }
1145
1146         fd = perf_data__fd(data);
1147         rec->session = session;
1148
1149         record__init_features(rec);
1150
1151         if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
1152                 session->header.env.clockid_res_ns = rec->opts.clockid_res_ns;
1153
1154         if (forks) {
1155                 err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
1156                                                     argv, data->is_pipe,
1157                                                     workload_exec_failed_signal);
1158                 if (err < 0) {
1159                         pr_err("Couldn't run the workload!\n");
1160                         status = err;
1161                         goto out_delete_session;
1162                 }
1163         }
1164
1165         /*
1166          * If we have just single event and are sending data
1167          * through pipe, we need to force the ids allocation,
1168          * because we synthesize event name through the pipe
1169          * and need the id for that.
1170          */
1171         if (data->is_pipe && rec->evlist->nr_entries == 1)
1172                 rec->opts.sample_id = true;
1173
1174         if (record__open(rec) != 0) {
1175                 err = -1;
1176                 goto out_child;
1177         }
1178
1179         err = bpf__apply_obj_config();
1180         if (err) {
1181                 char errbuf[BUFSIZ];
1182
1183                 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
1184                 pr_err("ERROR: Apply config to BPF failed: %s\n",
1185                          errbuf);
1186                 goto out_child;
1187         }
1188
1189         /*
1190          * Normally perf_session__new would do this, but it doesn't have the
1191          * evlist.
1192          */
1193         if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
1194                 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
1195                 rec->tool.ordered_events = false;
1196         }
1197
1198         if (!rec->evlist->nr_groups)
1199                 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
1200
1201         if (data->is_pipe) {
1202                 err = perf_header__write_pipe(fd);
1203                 if (err < 0)
1204                         goto out_child;
1205         } else {
1206                 err = perf_session__write_header(session, rec->evlist, fd, false);
1207                 if (err < 0)
1208                         goto out_child;
1209         }
1210
1211         if (!rec->no_buildid
1212             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
1213                 pr_err("Couldn't generate buildids. "
1214                        "Use --no-buildid to profile anyway.\n");
1215                 err = -1;
1216                 goto out_child;
1217         }
1218
1219         err = record__synthesize(rec, false);
1220         if (err < 0)
1221                 goto out_child;
1222
1223         if (rec->realtime_prio) {
1224                 struct sched_param param;
1225
1226                 param.sched_priority = rec->realtime_prio;
1227                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
1228                         pr_err("Could not set realtime priority.\n");
1229                         err = -1;
1230                         goto out_child;
1231                 }
1232         }
1233
1234         /*
1235          * When perf is starting the traced process, all the events
1236          * (apart from group members) have enable_on_exec=1 set,
1237          * so don't spoil it by prematurely enabling them.
1238          */
1239         if (!target__none(&opts->target) && !opts->initial_delay)
1240                 perf_evlist__enable(rec->evlist);
1241
1242         /*
1243          * Let the child rip
1244          */
1245         if (forks) {
1246                 struct machine *machine = &session->machines.host;
1247                 union perf_event *event;
1248                 pid_t tgid;
1249
1250                 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
1251                 if (event == NULL) {
1252                         err = -ENOMEM;
1253                         goto out_child;
1254                 }
1255
1256                 /*
1257                  * Some H/W events are generated before COMM event
1258                  * which is emitted during exec(), so perf script
1259                  * cannot see a correct process name for those events.
1260                  * Synthesize COMM event to prevent it.
1261                  */
1262                 tgid = perf_event__synthesize_comm(tool, event,
1263                                                    rec->evlist->workload.pid,
1264                                                    process_synthesized_event,
1265                                                    machine);
1266                 free(event);
1267
1268                 if (tgid == -1)
1269                         goto out_child;
1270
1271                 event = malloc(sizeof(event->namespaces) +
1272                                (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
1273                                machine->id_hdr_size);
1274                 if (event == NULL) {
1275                         err = -ENOMEM;
1276                         goto out_child;
1277                 }
1278
1279                 /*
1280                  * Synthesize NAMESPACES event for the command specified.
1281                  */
1282                 perf_event__synthesize_namespaces(tool, event,
1283                                                   rec->evlist->workload.pid,
1284                                                   tgid, process_synthesized_event,
1285                                                   machine);
1286                 free(event);
1287
1288                 perf_evlist__start_workload(rec->evlist);
1289         }
1290
1291         if (opts->initial_delay) {
1292                 usleep(opts->initial_delay * USEC_PER_MSEC);
1293                 perf_evlist__enable(rec->evlist);
1294         }
1295
1296         trigger_ready(&auxtrace_snapshot_trigger);
1297         trigger_ready(&switch_output_trigger);
1298         perf_hooks__invoke_record_start();
1299         for (;;) {
1300                 unsigned long long hits = rec->samples;
1301
1302                 /*
1303                  * rec->evlist->bkw_mmap_state is possible to be
1304                  * BKW_MMAP_EMPTY here: when done == true and
1305                  * hits != rec->samples in previous round.
1306                  *
1307                  * perf_evlist__toggle_bkw_mmap ensure we never
1308                  * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1309                  */
1310                 if (trigger_is_hit(&switch_output_trigger) || done || draining)
1311                         perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1312
1313                 if (record__mmap_read_all(rec) < 0) {
1314                         trigger_error(&auxtrace_snapshot_trigger);
1315                         trigger_error(&switch_output_trigger);
1316                         err = -1;
1317                         goto out_child;
1318                 }
1319
1320                 if (auxtrace_record__snapshot_started) {
1321                         auxtrace_record__snapshot_started = 0;
1322                         if (!trigger_is_error(&auxtrace_snapshot_trigger))
1323                                 record__read_auxtrace_snapshot(rec);
1324                         if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1325                                 pr_err("AUX area tracing snapshot failed\n");
1326                                 err = -1;
1327                                 goto out_child;
1328                         }
1329                 }
1330
1331                 if (trigger_is_hit(&switch_output_trigger)) {
1332                         /*
1333                          * If switch_output_trigger is hit, the data in
1334                          * overwritable ring buffer should have been collected,
1335                          * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1336                          *
1337                          * If SIGUSR2 raise after or during record__mmap_read_all(),
1338                          * record__mmap_read_all() didn't collect data from
1339                          * overwritable ring buffer. Read again.
1340                          */
1341                         if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1342                                 continue;
1343                         trigger_ready(&switch_output_trigger);
1344
1345                         /*
1346                          * Reenable events in overwrite ring buffer after
1347                          * record__mmap_read_all(): we should have collected
1348                          * data from it.
1349                          */
1350                         perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1351
1352                         if (!quiet)
1353                                 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1354                                         waking);
1355                         waking = 0;
1356                         fd = record__switch_output(rec, false);
1357                         if (fd < 0) {
1358                                 pr_err("Failed to switch to new file\n");
1359                                 trigger_error(&switch_output_trigger);
1360                                 err = fd;
1361                                 goto out_child;
1362                         }
1363
1364                         /* re-arm the alarm */
1365                         if (rec->switch_output.time)
1366                                 alarm(rec->switch_output.time);
1367                 }
1368
1369                 if (hits == rec->samples) {
1370                         if (done || draining)
1371                                 break;
1372                         err = perf_evlist__poll(rec->evlist, -1);
1373                         /*
1374                          * Propagate error, only if there's any. Ignore positive
1375                          * number of returned events and interrupt error.
1376                          */
1377                         if (err > 0 || (err < 0 && errno == EINTR))
1378                                 err = 0;
1379                         waking++;
1380
1381                         if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1382                                 draining = true;
1383                 }
1384
1385                 /*
1386                  * When perf is starting the traced process, at the end events
1387                  * die with the process and we wait for that. Thus no need to
1388                  * disable events in this case.
1389                  */
1390                 if (done && !disabled && !target__none(&opts->target)) {
1391                         trigger_off(&auxtrace_snapshot_trigger);
1392                         perf_evlist__disable(rec->evlist);
1393                         disabled = true;
1394                 }
1395         }
1396         trigger_off(&auxtrace_snapshot_trigger);
1397         trigger_off(&switch_output_trigger);
1398
1399         if (forks && workload_exec_errno) {
1400                 char msg[STRERR_BUFSIZE];
1401                 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1402                 pr_err("Workload failed: %s\n", emsg);
1403                 err = -1;
1404                 goto out_child;
1405         }
1406
1407         if (!quiet)
1408                 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1409
1410         if (target__none(&rec->opts.target))
1411                 record__synthesize_workload(rec, true);
1412
1413 out_child:
1414         record__aio_mmap_read_sync(rec);
1415
1416         if (forks) {
1417                 int exit_status;
1418
1419                 if (!child_finished)
1420                         kill(rec->evlist->workload.pid, SIGTERM);
1421
1422                 wait(&exit_status);
1423
1424                 if (err < 0)
1425                         status = err;
1426                 else if (WIFEXITED(exit_status))
1427                         status = WEXITSTATUS(exit_status);
1428                 else if (WIFSIGNALED(exit_status))
1429                         signr = WTERMSIG(exit_status);
1430         } else
1431                 status = err;
1432
1433         record__synthesize(rec, true);
1434         /* this will be recalculated during process_buildids() */
1435         rec->samples = 0;
1436
1437         if (!err) {
1438                 if (!rec->timestamp_filename) {
1439                         record__finish_output(rec);
1440                 } else {
1441                         fd = record__switch_output(rec, true);
1442                         if (fd < 0) {
1443                                 status = fd;
1444                                 goto out_delete_session;
1445                         }
1446                 }
1447         }
1448
1449         perf_hooks__invoke_record_end();
1450
1451         if (!err && !quiet) {
1452                 char samples[128];
1453                 const char *postfix = rec->timestamp_filename ?
1454                                         ".<timestamp>" : "";
1455
1456                 if (rec->samples && !rec->opts.full_auxtrace)
1457                         scnprintf(samples, sizeof(samples),
1458                                   " (%" PRIu64 " samples)", rec->samples);
1459                 else
1460                         samples[0] = '\0';
1461
1462                 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1463                         perf_data__size(data) / 1024.0 / 1024.0,
1464                         data->path, postfix, samples);
1465         }
1466
1467 out_delete_session:
1468         perf_session__delete(session);
1469         return status;
1470 }
1471
1472 static void callchain_debug(struct callchain_param *callchain)
1473 {
1474         static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1475
1476         pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1477
1478         if (callchain->record_mode == CALLCHAIN_DWARF)
1479                 pr_debug("callchain: stack dump size %d\n",
1480                          callchain->dump_size);
1481 }
1482
1483 int record_opts__parse_callchain(struct record_opts *record,
1484                                  struct callchain_param *callchain,
1485                                  const char *arg, bool unset)
1486 {
1487         int ret;
1488         callchain->enabled = !unset;
1489
1490         /* --no-call-graph */
1491         if (unset) {
1492                 callchain->record_mode = CALLCHAIN_NONE;
1493                 pr_debug("callchain: disabled\n");
1494                 return 0;
1495         }
1496
1497         ret = parse_callchain_record_opt(arg, callchain);
1498         if (!ret) {
1499                 /* Enable data address sampling for DWARF unwind. */
1500                 if (callchain->record_mode == CALLCHAIN_DWARF)
1501                         record->sample_address = true;
1502                 callchain_debug(callchain);
1503         }
1504
1505         return ret;
1506 }
1507
1508 int record_parse_callchain_opt(const struct option *opt,
1509                                const char *arg,
1510                                int unset)
1511 {
1512         return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1513 }
1514
1515 int record_callchain_opt(const struct option *opt,
1516                          const char *arg __maybe_unused,
1517                          int unset __maybe_unused)
1518 {
1519         struct callchain_param *callchain = opt->value;
1520
1521         callchain->enabled = true;
1522
1523         if (callchain->record_mode == CALLCHAIN_NONE)
1524                 callchain->record_mode = CALLCHAIN_FP;
1525
1526         callchain_debug(callchain);
1527         return 0;
1528 }
1529
1530 static int perf_record_config(const char *var, const char *value, void *cb)
1531 {
1532         struct record *rec = cb;
1533
1534         if (!strcmp(var, "record.build-id")) {
1535                 if (!strcmp(value, "cache"))
1536                         rec->no_buildid_cache = false;
1537                 else if (!strcmp(value, "no-cache"))
1538                         rec->no_buildid_cache = true;
1539                 else if (!strcmp(value, "skip"))
1540                         rec->no_buildid = true;
1541                 else
1542                         return -1;
1543                 return 0;
1544         }
1545         if (!strcmp(var, "record.call-graph")) {
1546                 var = "call-graph.record-mode";
1547                 return perf_default_config(var, value, cb);
1548         }
1549 #ifdef HAVE_AIO_SUPPORT
1550         if (!strcmp(var, "record.aio")) {
1551                 rec->opts.nr_cblocks = strtol(value, NULL, 0);
1552                 if (!rec->opts.nr_cblocks)
1553                         rec->opts.nr_cblocks = nr_cblocks_default;
1554         }
1555 #endif
1556
1557         return 0;
1558 }
1559
1560 struct clockid_map {
1561         const char *name;
1562         int clockid;
1563 };
1564
1565 #define CLOCKID_MAP(n, c)       \
1566         { .name = n, .clockid = (c), }
1567
1568 #define CLOCKID_END     { .name = NULL, }
1569
1570
1571 /*
1572  * Add the missing ones, we need to build on many distros...
1573  */
1574 #ifndef CLOCK_MONOTONIC_RAW
1575 #define CLOCK_MONOTONIC_RAW 4
1576 #endif
1577 #ifndef CLOCK_BOOTTIME
1578 #define CLOCK_BOOTTIME 7
1579 #endif
1580 #ifndef CLOCK_TAI
1581 #define CLOCK_TAI 11
1582 #endif
1583
1584 static const struct clockid_map clockids[] = {
1585         /* available for all events, NMI safe */
1586         CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1587         CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1588
1589         /* available for some events */
1590         CLOCKID_MAP("realtime", CLOCK_REALTIME),
1591         CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1592         CLOCKID_MAP("tai", CLOCK_TAI),
1593
1594         /* available for the lazy */
1595         CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1596         CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1597         CLOCKID_MAP("real", CLOCK_REALTIME),
1598         CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1599
1600         CLOCKID_END,
1601 };
1602
1603 static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
1604 {
1605         struct timespec res;
1606
1607         *res_ns = 0;
1608         if (!clock_getres(clk_id, &res))
1609                 *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
1610         else
1611                 pr_warning("WARNING: Failed to determine specified clock resolution.\n");
1612
1613         return 0;
1614 }
1615
1616 static int parse_clockid(const struct option *opt, const char *str, int unset)
1617 {
1618         struct record_opts *opts = (struct record_opts *)opt->value;
1619         const struct clockid_map *cm;
1620         const char *ostr = str;
1621
1622         if (unset) {
1623                 opts->use_clockid = 0;
1624                 return 0;
1625         }
1626
1627         /* no arg passed */
1628         if (!str)
1629                 return 0;
1630
1631         /* no setting it twice */
1632         if (opts->use_clockid)
1633                 return -1;
1634
1635         opts->use_clockid = true;
1636
1637         /* if its a number, we're done */
1638         if (sscanf(str, "%d", &opts->clockid) == 1)
1639                 return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
1640
1641         /* allow a "CLOCK_" prefix to the name */
1642         if (!strncasecmp(str, "CLOCK_", 6))
1643                 str += 6;
1644
1645         for (cm = clockids; cm->name; cm++) {
1646                 if (!strcasecmp(str, cm->name)) {
1647                         opts->clockid = cm->clockid;
1648                         return get_clockid_res(opts->clockid,
1649                                                &opts->clockid_res_ns);
1650                 }
1651         }
1652
1653         opts->use_clockid = false;
1654         ui__warning("unknown clockid %s, check man page\n", ostr);
1655         return -1;
1656 }
1657
1658 static int record__parse_affinity(const struct option *opt, const char *str, int unset)
1659 {
1660         struct record_opts *opts = (struct record_opts *)opt->value;
1661
1662         if (unset || !str)
1663                 return 0;
1664
1665         if (!strcasecmp(str, "node"))
1666                 opts->affinity = PERF_AFFINITY_NODE;
1667         else if (!strcasecmp(str, "cpu"))
1668                 opts->affinity = PERF_AFFINITY_CPU;
1669
1670         return 0;
1671 }
1672
1673 static int record__parse_mmap_pages(const struct option *opt,
1674                                     const char *str,
1675                                     int unset __maybe_unused)
1676 {
1677         struct record_opts *opts = opt->value;
1678         char *s, *p;
1679         unsigned int mmap_pages;
1680         int ret;
1681
1682         if (!str)
1683                 return -EINVAL;
1684
1685         s = strdup(str);
1686         if (!s)
1687                 return -ENOMEM;
1688
1689         p = strchr(s, ',');
1690         if (p)
1691                 *p = '\0';
1692
1693         if (*s) {
1694                 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1695                 if (ret)
1696                         goto out_free;
1697                 opts->mmap_pages = mmap_pages;
1698         }
1699
1700         if (!p) {
1701                 ret = 0;
1702                 goto out_free;
1703         }
1704
1705         ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1706         if (ret)
1707                 goto out_free;
1708
1709         opts->auxtrace_mmap_pages = mmap_pages;
1710
1711 out_free:
1712         free(s);
1713         return ret;
1714 }
1715
1716 static void switch_output_size_warn(struct record *rec)
1717 {
1718         u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages);
1719         struct switch_output *s = &rec->switch_output;
1720
1721         wakeup_size /= 2;
1722
1723         if (s->size < wakeup_size) {
1724                 char buf[100];
1725
1726                 unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
1727                 pr_warning("WARNING: switch-output data size lower than "
1728                            "wakeup kernel buffer size (%s) "
1729                            "expect bigger perf.data sizes\n", buf);
1730         }
1731 }
1732
1733 static int switch_output_setup(struct record *rec)
1734 {
1735         struct switch_output *s = &rec->switch_output;
1736         static struct parse_tag tags_size[] = {
1737                 { .tag  = 'B', .mult = 1       },
1738                 { .tag  = 'K', .mult = 1 << 10 },
1739                 { .tag  = 'M', .mult = 1 << 20 },
1740                 { .tag  = 'G', .mult = 1 << 30 },
1741                 { .tag  = 0 },
1742         };
1743         static struct parse_tag tags_time[] = {
1744                 { .tag  = 's', .mult = 1        },
1745                 { .tag  = 'm', .mult = 60       },
1746                 { .tag  = 'h', .mult = 60*60    },
1747                 { .tag  = 'd', .mult = 60*60*24 },
1748                 { .tag  = 0 },
1749         };
1750         unsigned long val;
1751
1752         if (!s->set)
1753                 return 0;
1754
1755         if (!strcmp(s->str, "signal")) {
1756                 s->signal = true;
1757                 pr_debug("switch-output with SIGUSR2 signal\n");
1758                 goto enabled;
1759         }
1760
1761         val = parse_tag_value(s->str, tags_size);
1762         if (val != (unsigned long) -1) {
1763                 s->size = val;
1764                 pr_debug("switch-output with %s size threshold\n", s->str);
1765                 goto enabled;
1766         }
1767
1768         val = parse_tag_value(s->str, tags_time);
1769         if (val != (unsigned long) -1) {
1770                 s->time = val;
1771                 pr_debug("switch-output with %s time threshold (%lu seconds)\n",
1772                          s->str, s->time);
1773                 goto enabled;
1774         }
1775
1776         return -1;
1777
1778 enabled:
1779         rec->timestamp_filename = true;
1780         s->enabled              = true;
1781
1782         if (s->size && !rec->opts.no_buffering)
1783                 switch_output_size_warn(rec);
1784
1785         return 0;
1786 }
1787
1788 static const char * const __record_usage[] = {
1789         "perf record [<options>] [<command>]",
1790         "perf record [<options>] -- <command> [<options>]",
1791         NULL
1792 };
1793 const char * const *record_usage = __record_usage;
1794
1795 /*
1796  * XXX Ideally would be local to cmd_record() and passed to a record__new
1797  * because we need to have access to it in record__exit, that is called
1798  * after cmd_record() exits, but since record_options need to be accessible to
1799  * builtin-script, leave it here.
1800  *
1801  * At least we don't ouch it in all the other functions here directly.
1802  *
1803  * Just say no to tons of global variables, sigh.
1804  */
1805 static struct record record = {
1806         .opts = {
1807                 .sample_time         = true,
1808                 .mmap_pages          = UINT_MAX,
1809                 .user_freq           = UINT_MAX,
1810                 .user_interval       = ULLONG_MAX,
1811                 .freq                = 4000,
1812                 .target              = {
1813                         .uses_mmap   = true,
1814                         .default_per_cpu = true,
1815                 },
1816         },
1817         .tool = {
1818                 .sample         = process_sample_event,
1819                 .fork           = perf_event__process_fork,
1820                 .exit           = perf_event__process_exit,
1821                 .comm           = perf_event__process_comm,
1822                 .namespaces     = perf_event__process_namespaces,
1823                 .mmap           = perf_event__process_mmap,
1824                 .mmap2          = perf_event__process_mmap2,
1825                 .ordered_events = true,
1826         },
1827 };
1828
1829 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1830         "\n\t\t\t\tDefault: fp";
1831
1832 static bool dry_run;
1833
1834 /*
1835  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1836  * with it and switch to use the library functions in perf_evlist that came
1837  * from builtin-record.c, i.e. use record_opts,
1838  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1839  * using pipes, etc.
1840  */
1841 static struct option __record_options[] = {
1842         OPT_CALLBACK('e', "event", &record.evlist, "event",
1843                      "event selector. use 'perf list' to list available events",
1844                      parse_events_option),
1845         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1846                      "event filter", parse_filter),
1847         OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1848                            NULL, "don't record events from perf itself",
1849                            exclude_perf),
1850         OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1851                     "record events on existing process id"),
1852         OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1853                     "record events on existing thread id"),
1854         OPT_INTEGER('r', "realtime", &record.realtime_prio,
1855                     "collect data with this RT SCHED_FIFO priority"),
1856         OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1857                     "collect data without buffering"),
1858         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1859                     "collect raw sample records from all opened counters"),
1860         OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1861                             "system-wide collection from all CPUs"),
1862         OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1863                     "list of cpus to monitor"),
1864         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1865         OPT_STRING('o', "output", &record.data.path, "file",
1866                     "output file name"),
1867         OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1868                         &record.opts.no_inherit_set,
1869                         "child tasks do not inherit counters"),
1870         OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1871                     "synthesize non-sample events at the end of output"),
1872         OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
1873         OPT_BOOLEAN(0, "bpf-event", &record.opts.bpf_event, "record bpf events"),
1874         OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
1875                     "Fail if the specified frequency can't be used"),
1876         OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
1877                      "profile at this frequency",
1878                       record__parse_freq),
1879         OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1880                      "number of mmap data pages and AUX area tracing mmap pages",
1881                      record__parse_mmap_pages),
1882         OPT_BOOLEAN(0, "group", &record.opts.group,
1883                     "put the counters into a counter group"),
1884         OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1885                            NULL, "enables call-graph recording" ,
1886                            &record_callchain_opt),
1887         OPT_CALLBACK(0, "call-graph", &record.opts,
1888                      "record_mode[,record_size]", record_callchain_help,
1889                      &record_parse_callchain_opt),
1890         OPT_INCR('v', "verbose", &verbose,
1891                     "be more verbose (show counter open errors, etc)"),
1892         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1893         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1894                     "per thread counts"),
1895         OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1896         OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
1897                     "Record the sample physical addresses"),
1898         OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
1899         OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1900                         &record.opts.sample_time_set,
1901                         "Record the sample timestamps"),
1902         OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
1903                         "Record the sample period"),
1904         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1905                     "don't sample"),
1906         OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1907                         &record.no_buildid_cache_set,
1908                         "do not update the buildid cache"),
1909         OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1910                         &record.no_buildid_set,
1911                         "do not collect buildids in perf.data"),
1912         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1913                      "monitor event in cgroup name only",
1914                      parse_cgroups),
1915         OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1916                   "ms to wait before starting measurement after program start"),
1917         OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1918                    "user to profile"),
1919
1920         OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1921                      "branch any", "sample any taken branches",
1922                      parse_branch_stack),
1923
1924         OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1925                      "branch filter mask", "branch stack filter modes",
1926                      parse_branch_stack),
1927         OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1928                     "sample by weight (on special events only)"),
1929         OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1930                     "sample transaction flags (special events only)"),
1931         OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1932                     "use per-thread mmaps"),
1933         OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1934                     "sample selected machine registers on interrupt,"
1935                     " use -I ? to list register names", parse_regs),
1936         OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
1937                     "sample selected machine registers on interrupt,"
1938                     " use -I ? to list register names", parse_regs),
1939         OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1940                     "Record running/enabled time of read (:S) events"),
1941         OPT_CALLBACK('k', "clockid", &record.opts,
1942         "clockid", "clockid to use for events, see clock_gettime()",
1943         parse_clockid),
1944         OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1945                           "opts", "AUX area tracing Snapshot Mode", ""),
1946         OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
1947                         "per thread proc mmap processing timeout in ms"),
1948         OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
1949                     "Record namespaces events"),
1950         OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1951                     "Record context switch events"),
1952         OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1953                          "Configure all used events to run in kernel space.",
1954                          PARSE_OPT_EXCLUSIVE),
1955         OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1956                          "Configure all used events to run in user space.",
1957                          PARSE_OPT_EXCLUSIVE),
1958         OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1959                    "clang binary to use for compiling BPF scriptlets"),
1960         OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1961                    "options passed to clang when compiling BPF scriptlets"),
1962         OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1963                    "file", "vmlinux pathname"),
1964         OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1965                     "Record build-id of all DSOs regardless of hits"),
1966         OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
1967                     "append timestamp to output filename"),
1968         OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
1969                     "Record timestamp boundary (time of first/last samples)"),
1970         OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
1971                           &record.switch_output.set, "signal,size,time",
1972                           "Switch output when receive SIGUSR2 or cross size,time threshold",
1973                           "signal"),
1974         OPT_BOOLEAN(0, "dry-run", &dry_run,
1975                     "Parse options then exit"),
1976 #ifdef HAVE_AIO_SUPPORT
1977         OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
1978                      &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
1979                      record__aio_parse),
1980 #endif
1981         OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
1982                      "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
1983                      record__parse_affinity),
1984         OPT_END()
1985 };
1986
1987 struct option *record_options = __record_options;
1988
1989 int cmd_record(int argc, const char **argv)
1990 {
1991         int err;
1992         struct record *rec = &record;
1993         char errbuf[BUFSIZ];
1994
1995         setlocale(LC_ALL, "");
1996
1997 #ifndef HAVE_LIBBPF_SUPPORT
1998 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1999         set_nobuild('\0', "clang-path", true);
2000         set_nobuild('\0', "clang-opt", true);
2001 # undef set_nobuild
2002 #endif
2003
2004 #ifndef HAVE_BPF_PROLOGUE
2005 # if !defined (HAVE_DWARF_SUPPORT)
2006 #  define REASON  "NO_DWARF=1"
2007 # elif !defined (HAVE_LIBBPF_SUPPORT)
2008 #  define REASON  "NO_LIBBPF=1"
2009 # else
2010 #  define REASON  "this architecture doesn't support BPF prologue"
2011 # endif
2012 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
2013         set_nobuild('\0', "vmlinux", true);
2014 # undef set_nobuild
2015 # undef REASON
2016 #endif
2017
2018         CPU_ZERO(&rec->affinity_mask);
2019         rec->opts.affinity = PERF_AFFINITY_SYS;
2020
2021         rec->evlist = perf_evlist__new();
2022         if (rec->evlist == NULL)
2023                 return -ENOMEM;
2024
2025         err = perf_config(perf_record_config, rec);
2026         if (err)
2027                 return err;
2028
2029         argc = parse_options(argc, argv, record_options, record_usage,
2030                             PARSE_OPT_STOP_AT_NON_OPTION);
2031         if (quiet)
2032                 perf_quiet_option();
2033
2034         /* Make system wide (-a) the default target. */
2035         if (!argc && target__none(&rec->opts.target))
2036                 rec->opts.target.system_wide = true;
2037
2038         if (nr_cgroups && !rec->opts.target.system_wide) {
2039                 usage_with_options_msg(record_usage, record_options,
2040                         "cgroup monitoring only available in system-wide mode");
2041
2042         }
2043         if (rec->opts.record_switch_events &&
2044             !perf_can_record_switch_events()) {
2045                 ui__error("kernel does not support recording context switch events\n");
2046                 parse_options_usage(record_usage, record_options, "switch-events", 0);
2047                 return -EINVAL;
2048         }
2049
2050         if (switch_output_setup(rec)) {
2051                 parse_options_usage(record_usage, record_options, "switch-output", 0);
2052                 return -EINVAL;
2053         }
2054
2055         if (rec->switch_output.time) {
2056                 signal(SIGALRM, alarm_sig_handler);
2057                 alarm(rec->switch_output.time);
2058         }
2059
2060         /*
2061          * Allow aliases to facilitate the lookup of symbols for address
2062          * filters. Refer to auxtrace_parse_filters().
2063          */
2064         symbol_conf.allow_aliases = true;
2065
2066         symbol__init(NULL);
2067
2068         err = record__auxtrace_init(rec);
2069         if (err)
2070                 goto out;
2071
2072         if (dry_run)
2073                 goto out;
2074
2075         err = bpf__setup_stdout(rec->evlist);
2076         if (err) {
2077                 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
2078                 pr_err("ERROR: Setup BPF stdout failed: %s\n",
2079                          errbuf);
2080                 goto out;
2081         }
2082
2083         err = -ENOMEM;
2084
2085         if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist))
2086                 pr_warning(
2087 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
2088 "check /proc/sys/kernel/kptr_restrict.\n\n"
2089 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
2090 "file is not found in the buildid cache or in the vmlinux path.\n\n"
2091 "Samples in kernel modules won't be resolved at all.\n\n"
2092 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
2093 "even with a suitable vmlinux or kallsyms file.\n\n");
2094
2095         if (rec->no_buildid_cache || rec->no_buildid) {
2096                 disable_buildid_cache();
2097         } else if (rec->switch_output.enabled) {
2098                 /*
2099                  * In 'perf record --switch-output', disable buildid
2100                  * generation by default to reduce data file switching
2101                  * overhead. Still generate buildid if they are required
2102                  * explicitly using
2103                  *
2104                  *  perf record --switch-output --no-no-buildid \
2105                  *              --no-no-buildid-cache
2106                  *
2107                  * Following code equals to:
2108                  *
2109                  * if ((rec->no_buildid || !rec->no_buildid_set) &&
2110                  *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
2111                  *         disable_buildid_cache();
2112                  */
2113                 bool disable = true;
2114
2115                 if (rec->no_buildid_set && !rec->no_buildid)
2116                         disable = false;
2117                 if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
2118                         disable = false;
2119                 if (disable) {
2120                         rec->no_buildid = true;
2121                         rec->no_buildid_cache = true;
2122                         disable_buildid_cache();
2123                 }
2124         }
2125
2126         if (record.opts.overwrite)
2127                 record.opts.tail_synthesize = true;
2128
2129         if (rec->evlist->nr_entries == 0 &&
2130             __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
2131                 pr_err("Not enough memory for event selector list\n");
2132                 goto out;
2133         }
2134
2135         if (rec->opts.target.tid && !rec->opts.no_inherit_set)
2136                 rec->opts.no_inherit = true;
2137
2138         err = target__validate(&rec->opts.target);
2139         if (err) {
2140                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2141                 ui__warning("%s\n", errbuf);
2142         }
2143
2144         err = target__parse_uid(&rec->opts.target);
2145         if (err) {
2146                 int saved_errno = errno;
2147
2148                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2149                 ui__error("%s", errbuf);
2150
2151                 err = -saved_errno;
2152                 goto out;
2153         }
2154
2155         /* Enable ignoring missing threads when -u/-p option is defined. */
2156         rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
2157
2158         err = -ENOMEM;
2159         if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
2160                 usage_with_options(record_usage, record_options);
2161
2162         err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
2163         if (err)
2164                 goto out;
2165
2166         /*
2167          * We take all buildids when the file contains
2168          * AUX area tracing data because we do not decode the
2169          * trace because it would take too long.
2170          */
2171         if (rec->opts.full_auxtrace)
2172                 rec->buildid_all = true;
2173
2174         if (record_opts__config(&rec->opts)) {
2175                 err = -EINVAL;
2176                 goto out;
2177         }
2178
2179         if (rec->opts.nr_cblocks > nr_cblocks_max)
2180                 rec->opts.nr_cblocks = nr_cblocks_max;
2181         if (verbose > 0)
2182                 pr_info("nr_cblocks: %d\n", rec->opts.nr_cblocks);
2183
2184         pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
2185
2186         err = __cmd_record(&record, argc, argv);
2187 out:
2188         perf_evlist__delete(rec->evlist);
2189         symbol__exit();
2190         auxtrace_record__free(rec->itr);
2191         return err;
2192 }
2193
2194 static void snapshot_sig_handler(int sig __maybe_unused)
2195 {
2196         struct record *rec = &record;
2197
2198         if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2199                 trigger_hit(&auxtrace_snapshot_trigger);
2200                 auxtrace_record__snapshot_started = 1;
2201                 if (auxtrace_record__snapshot_start(record.itr))
2202                         trigger_error(&auxtrace_snapshot_trigger);
2203         }
2204
2205         if (switch_output_signal(rec))
2206                 trigger_hit(&switch_output_trigger);
2207 }
2208
2209 static void alarm_sig_handler(int sig __maybe_unused)
2210 {
2211         struct record *rec = &record;
2212
2213         if (switch_output_time(rec))
2214                 trigger_hit(&switch_output_trigger);
2215 }