]> asedeno.scripts.mit.edu Git - linux.git/blob - tools/perf/builtin-top.c
libperf: Add 'event_copy' to 'struct perf_mmap'
[linux.git] / tools / perf / builtin-top.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * builtin-top.c
4  *
5  * Builtin top command: Display a continuously updated profile of
6  * any workload, CPU or specific PID.
7  *
8  * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
9  *               2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Improvements and fixes by:
12  *
13  *   Arjan van de Ven <arjan@linux.intel.com>
14  *   Yanmin Zhang <yanmin.zhang@intel.com>
15  *   Wu Fengguang <fengguang.wu@intel.com>
16  *   Mike Galbraith <efault@gmx.de>
17  *   Paul Mackerras <paulus@samba.org>
18  */
19 #include "builtin.h"
20
21 #include "perf.h"
22
23 #include "util/annotate.h"
24 #include "util/bpf-event.h"
25 #include "util/config.h"
26 #include "util/color.h"
27 #include "util/dso.h"
28 #include "util/evlist.h"
29 #include "util/evsel.h"
30 #include "util/event.h"
31 #include "util/machine.h"
32 #include "util/map.h"
33 #include "util/mmap.h"
34 #include "util/session.h"
35 #include "util/symbol.h"
36 #include "util/synthetic-events.h"
37 #include "util/top.h"
38 #include "util/util.h"
39 #include <linux/rbtree.h>
40 #include <subcmd/parse-options.h>
41 #include "util/parse-events.h"
42 #include "util/callchain.h"
43 #include "util/cpumap.h"
44 #include "util/sort.h"
45 #include "util/string2.h"
46 #include "util/term.h"
47 #include "util/intlist.h"
48 #include "util/parse-branch-options.h"
49 #include "arch/common.h"
50 #include "ui/ui.h"
51
52 #include "util/debug.h"
53 #include "util/ordered-events.h"
54
55 #include <assert.h>
56 #include <elf.h>
57 #include <fcntl.h>
58
59 #include <stdio.h>
60 #include <termios.h>
61 #include <unistd.h>
62 #include <inttypes.h>
63
64 #include <errno.h>
65 #include <time.h>
66 #include <sched.h>
67 #include <signal.h>
68
69 #include <sys/syscall.h>
70 #include <sys/ioctl.h>
71 #include <poll.h>
72 #include <sys/prctl.h>
73 #include <sys/wait.h>
74 #include <sys/uio.h>
75 #include <sys/utsname.h>
76 #include <sys/mman.h>
77
78 #include <linux/stringify.h>
79 #include <linux/time64.h>
80 #include <linux/types.h>
81 #include <linux/err.h>
82
83 #include <linux/ctype.h>
84
85 static volatile int done;
86 static volatile int resize;
87
88 #define HEADER_LINE_NR  5
89
90 static void perf_top__update_print_entries(struct perf_top *top)
91 {
92         top->print_entries = top->winsize.ws_row - HEADER_LINE_NR;
93 }
94
95 static void winch_sig(int sig __maybe_unused)
96 {
97         resize = 1;
98 }
99
100 static void perf_top__resize(struct perf_top *top)
101 {
102         get_term_dimensions(&top->winsize);
103         perf_top__update_print_entries(top);
104 }
105
106 static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
107 {
108         struct evsel *evsel;
109         struct symbol *sym;
110         struct annotation *notes;
111         struct map *map;
112         int err = -1;
113
114         if (!he || !he->ms.sym)
115                 return -1;
116
117         evsel = hists_to_evsel(he->hists);
118
119         sym = he->ms.sym;
120         map = he->ms.map;
121
122         /*
123          * We can't annotate with just /proc/kallsyms
124          */
125         if (map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS &&
126             !dso__is_kcore(map->dso)) {
127                 pr_err("Can't annotate %s: No vmlinux file was found in the "
128                        "path\n", sym->name);
129                 sleep(1);
130                 return -1;
131         }
132
133         notes = symbol__annotation(sym);
134         pthread_mutex_lock(&notes->lock);
135
136         if (!symbol__hists(sym, top->evlist->core.nr_entries)) {
137                 pthread_mutex_unlock(&notes->lock);
138                 pr_err("Not enough memory for annotating '%s' symbol!\n",
139                        sym->name);
140                 sleep(1);
141                 return err;
142         }
143
144         err = symbol__annotate(sym, map, evsel, 0, &top->annotation_opts, NULL);
145         if (err == 0) {
146                 top->sym_filter_entry = he;
147         } else {
148                 char msg[BUFSIZ];
149                 symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
150                 pr_err("Couldn't annotate %s: %s\n", sym->name, msg);
151         }
152
153         pthread_mutex_unlock(&notes->lock);
154         return err;
155 }
156
157 static void __zero_source_counters(struct hist_entry *he)
158 {
159         struct symbol *sym = he->ms.sym;
160         symbol__annotate_zero_histograms(sym);
161 }
162
163 static void ui__warn_map_erange(struct map *map, struct symbol *sym, u64 ip)
164 {
165         struct utsname uts;
166         int err = uname(&uts);
167
168         ui__warning("Out of bounds address found:\n\n"
169                     "Addr:   %" PRIx64 "\n"
170                     "DSO:    %s %c\n"
171                     "Map:    %" PRIx64 "-%" PRIx64 "\n"
172                     "Symbol: %" PRIx64 "-%" PRIx64 " %c %s\n"
173                     "Arch:   %s\n"
174                     "Kernel: %s\n"
175                     "Tools:  %s\n\n"
176                     "Not all samples will be on the annotation output.\n\n"
177                     "Please report to linux-kernel@vger.kernel.org\n",
178                     ip, map->dso->long_name, dso__symtab_origin(map->dso),
179                     map->start, map->end, sym->start, sym->end,
180                     sym->binding == STB_GLOBAL ? 'g' :
181                     sym->binding == STB_LOCAL  ? 'l' : 'w', sym->name,
182                     err ? "[unknown]" : uts.machine,
183                     err ? "[unknown]" : uts.release, perf_version_string);
184         if (use_browser <= 0)
185                 sleep(5);
186
187         map->erange_warned = true;
188 }
189
190 static void perf_top__record_precise_ip(struct perf_top *top,
191                                         struct hist_entry *he,
192                                         struct perf_sample *sample,
193                                         struct evsel *evsel, u64 ip)
194 {
195         struct annotation *notes;
196         struct symbol *sym = he->ms.sym;
197         int err = 0;
198
199         if (sym == NULL || (use_browser == 0 &&
200                             (top->sym_filter_entry == NULL ||
201                              top->sym_filter_entry->ms.sym != sym)))
202                 return;
203
204         notes = symbol__annotation(sym);
205
206         if (pthread_mutex_trylock(&notes->lock))
207                 return;
208
209         err = hist_entry__inc_addr_samples(he, sample, evsel, ip);
210
211         pthread_mutex_unlock(&notes->lock);
212
213         if (unlikely(err)) {
214                 /*
215                  * This function is now called with he->hists->lock held.
216                  * Release it before going to sleep.
217                  */
218                 pthread_mutex_unlock(&he->hists->lock);
219
220                 if (err == -ERANGE && !he->ms.map->erange_warned)
221                         ui__warn_map_erange(he->ms.map, sym, ip);
222                 else if (err == -ENOMEM) {
223                         pr_err("Not enough memory for annotating '%s' symbol!\n",
224                                sym->name);
225                         sleep(1);
226                 }
227
228                 pthread_mutex_lock(&he->hists->lock);
229         }
230 }
231
232 static void perf_top__show_details(struct perf_top *top)
233 {
234         struct hist_entry *he = top->sym_filter_entry;
235         struct evsel *evsel;
236         struct annotation *notes;
237         struct symbol *symbol;
238         int more;
239
240         if (!he)
241                 return;
242
243         evsel = hists_to_evsel(he->hists);
244
245         symbol = he->ms.sym;
246         notes = symbol__annotation(symbol);
247
248         pthread_mutex_lock(&notes->lock);
249
250         symbol__calc_percent(symbol, evsel);
251
252         if (notes->src == NULL)
253                 goto out_unlock;
254
255         printf("Showing %s for %s\n", perf_evsel__name(top->sym_evsel), symbol->name);
256         printf("  Events  Pcnt (>=%d%%)\n", top->annotation_opts.min_pcnt);
257
258         more = symbol__annotate_printf(symbol, he->ms.map, top->sym_evsel, &top->annotation_opts);
259
260         if (top->evlist->enabled) {
261                 if (top->zero)
262                         symbol__annotate_zero_histogram(symbol, top->sym_evsel->idx);
263                 else
264                         symbol__annotate_decay_histogram(symbol, top->sym_evsel->idx);
265         }
266         if (more != 0)
267                 printf("%d lines not displayed, maybe increase display entries [e]\n", more);
268 out_unlock:
269         pthread_mutex_unlock(&notes->lock);
270 }
271
272 static void perf_top__resort_hists(struct perf_top *t)
273 {
274         struct evlist *evlist = t->evlist;
275         struct evsel *pos;
276
277         evlist__for_each_entry(evlist, pos) {
278                 struct hists *hists = evsel__hists(pos);
279
280                 /*
281                  * unlink existing entries so that they can be linked
282                  * in a correct order in hists__match() below.
283                  */
284                 hists__unlink(hists);
285
286                 if (evlist->enabled) {
287                         if (t->zero) {
288                                 hists__delete_entries(hists);
289                         } else {
290                                 hists__decay_entries(hists, t->hide_user_symbols,
291                                                      t->hide_kernel_symbols);
292                         }
293                 }
294
295                 hists__collapse_resort(hists, NULL);
296
297                 /* Non-group events are considered as leader */
298                 if (symbol_conf.event_group &&
299                     !perf_evsel__is_group_leader(pos)) {
300                         struct hists *leader_hists = evsel__hists(pos->leader);
301
302                         hists__match(leader_hists, hists);
303                         hists__link(leader_hists, hists);
304                 }
305         }
306
307         evlist__for_each_entry(evlist, pos) {
308                 perf_evsel__output_resort(pos, NULL);
309         }
310 }
311
312 static void perf_top__print_sym_table(struct perf_top *top)
313 {
314         char bf[160];
315         int printed = 0;
316         const int win_width = top->winsize.ws_col - 1;
317         struct evsel *evsel = top->sym_evsel;
318         struct hists *hists = evsel__hists(evsel);
319
320         puts(CONSOLE_CLEAR);
321
322         perf_top__header_snprintf(top, bf, sizeof(bf));
323         printf("%s\n", bf);
324
325         printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
326
327         if (!top->record_opts.overwrite &&
328             (hists->stats.nr_lost_warned !=
329             hists->stats.nr_events[PERF_RECORD_LOST])) {
330                 hists->stats.nr_lost_warned =
331                               hists->stats.nr_events[PERF_RECORD_LOST];
332                 color_fprintf(stdout, PERF_COLOR_RED,
333                               "WARNING: LOST %d chunks, Check IO/CPU overload",
334                               hists->stats.nr_lost_warned);
335                 ++printed;
336         }
337
338         if (top->sym_filter_entry) {
339                 perf_top__show_details(top);
340                 return;
341         }
342
343         perf_top__resort_hists(top);
344
345         hists__output_recalc_col_len(hists, top->print_entries - printed);
346         putchar('\n');
347         hists__fprintf(hists, false, top->print_entries - printed, win_width,
348                        top->min_percent, stdout, !symbol_conf.use_callchain);
349 }
350
351 static void prompt_integer(int *target, const char *msg)
352 {
353         char *buf = malloc(0), *p;
354         size_t dummy = 0;
355         int tmp;
356
357         fprintf(stdout, "\n%s: ", msg);
358         if (getline(&buf, &dummy, stdin) < 0)
359                 return;
360
361         p = strchr(buf, '\n');
362         if (p)
363                 *p = 0;
364
365         p = buf;
366         while(*p) {
367                 if (!isdigit(*p))
368                         goto out_free;
369                 p++;
370         }
371         tmp = strtoul(buf, NULL, 10);
372         *target = tmp;
373 out_free:
374         free(buf);
375 }
376
377 static void prompt_percent(int *target, const char *msg)
378 {
379         int tmp = 0;
380
381         prompt_integer(&tmp, msg);
382         if (tmp >= 0 && tmp <= 100)
383                 *target = tmp;
384 }
385
386 static void perf_top__prompt_symbol(struct perf_top *top, const char *msg)
387 {
388         char *buf = malloc(0), *p;
389         struct hist_entry *syme = top->sym_filter_entry, *n, *found = NULL;
390         struct hists *hists = evsel__hists(top->sym_evsel);
391         struct rb_node *next;
392         size_t dummy = 0;
393
394         /* zero counters of active symbol */
395         if (syme) {
396                 __zero_source_counters(syme);
397                 top->sym_filter_entry = NULL;
398         }
399
400         fprintf(stdout, "\n%s: ", msg);
401         if (getline(&buf, &dummy, stdin) < 0)
402                 goto out_free;
403
404         p = strchr(buf, '\n');
405         if (p)
406                 *p = 0;
407
408         next = rb_first_cached(&hists->entries);
409         while (next) {
410                 n = rb_entry(next, struct hist_entry, rb_node);
411                 if (n->ms.sym && !strcmp(buf, n->ms.sym->name)) {
412                         found = n;
413                         break;
414                 }
415                 next = rb_next(&n->rb_node);
416         }
417
418         if (!found) {
419                 fprintf(stderr, "Sorry, %s is not active.\n", buf);
420                 sleep(1);
421         } else
422                 perf_top__parse_source(top, found);
423
424 out_free:
425         free(buf);
426 }
427
428 static void perf_top__print_mapped_keys(struct perf_top *top)
429 {
430         char *name = NULL;
431
432         if (top->sym_filter_entry) {
433                 struct symbol *sym = top->sym_filter_entry->ms.sym;
434                 name = sym->name;
435         }
436
437         fprintf(stdout, "\nMapped keys:\n");
438         fprintf(stdout, "\t[d]     display refresh delay.             \t(%d)\n", top->delay_secs);
439         fprintf(stdout, "\t[e]     display entries (lines).           \t(%d)\n", top->print_entries);
440
441         if (top->evlist->core.nr_entries > 1)
442                 fprintf(stdout, "\t[E]     active event counter.              \t(%s)\n", perf_evsel__name(top->sym_evsel));
443
444         fprintf(stdout, "\t[f]     profile display filter (count).    \t(%d)\n", top->count_filter);
445
446         fprintf(stdout, "\t[F]     annotate display filter (percent). \t(%d%%)\n", top->annotation_opts.min_pcnt);
447         fprintf(stdout, "\t[s]     annotate symbol.                   \t(%s)\n", name?: "NULL");
448         fprintf(stdout, "\t[S]     stop annotation.\n");
449
450         fprintf(stdout,
451                 "\t[K]     hide kernel symbols.             \t(%s)\n",
452                 top->hide_kernel_symbols ? "yes" : "no");
453         fprintf(stdout,
454                 "\t[U]     hide user symbols.               \t(%s)\n",
455                 top->hide_user_symbols ? "yes" : "no");
456         fprintf(stdout, "\t[z]     toggle sample zeroing.             \t(%d)\n", top->zero ? 1 : 0);
457         fprintf(stdout, "\t[qQ]    quit.\n");
458 }
459
460 static int perf_top__key_mapped(struct perf_top *top, int c)
461 {
462         switch (c) {
463                 case 'd':
464                 case 'e':
465                 case 'f':
466                 case 'z':
467                 case 'q':
468                 case 'Q':
469                 case 'K':
470                 case 'U':
471                 case 'F':
472                 case 's':
473                 case 'S':
474                         return 1;
475                 case 'E':
476                         return top->evlist->core.nr_entries > 1 ? 1 : 0;
477                 default:
478                         break;
479         }
480
481         return 0;
482 }
483
484 static bool perf_top__handle_keypress(struct perf_top *top, int c)
485 {
486         bool ret = true;
487
488         if (!perf_top__key_mapped(top, c)) {
489                 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
490                 struct termios save;
491
492                 perf_top__print_mapped_keys(top);
493                 fprintf(stdout, "\nEnter selection, or unmapped key to continue: ");
494                 fflush(stdout);
495
496                 set_term_quiet_input(&save);
497
498                 poll(&stdin_poll, 1, -1);
499                 c = getc(stdin);
500
501                 tcsetattr(0, TCSAFLUSH, &save);
502                 if (!perf_top__key_mapped(top, c))
503                         return ret;
504         }
505
506         switch (c) {
507                 case 'd':
508                         prompt_integer(&top->delay_secs, "Enter display delay");
509                         if (top->delay_secs < 1)
510                                 top->delay_secs = 1;
511                         break;
512                 case 'e':
513                         prompt_integer(&top->print_entries, "Enter display entries (lines)");
514                         if (top->print_entries == 0) {
515                                 perf_top__resize(top);
516                                 signal(SIGWINCH, winch_sig);
517                         } else {
518                                 signal(SIGWINCH, SIG_DFL);
519                         }
520                         break;
521                 case 'E':
522                         if (top->evlist->core.nr_entries > 1) {
523                                 /* Select 0 as the default event: */
524                                 int counter = 0;
525
526                                 fprintf(stderr, "\nAvailable events:");
527
528                                 evlist__for_each_entry(top->evlist, top->sym_evsel)
529                                         fprintf(stderr, "\n\t%d %s", top->sym_evsel->idx, perf_evsel__name(top->sym_evsel));
530
531                                 prompt_integer(&counter, "Enter details event counter");
532
533                                 if (counter >= top->evlist->core.nr_entries) {
534                                         top->sym_evsel = perf_evlist__first(top->evlist);
535                                         fprintf(stderr, "Sorry, no such event, using %s.\n", perf_evsel__name(top->sym_evsel));
536                                         sleep(1);
537                                         break;
538                                 }
539                                 evlist__for_each_entry(top->evlist, top->sym_evsel)
540                                         if (top->sym_evsel->idx == counter)
541                                                 break;
542                         } else
543                                 top->sym_evsel = perf_evlist__first(top->evlist);
544                         break;
545                 case 'f':
546                         prompt_integer(&top->count_filter, "Enter display event count filter");
547                         break;
548                 case 'F':
549                         prompt_percent(&top->annotation_opts.min_pcnt,
550                                        "Enter details display event filter (percent)");
551                         break;
552                 case 'K':
553                         top->hide_kernel_symbols = !top->hide_kernel_symbols;
554                         break;
555                 case 'q':
556                 case 'Q':
557                         printf("exiting.\n");
558                         if (top->dump_symtab)
559                                 perf_session__fprintf_dsos(top->session, stderr);
560                         ret = false;
561                         break;
562                 case 's':
563                         perf_top__prompt_symbol(top, "Enter details symbol");
564                         break;
565                 case 'S':
566                         if (!top->sym_filter_entry)
567                                 break;
568                         else {
569                                 struct hist_entry *syme = top->sym_filter_entry;
570
571                                 top->sym_filter_entry = NULL;
572                                 __zero_source_counters(syme);
573                         }
574                         break;
575                 case 'U':
576                         top->hide_user_symbols = !top->hide_user_symbols;
577                         break;
578                 case 'z':
579                         top->zero = !top->zero;
580                         break;
581                 default:
582                         break;
583         }
584
585         return ret;
586 }
587
588 static void perf_top__sort_new_samples(void *arg)
589 {
590         struct perf_top *t = arg;
591
592         if (t->evlist->selected != NULL)
593                 t->sym_evsel = t->evlist->selected;
594
595         perf_top__resort_hists(t);
596
597         if (t->lost || t->drop)
598                 pr_warning("Too slow to read ring buffer (change period (-c/-F) or limit CPUs (-C)\n");
599 }
600
601 static void stop_top(void)
602 {
603         session_done = 1;
604         done = 1;
605 }
606
607 static void *display_thread_tui(void *arg)
608 {
609         struct evsel *pos;
610         struct perf_top *top = arg;
611         const char *help = "For a higher level overview, try: perf top --sort comm,dso";
612         struct hist_browser_timer hbt = {
613                 .timer          = perf_top__sort_new_samples,
614                 .arg            = top,
615                 .refresh        = top->delay_secs,
616         };
617
618         /* In order to read symbols from other namespaces perf to  needs to call
619          * setns(2).  This isn't permitted if the struct_fs has multiple users.
620          * unshare(2) the fs so that we may continue to setns into namespaces
621          * that we're observing.
622          */
623         unshare(CLONE_FS);
624
625         prctl(PR_SET_NAME, "perf-top-UI", 0, 0, 0);
626
627         perf_top__sort_new_samples(top);
628
629         /*
630          * Initialize the uid_filter_str, in the future the TUI will allow
631          * Zooming in/out UIDs. For now just use whatever the user passed
632          * via --uid.
633          */
634         evlist__for_each_entry(top->evlist, pos) {
635                 struct hists *hists = evsel__hists(pos);
636                 hists->uid_filter_str = top->record_opts.target.uid_str;
637         }
638
639         perf_evlist__tui_browse_hists(top->evlist, help, &hbt,
640                                       top->min_percent,
641                                       &top->session->header.env,
642                                       !top->record_opts.overwrite,
643                                       &top->annotation_opts);
644
645         stop_top();
646         return NULL;
647 }
648
649 static void display_sig(int sig __maybe_unused)
650 {
651         stop_top();
652 }
653
654 static void display_setup_sig(void)
655 {
656         signal(SIGSEGV, sighandler_dump_stack);
657         signal(SIGFPE, sighandler_dump_stack);
658         signal(SIGINT,  display_sig);
659         signal(SIGQUIT, display_sig);
660         signal(SIGTERM, display_sig);
661 }
662
663 static void *display_thread(void *arg)
664 {
665         struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
666         struct termios save;
667         struct perf_top *top = arg;
668         int delay_msecs, c;
669
670         /* In order to read symbols from other namespaces perf to  needs to call
671          * setns(2).  This isn't permitted if the struct_fs has multiple users.
672          * unshare(2) the fs so that we may continue to setns into namespaces
673          * that we're observing.
674          */
675         unshare(CLONE_FS);
676
677         prctl(PR_SET_NAME, "perf-top-UI", 0, 0, 0);
678
679         display_setup_sig();
680         pthread__unblock_sigwinch();
681 repeat:
682         delay_msecs = top->delay_secs * MSEC_PER_SEC;
683         set_term_quiet_input(&save);
684         /* trash return*/
685         getc(stdin);
686
687         while (!done) {
688                 perf_top__print_sym_table(top);
689                 /*
690                  * Either timeout expired or we got an EINTR due to SIGWINCH,
691                  * refresh screen in both cases.
692                  */
693                 switch (poll(&stdin_poll, 1, delay_msecs)) {
694                 case 0:
695                         continue;
696                 case -1:
697                         if (errno == EINTR)
698                                 continue;
699                         __fallthrough;
700                 default:
701                         c = getc(stdin);
702                         tcsetattr(0, TCSAFLUSH, &save);
703
704                         if (perf_top__handle_keypress(top, c))
705                                 goto repeat;
706                         stop_top();
707                 }
708         }
709
710         tcsetattr(0, TCSAFLUSH, &save);
711         return NULL;
712 }
713
714 static int hist_iter__top_callback(struct hist_entry_iter *iter,
715                                    struct addr_location *al, bool single,
716                                    void *arg)
717 {
718         struct perf_top *top = arg;
719         struct hist_entry *he = iter->he;
720         struct evsel *evsel = iter->evsel;
721
722         if (perf_hpp_list.sym && single)
723                 perf_top__record_precise_ip(top, he, iter->sample, evsel, al->addr);
724
725         hist__account_cycles(iter->sample->branch_stack, al, iter->sample,
726                      !(top->record_opts.branch_stack & PERF_SAMPLE_BRANCH_ANY));
727         return 0;
728 }
729
730 static void perf_event__process_sample(struct perf_tool *tool,
731                                        const union perf_event *event,
732                                        struct evsel *evsel,
733                                        struct perf_sample *sample,
734                                        struct machine *machine)
735 {
736         struct perf_top *top = container_of(tool, struct perf_top, tool);
737         struct addr_location al;
738         int err;
739
740         if (!machine && perf_guest) {
741                 static struct intlist *seen;
742
743                 if (!seen)
744                         seen = intlist__new(NULL);
745
746                 if (!intlist__has_entry(seen, sample->pid)) {
747                         pr_err("Can't find guest [%d]'s kernel information\n",
748                                 sample->pid);
749                         intlist__add(seen, sample->pid);
750                 }
751                 return;
752         }
753
754         if (!machine) {
755                 pr_err("%u unprocessable samples recorded.\r",
756                        top->session->evlist->stats.nr_unprocessable_samples++);
757                 return;
758         }
759
760         if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
761                 top->exact_samples++;
762
763         if (machine__resolve(machine, &al, sample) < 0)
764                 return;
765
766         if (!machine->kptr_restrict_warned &&
767             symbol_conf.kptr_restrict &&
768             al.cpumode == PERF_RECORD_MISC_KERNEL) {
769                 if (!perf_evlist__exclude_kernel(top->session->evlist)) {
770                         ui__warning(
771 "Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
772 "Check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
773 "Kernel%s samples will not be resolved.\n",
774                           al.map && map__has_symbols(al.map) ?
775                           " modules" : "");
776                         if (use_browser <= 0)
777                                 sleep(5);
778                 }
779                 machine->kptr_restrict_warned = true;
780         }
781
782         if (al.sym == NULL && al.map != NULL) {
783                 const char *msg = "Kernel samples will not be resolved.\n";
784                 /*
785                  * As we do lazy loading of symtabs we only will know if the
786                  * specified vmlinux file is invalid when we actually have a
787                  * hit in kernel space and then try to load it. So if we get
788                  * here and there are _no_ symbols in the DSO backing the
789                  * kernel map, bail out.
790                  *
791                  * We may never get here, for instance, if we use -K/
792                  * --hide-kernel-symbols, even if the user specifies an
793                  * invalid --vmlinux ;-)
794                  */
795                 if (!machine->kptr_restrict_warned && !top->vmlinux_warned &&
796                     __map__is_kernel(al.map) && map__has_symbols(al.map)) {
797                         if (symbol_conf.vmlinux_name) {
798                                 char serr[256];
799                                 dso__strerror_load(al.map->dso, serr, sizeof(serr));
800                                 ui__warning("The %s file can't be used: %s\n%s",
801                                             symbol_conf.vmlinux_name, serr, msg);
802                         } else {
803                                 ui__warning("A vmlinux file was not found.\n%s",
804                                             msg);
805                         }
806
807                         if (use_browser <= 0)
808                                 sleep(5);
809                         top->vmlinux_warned = true;
810                 }
811         }
812
813         if (al.sym == NULL || !al.sym->idle) {
814                 struct hists *hists = evsel__hists(evsel);
815                 struct hist_entry_iter iter = {
816                         .evsel          = evsel,
817                         .sample         = sample,
818                         .add_entry_cb   = hist_iter__top_callback,
819                 };
820
821                 if (symbol_conf.cumulate_callchain)
822                         iter.ops = &hist_iter_cumulative;
823                 else
824                         iter.ops = &hist_iter_normal;
825
826                 pthread_mutex_lock(&hists->lock);
827
828                 err = hist_entry_iter__add(&iter, &al, top->max_stack, top);
829                 if (err < 0)
830                         pr_err("Problem incrementing symbol period, skipping event\n");
831
832                 pthread_mutex_unlock(&hists->lock);
833         }
834
835         addr_location__put(&al);
836 }
837
838 static void
839 perf_top__process_lost(struct perf_top *top, union perf_event *event,
840                        struct evsel *evsel)
841 {
842         struct hists *hists = evsel__hists(evsel);
843
844         top->lost += event->lost.lost;
845         top->lost_total += event->lost.lost;
846         hists->stats.total_lost += event->lost.lost;
847 }
848
849 static void
850 perf_top__process_lost_samples(struct perf_top *top,
851                                union perf_event *event,
852                                struct evsel *evsel)
853 {
854         struct hists *hists = evsel__hists(evsel);
855
856         top->lost += event->lost_samples.lost;
857         top->lost_total += event->lost_samples.lost;
858         hists->stats.total_lost_samples += event->lost_samples.lost;
859 }
860
861 static u64 last_timestamp;
862
863 static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
864 {
865         struct record_opts *opts = &top->record_opts;
866         struct evlist *evlist = top->evlist;
867         struct mmap *md;
868         union perf_event *event;
869
870         md = opts->overwrite ? &evlist->overwrite_mmap[idx] : &evlist->mmap[idx];
871         if (perf_mmap__read_init(md) < 0)
872                 return;
873
874         while ((event = perf_mmap__read_event(md)) != NULL) {
875                 int ret;
876
877                 ret = perf_evlist__parse_sample_timestamp(evlist, event, &last_timestamp);
878                 if (ret && ret != -1)
879                         break;
880
881                 ret = ordered_events__queue(top->qe.in, event, last_timestamp, 0);
882                 if (ret)
883                         break;
884
885                 perf_mmap__consume(md);
886
887                 if (top->qe.rotate) {
888                         pthread_mutex_lock(&top->qe.mutex);
889                         top->qe.rotate = false;
890                         pthread_cond_signal(&top->qe.cond);
891                         pthread_mutex_unlock(&top->qe.mutex);
892                 }
893         }
894
895         perf_mmap__read_done(md);
896 }
897
898 static void perf_top__mmap_read(struct perf_top *top)
899 {
900         bool overwrite = top->record_opts.overwrite;
901         struct evlist *evlist = top->evlist;
902         int i;
903
904         if (overwrite)
905                 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_DATA_PENDING);
906
907         for (i = 0; i < top->evlist->nr_mmaps; i++)
908                 perf_top__mmap_read_idx(top, i);
909
910         if (overwrite) {
911                 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
912                 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
913         }
914 }
915
916 /*
917  * Check per-event overwrite term.
918  * perf top should support consistent term for all events.
919  * - All events don't have per-event term
920  *   E.g. "cpu/cpu-cycles/,cpu/instructions/"
921  *   Nothing change, return 0.
922  * - All events have same per-event term
923  *   E.g. "cpu/cpu-cycles,no-overwrite/,cpu/instructions,no-overwrite/
924  *   Using the per-event setting to replace the opts->overwrite if
925  *   they are different, then return 0.
926  * - Events have different per-event term
927  *   E.g. "cpu/cpu-cycles,overwrite/,cpu/instructions,no-overwrite/"
928  *   Return -1
929  * - Some of the event set per-event term, but some not.
930  *   E.g. "cpu/cpu-cycles/,cpu/instructions,no-overwrite/"
931  *   Return -1
932  */
933 static int perf_top__overwrite_check(struct perf_top *top)
934 {
935         struct record_opts *opts = &top->record_opts;
936         struct evlist *evlist = top->evlist;
937         struct perf_evsel_config_term *term;
938         struct list_head *config_terms;
939         struct evsel *evsel;
940         int set, overwrite = -1;
941
942         evlist__for_each_entry(evlist, evsel) {
943                 set = -1;
944                 config_terms = &evsel->config_terms;
945                 list_for_each_entry(term, config_terms, list) {
946                         if (term->type == PERF_EVSEL__CONFIG_TERM_OVERWRITE)
947                                 set = term->val.overwrite ? 1 : 0;
948                 }
949
950                 /* no term for current and previous event (likely) */
951                 if ((overwrite < 0) && (set < 0))
952                         continue;
953
954                 /* has term for both current and previous event, compare */
955                 if ((overwrite >= 0) && (set >= 0) && (overwrite != set))
956                         return -1;
957
958                 /* no term for current event but has term for previous one */
959                 if ((overwrite >= 0) && (set < 0))
960                         return -1;
961
962                 /* has term for current event */
963                 if ((overwrite < 0) && (set >= 0)) {
964                         /* if it's first event, set overwrite */
965                         if (evsel == perf_evlist__first(evlist))
966                                 overwrite = set;
967                         else
968                                 return -1;
969                 }
970         }
971
972         if ((overwrite >= 0) && (opts->overwrite != overwrite))
973                 opts->overwrite = overwrite;
974
975         return 0;
976 }
977
978 static int perf_top_overwrite_fallback(struct perf_top *top,
979                                        struct evsel *evsel)
980 {
981         struct record_opts *opts = &top->record_opts;
982         struct evlist *evlist = top->evlist;
983         struct evsel *counter;
984
985         if (!opts->overwrite)
986                 return 0;
987
988         /* only fall back when first event fails */
989         if (evsel != perf_evlist__first(evlist))
990                 return 0;
991
992         evlist__for_each_entry(evlist, counter)
993                 counter->core.attr.write_backward = false;
994         opts->overwrite = false;
995         pr_debug2("fall back to non-overwrite mode\n");
996         return 1;
997 }
998
999 static int perf_top__start_counters(struct perf_top *top)
1000 {
1001         char msg[BUFSIZ];
1002         struct evsel *counter;
1003         struct evlist *evlist = top->evlist;
1004         struct record_opts *opts = &top->record_opts;
1005
1006         if (perf_top__overwrite_check(top)) {
1007                 ui__error("perf top only support consistent per-event "
1008                           "overwrite setting for all events\n");
1009                 goto out_err;
1010         }
1011
1012         perf_evlist__config(evlist, opts, &callchain_param);
1013
1014         evlist__for_each_entry(evlist, counter) {
1015 try_again:
1016                 if (evsel__open(counter, top->evlist->core.cpus,
1017                                      top->evlist->core.threads) < 0) {
1018
1019                         /*
1020                          * Specially handle overwrite fall back.
1021                          * Because perf top is the only tool which has
1022                          * overwrite mode by default, support
1023                          * both overwrite and non-overwrite mode, and
1024                          * require consistent mode for all events.
1025                          *
1026                          * May move it to generic code with more tools
1027                          * have similar attribute.
1028                          */
1029                         if (perf_missing_features.write_backward &&
1030                             perf_top_overwrite_fallback(top, counter))
1031                                 goto try_again;
1032
1033                         if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
1034                                 if (verbose > 0)
1035                                         ui__warning("%s\n", msg);
1036                                 goto try_again;
1037                         }
1038
1039                         perf_evsel__open_strerror(counter, &opts->target,
1040                                                   errno, msg, sizeof(msg));
1041                         ui__error("%s\n", msg);
1042                         goto out_err;
1043                 }
1044         }
1045
1046         if (evlist__mmap(evlist, opts->mmap_pages) < 0) {
1047                 ui__error("Failed to mmap with %d (%s)\n",
1048                             errno, str_error_r(errno, msg, sizeof(msg)));
1049                 goto out_err;
1050         }
1051
1052         return 0;
1053
1054 out_err:
1055         return -1;
1056 }
1057
1058 static int callchain_param__setup_sample_type(struct callchain_param *callchain)
1059 {
1060         if (callchain->mode != CHAIN_NONE) {
1061                 if (callchain_register_param(callchain) < 0) {
1062                         ui__error("Can't register callchain params.\n");
1063                         return -EINVAL;
1064                 }
1065         }
1066
1067         return 0;
1068 }
1069
1070 static struct ordered_events *rotate_queues(struct perf_top *top)
1071 {
1072         struct ordered_events *in = top->qe.in;
1073
1074         if (top->qe.in == &top->qe.data[1])
1075                 top->qe.in = &top->qe.data[0];
1076         else
1077                 top->qe.in = &top->qe.data[1];
1078
1079         return in;
1080 }
1081
1082 static void *process_thread(void *arg)
1083 {
1084         struct perf_top *top = arg;
1085
1086         while (!done) {
1087                 struct ordered_events *out, *in = top->qe.in;
1088
1089                 if (!in->nr_events) {
1090                         usleep(100);
1091                         continue;
1092                 }
1093
1094                 out = rotate_queues(top);
1095
1096                 pthread_mutex_lock(&top->qe.mutex);
1097                 top->qe.rotate = true;
1098                 pthread_cond_wait(&top->qe.cond, &top->qe.mutex);
1099                 pthread_mutex_unlock(&top->qe.mutex);
1100
1101                 if (ordered_events__flush(out, OE_FLUSH__TOP))
1102                         pr_err("failed to process events\n");
1103         }
1104
1105         return NULL;
1106 }
1107
1108 /*
1109  * Allow only 'top->delay_secs' seconds behind samples.
1110  */
1111 static int should_drop(struct ordered_event *qevent, struct perf_top *top)
1112 {
1113         union perf_event *event = qevent->event;
1114         u64 delay_timestamp;
1115
1116         if (event->header.type != PERF_RECORD_SAMPLE)
1117                 return false;
1118
1119         delay_timestamp = qevent->timestamp + top->delay_secs * NSEC_PER_SEC;
1120         return delay_timestamp < last_timestamp;
1121 }
1122
1123 static int deliver_event(struct ordered_events *qe,
1124                          struct ordered_event *qevent)
1125 {
1126         struct perf_top *top = qe->data;
1127         struct evlist *evlist = top->evlist;
1128         struct perf_session *session = top->session;
1129         union perf_event *event = qevent->event;
1130         struct perf_sample sample;
1131         struct evsel *evsel;
1132         struct machine *machine;
1133         int ret = -1;
1134
1135         if (should_drop(qevent, top)) {
1136                 top->drop++;
1137                 top->drop_total++;
1138                 return 0;
1139         }
1140
1141         ret = perf_evlist__parse_sample(evlist, event, &sample);
1142         if (ret) {
1143                 pr_err("Can't parse sample, err = %d\n", ret);
1144                 goto next_event;
1145         }
1146
1147         evsel = perf_evlist__id2evsel(session->evlist, sample.id);
1148         assert(evsel != NULL);
1149
1150         if (event->header.type == PERF_RECORD_SAMPLE) {
1151                 if (evswitch__discard(&top->evswitch, evsel))
1152                         return 0;
1153                 ++top->samples;
1154         }
1155
1156         switch (sample.cpumode) {
1157         case PERF_RECORD_MISC_USER:
1158                 ++top->us_samples;
1159                 if (top->hide_user_symbols)
1160                         goto next_event;
1161                 machine = &session->machines.host;
1162                 break;
1163         case PERF_RECORD_MISC_KERNEL:
1164                 ++top->kernel_samples;
1165                 if (top->hide_kernel_symbols)
1166                         goto next_event;
1167                 machine = &session->machines.host;
1168                 break;
1169         case PERF_RECORD_MISC_GUEST_KERNEL:
1170                 ++top->guest_kernel_samples;
1171                 machine = perf_session__find_machine(session,
1172                                                      sample.pid);
1173                 break;
1174         case PERF_RECORD_MISC_GUEST_USER:
1175                 ++top->guest_us_samples;
1176                 /*
1177                  * TODO: we don't process guest user from host side
1178                  * except simple counting.
1179                  */
1180                 goto next_event;
1181         default:
1182                 if (event->header.type == PERF_RECORD_SAMPLE)
1183                         goto next_event;
1184                 machine = &session->machines.host;
1185                 break;
1186         }
1187
1188         if (event->header.type == PERF_RECORD_SAMPLE) {
1189                 perf_event__process_sample(&top->tool, event, evsel,
1190                                            &sample, machine);
1191         } else if (event->header.type == PERF_RECORD_LOST) {
1192                 perf_top__process_lost(top, event, evsel);
1193         } else if (event->header.type == PERF_RECORD_LOST_SAMPLES) {
1194                 perf_top__process_lost_samples(top, event, evsel);
1195         } else if (event->header.type < PERF_RECORD_MAX) {
1196                 hists__inc_nr_events(evsel__hists(evsel), event->header.type);
1197                 machine__process_event(machine, event, &sample);
1198         } else
1199                 ++session->evlist->stats.nr_unknown_events;
1200
1201         ret = 0;
1202 next_event:
1203         return ret;
1204 }
1205
1206 static void init_process_thread(struct perf_top *top)
1207 {
1208         ordered_events__init(&top->qe.data[0], deliver_event, top);
1209         ordered_events__init(&top->qe.data[1], deliver_event, top);
1210         ordered_events__set_copy_on_queue(&top->qe.data[0], true);
1211         ordered_events__set_copy_on_queue(&top->qe.data[1], true);
1212         top->qe.in = &top->qe.data[0];
1213         pthread_mutex_init(&top->qe.mutex, NULL);
1214         pthread_cond_init(&top->qe.cond, NULL);
1215 }
1216
1217 static int __cmd_top(struct perf_top *top)
1218 {
1219         struct record_opts *opts = &top->record_opts;
1220         pthread_t thread, thread_process;
1221         int ret;
1222
1223         if (!top->annotation_opts.objdump_path) {
1224                 ret = perf_env__lookup_objdump(&top->session->header.env,
1225                                                &top->annotation_opts.objdump_path);
1226                 if (ret)
1227                         return ret;
1228         }
1229
1230         ret = callchain_param__setup_sample_type(&callchain_param);
1231         if (ret)
1232                 return ret;
1233
1234         if (perf_session__register_idle_thread(top->session) < 0)
1235                 return ret;
1236
1237         if (top->nr_threads_synthesize > 1)
1238                 perf_set_multithreaded();
1239
1240         init_process_thread(top);
1241
1242         if (opts->record_namespaces)
1243                 top->tool.namespace_events = true;
1244
1245         ret = perf_event__synthesize_bpf_events(top->session, perf_event__process,
1246                                                 &top->session->machines.host,
1247                                                 &top->record_opts);
1248         if (ret < 0)
1249                 pr_debug("Couldn't synthesize BPF events: Pre-existing BPF programs won't have symbols resolved.\n");
1250
1251         machine__synthesize_threads(&top->session->machines.host, &opts->target,
1252                                     top->evlist->core.threads, false,
1253                                     top->nr_threads_synthesize);
1254
1255         if (top->nr_threads_synthesize > 1)
1256                 perf_set_singlethreaded();
1257
1258         if (perf_hpp_list.socket) {
1259                 ret = perf_env__read_cpu_topology_map(&perf_env);
1260                 if (ret < 0) {
1261                         char errbuf[BUFSIZ];
1262                         const char *err = str_error_r(-ret, errbuf, sizeof(errbuf));
1263
1264                         ui__error("Could not read the CPU topology map: %s\n", err);
1265                         return ret;
1266                 }
1267         }
1268
1269         ret = perf_top__start_counters(top);
1270         if (ret)
1271                 return ret;
1272
1273         top->session->evlist = top->evlist;
1274         perf_session__set_id_hdr_size(top->session);
1275
1276         /*
1277          * When perf is starting the traced process, all the events (apart from
1278          * group members) have enable_on_exec=1 set, so don't spoil it by
1279          * prematurely enabling them.
1280          *
1281          * XXX 'top' still doesn't start workloads like record, trace, but should,
1282          * so leave the check here.
1283          */
1284         if (!target__none(&opts->target))
1285                 evlist__enable(top->evlist);
1286
1287         ret = -1;
1288         if (pthread_create(&thread_process, NULL, process_thread, top)) {
1289                 ui__error("Could not create process thread.\n");
1290                 return ret;
1291         }
1292
1293         if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui :
1294                                                             display_thread), top)) {
1295                 ui__error("Could not create display thread.\n");
1296                 goto out_join_thread;
1297         }
1298
1299         if (top->realtime_prio) {
1300                 struct sched_param param;
1301
1302                 param.sched_priority = top->realtime_prio;
1303                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
1304                         ui__error("Could not set realtime priority.\n");
1305                         goto out_join;
1306                 }
1307         }
1308
1309         /* Wait for a minimal set of events before starting the snapshot */
1310         perf_evlist__poll(top->evlist, 100);
1311
1312         perf_top__mmap_read(top);
1313
1314         while (!done) {
1315                 u64 hits = top->samples;
1316
1317                 perf_top__mmap_read(top);
1318
1319                 if (opts->overwrite || (hits == top->samples))
1320                         ret = perf_evlist__poll(top->evlist, 100);
1321
1322                 if (resize) {
1323                         perf_top__resize(top);
1324                         resize = 0;
1325                 }
1326         }
1327
1328         ret = 0;
1329 out_join:
1330         pthread_join(thread, NULL);
1331 out_join_thread:
1332         pthread_cond_signal(&top->qe.cond);
1333         pthread_join(thread_process, NULL);
1334         return ret;
1335 }
1336
1337 static int
1338 callchain_opt(const struct option *opt, const char *arg, int unset)
1339 {
1340         symbol_conf.use_callchain = true;
1341         return record_callchain_opt(opt, arg, unset);
1342 }
1343
1344 static int
1345 parse_callchain_opt(const struct option *opt, const char *arg, int unset)
1346 {
1347         struct callchain_param *callchain = opt->value;
1348
1349         callchain->enabled = !unset;
1350         callchain->record_mode = CALLCHAIN_FP;
1351
1352         /*
1353          * --no-call-graph
1354          */
1355         if (unset) {
1356                 symbol_conf.use_callchain = false;
1357                 callchain->record_mode = CALLCHAIN_NONE;
1358                 return 0;
1359         }
1360
1361         return parse_callchain_top_opt(arg);
1362 }
1363
1364 static int perf_top_config(const char *var, const char *value, void *cb __maybe_unused)
1365 {
1366         if (!strcmp(var, "top.call-graph")) {
1367                 var = "call-graph.record-mode";
1368                 return perf_default_config(var, value, cb);
1369         }
1370         if (!strcmp(var, "top.children")) {
1371                 symbol_conf.cumulate_callchain = perf_config_bool(var, value);
1372                 return 0;
1373         }
1374
1375         return 0;
1376 }
1377
1378 static int
1379 parse_percent_limit(const struct option *opt, const char *arg,
1380                     int unset __maybe_unused)
1381 {
1382         struct perf_top *top = opt->value;
1383
1384         top->min_percent = strtof(arg, NULL);
1385         return 0;
1386 }
1387
1388 const char top_callchain_help[] = CALLCHAIN_RECORD_HELP CALLCHAIN_REPORT_HELP
1389         "\n\t\t\t\tDefault: fp,graph,0.5,caller,function";
1390
1391 int cmd_top(int argc, const char **argv)
1392 {
1393         char errbuf[BUFSIZ];
1394         struct perf_top top = {
1395                 .count_filter        = 5,
1396                 .delay_secs          = 2,
1397                 .record_opts = {
1398                         .mmap_pages     = UINT_MAX,
1399                         .user_freq      = UINT_MAX,
1400                         .user_interval  = ULLONG_MAX,
1401                         .freq           = 4000, /* 4 KHz */
1402                         .target         = {
1403                                 .uses_mmap   = true,
1404                         },
1405                         /*
1406                          * FIXME: This will lose PERF_RECORD_MMAP and other metadata
1407                          * when we pause, fix that and reenable. Probably using a
1408                          * separate evlist with a dummy event, i.e. a non-overwrite
1409                          * ring buffer just for metadata events, while PERF_RECORD_SAMPLE
1410                          * stays in overwrite mode. -acme
1411                          * */
1412                         .overwrite      = 0,
1413                         .sample_time    = true,
1414                         .sample_time_set = true,
1415                 },
1416                 .max_stack           = sysctl__max_stack(),
1417                 .annotation_opts     = annotation__default_options,
1418                 .nr_threads_synthesize = UINT_MAX,
1419         };
1420         struct record_opts *opts = &top.record_opts;
1421         struct target *target = &opts->target;
1422         const struct option options[] = {
1423         OPT_CALLBACK('e', "event", &top.evlist, "event",
1424                      "event selector. use 'perf list' to list available events",
1425                      parse_events_option),
1426         OPT_U64('c', "count", &opts->user_interval, "event period to sample"),
1427         OPT_STRING('p', "pid", &target->pid, "pid",
1428                     "profile events on existing process id"),
1429         OPT_STRING('t', "tid", &target->tid, "tid",
1430                     "profile events on existing thread id"),
1431         OPT_BOOLEAN('a', "all-cpus", &target->system_wide,
1432                             "system-wide collection from all CPUs"),
1433         OPT_STRING('C', "cpu", &target->cpu_list, "cpu",
1434                     "list of cpus to monitor"),
1435         OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
1436                    "file", "vmlinux pathname"),
1437         OPT_BOOLEAN(0, "ignore-vmlinux", &symbol_conf.ignore_vmlinux,
1438                     "don't load vmlinux even if found"),
1439         OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
1440                    "file", "kallsyms pathname"),
1441         OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols,
1442                     "hide kernel symbols"),
1443         OPT_CALLBACK('m', "mmap-pages", &opts->mmap_pages, "pages",
1444                      "number of mmap data pages",
1445                      perf_evlist__parse_mmap_pages),
1446         OPT_INTEGER('r', "realtime", &top.realtime_prio,
1447                     "collect data with this RT SCHED_FIFO priority"),
1448         OPT_INTEGER('d', "delay", &top.delay_secs,
1449                     "number of seconds to delay between refreshes"),
1450         OPT_BOOLEAN('D', "dump-symtab", &top.dump_symtab,
1451                             "dump the symbol table used for profiling"),
1452         OPT_INTEGER('f', "count-filter", &top.count_filter,
1453                     "only display functions with more events than this"),
1454         OPT_BOOLEAN(0, "group", &opts->group,
1455                             "put the counters into a counter group"),
1456         OPT_BOOLEAN('i', "no-inherit", &opts->no_inherit,
1457                     "child tasks do not inherit counters"),
1458         OPT_STRING(0, "sym-annotate", &top.sym_filter, "symbol name",
1459                     "symbol to annotate"),
1460         OPT_BOOLEAN('z', "zero", &top.zero, "zero history across updates"),
1461         OPT_CALLBACK('F', "freq", &top.record_opts, "freq or 'max'",
1462                      "profile at this frequency",
1463                       record__parse_freq),
1464         OPT_INTEGER('E', "entries", &top.print_entries,
1465                     "display this many functions"),
1466         OPT_BOOLEAN('U', "hide_user_symbols", &top.hide_user_symbols,
1467                     "hide user symbols"),
1468         OPT_BOOLEAN(0, "tui", &top.use_tui, "Use the TUI interface"),
1469         OPT_BOOLEAN(0, "stdio", &top.use_stdio, "Use the stdio interface"),
1470         OPT_INCR('v', "verbose", &verbose,
1471                     "be more verbose (show counter open errors, etc)"),
1472         OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
1473                    "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline, ..."
1474                    " Please refer the man page for the complete list."),
1475         OPT_STRING(0, "fields", &field_order, "key[,keys...]",
1476                    "output field(s): overhead, period, sample plus all of sort keys"),
1477         OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
1478                     "Show a column with the number of samples"),
1479         OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1480                            NULL, "enables call-graph recording and display",
1481                            &callchain_opt),
1482         OPT_CALLBACK(0, "call-graph", &callchain_param,
1483                      "record_mode[,record_size],print_type,threshold[,print_limit],order,sort_key[,branch]",
1484                      top_callchain_help, &parse_callchain_opt),
1485         OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
1486                     "Accumulate callchains of children and show total overhead as well"),
1487         OPT_INTEGER(0, "max-stack", &top.max_stack,
1488                     "Set the maximum stack depth when parsing the callchain. "
1489                     "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
1490         OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
1491                    "ignore callees of these functions in call graphs",
1492                    report_parse_ignore_callees_opt),
1493         OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
1494                     "Show a column with the sum of periods"),
1495         OPT_STRING(0, "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
1496                    "only consider symbols in these dsos"),
1497         OPT_STRING(0, "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
1498                    "only consider symbols in these comms"),
1499         OPT_STRING(0, "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
1500                    "only consider these symbols"),
1501         OPT_BOOLEAN(0, "source", &top.annotation_opts.annotate_src,
1502                     "Interleave source code with assembly code (default)"),
1503         OPT_BOOLEAN(0, "asm-raw", &top.annotation_opts.show_asm_raw,
1504                     "Display raw encoding of assembly instructions (default)"),
1505         OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel,
1506                     "Enable kernel symbol demangling"),
1507         OPT_BOOLEAN(0, "no-bpf-event", &top.record_opts.no_bpf_event, "do not record bpf events"),
1508         OPT_STRING(0, "objdump", &top.annotation_opts.objdump_path, "path",
1509                     "objdump binary to use for disassembly and annotations"),
1510         OPT_STRING('M', "disassembler-style", &top.annotation_opts.disassembler_style, "disassembler style",
1511                    "Specify disassembler style (e.g. -M intel for intel syntax)"),
1512         OPT_STRING('u', "uid", &target->uid_str, "user", "user to profile"),
1513         OPT_CALLBACK(0, "percent-limit", &top, "percent",
1514                      "Don't show entries under that percent", parse_percent_limit),
1515         OPT_CALLBACK(0, "percentage", NULL, "relative|absolute",
1516                      "How to display percentage of filtered entries", parse_filter_percentage),
1517         OPT_STRING('w', "column-widths", &symbol_conf.col_width_list_str,
1518                    "width[,width...]",
1519                    "don't try to adjust column width, use these fixed values"),
1520         OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
1521                         "per thread proc mmap processing timeout in ms"),
1522         OPT_CALLBACK_NOOPT('b', "branch-any", &opts->branch_stack,
1523                      "branch any", "sample any taken branches",
1524                      parse_branch_stack),
1525         OPT_CALLBACK('j', "branch-filter", &opts->branch_stack,
1526                      "branch filter mask", "branch stack filter modes",
1527                      parse_branch_stack),
1528         OPT_BOOLEAN(0, "raw-trace", &symbol_conf.raw_trace,
1529                     "Show raw trace event output (do not use print fmt or plugins)"),
1530         OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy,
1531                     "Show entries in a hierarchy"),
1532         OPT_BOOLEAN(0, "overwrite", &top.record_opts.overwrite,
1533                     "Use a backward ring buffer, default: no"),
1534         OPT_BOOLEAN(0, "force", &symbol_conf.force, "don't complain, do it"),
1535         OPT_UINTEGER(0, "num-thread-synthesize", &top.nr_threads_synthesize,
1536                         "number of thread to run event synthesize"),
1537         OPT_BOOLEAN(0, "namespaces", &opts->record_namespaces,
1538                     "Record namespaces events"),
1539         OPTS_EVSWITCH(&top.evswitch),
1540         OPT_END()
1541         };
1542         struct evlist *sb_evlist = NULL;
1543         const char * const top_usage[] = {
1544                 "perf top [<options>]",
1545                 NULL
1546         };
1547         int status = hists__init();
1548
1549         if (status < 0)
1550                 return status;
1551
1552         top.annotation_opts.min_pcnt = 5;
1553         top.annotation_opts.context  = 4;
1554
1555         top.evlist = evlist__new();
1556         if (top.evlist == NULL)
1557                 return -ENOMEM;
1558
1559         status = perf_config(perf_top_config, &top);
1560         if (status)
1561                 return status;
1562
1563         argc = parse_options(argc, argv, options, top_usage, 0);
1564         if (argc)
1565                 usage_with_options(top_usage, options);
1566
1567         if (!top.evlist->core.nr_entries &&
1568             perf_evlist__add_default(top.evlist) < 0) {
1569                 pr_err("Not enough memory for event selector list\n");
1570                 goto out_delete_evlist;
1571         }
1572
1573         status = evswitch__init(&top.evswitch, top.evlist, stderr);
1574         if (status)
1575                 goto out_delete_evlist;
1576
1577         if (symbol_conf.report_hierarchy) {
1578                 /* disable incompatible options */
1579                 symbol_conf.event_group = false;
1580                 symbol_conf.cumulate_callchain = false;
1581
1582                 if (field_order) {
1583                         pr_err("Error: --hierarchy and --fields options cannot be used together\n");
1584                         parse_options_usage(top_usage, options, "fields", 0);
1585                         parse_options_usage(NULL, options, "hierarchy", 0);
1586                         goto out_delete_evlist;
1587                 }
1588         }
1589
1590         if (opts->branch_stack && callchain_param.enabled)
1591                 symbol_conf.show_branchflag_count = true;
1592
1593         sort__mode = SORT_MODE__TOP;
1594         /* display thread wants entries to be collapsed in a different tree */
1595         perf_hpp_list.need_collapse = 1;
1596
1597         if (top.use_stdio)
1598                 use_browser = 0;
1599         else if (top.use_tui)
1600                 use_browser = 1;
1601
1602         setup_browser(false);
1603
1604         if (setup_sorting(top.evlist) < 0) {
1605                 if (sort_order)
1606                         parse_options_usage(top_usage, options, "s", 1);
1607                 if (field_order)
1608                         parse_options_usage(sort_order ? NULL : top_usage,
1609                                             options, "fields", 0);
1610                 goto out_delete_evlist;
1611         }
1612
1613         status = target__validate(target);
1614         if (status) {
1615                 target__strerror(target, status, errbuf, BUFSIZ);
1616                 ui__warning("%s\n", errbuf);
1617         }
1618
1619         status = target__parse_uid(target);
1620         if (status) {
1621                 int saved_errno = errno;
1622
1623                 target__strerror(target, status, errbuf, BUFSIZ);
1624                 ui__error("%s\n", errbuf);
1625
1626                 status = -saved_errno;
1627                 goto out_delete_evlist;
1628         }
1629
1630         if (target__none(target))
1631                 target->system_wide = true;
1632
1633         if (perf_evlist__create_maps(top.evlist, target) < 0) {
1634                 ui__error("Couldn't create thread/CPU maps: %s\n",
1635                           errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf)));
1636                 goto out_delete_evlist;
1637         }
1638
1639         if (top.delay_secs < 1)
1640                 top.delay_secs = 1;
1641
1642         if (record_opts__config(opts)) {
1643                 status = -EINVAL;
1644                 goto out_delete_evlist;
1645         }
1646
1647         top.sym_evsel = perf_evlist__first(top.evlist);
1648
1649         if (!callchain_param.enabled) {
1650                 symbol_conf.cumulate_callchain = false;
1651                 perf_hpp__cancel_cumulate();
1652         }
1653
1654         if (symbol_conf.cumulate_callchain && !callchain_param.order_set)
1655                 callchain_param.order = ORDER_CALLER;
1656
1657         status = symbol__annotation_init();
1658         if (status < 0)
1659                 goto out_delete_evlist;
1660
1661         annotation_config__init();
1662
1663         symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
1664         status = symbol__init(NULL);
1665         if (status < 0)
1666                 goto out_delete_evlist;
1667
1668         sort__setup_elide(stdout);
1669
1670         get_term_dimensions(&top.winsize);
1671         if (top.print_entries == 0) {
1672                 perf_top__update_print_entries(&top);
1673                 signal(SIGWINCH, winch_sig);
1674         }
1675
1676         top.session = perf_session__new(NULL, false, NULL);
1677         if (IS_ERR(top.session)) {
1678                 status = PTR_ERR(top.session);
1679                 goto out_delete_evlist;
1680         }
1681
1682         if (!top.record_opts.no_bpf_event)
1683                 bpf_event__add_sb_event(&sb_evlist, &perf_env);
1684
1685         if (perf_evlist__start_sb_thread(sb_evlist, target)) {
1686                 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
1687                 opts->no_bpf_event = true;
1688         }
1689
1690         status = __cmd_top(&top);
1691
1692         if (!opts->no_bpf_event)
1693                 perf_evlist__stop_sb_thread(sb_evlist);
1694
1695 out_delete_evlist:
1696         evlist__delete(top.evlist);
1697         perf_session__delete(top.session);
1698
1699         return status;
1700 }