]> asedeno.scripts.mit.edu Git - linux.git/blob - tools/perf/builtin-kvm.c
perf tools: Rename perf_evlist__mmap() to evlist__mmap()
[linux.git] / tools / perf / builtin-kvm.c
1 // SPDX-License-Identifier: GPL-2.0
2 #include "builtin.h"
3 #include "perf.h"
4
5 #include "util/build-id.h"
6 #include "util/evsel.h"
7 #include "util/evlist.h"
8 #include "util/term.h"
9 #include "util/symbol.h"
10 #include "util/thread.h"
11 #include "util/header.h"
12 #include "util/session.h"
13 #include "util/intlist.h"
14 #include <subcmd/pager.h>
15 #include <subcmd/parse-options.h>
16 #include "util/trace-event.h"
17 #include "util/debug.h"
18 #include "util/tool.h"
19 #include "util/stat.h"
20 #include "util/synthetic-events.h"
21 #include "util/top.h"
22 #include "util/data.h"
23 #include "util/ordered-events.h"
24 #include "util/kvm-stat.h"
25 #include "ui/ui.h"
26
27 #include <sys/prctl.h>
28 #ifdef HAVE_TIMERFD_SUPPORT
29 #include <sys/timerfd.h>
30 #endif
31 #include <sys/time.h>
32 #include <sys/types.h>
33 #include <sys/stat.h>
34 #include <fcntl.h>
35
36 #include <linux/err.h>
37 #include <linux/kernel.h>
38 #include <linux/string.h>
39 #include <linux/time64.h>
40 #include <linux/zalloc.h>
41 #include <errno.h>
42 #include <inttypes.h>
43 #include <poll.h>
44 #include <termios.h>
45 #include <semaphore.h>
46 #include <signal.h>
47 #include <math.h>
48
49 static const char *get_filename_for_perf_kvm(void)
50 {
51         const char *filename;
52
53         if (perf_host && !perf_guest)
54                 filename = strdup("perf.data.host");
55         else if (!perf_host && perf_guest)
56                 filename = strdup("perf.data.guest");
57         else
58                 filename = strdup("perf.data.kvm");
59
60         return filename;
61 }
62
63 #ifdef HAVE_KVM_STAT_SUPPORT
64
65 void exit_event_get_key(struct evsel *evsel,
66                         struct perf_sample *sample,
67                         struct event_key *key)
68 {
69         key->info = 0;
70         key->key = perf_evsel__intval(evsel, sample, kvm_exit_reason);
71 }
72
73 bool kvm_exit_event(struct evsel *evsel)
74 {
75         return !strcmp(evsel->name, kvm_exit_trace);
76 }
77
78 bool exit_event_begin(struct evsel *evsel,
79                       struct perf_sample *sample, struct event_key *key)
80 {
81         if (kvm_exit_event(evsel)) {
82                 exit_event_get_key(evsel, sample, key);
83                 return true;
84         }
85
86         return false;
87 }
88
89 bool kvm_entry_event(struct evsel *evsel)
90 {
91         return !strcmp(evsel->name, kvm_entry_trace);
92 }
93
94 bool exit_event_end(struct evsel *evsel,
95                     struct perf_sample *sample __maybe_unused,
96                     struct event_key *key __maybe_unused)
97 {
98         return kvm_entry_event(evsel);
99 }
100
101 static const char *get_exit_reason(struct perf_kvm_stat *kvm,
102                                    struct exit_reasons_table *tbl,
103                                    u64 exit_code)
104 {
105         while (tbl->reason != NULL) {
106                 if (tbl->exit_code == exit_code)
107                         return tbl->reason;
108                 tbl++;
109         }
110
111         pr_err("unknown kvm exit code:%lld on %s\n",
112                 (unsigned long long)exit_code, kvm->exit_reasons_isa);
113         return "UNKNOWN";
114 }
115
116 void exit_event_decode_key(struct perf_kvm_stat *kvm,
117                            struct event_key *key,
118                            char *decode)
119 {
120         const char *exit_reason = get_exit_reason(kvm, key->exit_reasons,
121                                                   key->key);
122
123         scnprintf(decode, decode_str_len, "%s", exit_reason);
124 }
125
126 static bool register_kvm_events_ops(struct perf_kvm_stat *kvm)
127 {
128         struct kvm_reg_events_ops *events_ops = kvm_reg_events_ops;
129
130         for (events_ops = kvm_reg_events_ops; events_ops->name; events_ops++) {
131                 if (!strcmp(events_ops->name, kvm->report_event)) {
132                         kvm->events_ops = events_ops->ops;
133                         return true;
134                 }
135         }
136
137         return false;
138 }
139
140 struct vcpu_event_record {
141         int vcpu_id;
142         u64 start_time;
143         struct kvm_event *last_event;
144 };
145
146
147 static void init_kvm_event_record(struct perf_kvm_stat *kvm)
148 {
149         unsigned int i;
150
151         for (i = 0; i < EVENTS_CACHE_SIZE; i++)
152                 INIT_LIST_HEAD(&kvm->kvm_events_cache[i]);
153 }
154
155 #ifdef HAVE_TIMERFD_SUPPORT
156 static void clear_events_cache_stats(struct list_head *kvm_events_cache)
157 {
158         struct list_head *head;
159         struct kvm_event *event;
160         unsigned int i;
161         int j;
162
163         for (i = 0; i < EVENTS_CACHE_SIZE; i++) {
164                 head = &kvm_events_cache[i];
165                 list_for_each_entry(event, head, hash_entry) {
166                         /* reset stats for event */
167                         event->total.time = 0;
168                         init_stats(&event->total.stats);
169
170                         for (j = 0; j < event->max_vcpu; ++j) {
171                                 event->vcpu[j].time = 0;
172                                 init_stats(&event->vcpu[j].stats);
173                         }
174                 }
175         }
176 }
177 #endif
178
179 static int kvm_events_hash_fn(u64 key)
180 {
181         return key & (EVENTS_CACHE_SIZE - 1);
182 }
183
184 static bool kvm_event_expand(struct kvm_event *event, int vcpu_id)
185 {
186         int old_max_vcpu = event->max_vcpu;
187         void *prev;
188
189         if (vcpu_id < event->max_vcpu)
190                 return true;
191
192         while (event->max_vcpu <= vcpu_id)
193                 event->max_vcpu += DEFAULT_VCPU_NUM;
194
195         prev = event->vcpu;
196         event->vcpu = realloc(event->vcpu,
197                               event->max_vcpu * sizeof(*event->vcpu));
198         if (!event->vcpu) {
199                 free(prev);
200                 pr_err("Not enough memory\n");
201                 return false;
202         }
203
204         memset(event->vcpu + old_max_vcpu, 0,
205                (event->max_vcpu - old_max_vcpu) * sizeof(*event->vcpu));
206         return true;
207 }
208
209 static struct kvm_event *kvm_alloc_init_event(struct event_key *key)
210 {
211         struct kvm_event *event;
212
213         event = zalloc(sizeof(*event));
214         if (!event) {
215                 pr_err("Not enough memory\n");
216                 return NULL;
217         }
218
219         event->key = *key;
220         init_stats(&event->total.stats);
221         return event;
222 }
223
224 static struct kvm_event *find_create_kvm_event(struct perf_kvm_stat *kvm,
225                                                struct event_key *key)
226 {
227         struct kvm_event *event;
228         struct list_head *head;
229
230         BUG_ON(key->key == INVALID_KEY);
231
232         head = &kvm->kvm_events_cache[kvm_events_hash_fn(key->key)];
233         list_for_each_entry(event, head, hash_entry) {
234                 if (event->key.key == key->key && event->key.info == key->info)
235                         return event;
236         }
237
238         event = kvm_alloc_init_event(key);
239         if (!event)
240                 return NULL;
241
242         list_add(&event->hash_entry, head);
243         return event;
244 }
245
246 static bool handle_begin_event(struct perf_kvm_stat *kvm,
247                                struct vcpu_event_record *vcpu_record,
248                                struct event_key *key, u64 timestamp)
249 {
250         struct kvm_event *event = NULL;
251
252         if (key->key != INVALID_KEY)
253                 event = find_create_kvm_event(kvm, key);
254
255         vcpu_record->last_event = event;
256         vcpu_record->start_time = timestamp;
257         return true;
258 }
259
260 static void
261 kvm_update_event_stats(struct kvm_event_stats *kvm_stats, u64 time_diff)
262 {
263         kvm_stats->time += time_diff;
264         update_stats(&kvm_stats->stats, time_diff);
265 }
266
267 static double kvm_event_rel_stddev(int vcpu_id, struct kvm_event *event)
268 {
269         struct kvm_event_stats *kvm_stats = &event->total;
270
271         if (vcpu_id != -1)
272                 kvm_stats = &event->vcpu[vcpu_id];
273
274         return rel_stddev_stats(stddev_stats(&kvm_stats->stats),
275                                 avg_stats(&kvm_stats->stats));
276 }
277
278 static bool update_kvm_event(struct kvm_event *event, int vcpu_id,
279                              u64 time_diff)
280 {
281         if (vcpu_id == -1) {
282                 kvm_update_event_stats(&event->total, time_diff);
283                 return true;
284         }
285
286         if (!kvm_event_expand(event, vcpu_id))
287                 return false;
288
289         kvm_update_event_stats(&event->vcpu[vcpu_id], time_diff);
290         return true;
291 }
292
293 static bool is_child_event(struct perf_kvm_stat *kvm,
294                            struct evsel *evsel,
295                            struct perf_sample *sample,
296                            struct event_key *key)
297 {
298         struct child_event_ops *child_ops;
299
300         child_ops = kvm->events_ops->child_ops;
301
302         if (!child_ops)
303                 return false;
304
305         for (; child_ops->name; child_ops++) {
306                 if (!strcmp(evsel->name, child_ops->name)) {
307                         child_ops->get_key(evsel, sample, key);
308                         return true;
309                 }
310         }
311
312         return false;
313 }
314
315 static bool handle_child_event(struct perf_kvm_stat *kvm,
316                                struct vcpu_event_record *vcpu_record,
317                                struct event_key *key,
318                                struct perf_sample *sample __maybe_unused)
319 {
320         struct kvm_event *event = NULL;
321
322         if (key->key != INVALID_KEY)
323                 event = find_create_kvm_event(kvm, key);
324
325         vcpu_record->last_event = event;
326
327         return true;
328 }
329
330 static bool skip_event(const char *event)
331 {
332         const char * const *skip_events;
333
334         for (skip_events = kvm_skip_events; *skip_events; skip_events++)
335                 if (!strcmp(event, *skip_events))
336                         return true;
337
338         return false;
339 }
340
341 static bool handle_end_event(struct perf_kvm_stat *kvm,
342                              struct vcpu_event_record *vcpu_record,
343                              struct event_key *key,
344                              struct perf_sample *sample)
345 {
346         struct kvm_event *event;
347         u64 time_begin, time_diff;
348         int vcpu;
349
350         if (kvm->trace_vcpu == -1)
351                 vcpu = -1;
352         else
353                 vcpu = vcpu_record->vcpu_id;
354
355         event = vcpu_record->last_event;
356         time_begin = vcpu_record->start_time;
357
358         /* The begin event is not caught. */
359         if (!time_begin)
360                 return true;
361
362         /*
363          * In some case, the 'begin event' only records the start timestamp,
364          * the actual event is recognized in the 'end event' (e.g. mmio-event).
365          */
366
367         /* Both begin and end events did not get the key. */
368         if (!event && key->key == INVALID_KEY)
369                 return true;
370
371         if (!event)
372                 event = find_create_kvm_event(kvm, key);
373
374         if (!event)
375                 return false;
376
377         vcpu_record->last_event = NULL;
378         vcpu_record->start_time = 0;
379
380         /* seems to happen once in a while during live mode */
381         if (sample->time < time_begin) {
382                 pr_debug("End time before begin time; skipping event.\n");
383                 return true;
384         }
385
386         time_diff = sample->time - time_begin;
387
388         if (kvm->duration && time_diff > kvm->duration) {
389                 char decode[decode_str_len];
390
391                 kvm->events_ops->decode_key(kvm, &event->key, decode);
392                 if (!skip_event(decode)) {
393                         pr_info("%" PRIu64 " VM %d, vcpu %d: %s event took %" PRIu64 "usec\n",
394                                  sample->time, sample->pid, vcpu_record->vcpu_id,
395                                  decode, time_diff / NSEC_PER_USEC);
396                 }
397         }
398
399         return update_kvm_event(event, vcpu, time_diff);
400 }
401
402 static
403 struct vcpu_event_record *per_vcpu_record(struct thread *thread,
404                                           struct evsel *evsel,
405                                           struct perf_sample *sample)
406 {
407         /* Only kvm_entry records vcpu id. */
408         if (!thread__priv(thread) && kvm_entry_event(evsel)) {
409                 struct vcpu_event_record *vcpu_record;
410
411                 vcpu_record = zalloc(sizeof(*vcpu_record));
412                 if (!vcpu_record) {
413                         pr_err("%s: Not enough memory\n", __func__);
414                         return NULL;
415                 }
416
417                 vcpu_record->vcpu_id = perf_evsel__intval(evsel, sample,
418                                                           vcpu_id_str);
419                 thread__set_priv(thread, vcpu_record);
420         }
421
422         return thread__priv(thread);
423 }
424
425 static bool handle_kvm_event(struct perf_kvm_stat *kvm,
426                              struct thread *thread,
427                              struct evsel *evsel,
428                              struct perf_sample *sample)
429 {
430         struct vcpu_event_record *vcpu_record;
431         struct event_key key = { .key = INVALID_KEY,
432                                  .exit_reasons = kvm->exit_reasons };
433
434         vcpu_record = per_vcpu_record(thread, evsel, sample);
435         if (!vcpu_record)
436                 return true;
437
438         /* only process events for vcpus user cares about */
439         if ((kvm->trace_vcpu != -1) &&
440             (kvm->trace_vcpu != vcpu_record->vcpu_id))
441                 return true;
442
443         if (kvm->events_ops->is_begin_event(evsel, sample, &key))
444                 return handle_begin_event(kvm, vcpu_record, &key, sample->time);
445
446         if (is_child_event(kvm, evsel, sample, &key))
447                 return handle_child_event(kvm, vcpu_record, &key, sample);
448
449         if (kvm->events_ops->is_end_event(evsel, sample, &key))
450                 return handle_end_event(kvm, vcpu_record, &key, sample);
451
452         return true;
453 }
454
455 #define GET_EVENT_KEY(func, field)                                      \
456 static u64 get_event_ ##func(struct kvm_event *event, int vcpu)         \
457 {                                                                       \
458         if (vcpu == -1)                                                 \
459                 return event->total.field;                              \
460                                                                         \
461         if (vcpu >= event->max_vcpu)                                    \
462                 return 0;                                               \
463                                                                         \
464         return event->vcpu[vcpu].field;                                 \
465 }
466
467 #define COMPARE_EVENT_KEY(func, field)                                  \
468 GET_EVENT_KEY(func, field)                                              \
469 static int compare_kvm_event_ ## func(struct kvm_event *one,            \
470                                         struct kvm_event *two, int vcpu)\
471 {                                                                       \
472         return get_event_ ##func(one, vcpu) >                           \
473                                 get_event_ ##func(two, vcpu);           \
474 }
475
476 GET_EVENT_KEY(time, time);
477 COMPARE_EVENT_KEY(count, stats.n);
478 COMPARE_EVENT_KEY(mean, stats.mean);
479 GET_EVENT_KEY(max, stats.max);
480 GET_EVENT_KEY(min, stats.min);
481
482 #define DEF_SORT_NAME_KEY(name, compare_key)                            \
483         { #name, compare_kvm_event_ ## compare_key }
484
485 static struct kvm_event_key keys[] = {
486         DEF_SORT_NAME_KEY(sample, count),
487         DEF_SORT_NAME_KEY(time, mean),
488         { NULL, NULL }
489 };
490
491 static bool select_key(struct perf_kvm_stat *kvm)
492 {
493         int i;
494
495         for (i = 0; keys[i].name; i++) {
496                 if (!strcmp(keys[i].name, kvm->sort_key)) {
497                         kvm->compare = keys[i].key;
498                         return true;
499                 }
500         }
501
502         pr_err("Unknown compare key:%s\n", kvm->sort_key);
503         return false;
504 }
505
506 static void insert_to_result(struct rb_root *result, struct kvm_event *event,
507                              key_cmp_fun bigger, int vcpu)
508 {
509         struct rb_node **rb = &result->rb_node;
510         struct rb_node *parent = NULL;
511         struct kvm_event *p;
512
513         while (*rb) {
514                 p = container_of(*rb, struct kvm_event, rb);
515                 parent = *rb;
516
517                 if (bigger(event, p, vcpu))
518                         rb = &(*rb)->rb_left;
519                 else
520                         rb = &(*rb)->rb_right;
521         }
522
523         rb_link_node(&event->rb, parent, rb);
524         rb_insert_color(&event->rb, result);
525 }
526
527 static void
528 update_total_count(struct perf_kvm_stat *kvm, struct kvm_event *event)
529 {
530         int vcpu = kvm->trace_vcpu;
531
532         kvm->total_count += get_event_count(event, vcpu);
533         kvm->total_time += get_event_time(event, vcpu);
534 }
535
536 static bool event_is_valid(struct kvm_event *event, int vcpu)
537 {
538         return !!get_event_count(event, vcpu);
539 }
540
541 static void sort_result(struct perf_kvm_stat *kvm)
542 {
543         unsigned int i;
544         int vcpu = kvm->trace_vcpu;
545         struct kvm_event *event;
546
547         for (i = 0; i < EVENTS_CACHE_SIZE; i++) {
548                 list_for_each_entry(event, &kvm->kvm_events_cache[i], hash_entry) {
549                         if (event_is_valid(event, vcpu)) {
550                                 update_total_count(kvm, event);
551                                 insert_to_result(&kvm->result, event,
552                                                  kvm->compare, vcpu);
553                         }
554                 }
555         }
556 }
557
558 /* returns left most element of result, and erase it */
559 static struct kvm_event *pop_from_result(struct rb_root *result)
560 {
561         struct rb_node *node = rb_first(result);
562
563         if (!node)
564                 return NULL;
565
566         rb_erase(node, result);
567         return container_of(node, struct kvm_event, rb);
568 }
569
570 static void print_vcpu_info(struct perf_kvm_stat *kvm)
571 {
572         int vcpu = kvm->trace_vcpu;
573
574         pr_info("Analyze events for ");
575
576         if (kvm->opts.target.system_wide)
577                 pr_info("all VMs, ");
578         else if (kvm->opts.target.pid)
579                 pr_info("pid(s) %s, ", kvm->opts.target.pid);
580         else
581                 pr_info("dazed and confused on what is monitored, ");
582
583         if (vcpu == -1)
584                 pr_info("all VCPUs:\n\n");
585         else
586                 pr_info("VCPU %d:\n\n", vcpu);
587 }
588
589 static void show_timeofday(void)
590 {
591         char date[64];
592         struct timeval tv;
593         struct tm ltime;
594
595         gettimeofday(&tv, NULL);
596         if (localtime_r(&tv.tv_sec, &ltime)) {
597                 strftime(date, sizeof(date), "%H:%M:%S", &ltime);
598                 pr_info("%s.%06ld", date, tv.tv_usec);
599         } else
600                 pr_info("00:00:00.000000");
601
602         return;
603 }
604
605 static void print_result(struct perf_kvm_stat *kvm)
606 {
607         char decode[decode_str_len];
608         struct kvm_event *event;
609         int vcpu = kvm->trace_vcpu;
610
611         if (kvm->live) {
612                 puts(CONSOLE_CLEAR);
613                 show_timeofday();
614         }
615
616         pr_info("\n\n");
617         print_vcpu_info(kvm);
618         pr_info("%*s ", decode_str_len, kvm->events_ops->name);
619         pr_info("%10s ", "Samples");
620         pr_info("%9s ", "Samples%");
621
622         pr_info("%9s ", "Time%");
623         pr_info("%11s ", "Min Time");
624         pr_info("%11s ", "Max Time");
625         pr_info("%16s ", "Avg time");
626         pr_info("\n\n");
627
628         while ((event = pop_from_result(&kvm->result))) {
629                 u64 ecount, etime, max, min;
630
631                 ecount = get_event_count(event, vcpu);
632                 etime = get_event_time(event, vcpu);
633                 max = get_event_max(event, vcpu);
634                 min = get_event_min(event, vcpu);
635
636                 kvm->events_ops->decode_key(kvm, &event->key, decode);
637                 pr_info("%*s ", decode_str_len, decode);
638                 pr_info("%10llu ", (unsigned long long)ecount);
639                 pr_info("%8.2f%% ", (double)ecount / kvm->total_count * 100);
640                 pr_info("%8.2f%% ", (double)etime / kvm->total_time * 100);
641                 pr_info("%9.2fus ", (double)min / NSEC_PER_USEC);
642                 pr_info("%9.2fus ", (double)max / NSEC_PER_USEC);
643                 pr_info("%9.2fus ( +-%7.2f%% )", (double)etime / ecount / NSEC_PER_USEC,
644                         kvm_event_rel_stddev(vcpu, event));
645                 pr_info("\n");
646         }
647
648         pr_info("\nTotal Samples:%" PRIu64 ", Total events handled time:%.2fus.\n\n",
649                 kvm->total_count, kvm->total_time / (double)NSEC_PER_USEC);
650
651         if (kvm->lost_events)
652                 pr_info("\nLost events: %" PRIu64 "\n\n", kvm->lost_events);
653 }
654
655 #ifdef HAVE_TIMERFD_SUPPORT
656 static int process_lost_event(struct perf_tool *tool,
657                               union perf_event *event __maybe_unused,
658                               struct perf_sample *sample __maybe_unused,
659                               struct machine *machine __maybe_unused)
660 {
661         struct perf_kvm_stat *kvm = container_of(tool, struct perf_kvm_stat, tool);
662
663         kvm->lost_events++;
664         return 0;
665 }
666 #endif
667
668 static bool skip_sample(struct perf_kvm_stat *kvm,
669                         struct perf_sample *sample)
670 {
671         if (kvm->pid_list && intlist__find(kvm->pid_list, sample->pid) == NULL)
672                 return true;
673
674         return false;
675 }
676
677 static int process_sample_event(struct perf_tool *tool,
678                                 union perf_event *event,
679                                 struct perf_sample *sample,
680                                 struct evsel *evsel,
681                                 struct machine *machine)
682 {
683         int err = 0;
684         struct thread *thread;
685         struct perf_kvm_stat *kvm = container_of(tool, struct perf_kvm_stat,
686                                                  tool);
687
688         if (skip_sample(kvm, sample))
689                 return 0;
690
691         thread = machine__findnew_thread(machine, sample->pid, sample->tid);
692         if (thread == NULL) {
693                 pr_debug("problem processing %d event, skipping it.\n",
694                         event->header.type);
695                 return -1;
696         }
697
698         if (!handle_kvm_event(kvm, thread, evsel, sample))
699                 err = -1;
700
701         thread__put(thread);
702         return err;
703 }
704
705 static int cpu_isa_config(struct perf_kvm_stat *kvm)
706 {
707         char buf[64], *cpuid;
708         int err;
709
710         if (kvm->live) {
711                 err = get_cpuid(buf, sizeof(buf));
712                 if (err != 0) {
713                         pr_err("Failed to look up CPU type\n");
714                         return err;
715                 }
716                 cpuid = buf;
717         } else
718                 cpuid = kvm->session->header.env.cpuid;
719
720         if (!cpuid) {
721                 pr_err("Failed to look up CPU type\n");
722                 return -EINVAL;
723         }
724
725         err = cpu_isa_init(kvm, cpuid);
726         if (err == -ENOTSUP)
727                 pr_err("CPU %s is not supported.\n", cpuid);
728
729         return err;
730 }
731
732 static bool verify_vcpu(int vcpu)
733 {
734         if (vcpu != -1 && vcpu < 0) {
735                 pr_err("Invalid vcpu:%d.\n", vcpu);
736                 return false;
737         }
738
739         return true;
740 }
741
742 #ifdef HAVE_TIMERFD_SUPPORT
743 /* keeping the max events to a modest level to keep
744  * the processing of samples per mmap smooth.
745  */
746 #define PERF_KVM__MAX_EVENTS_PER_MMAP  25
747
748 static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
749                                    u64 *mmap_time)
750 {
751         struct evlist *evlist = kvm->evlist;
752         union perf_event *event;
753         struct mmap *md;
754         u64 timestamp;
755         s64 n = 0;
756         int err;
757
758         *mmap_time = ULLONG_MAX;
759         md = &evlist->mmap[idx];
760         err = perf_mmap__read_init(md);
761         if (err < 0)
762                 return (err == -EAGAIN) ? 0 : -1;
763
764         while ((event = perf_mmap__read_event(md)) != NULL) {
765                 err = perf_evlist__parse_sample_timestamp(evlist, event, &timestamp);
766                 if (err) {
767                         perf_mmap__consume(md);
768                         pr_err("Failed to parse sample\n");
769                         return -1;
770                 }
771
772                 err = perf_session__queue_event(kvm->session, event, timestamp, 0);
773                 /*
774                  * FIXME: Here we can't consume the event, as perf_session__queue_event will
775                  *        point to it, and it'll get possibly overwritten by the kernel.
776                  */
777                 perf_mmap__consume(md);
778
779                 if (err) {
780                         pr_err("Failed to enqueue sample: %d\n", err);
781                         return -1;
782                 }
783
784                 /* save time stamp of our first sample for this mmap */
785                 if (n == 0)
786                         *mmap_time = timestamp;
787
788                 /* limit events per mmap handled all at once */
789                 n++;
790                 if (n == PERF_KVM__MAX_EVENTS_PER_MMAP)
791                         break;
792         }
793
794         perf_mmap__read_done(md);
795         return n;
796 }
797
798 static int perf_kvm__mmap_read(struct perf_kvm_stat *kvm)
799 {
800         int i, err, throttled = 0;
801         s64 n, ntotal = 0;
802         u64 flush_time = ULLONG_MAX, mmap_time;
803
804         for (i = 0; i < kvm->evlist->nr_mmaps; i++) {
805                 n = perf_kvm__mmap_read_idx(kvm, i, &mmap_time);
806                 if (n < 0)
807                         return -1;
808
809                 /* flush time is going to be the minimum of all the individual
810                  * mmap times. Essentially, we flush all the samples queued up
811                  * from the last pass under our minimal start time -- that leaves
812                  * a very small race for samples to come in with a lower timestamp.
813                  * The ioctl to return the perf_clock timestamp should close the
814                  * race entirely.
815                  */
816                 if (mmap_time < flush_time)
817                         flush_time = mmap_time;
818
819                 ntotal += n;
820                 if (n == PERF_KVM__MAX_EVENTS_PER_MMAP)
821                         throttled = 1;
822         }
823
824         /* flush queue after each round in which we processed events */
825         if (ntotal) {
826                 struct ordered_events *oe = &kvm->session->ordered_events;
827
828                 oe->next_flush = flush_time;
829                 err = ordered_events__flush(oe, OE_FLUSH__ROUND);
830                 if (err) {
831                         if (kvm->lost_events)
832                                 pr_info("\nLost events: %" PRIu64 "\n\n",
833                                         kvm->lost_events);
834                         return err;
835                 }
836         }
837
838         return throttled;
839 }
840
841 static volatile int done;
842
843 static void sig_handler(int sig __maybe_unused)
844 {
845         done = 1;
846 }
847
848 static int perf_kvm__timerfd_create(struct perf_kvm_stat *kvm)
849 {
850         struct itimerspec new_value;
851         int rc = -1;
852
853         kvm->timerfd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK);
854         if (kvm->timerfd < 0) {
855                 pr_err("timerfd_create failed\n");
856                 goto out;
857         }
858
859         new_value.it_value.tv_sec = kvm->display_time;
860         new_value.it_value.tv_nsec = 0;
861         new_value.it_interval.tv_sec = kvm->display_time;
862         new_value.it_interval.tv_nsec = 0;
863
864         if (timerfd_settime(kvm->timerfd, 0, &new_value, NULL) != 0) {
865                 pr_err("timerfd_settime failed: %d\n", errno);
866                 close(kvm->timerfd);
867                 goto out;
868         }
869
870         rc = 0;
871 out:
872         return rc;
873 }
874
875 static int perf_kvm__handle_timerfd(struct perf_kvm_stat *kvm)
876 {
877         uint64_t c;
878         int rc;
879
880         rc = read(kvm->timerfd, &c, sizeof(uint64_t));
881         if (rc < 0) {
882                 if (errno == EAGAIN)
883                         return 0;
884
885                 pr_err("Failed to read timer fd: %d\n", errno);
886                 return -1;
887         }
888
889         if (rc != sizeof(uint64_t)) {
890                 pr_err("Error reading timer fd - invalid size returned\n");
891                 return -1;
892         }
893
894         if (c != 1)
895                 pr_debug("Missed timer beats: %" PRIu64 "\n", c-1);
896
897         /* update display */
898         sort_result(kvm);
899         print_result(kvm);
900
901         /* reset counts */
902         clear_events_cache_stats(kvm->kvm_events_cache);
903         kvm->total_count = 0;
904         kvm->total_time = 0;
905         kvm->lost_events = 0;
906
907         return 0;
908 }
909
910 static int fd_set_nonblock(int fd)
911 {
912         long arg = 0;
913
914         arg = fcntl(fd, F_GETFL);
915         if (arg < 0) {
916                 pr_err("Failed to get current flags for fd %d\n", fd);
917                 return -1;
918         }
919
920         if (fcntl(fd, F_SETFL, arg | O_NONBLOCK) < 0) {
921                 pr_err("Failed to set non-block option on fd %d\n", fd);
922                 return -1;
923         }
924
925         return 0;
926 }
927
928 static int perf_kvm__handle_stdin(void)
929 {
930         int c;
931
932         c = getc(stdin);
933         if (c == 'q')
934                 return 1;
935
936         return 0;
937 }
938
939 static int kvm_events_live_report(struct perf_kvm_stat *kvm)
940 {
941         int nr_stdin, ret, err = -EINVAL;
942         struct termios save;
943
944         /* live flag must be set first */
945         kvm->live = true;
946
947         ret = cpu_isa_config(kvm);
948         if (ret < 0)
949                 return ret;
950
951         if (!verify_vcpu(kvm->trace_vcpu) ||
952             !select_key(kvm) ||
953             !register_kvm_events_ops(kvm)) {
954                 goto out;
955         }
956
957         set_term_quiet_input(&save);
958         init_kvm_event_record(kvm);
959
960         signal(SIGINT, sig_handler);
961         signal(SIGTERM, sig_handler);
962
963         /* add timer fd */
964         if (perf_kvm__timerfd_create(kvm) < 0) {
965                 err = -1;
966                 goto out;
967         }
968
969         if (perf_evlist__add_pollfd(kvm->evlist, kvm->timerfd) < 0)
970                 goto out;
971
972         nr_stdin = perf_evlist__add_pollfd(kvm->evlist, fileno(stdin));
973         if (nr_stdin < 0)
974                 goto out;
975
976         if (fd_set_nonblock(fileno(stdin)) != 0)
977                 goto out;
978
979         /* everything is good - enable the events and process */
980         evlist__enable(kvm->evlist);
981
982         while (!done) {
983                 struct fdarray *fda = &kvm->evlist->pollfd;
984                 int rc;
985
986                 rc = perf_kvm__mmap_read(kvm);
987                 if (rc < 0)
988                         break;
989
990                 err = perf_kvm__handle_timerfd(kvm);
991                 if (err)
992                         goto out;
993
994                 if (fda->entries[nr_stdin].revents & POLLIN)
995                         done = perf_kvm__handle_stdin();
996
997                 if (!rc && !done)
998                         err = fdarray__poll(fda, 100);
999         }
1000
1001         evlist__disable(kvm->evlist);
1002
1003         if (err == 0) {
1004                 sort_result(kvm);
1005                 print_result(kvm);
1006         }
1007
1008 out:
1009         if (kvm->timerfd >= 0)
1010                 close(kvm->timerfd);
1011
1012         tcsetattr(0, TCSAFLUSH, &save);
1013         return err;
1014 }
1015
1016 static int kvm_live_open_events(struct perf_kvm_stat *kvm)
1017 {
1018         int err, rc = -1;
1019         struct evsel *pos;
1020         struct evlist *evlist = kvm->evlist;
1021         char sbuf[STRERR_BUFSIZE];
1022
1023         perf_evlist__config(evlist, &kvm->opts, NULL);
1024
1025         /*
1026          * Note: exclude_{guest,host} do not apply here.
1027          *       This command processes KVM tracepoints from host only
1028          */
1029         evlist__for_each_entry(evlist, pos) {
1030                 struct perf_event_attr *attr = &pos->core.attr;
1031
1032                 /* make sure these *are* set */
1033                 perf_evsel__set_sample_bit(pos, TID);
1034                 perf_evsel__set_sample_bit(pos, TIME);
1035                 perf_evsel__set_sample_bit(pos, CPU);
1036                 perf_evsel__set_sample_bit(pos, RAW);
1037                 /* make sure these are *not*; want as small a sample as possible */
1038                 perf_evsel__reset_sample_bit(pos, PERIOD);
1039                 perf_evsel__reset_sample_bit(pos, IP);
1040                 perf_evsel__reset_sample_bit(pos, CALLCHAIN);
1041                 perf_evsel__reset_sample_bit(pos, ADDR);
1042                 perf_evsel__reset_sample_bit(pos, READ);
1043                 attr->mmap = 0;
1044                 attr->comm = 0;
1045                 attr->task = 0;
1046
1047                 attr->sample_period = 1;
1048
1049                 attr->watermark = 0;
1050                 attr->wakeup_events = 1000;
1051
1052                 /* will enable all once we are ready */
1053                 attr->disabled = 1;
1054         }
1055
1056         err = evlist__open(evlist);
1057         if (err < 0) {
1058                 printf("Couldn't create the events: %s\n",
1059                        str_error_r(errno, sbuf, sizeof(sbuf)));
1060                 goto out;
1061         }
1062
1063         if (evlist__mmap(evlist, kvm->opts.mmap_pages) < 0) {
1064                 ui__error("Failed to mmap the events: %s\n",
1065                           str_error_r(errno, sbuf, sizeof(sbuf)));
1066                 evlist__close(evlist);
1067                 goto out;
1068         }
1069
1070         rc = 0;
1071
1072 out:
1073         return rc;
1074 }
1075 #endif
1076
1077 static int read_events(struct perf_kvm_stat *kvm)
1078 {
1079         int ret;
1080
1081         struct perf_tool eops = {
1082                 .sample                 = process_sample_event,
1083                 .comm                   = perf_event__process_comm,
1084                 .namespaces             = perf_event__process_namespaces,
1085                 .ordered_events         = true,
1086         };
1087         struct perf_data file = {
1088                 .path  = kvm->file_name,
1089                 .mode  = PERF_DATA_MODE_READ,
1090                 .force = kvm->force,
1091         };
1092
1093         kvm->tool = eops;
1094         kvm->session = perf_session__new(&file, false, &kvm->tool);
1095         if (IS_ERR(kvm->session)) {
1096                 pr_err("Initializing perf session failed\n");
1097                 return PTR_ERR(kvm->session);
1098         }
1099
1100         symbol__init(&kvm->session->header.env);
1101
1102         if (!perf_session__has_traces(kvm->session, "kvm record")) {
1103                 ret = -EINVAL;
1104                 goto out_delete;
1105         }
1106
1107         /*
1108          * Do not use 'isa' recorded in kvm_exit tracepoint since it is not
1109          * traced in the old kernel.
1110          */
1111         ret = cpu_isa_config(kvm);
1112         if (ret < 0)
1113                 goto out_delete;
1114
1115         ret = perf_session__process_events(kvm->session);
1116
1117 out_delete:
1118         perf_session__delete(kvm->session);
1119         return ret;
1120 }
1121
1122 static int parse_target_str(struct perf_kvm_stat *kvm)
1123 {
1124         if (kvm->opts.target.pid) {
1125                 kvm->pid_list = intlist__new(kvm->opts.target.pid);
1126                 if (kvm->pid_list == NULL) {
1127                         pr_err("Error parsing process id string\n");
1128                         return -EINVAL;
1129                 }
1130         }
1131
1132         return 0;
1133 }
1134
1135 static int kvm_events_report_vcpu(struct perf_kvm_stat *kvm)
1136 {
1137         int ret = -EINVAL;
1138         int vcpu = kvm->trace_vcpu;
1139
1140         if (parse_target_str(kvm) != 0)
1141                 goto exit;
1142
1143         if (!verify_vcpu(vcpu))
1144                 goto exit;
1145
1146         if (!select_key(kvm))
1147                 goto exit;
1148
1149         if (!register_kvm_events_ops(kvm))
1150                 goto exit;
1151
1152         init_kvm_event_record(kvm);
1153         setup_pager();
1154
1155         ret = read_events(kvm);
1156         if (ret)
1157                 goto exit;
1158
1159         sort_result(kvm);
1160         print_result(kvm);
1161
1162 exit:
1163         return ret;
1164 }
1165
1166 #define STRDUP_FAIL_EXIT(s)             \
1167         ({      char *_p;               \
1168         _p = strdup(s);         \
1169                 if (!_p)                \
1170                         return -ENOMEM; \
1171                 _p;                     \
1172         })
1173
1174 int __weak setup_kvm_events_tp(struct perf_kvm_stat *kvm __maybe_unused)
1175 {
1176         return 0;
1177 }
1178
1179 static int
1180 kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv)
1181 {
1182         unsigned int rec_argc, i, j, events_tp_size;
1183         const char **rec_argv;
1184         const char * const record_args[] = {
1185                 "record",
1186                 "-R",
1187                 "-m", "1024",
1188                 "-c", "1",
1189         };
1190         const char * const kvm_stat_record_usage[] = {
1191                 "perf kvm stat record [<options>]",
1192                 NULL
1193         };
1194         const char * const *events_tp;
1195         int ret;
1196
1197         events_tp_size = 0;
1198         ret = setup_kvm_events_tp(kvm);
1199         if (ret < 0) {
1200                 pr_err("Unable to setup the kvm tracepoints\n");
1201                 return ret;
1202         }
1203
1204         for (events_tp = kvm_events_tp; *events_tp; events_tp++)
1205                 events_tp_size++;
1206
1207         rec_argc = ARRAY_SIZE(record_args) + argc + 2 +
1208                    2 * events_tp_size;
1209         rec_argv = calloc(rec_argc + 1, sizeof(char *));
1210
1211         if (rec_argv == NULL)
1212                 return -ENOMEM;
1213
1214         for (i = 0; i < ARRAY_SIZE(record_args); i++)
1215                 rec_argv[i] = STRDUP_FAIL_EXIT(record_args[i]);
1216
1217         for (j = 0; j < events_tp_size; j++) {
1218                 rec_argv[i++] = "-e";
1219                 rec_argv[i++] = STRDUP_FAIL_EXIT(kvm_events_tp[j]);
1220         }
1221
1222         rec_argv[i++] = STRDUP_FAIL_EXIT("-o");
1223         rec_argv[i++] = STRDUP_FAIL_EXIT(kvm->file_name);
1224
1225         for (j = 1; j < (unsigned int)argc; j++, i++)
1226                 rec_argv[i] = argv[j];
1227
1228         set_option_flag(record_options, 'e', "event", PARSE_OPT_HIDDEN);
1229         set_option_flag(record_options, 0, "filter", PARSE_OPT_HIDDEN);
1230         set_option_flag(record_options, 'R', "raw-samples", PARSE_OPT_HIDDEN);
1231
1232         set_option_flag(record_options, 'F', "freq", PARSE_OPT_DISABLED);
1233         set_option_flag(record_options, 0, "group", PARSE_OPT_DISABLED);
1234         set_option_flag(record_options, 'g', NULL, PARSE_OPT_DISABLED);
1235         set_option_flag(record_options, 0, "call-graph", PARSE_OPT_DISABLED);
1236         set_option_flag(record_options, 'd', "data", PARSE_OPT_DISABLED);
1237         set_option_flag(record_options, 'T', "timestamp", PARSE_OPT_DISABLED);
1238         set_option_flag(record_options, 'P', "period", PARSE_OPT_DISABLED);
1239         set_option_flag(record_options, 'n', "no-samples", PARSE_OPT_DISABLED);
1240         set_option_flag(record_options, 'N', "no-buildid-cache", PARSE_OPT_DISABLED);
1241         set_option_flag(record_options, 'B', "no-buildid", PARSE_OPT_DISABLED);
1242         set_option_flag(record_options, 'G', "cgroup", PARSE_OPT_DISABLED);
1243         set_option_flag(record_options, 'b', "branch-any", PARSE_OPT_DISABLED);
1244         set_option_flag(record_options, 'j', "branch-filter", PARSE_OPT_DISABLED);
1245         set_option_flag(record_options, 'W', "weight", PARSE_OPT_DISABLED);
1246         set_option_flag(record_options, 0, "transaction", PARSE_OPT_DISABLED);
1247
1248         record_usage = kvm_stat_record_usage;
1249         return cmd_record(i, rec_argv);
1250 }
1251
1252 static int
1253 kvm_events_report(struct perf_kvm_stat *kvm, int argc, const char **argv)
1254 {
1255         const struct option kvm_events_report_options[] = {
1256                 OPT_STRING(0, "event", &kvm->report_event, "report event",
1257                            "event for reporting: vmexit, "
1258                            "mmio (x86 only), ioport (x86 only)"),
1259                 OPT_INTEGER(0, "vcpu", &kvm->trace_vcpu,
1260                             "vcpu id to report"),
1261                 OPT_STRING('k', "key", &kvm->sort_key, "sort-key",
1262                             "key for sorting: sample(sort by samples number)"
1263                             " time (sort by avg time)"),
1264                 OPT_STRING('p', "pid", &kvm->opts.target.pid, "pid",
1265                            "analyze events only for given process id(s)"),
1266                 OPT_BOOLEAN('f', "force", &kvm->force, "don't complain, do it"),
1267                 OPT_END()
1268         };
1269
1270         const char * const kvm_events_report_usage[] = {
1271                 "perf kvm stat report [<options>]",
1272                 NULL
1273         };
1274
1275         if (argc) {
1276                 argc = parse_options(argc, argv,
1277                                      kvm_events_report_options,
1278                                      kvm_events_report_usage, 0);
1279                 if (argc)
1280                         usage_with_options(kvm_events_report_usage,
1281                                            kvm_events_report_options);
1282         }
1283
1284         if (!kvm->opts.target.pid)
1285                 kvm->opts.target.system_wide = true;
1286
1287         return kvm_events_report_vcpu(kvm);
1288 }
1289
1290 #ifdef HAVE_TIMERFD_SUPPORT
1291 static struct evlist *kvm_live_event_list(void)
1292 {
1293         struct evlist *evlist;
1294         char *tp, *name, *sys;
1295         int err = -1;
1296         const char * const *events_tp;
1297
1298         evlist = evlist__new();
1299         if (evlist == NULL)
1300                 return NULL;
1301
1302         for (events_tp = kvm_events_tp; *events_tp; events_tp++) {
1303
1304                 tp = strdup(*events_tp);
1305                 if (tp == NULL)
1306                         goto out;
1307
1308                 /* split tracepoint into subsystem and name */
1309                 sys = tp;
1310                 name = strchr(tp, ':');
1311                 if (name == NULL) {
1312                         pr_err("Error parsing %s tracepoint: subsystem delimiter not found\n",
1313                                *events_tp);
1314                         free(tp);
1315                         goto out;
1316                 }
1317                 *name = '\0';
1318                 name++;
1319
1320                 if (perf_evlist__add_newtp(evlist, sys, name, NULL)) {
1321                         pr_err("Failed to add %s tracepoint to the list\n", *events_tp);
1322                         free(tp);
1323                         goto out;
1324                 }
1325
1326                 free(tp);
1327         }
1328
1329         err = 0;
1330
1331 out:
1332         if (err) {
1333                 evlist__delete(evlist);
1334                 evlist = NULL;
1335         }
1336
1337         return evlist;
1338 }
1339
1340 static int kvm_events_live(struct perf_kvm_stat *kvm,
1341                            int argc, const char **argv)
1342 {
1343         char errbuf[BUFSIZ];
1344         int err;
1345
1346         const struct option live_options[] = {
1347                 OPT_STRING('p', "pid", &kvm->opts.target.pid, "pid",
1348                         "record events on existing process id"),
1349                 OPT_CALLBACK('m', "mmap-pages", &kvm->opts.mmap_pages, "pages",
1350                         "number of mmap data pages",
1351                         perf_evlist__parse_mmap_pages),
1352                 OPT_INCR('v', "verbose", &verbose,
1353                         "be more verbose (show counter open errors, etc)"),
1354                 OPT_BOOLEAN('a', "all-cpus", &kvm->opts.target.system_wide,
1355                         "system-wide collection from all CPUs"),
1356                 OPT_UINTEGER('d', "display", &kvm->display_time,
1357                         "time in seconds between display updates"),
1358                 OPT_STRING(0, "event", &kvm->report_event, "report event",
1359                         "event for reporting: "
1360                         "vmexit, mmio (x86 only), ioport (x86 only)"),
1361                 OPT_INTEGER(0, "vcpu", &kvm->trace_vcpu,
1362                         "vcpu id to report"),
1363                 OPT_STRING('k', "key", &kvm->sort_key, "sort-key",
1364                         "key for sorting: sample(sort by samples number)"
1365                         " time (sort by avg time)"),
1366                 OPT_U64(0, "duration", &kvm->duration,
1367                         "show events other than"
1368                         " HLT (x86 only) or Wait state (s390 only)"
1369                         " that take longer than duration usecs"),
1370                 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
1371                                 "per thread proc mmap processing timeout in ms"),
1372                 OPT_END()
1373         };
1374         const char * const live_usage[] = {
1375                 "perf kvm stat live [<options>]",
1376                 NULL
1377         };
1378         struct perf_data data = {
1379                 .mode = PERF_DATA_MODE_WRITE,
1380         };
1381
1382
1383         /* event handling */
1384         kvm->tool.sample = process_sample_event;
1385         kvm->tool.comm   = perf_event__process_comm;
1386         kvm->tool.exit   = perf_event__process_exit;
1387         kvm->tool.fork   = perf_event__process_fork;
1388         kvm->tool.lost   = process_lost_event;
1389         kvm->tool.namespaces  = perf_event__process_namespaces;
1390         kvm->tool.ordered_events = true;
1391         perf_tool__fill_defaults(&kvm->tool);
1392
1393         /* set defaults */
1394         kvm->display_time = 1;
1395         kvm->opts.user_interval = 1;
1396         kvm->opts.mmap_pages = 512;
1397         kvm->opts.target.uses_mmap = false;
1398         kvm->opts.target.uid_str = NULL;
1399         kvm->opts.target.uid = UINT_MAX;
1400
1401         symbol__init(NULL);
1402         disable_buildid_cache();
1403
1404         use_browser = 0;
1405
1406         if (argc) {
1407                 argc = parse_options(argc, argv, live_options,
1408                                      live_usage, 0);
1409                 if (argc)
1410                         usage_with_options(live_usage, live_options);
1411         }
1412
1413         kvm->duration *= NSEC_PER_USEC;   /* convert usec to nsec */
1414
1415         /*
1416          * target related setups
1417          */
1418         err = target__validate(&kvm->opts.target);
1419         if (err) {
1420                 target__strerror(&kvm->opts.target, err, errbuf, BUFSIZ);
1421                 ui__warning("%s", errbuf);
1422         }
1423
1424         if (target__none(&kvm->opts.target))
1425                 kvm->opts.target.system_wide = true;
1426
1427
1428         /*
1429          * generate the event list
1430          */
1431         err = setup_kvm_events_tp(kvm);
1432         if (err < 0) {
1433                 pr_err("Unable to setup the kvm tracepoints\n");
1434                 return err;
1435         }
1436
1437         kvm->evlist = kvm_live_event_list();
1438         if (kvm->evlist == NULL) {
1439                 err = -1;
1440                 goto out;
1441         }
1442
1443         if (perf_evlist__create_maps(kvm->evlist, &kvm->opts.target) < 0)
1444                 usage_with_options(live_usage, live_options);
1445
1446         /*
1447          * perf session
1448          */
1449         kvm->session = perf_session__new(&data, false, &kvm->tool);
1450         if (IS_ERR(kvm->session)) {
1451                 err = PTR_ERR(kvm->session);
1452                 goto out;
1453         }
1454         kvm->session->evlist = kvm->evlist;
1455         perf_session__set_id_hdr_size(kvm->session);
1456         ordered_events__set_copy_on_queue(&kvm->session->ordered_events, true);
1457         machine__synthesize_threads(&kvm->session->machines.host, &kvm->opts.target,
1458                                     kvm->evlist->core.threads, false, 1);
1459         err = kvm_live_open_events(kvm);
1460         if (err)
1461                 goto out;
1462
1463         err = kvm_events_live_report(kvm);
1464
1465 out:
1466         perf_session__delete(kvm->session);
1467         kvm->session = NULL;
1468         evlist__delete(kvm->evlist);
1469
1470         return err;
1471 }
1472 #endif
1473
1474 static void print_kvm_stat_usage(void)
1475 {
1476         printf("Usage: perf kvm stat <command>\n\n");
1477
1478         printf("# Available commands:\n");
1479         printf("\trecord: record kvm events\n");
1480         printf("\treport: report statistical data of kvm events\n");
1481         printf("\tlive:   live reporting of statistical data of kvm events\n");
1482
1483         printf("\nOtherwise, it is the alias of 'perf stat':\n");
1484 }
1485
1486 static int kvm_cmd_stat(const char *file_name, int argc, const char **argv)
1487 {
1488         struct perf_kvm_stat kvm = {
1489                 .file_name = file_name,
1490
1491                 .trace_vcpu     = -1,
1492                 .report_event   = "vmexit",
1493                 .sort_key       = "sample",
1494
1495         };
1496
1497         if (argc == 1) {
1498                 print_kvm_stat_usage();
1499                 goto perf_stat;
1500         }
1501
1502         if (!strncmp(argv[1], "rec", 3))
1503                 return kvm_events_record(&kvm, argc - 1, argv + 1);
1504
1505         if (!strncmp(argv[1], "rep", 3))
1506                 return kvm_events_report(&kvm, argc - 1 , argv + 1);
1507
1508 #ifdef HAVE_TIMERFD_SUPPORT
1509         if (!strncmp(argv[1], "live", 4))
1510                 return kvm_events_live(&kvm, argc - 1 , argv + 1);
1511 #endif
1512
1513 perf_stat:
1514         return cmd_stat(argc, argv);
1515 }
1516 #endif /* HAVE_KVM_STAT_SUPPORT */
1517
1518 int __weak kvm_add_default_arch_event(int *argc __maybe_unused,
1519                                         const char **argv __maybe_unused)
1520 {
1521         return 0;
1522 }
1523
1524 static int __cmd_record(const char *file_name, int argc, const char **argv)
1525 {
1526         int rec_argc, i = 0, j, ret;
1527         const char **rec_argv;
1528
1529         ret = kvm_add_default_arch_event(&argc, argv);
1530         if (ret)
1531                 return -EINVAL;
1532
1533         rec_argc = argc + 2;
1534         rec_argv = calloc(rec_argc + 1, sizeof(char *));
1535         rec_argv[i++] = strdup("record");
1536         rec_argv[i++] = strdup("-o");
1537         rec_argv[i++] = strdup(file_name);
1538         for (j = 1; j < argc; j++, i++)
1539                 rec_argv[i] = argv[j];
1540
1541         BUG_ON(i != rec_argc);
1542
1543         return cmd_record(i, rec_argv);
1544 }
1545
1546 static int __cmd_report(const char *file_name, int argc, const char **argv)
1547 {
1548         int rec_argc, i = 0, j;
1549         const char **rec_argv;
1550
1551         rec_argc = argc + 2;
1552         rec_argv = calloc(rec_argc + 1, sizeof(char *));
1553         rec_argv[i++] = strdup("report");
1554         rec_argv[i++] = strdup("-i");
1555         rec_argv[i++] = strdup(file_name);
1556         for (j = 1; j < argc; j++, i++)
1557                 rec_argv[i] = argv[j];
1558
1559         BUG_ON(i != rec_argc);
1560
1561         return cmd_report(i, rec_argv);
1562 }
1563
1564 static int
1565 __cmd_buildid_list(const char *file_name, int argc, const char **argv)
1566 {
1567         int rec_argc, i = 0, j;
1568         const char **rec_argv;
1569
1570         rec_argc = argc + 2;
1571         rec_argv = calloc(rec_argc + 1, sizeof(char *));
1572         rec_argv[i++] = strdup("buildid-list");
1573         rec_argv[i++] = strdup("-i");
1574         rec_argv[i++] = strdup(file_name);
1575         for (j = 1; j < argc; j++, i++)
1576                 rec_argv[i] = argv[j];
1577
1578         BUG_ON(i != rec_argc);
1579
1580         return cmd_buildid_list(i, rec_argv);
1581 }
1582
1583 int cmd_kvm(int argc, const char **argv)
1584 {
1585         const char *file_name = NULL;
1586         const struct option kvm_options[] = {
1587                 OPT_STRING('i', "input", &file_name, "file",
1588                            "Input file name"),
1589                 OPT_STRING('o', "output", &file_name, "file",
1590                            "Output file name"),
1591                 OPT_BOOLEAN(0, "guest", &perf_guest,
1592                             "Collect guest os data"),
1593                 OPT_BOOLEAN(0, "host", &perf_host,
1594                             "Collect host os data"),
1595                 OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory",
1596                            "guest mount directory under which every guest os"
1597                            " instance has a subdir"),
1598                 OPT_STRING(0, "guestvmlinux", &symbol_conf.default_guest_vmlinux_name,
1599                            "file", "file saving guest os vmlinux"),
1600                 OPT_STRING(0, "guestkallsyms", &symbol_conf.default_guest_kallsyms,
1601                            "file", "file saving guest os /proc/kallsyms"),
1602                 OPT_STRING(0, "guestmodules", &symbol_conf.default_guest_modules,
1603                            "file", "file saving guest os /proc/modules"),
1604                 OPT_INCR('v', "verbose", &verbose,
1605                             "be more verbose (show counter open errors, etc)"),
1606                 OPT_END()
1607         };
1608
1609         const char *const kvm_subcommands[] = { "top", "record", "report", "diff",
1610                                                 "buildid-list", "stat", NULL };
1611         const char *kvm_usage[] = { NULL, NULL };
1612
1613         perf_host  = 0;
1614         perf_guest = 1;
1615
1616         argc = parse_options_subcommand(argc, argv, kvm_options, kvm_subcommands, kvm_usage,
1617                                         PARSE_OPT_STOP_AT_NON_OPTION);
1618         if (!argc)
1619                 usage_with_options(kvm_usage, kvm_options);
1620
1621         if (!perf_host)
1622                 perf_guest = 1;
1623
1624         if (!file_name) {
1625                 file_name = get_filename_for_perf_kvm();
1626
1627                 if (!file_name) {
1628                         pr_err("Failed to allocate memory for filename\n");
1629                         return -ENOMEM;
1630                 }
1631         }
1632
1633         if (!strncmp(argv[0], "rec", 3))
1634                 return __cmd_record(file_name, argc, argv);
1635         else if (!strncmp(argv[0], "rep", 3))
1636                 return __cmd_report(file_name, argc, argv);
1637         else if (!strncmp(argv[0], "diff", 4))
1638                 return cmd_diff(argc, argv);
1639         else if (!strncmp(argv[0], "top", 3))
1640                 return cmd_top(argc, argv);
1641         else if (!strncmp(argv[0], "buildid-list", 12))
1642                 return __cmd_buildid_list(file_name, argc, argv);
1643 #ifdef HAVE_KVM_STAT_SUPPORT
1644         else if (!strncmp(argv[0], "stat", 4))
1645                 return kvm_cmd_stat(file_name, argc, argv);
1646 #endif
1647         else
1648                 usage_with_options(kvm_usage, kvm_options);
1649
1650         return 0;
1651 }