]> asedeno.scripts.mit.edu Git - linux.git/blob - tools/perf/util/intel-pt.c
6df836469f2b4987651c8197f4bfafc823819c36
[linux.git] / tools / perf / util / intel-pt.c
1 /*
2  * intel_pt.c: Intel Processor Trace support
3  * Copyright (c) 2013-2015, Intel Corporation.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms and conditions of the GNU General Public License,
7  * version 2, as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12  * more details.
13  *
14  */
15
16 #include <inttypes.h>
17 #include <stdio.h>
18 #include <stdbool.h>
19 #include <errno.h>
20 #include <linux/kernel.h>
21 #include <linux/types.h>
22
23 #include "../perf.h"
24 #include "session.h"
25 #include "machine.h"
26 #include "memswap.h"
27 #include "sort.h"
28 #include "tool.h"
29 #include "event.h"
30 #include "evlist.h"
31 #include "evsel.h"
32 #include "map.h"
33 #include "color.h"
34 #include "util.h"
35 #include "thread.h"
36 #include "thread-stack.h"
37 #include "symbol.h"
38 #include "callchain.h"
39 #include "dso.h"
40 #include "debug.h"
41 #include "auxtrace.h"
42 #include "tsc.h"
43 #include "intel-pt.h"
44 #include "config.h"
45
46 #include "intel-pt-decoder/intel-pt-log.h"
47 #include "intel-pt-decoder/intel-pt-decoder.h"
48 #include "intel-pt-decoder/intel-pt-insn-decoder.h"
49 #include "intel-pt-decoder/intel-pt-pkt-decoder.h"
50
51 #define MAX_TIMESTAMP (~0ULL)
52
53 struct intel_pt {
54         struct auxtrace auxtrace;
55         struct auxtrace_queues queues;
56         struct auxtrace_heap heap;
57         u32 auxtrace_type;
58         struct perf_session *session;
59         struct machine *machine;
60         struct perf_evsel *switch_evsel;
61         struct thread *unknown_thread;
62         bool timeless_decoding;
63         bool sampling_mode;
64         bool snapshot_mode;
65         bool per_cpu_mmaps;
66         bool have_tsc;
67         bool data_queued;
68         bool est_tsc;
69         bool sync_switch;
70         bool mispred_all;
71         int have_sched_switch;
72         u32 pmu_type;
73         u64 kernel_start;
74         u64 switch_ip;
75         u64 ptss_ip;
76
77         struct perf_tsc_conversion tc;
78         bool cap_user_time_zero;
79
80         struct itrace_synth_opts synth_opts;
81
82         bool sample_instructions;
83         u64 instructions_sample_type;
84         u64 instructions_sample_period;
85         u64 instructions_id;
86
87         bool sample_branches;
88         u32 branches_filter;
89         u64 branches_sample_type;
90         u64 branches_id;
91
92         bool sample_transactions;
93         u64 transactions_sample_type;
94         u64 transactions_id;
95
96         bool synth_needs_swap;
97
98         u64 tsc_bit;
99         u64 mtc_bit;
100         u64 mtc_freq_bits;
101         u32 tsc_ctc_ratio_n;
102         u32 tsc_ctc_ratio_d;
103         u64 cyc_bit;
104         u64 noretcomp_bit;
105         unsigned max_non_turbo_ratio;
106
107         unsigned long num_events;
108
109         char *filter;
110         struct addr_filters filts;
111 };
112
113 enum switch_state {
114         INTEL_PT_SS_NOT_TRACING,
115         INTEL_PT_SS_UNKNOWN,
116         INTEL_PT_SS_TRACING,
117         INTEL_PT_SS_EXPECTING_SWITCH_EVENT,
118         INTEL_PT_SS_EXPECTING_SWITCH_IP,
119 };
120
121 struct intel_pt_queue {
122         struct intel_pt *pt;
123         unsigned int queue_nr;
124         struct auxtrace_buffer *buffer;
125         void *decoder;
126         const struct intel_pt_state *state;
127         struct ip_callchain *chain;
128         struct branch_stack *last_branch;
129         struct branch_stack *last_branch_rb;
130         size_t last_branch_pos;
131         union perf_event *event_buf;
132         bool on_heap;
133         bool stop;
134         bool step_through_buffers;
135         bool use_buffer_pid_tid;
136         pid_t pid, tid;
137         int cpu;
138         int switch_state;
139         pid_t next_tid;
140         struct thread *thread;
141         bool exclude_kernel;
142         bool have_sample;
143         u64 time;
144         u64 timestamp;
145         u32 flags;
146         u16 insn_len;
147         u64 last_insn_cnt;
148         char insn[INTEL_PT_INSN_BUF_SZ];
149 };
150
151 static void intel_pt_dump(struct intel_pt *pt __maybe_unused,
152                           unsigned char *buf, size_t len)
153 {
154         struct intel_pt_pkt packet;
155         size_t pos = 0;
156         int ret, pkt_len, i;
157         char desc[INTEL_PT_PKT_DESC_MAX];
158         const char *color = PERF_COLOR_BLUE;
159
160         color_fprintf(stdout, color,
161                       ". ... Intel Processor Trace data: size %zu bytes\n",
162                       len);
163
164         while (len) {
165                 ret = intel_pt_get_packet(buf, len, &packet);
166                 if (ret > 0)
167                         pkt_len = ret;
168                 else
169                         pkt_len = 1;
170                 printf(".");
171                 color_fprintf(stdout, color, "  %08x: ", pos);
172                 for (i = 0; i < pkt_len; i++)
173                         color_fprintf(stdout, color, " %02x", buf[i]);
174                 for (; i < 16; i++)
175                         color_fprintf(stdout, color, "   ");
176                 if (ret > 0) {
177                         ret = intel_pt_pkt_desc(&packet, desc,
178                                                 INTEL_PT_PKT_DESC_MAX);
179                         if (ret > 0)
180                                 color_fprintf(stdout, color, " %s\n", desc);
181                 } else {
182                         color_fprintf(stdout, color, " Bad packet!\n");
183                 }
184                 pos += pkt_len;
185                 buf += pkt_len;
186                 len -= pkt_len;
187         }
188 }
189
190 static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf,
191                                 size_t len)
192 {
193         printf(".\n");
194         intel_pt_dump(pt, buf, len);
195 }
196
197 static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a,
198                                    struct auxtrace_buffer *b)
199 {
200         void *start;
201
202         start = intel_pt_find_overlap(a->data, a->size, b->data, b->size,
203                                       pt->have_tsc);
204         if (!start)
205                 return -EINVAL;
206         b->use_size = b->data + b->size - start;
207         b->use_data = start;
208         return 0;
209 }
210
211 static void intel_pt_use_buffer_pid_tid(struct intel_pt_queue *ptq,
212                                         struct auxtrace_queue *queue,
213                                         struct auxtrace_buffer *buffer)
214 {
215         if (queue->cpu == -1 && buffer->cpu != -1)
216                 ptq->cpu = buffer->cpu;
217
218         ptq->pid = buffer->pid;
219         ptq->tid = buffer->tid;
220
221         intel_pt_log("queue %u cpu %d pid %d tid %d\n",
222                      ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
223
224         thread__zput(ptq->thread);
225
226         if (ptq->tid != -1) {
227                 if (ptq->pid != -1)
228                         ptq->thread = machine__findnew_thread(ptq->pt->machine,
229                                                               ptq->pid,
230                                                               ptq->tid);
231                 else
232                         ptq->thread = machine__find_thread(ptq->pt->machine, -1,
233                                                            ptq->tid);
234         }
235 }
236
237 /* This function assumes data is processed sequentially only */
238 static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
239 {
240         struct intel_pt_queue *ptq = data;
241         struct auxtrace_buffer *buffer = ptq->buffer, *old_buffer = buffer;
242         struct auxtrace_queue *queue;
243
244         if (ptq->stop) {
245                 b->len = 0;
246                 return 0;
247         }
248
249         queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
250 next:
251         buffer = auxtrace_buffer__next(queue, buffer);
252         if (!buffer) {
253                 if (old_buffer)
254                         auxtrace_buffer__drop_data(old_buffer);
255                 b->len = 0;
256                 return 0;
257         }
258
259         ptq->buffer = buffer;
260
261         if (!buffer->data) {
262                 int fd = perf_data_file__fd(ptq->pt->session->file);
263
264                 buffer->data = auxtrace_buffer__get_data(buffer, fd);
265                 if (!buffer->data)
266                         return -ENOMEM;
267         }
268
269         if (ptq->pt->snapshot_mode && !buffer->consecutive && old_buffer &&
270             intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer))
271                 return -ENOMEM;
272
273         if (buffer->use_data) {
274                 b->len = buffer->use_size;
275                 b->buf = buffer->use_data;
276         } else {
277                 b->len = buffer->size;
278                 b->buf = buffer->data;
279         }
280         b->ref_timestamp = buffer->reference;
281
282         /*
283          * If in snapshot mode and the buffer has no usable data, get next
284          * buffer and again check overlap against old_buffer.
285          */
286         if (ptq->pt->snapshot_mode && !b->len)
287                 goto next;
288
289         if (old_buffer)
290                 auxtrace_buffer__drop_data(old_buffer);
291
292         if (!old_buffer || ptq->pt->sampling_mode || (ptq->pt->snapshot_mode &&
293                                                       !buffer->consecutive)) {
294                 b->consecutive = false;
295                 b->trace_nr = buffer->buffer_nr + 1;
296         } else {
297                 b->consecutive = true;
298         }
299
300         if (ptq->use_buffer_pid_tid && (ptq->pid != buffer->pid ||
301                                         ptq->tid != buffer->tid))
302                 intel_pt_use_buffer_pid_tid(ptq, queue, buffer);
303
304         if (ptq->step_through_buffers)
305                 ptq->stop = true;
306
307         if (!b->len)
308                 return intel_pt_get_trace(b, data);
309
310         return 0;
311 }
312
313 struct intel_pt_cache_entry {
314         struct auxtrace_cache_entry     entry;
315         u64                             insn_cnt;
316         u64                             byte_cnt;
317         enum intel_pt_insn_op           op;
318         enum intel_pt_insn_branch       branch;
319         int                             length;
320         int32_t                         rel;
321         char                            insn[INTEL_PT_INSN_BUF_SZ];
322 };
323
324 static int intel_pt_config_div(const char *var, const char *value, void *data)
325 {
326         int *d = data;
327         long val;
328
329         if (!strcmp(var, "intel-pt.cache-divisor")) {
330                 val = strtol(value, NULL, 0);
331                 if (val > 0 && val <= INT_MAX)
332                         *d = val;
333         }
334
335         return 0;
336 }
337
338 static int intel_pt_cache_divisor(void)
339 {
340         static int d;
341
342         if (d)
343                 return d;
344
345         perf_config(intel_pt_config_div, &d);
346
347         if (!d)
348                 d = 64;
349
350         return d;
351 }
352
353 static unsigned int intel_pt_cache_size(struct dso *dso,
354                                         struct machine *machine)
355 {
356         off_t size;
357
358         size = dso__data_size(dso, machine);
359         size /= intel_pt_cache_divisor();
360         if (size < 1000)
361                 return 10;
362         if (size > (1 << 21))
363                 return 21;
364         return 32 - __builtin_clz(size);
365 }
366
367 static struct auxtrace_cache *intel_pt_cache(struct dso *dso,
368                                              struct machine *machine)
369 {
370         struct auxtrace_cache *c;
371         unsigned int bits;
372
373         if (dso->auxtrace_cache)
374                 return dso->auxtrace_cache;
375
376         bits = intel_pt_cache_size(dso, machine);
377
378         /* Ignoring cache creation failure */
379         c = auxtrace_cache__new(bits, sizeof(struct intel_pt_cache_entry), 200);
380
381         dso->auxtrace_cache = c;
382
383         return c;
384 }
385
386 static int intel_pt_cache_add(struct dso *dso, struct machine *machine,
387                               u64 offset, u64 insn_cnt, u64 byte_cnt,
388                               struct intel_pt_insn *intel_pt_insn)
389 {
390         struct auxtrace_cache *c = intel_pt_cache(dso, machine);
391         struct intel_pt_cache_entry *e;
392         int err;
393
394         if (!c)
395                 return -ENOMEM;
396
397         e = auxtrace_cache__alloc_entry(c);
398         if (!e)
399                 return -ENOMEM;
400
401         e->insn_cnt = insn_cnt;
402         e->byte_cnt = byte_cnt;
403         e->op = intel_pt_insn->op;
404         e->branch = intel_pt_insn->branch;
405         e->length = intel_pt_insn->length;
406         e->rel = intel_pt_insn->rel;
407         memcpy(e->insn, intel_pt_insn->buf, INTEL_PT_INSN_BUF_SZ);
408
409         err = auxtrace_cache__add(c, offset, &e->entry);
410         if (err)
411                 auxtrace_cache__free_entry(c, e);
412
413         return err;
414 }
415
416 static struct intel_pt_cache_entry *
417 intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset)
418 {
419         struct auxtrace_cache *c = intel_pt_cache(dso, machine);
420
421         if (!c)
422                 return NULL;
423
424         return auxtrace_cache__lookup(dso->auxtrace_cache, offset);
425 }
426
427 static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
428                                    uint64_t *insn_cnt_ptr, uint64_t *ip,
429                                    uint64_t to_ip, uint64_t max_insn_cnt,
430                                    void *data)
431 {
432         struct intel_pt_queue *ptq = data;
433         struct machine *machine = ptq->pt->machine;
434         struct thread *thread;
435         struct addr_location al;
436         unsigned char buf[INTEL_PT_INSN_BUF_SZ];
437         ssize_t len;
438         int x86_64;
439         u8 cpumode;
440         u64 offset, start_offset, start_ip;
441         u64 insn_cnt = 0;
442         bool one_map = true;
443
444         intel_pt_insn->length = 0;
445
446         if (to_ip && *ip == to_ip)
447                 goto out_no_cache;
448
449         if (*ip >= ptq->pt->kernel_start)
450                 cpumode = PERF_RECORD_MISC_KERNEL;
451         else
452                 cpumode = PERF_RECORD_MISC_USER;
453
454         thread = ptq->thread;
455         if (!thread) {
456                 if (cpumode != PERF_RECORD_MISC_KERNEL)
457                         return -EINVAL;
458                 thread = ptq->pt->unknown_thread;
459         }
460
461         while (1) {
462                 thread__find_addr_map(thread, cpumode, MAP__FUNCTION, *ip, &al);
463                 if (!al.map || !al.map->dso)
464                         return -EINVAL;
465
466                 if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR &&
467                     dso__data_status_seen(al.map->dso,
468                                           DSO_DATA_STATUS_SEEN_ITRACE))
469                         return -ENOENT;
470
471                 offset = al.map->map_ip(al.map, *ip);
472
473                 if (!to_ip && one_map) {
474                         struct intel_pt_cache_entry *e;
475
476                         e = intel_pt_cache_lookup(al.map->dso, machine, offset);
477                         if (e &&
478                             (!max_insn_cnt || e->insn_cnt <= max_insn_cnt)) {
479                                 *insn_cnt_ptr = e->insn_cnt;
480                                 *ip += e->byte_cnt;
481                                 intel_pt_insn->op = e->op;
482                                 intel_pt_insn->branch = e->branch;
483                                 intel_pt_insn->length = e->length;
484                                 intel_pt_insn->rel = e->rel;
485                                 memcpy(intel_pt_insn->buf, e->insn,
486                                        INTEL_PT_INSN_BUF_SZ);
487                                 intel_pt_log_insn_no_data(intel_pt_insn, *ip);
488                                 return 0;
489                         }
490                 }
491
492                 start_offset = offset;
493                 start_ip = *ip;
494
495                 /* Load maps to ensure dso->is_64_bit has been updated */
496                 map__load(al.map);
497
498                 x86_64 = al.map->dso->is_64_bit;
499
500                 while (1) {
501                         len = dso__data_read_offset(al.map->dso, machine,
502                                                     offset, buf,
503                                                     INTEL_PT_INSN_BUF_SZ);
504                         if (len <= 0)
505                                 return -EINVAL;
506
507                         if (intel_pt_get_insn(buf, len, x86_64, intel_pt_insn))
508                                 return -EINVAL;
509
510                         intel_pt_log_insn(intel_pt_insn, *ip);
511
512                         insn_cnt += 1;
513
514                         if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH)
515                                 goto out;
516
517                         if (max_insn_cnt && insn_cnt >= max_insn_cnt)
518                                 goto out_no_cache;
519
520                         *ip += intel_pt_insn->length;
521
522                         if (to_ip && *ip == to_ip)
523                                 goto out_no_cache;
524
525                         if (*ip >= al.map->end)
526                                 break;
527
528                         offset += intel_pt_insn->length;
529                 }
530                 one_map = false;
531         }
532 out:
533         *insn_cnt_ptr = insn_cnt;
534
535         if (!one_map)
536                 goto out_no_cache;
537
538         /*
539          * Didn't lookup in the 'to_ip' case, so do it now to prevent duplicate
540          * entries.
541          */
542         if (to_ip) {
543                 struct intel_pt_cache_entry *e;
544
545                 e = intel_pt_cache_lookup(al.map->dso, machine, start_offset);
546                 if (e)
547                         return 0;
548         }
549
550         /* Ignore cache errors */
551         intel_pt_cache_add(al.map->dso, machine, start_offset, insn_cnt,
552                            *ip - start_ip, intel_pt_insn);
553
554         return 0;
555
556 out_no_cache:
557         *insn_cnt_ptr = insn_cnt;
558         return 0;
559 }
560
561 static bool intel_pt_match_pgd_ip(struct intel_pt *pt, uint64_t ip,
562                                   uint64_t offset, const char *filename)
563 {
564         struct addr_filter *filt;
565         bool have_filter   = false;
566         bool hit_tracestop = false;
567         bool hit_filter    = false;
568
569         list_for_each_entry(filt, &pt->filts.head, list) {
570                 if (filt->start)
571                         have_filter = true;
572
573                 if ((filename && !filt->filename) ||
574                     (!filename && filt->filename) ||
575                     (filename && strcmp(filename, filt->filename)))
576                         continue;
577
578                 if (!(offset >= filt->addr && offset < filt->addr + filt->size))
579                         continue;
580
581                 intel_pt_log("TIP.PGD ip %#"PRIx64" offset %#"PRIx64" in %s hit filter: %s offset %#"PRIx64" size %#"PRIx64"\n",
582                              ip, offset, filename ? filename : "[kernel]",
583                              filt->start ? "filter" : "stop",
584                              filt->addr, filt->size);
585
586                 if (filt->start)
587                         hit_filter = true;
588                 else
589                         hit_tracestop = true;
590         }
591
592         if (!hit_tracestop && !hit_filter)
593                 intel_pt_log("TIP.PGD ip %#"PRIx64" offset %#"PRIx64" in %s is not in a filter region\n",
594                              ip, offset, filename ? filename : "[kernel]");
595
596         return hit_tracestop || (have_filter && !hit_filter);
597 }
598
599 static int __intel_pt_pgd_ip(uint64_t ip, void *data)
600 {
601         struct intel_pt_queue *ptq = data;
602         struct thread *thread;
603         struct addr_location al;
604         u8 cpumode;
605         u64 offset;
606
607         if (ip >= ptq->pt->kernel_start)
608                 return intel_pt_match_pgd_ip(ptq->pt, ip, ip, NULL);
609
610         cpumode = PERF_RECORD_MISC_USER;
611
612         thread = ptq->thread;
613         if (!thread)
614                 return -EINVAL;
615
616         thread__find_addr_map(thread, cpumode, MAP__FUNCTION, ip, &al);
617         if (!al.map || !al.map->dso)
618                 return -EINVAL;
619
620         offset = al.map->map_ip(al.map, ip);
621
622         return intel_pt_match_pgd_ip(ptq->pt, ip, offset,
623                                      al.map->dso->long_name);
624 }
625
626 static bool intel_pt_pgd_ip(uint64_t ip, void *data)
627 {
628         return __intel_pt_pgd_ip(ip, data) > 0;
629 }
630
631 static bool intel_pt_get_config(struct intel_pt *pt,
632                                 struct perf_event_attr *attr, u64 *config)
633 {
634         if (attr->type == pt->pmu_type) {
635                 if (config)
636                         *config = attr->config;
637                 return true;
638         }
639
640         return false;
641 }
642
643 static bool intel_pt_exclude_kernel(struct intel_pt *pt)
644 {
645         struct perf_evsel *evsel;
646
647         evlist__for_each_entry(pt->session->evlist, evsel) {
648                 if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
649                     !evsel->attr.exclude_kernel)
650                         return false;
651         }
652         return true;
653 }
654
655 static bool intel_pt_return_compression(struct intel_pt *pt)
656 {
657         struct perf_evsel *evsel;
658         u64 config;
659
660         if (!pt->noretcomp_bit)
661                 return true;
662
663         evlist__for_each_entry(pt->session->evlist, evsel) {
664                 if (intel_pt_get_config(pt, &evsel->attr, &config) &&
665                     (config & pt->noretcomp_bit))
666                         return false;
667         }
668         return true;
669 }
670
671 static bool intel_pt_branch_enable(struct intel_pt *pt)
672 {
673         struct perf_evsel *evsel;
674         u64 config;
675
676         evlist__for_each_entry(pt->session->evlist, evsel) {
677                 if (intel_pt_get_config(pt, &evsel->attr, &config) &&
678                     (config & 1) && !(config & 0x2000))
679                         return false;
680         }
681         return true;
682 }
683
684 static unsigned int intel_pt_mtc_period(struct intel_pt *pt)
685 {
686         struct perf_evsel *evsel;
687         unsigned int shift;
688         u64 config;
689
690         if (!pt->mtc_freq_bits)
691                 return 0;
692
693         for (shift = 0, config = pt->mtc_freq_bits; !(config & 1); shift++)
694                 config >>= 1;
695
696         evlist__for_each_entry(pt->session->evlist, evsel) {
697                 if (intel_pt_get_config(pt, &evsel->attr, &config))
698                         return (config & pt->mtc_freq_bits) >> shift;
699         }
700         return 0;
701 }
702
703 static bool intel_pt_timeless_decoding(struct intel_pt *pt)
704 {
705         struct perf_evsel *evsel;
706         bool timeless_decoding = true;
707         u64 config;
708
709         if (!pt->tsc_bit || !pt->cap_user_time_zero)
710                 return true;
711
712         evlist__for_each_entry(pt->session->evlist, evsel) {
713                 if (!(evsel->attr.sample_type & PERF_SAMPLE_TIME))
714                         return true;
715                 if (intel_pt_get_config(pt, &evsel->attr, &config)) {
716                         if (config & pt->tsc_bit)
717                                 timeless_decoding = false;
718                         else
719                                 return true;
720                 }
721         }
722         return timeless_decoding;
723 }
724
725 static bool intel_pt_tracing_kernel(struct intel_pt *pt)
726 {
727         struct perf_evsel *evsel;
728
729         evlist__for_each_entry(pt->session->evlist, evsel) {
730                 if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
731                     !evsel->attr.exclude_kernel)
732                         return true;
733         }
734         return false;
735 }
736
737 static bool intel_pt_have_tsc(struct intel_pt *pt)
738 {
739         struct perf_evsel *evsel;
740         bool have_tsc = false;
741         u64 config;
742
743         if (!pt->tsc_bit)
744                 return false;
745
746         evlist__for_each_entry(pt->session->evlist, evsel) {
747                 if (intel_pt_get_config(pt, &evsel->attr, &config)) {
748                         if (config & pt->tsc_bit)
749                                 have_tsc = true;
750                         else
751                                 return false;
752                 }
753         }
754         return have_tsc;
755 }
756
757 static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns)
758 {
759         u64 quot, rem;
760
761         quot = ns / pt->tc.time_mult;
762         rem  = ns % pt->tc.time_mult;
763         return (quot << pt->tc.time_shift) + (rem << pt->tc.time_shift) /
764                 pt->tc.time_mult;
765 }
766
767 static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
768                                                    unsigned int queue_nr)
769 {
770         struct intel_pt_params params = { .get_trace = 0, };
771         struct intel_pt_queue *ptq;
772
773         ptq = zalloc(sizeof(struct intel_pt_queue));
774         if (!ptq)
775                 return NULL;
776
777         if (pt->synth_opts.callchain) {
778                 size_t sz = sizeof(struct ip_callchain);
779
780                 sz += pt->synth_opts.callchain_sz * sizeof(u64);
781                 ptq->chain = zalloc(sz);
782                 if (!ptq->chain)
783                         goto out_free;
784         }
785
786         if (pt->synth_opts.last_branch) {
787                 size_t sz = sizeof(struct branch_stack);
788
789                 sz += pt->synth_opts.last_branch_sz *
790                       sizeof(struct branch_entry);
791                 ptq->last_branch = zalloc(sz);
792                 if (!ptq->last_branch)
793                         goto out_free;
794                 ptq->last_branch_rb = zalloc(sz);
795                 if (!ptq->last_branch_rb)
796                         goto out_free;
797         }
798
799         ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
800         if (!ptq->event_buf)
801                 goto out_free;
802
803         ptq->pt = pt;
804         ptq->queue_nr = queue_nr;
805         ptq->exclude_kernel = intel_pt_exclude_kernel(pt);
806         ptq->pid = -1;
807         ptq->tid = -1;
808         ptq->cpu = -1;
809         ptq->next_tid = -1;
810
811         params.get_trace = intel_pt_get_trace;
812         params.walk_insn = intel_pt_walk_next_insn;
813         params.data = ptq;
814         params.return_compression = intel_pt_return_compression(pt);
815         params.branch_enable = intel_pt_branch_enable(pt);
816         params.max_non_turbo_ratio = pt->max_non_turbo_ratio;
817         params.mtc_period = intel_pt_mtc_period(pt);
818         params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
819         params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d;
820
821         if (pt->filts.cnt > 0)
822                 params.pgd_ip = intel_pt_pgd_ip;
823
824         if (pt->synth_opts.instructions) {
825                 if (pt->synth_opts.period) {
826                         switch (pt->synth_opts.period_type) {
827                         case PERF_ITRACE_PERIOD_INSTRUCTIONS:
828                                 params.period_type =
829                                                 INTEL_PT_PERIOD_INSTRUCTIONS;
830                                 params.period = pt->synth_opts.period;
831                                 break;
832                         case PERF_ITRACE_PERIOD_TICKS:
833                                 params.period_type = INTEL_PT_PERIOD_TICKS;
834                                 params.period = pt->synth_opts.period;
835                                 break;
836                         case PERF_ITRACE_PERIOD_NANOSECS:
837                                 params.period_type = INTEL_PT_PERIOD_TICKS;
838                                 params.period = intel_pt_ns_to_ticks(pt,
839                                                         pt->synth_opts.period);
840                                 break;
841                         default:
842                                 break;
843                         }
844                 }
845
846                 if (!params.period) {
847                         params.period_type = INTEL_PT_PERIOD_INSTRUCTIONS;
848                         params.period = 1;
849                 }
850         }
851
852         ptq->decoder = intel_pt_decoder_new(&params);
853         if (!ptq->decoder)
854                 goto out_free;
855
856         return ptq;
857
858 out_free:
859         zfree(&ptq->event_buf);
860         zfree(&ptq->last_branch);
861         zfree(&ptq->last_branch_rb);
862         zfree(&ptq->chain);
863         free(ptq);
864         return NULL;
865 }
866
867 static void intel_pt_free_queue(void *priv)
868 {
869         struct intel_pt_queue *ptq = priv;
870
871         if (!ptq)
872                 return;
873         thread__zput(ptq->thread);
874         intel_pt_decoder_free(ptq->decoder);
875         zfree(&ptq->event_buf);
876         zfree(&ptq->last_branch);
877         zfree(&ptq->last_branch_rb);
878         zfree(&ptq->chain);
879         free(ptq);
880 }
881
882 static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt,
883                                      struct auxtrace_queue *queue)
884 {
885         struct intel_pt_queue *ptq = queue->priv;
886
887         if (queue->tid == -1 || pt->have_sched_switch) {
888                 ptq->tid = machine__get_current_tid(pt->machine, ptq->cpu);
889                 thread__zput(ptq->thread);
890         }
891
892         if (!ptq->thread && ptq->tid != -1)
893                 ptq->thread = machine__find_thread(pt->machine, -1, ptq->tid);
894
895         if (ptq->thread) {
896                 ptq->pid = ptq->thread->pid_;
897                 if (queue->cpu == -1)
898                         ptq->cpu = ptq->thread->cpu;
899         }
900 }
901
902 static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
903 {
904         if (ptq->state->flags & INTEL_PT_ABORT_TX) {
905                 ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT;
906         } else if (ptq->state->flags & INTEL_PT_ASYNC) {
907                 if (ptq->state->to_ip)
908                         ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
909                                      PERF_IP_FLAG_ASYNC |
910                                      PERF_IP_FLAG_INTERRUPT;
911                 else
912                         ptq->flags = PERF_IP_FLAG_BRANCH |
913                                      PERF_IP_FLAG_TRACE_END;
914                 ptq->insn_len = 0;
915         } else {
916                 if (ptq->state->from_ip)
917                         ptq->flags = intel_pt_insn_type(ptq->state->insn_op);
918                 else
919                         ptq->flags = PERF_IP_FLAG_BRANCH |
920                                      PERF_IP_FLAG_TRACE_BEGIN;
921                 if (ptq->state->flags & INTEL_PT_IN_TX)
922                         ptq->flags |= PERF_IP_FLAG_IN_TX;
923                 ptq->insn_len = ptq->state->insn_len;
924                 memcpy(ptq->insn, ptq->state->insn, INTEL_PT_INSN_BUF_SZ);
925         }
926 }
927
928 static int intel_pt_setup_queue(struct intel_pt *pt,
929                                 struct auxtrace_queue *queue,
930                                 unsigned int queue_nr)
931 {
932         struct intel_pt_queue *ptq = queue->priv;
933
934         if (list_empty(&queue->head))
935                 return 0;
936
937         if (!ptq) {
938                 ptq = intel_pt_alloc_queue(pt, queue_nr);
939                 if (!ptq)
940                         return -ENOMEM;
941                 queue->priv = ptq;
942
943                 if (queue->cpu != -1)
944                         ptq->cpu = queue->cpu;
945                 ptq->tid = queue->tid;
946
947                 if (pt->sampling_mode) {
948                         if (pt->timeless_decoding)
949                                 ptq->step_through_buffers = true;
950                         if (pt->timeless_decoding || !pt->have_sched_switch)
951                                 ptq->use_buffer_pid_tid = true;
952                 }
953         }
954
955         if (!ptq->on_heap &&
956             (!pt->sync_switch ||
957              ptq->switch_state != INTEL_PT_SS_EXPECTING_SWITCH_EVENT)) {
958                 const struct intel_pt_state *state;
959                 int ret;
960
961                 if (pt->timeless_decoding)
962                         return 0;
963
964                 intel_pt_log("queue %u getting timestamp\n", queue_nr);
965                 intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
966                              queue_nr, ptq->cpu, ptq->pid, ptq->tid);
967                 while (1) {
968                         state = intel_pt_decode(ptq->decoder);
969                         if (state->err) {
970                                 if (state->err == INTEL_PT_ERR_NODATA) {
971                                         intel_pt_log("queue %u has no timestamp\n",
972                                                      queue_nr);
973                                         return 0;
974                                 }
975                                 continue;
976                         }
977                         if (state->timestamp)
978                                 break;
979                 }
980
981                 ptq->timestamp = state->timestamp;
982                 intel_pt_log("queue %u timestamp 0x%" PRIx64 "\n",
983                              queue_nr, ptq->timestamp);
984                 ptq->state = state;
985                 ptq->have_sample = true;
986                 intel_pt_sample_flags(ptq);
987                 ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp);
988                 if (ret)
989                         return ret;
990                 ptq->on_heap = true;
991         }
992
993         return 0;
994 }
995
996 static int intel_pt_setup_queues(struct intel_pt *pt)
997 {
998         unsigned int i;
999         int ret;
1000
1001         for (i = 0; i < pt->queues.nr_queues; i++) {
1002                 ret = intel_pt_setup_queue(pt, &pt->queues.queue_array[i], i);
1003                 if (ret)
1004                         return ret;
1005         }
1006         return 0;
1007 }
1008
1009 static inline void intel_pt_copy_last_branch_rb(struct intel_pt_queue *ptq)
1010 {
1011         struct branch_stack *bs_src = ptq->last_branch_rb;
1012         struct branch_stack *bs_dst = ptq->last_branch;
1013         size_t nr = 0;
1014
1015         bs_dst->nr = bs_src->nr;
1016
1017         if (!bs_src->nr)
1018                 return;
1019
1020         nr = ptq->pt->synth_opts.last_branch_sz - ptq->last_branch_pos;
1021         memcpy(&bs_dst->entries[0],
1022                &bs_src->entries[ptq->last_branch_pos],
1023                sizeof(struct branch_entry) * nr);
1024
1025         if (bs_src->nr >= ptq->pt->synth_opts.last_branch_sz) {
1026                 memcpy(&bs_dst->entries[nr],
1027                        &bs_src->entries[0],
1028                        sizeof(struct branch_entry) * ptq->last_branch_pos);
1029         }
1030 }
1031
1032 static inline void intel_pt_reset_last_branch_rb(struct intel_pt_queue *ptq)
1033 {
1034         ptq->last_branch_pos = 0;
1035         ptq->last_branch_rb->nr = 0;
1036 }
1037
1038 static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq)
1039 {
1040         const struct intel_pt_state *state = ptq->state;
1041         struct branch_stack *bs = ptq->last_branch_rb;
1042         struct branch_entry *be;
1043
1044         if (!ptq->last_branch_pos)
1045                 ptq->last_branch_pos = ptq->pt->synth_opts.last_branch_sz;
1046
1047         ptq->last_branch_pos -= 1;
1048
1049         be              = &bs->entries[ptq->last_branch_pos];
1050         be->from        = state->from_ip;
1051         be->to          = state->to_ip;
1052         be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX);
1053         be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX);
1054         /* No support for mispredict */
1055         be->flags.mispred = ptq->pt->mispred_all;
1056
1057         if (bs->nr < ptq->pt->synth_opts.last_branch_sz)
1058                 bs->nr += 1;
1059 }
1060
1061 static int intel_pt_inject_event(union perf_event *event,
1062                                  struct perf_sample *sample, u64 type,
1063                                  bool swapped)
1064 {
1065         event->header.size = perf_event__sample_event_size(sample, type, 0);
1066         return perf_event__synthesize_sample(event, type, 0, sample, swapped);
1067 }
1068
1069 static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
1070 {
1071         int ret;
1072         struct intel_pt *pt = ptq->pt;
1073         union perf_event *event = ptq->event_buf;
1074         struct perf_sample sample = { .ip = 0, };
1075         struct dummy_branch_stack {
1076                 u64                     nr;
1077                 struct branch_entry     entries;
1078         } dummy_bs;
1079
1080         if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
1081                 return 0;
1082
1083         if (pt->synth_opts.initial_skip &&
1084             pt->num_events++ < pt->synth_opts.initial_skip)
1085                 return 0;
1086
1087         event->sample.header.type = PERF_RECORD_SAMPLE;
1088         event->sample.header.misc = PERF_RECORD_MISC_USER;
1089         event->sample.header.size = sizeof(struct perf_event_header);
1090
1091         if (!pt->timeless_decoding)
1092                 sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
1093
1094         sample.cpumode = PERF_RECORD_MISC_USER;
1095         sample.ip = ptq->state->from_ip;
1096         sample.pid = ptq->pid;
1097         sample.tid = ptq->tid;
1098         sample.addr = ptq->state->to_ip;
1099         sample.id = ptq->pt->branches_id;
1100         sample.stream_id = ptq->pt->branches_id;
1101         sample.period = 1;
1102         sample.cpu = ptq->cpu;
1103         sample.flags = ptq->flags;
1104         sample.insn_len = ptq->insn_len;
1105         memcpy(sample.insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
1106
1107         /*
1108          * perf report cannot handle events without a branch stack when using
1109          * SORT_MODE__BRANCH so make a dummy one.
1110          */
1111         if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) {
1112                 dummy_bs = (struct dummy_branch_stack){
1113                         .nr = 1,
1114                         .entries = {
1115                                 .from = sample.ip,
1116                                 .to = sample.addr,
1117                         },
1118                 };
1119                 sample.branch_stack = (struct branch_stack *)&dummy_bs;
1120         }
1121
1122         if (pt->synth_opts.inject) {
1123                 ret = intel_pt_inject_event(event, &sample,
1124                                             pt->branches_sample_type,
1125                                             pt->synth_needs_swap);
1126                 if (ret)
1127                         return ret;
1128         }
1129
1130         ret = perf_session__deliver_synth_event(pt->session, event, &sample);
1131         if (ret)
1132                 pr_err("Intel Processor Trace: failed to deliver branch event, error %d\n",
1133                        ret);
1134
1135         return ret;
1136 }
1137
1138 static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
1139 {
1140         int ret;
1141         struct intel_pt *pt = ptq->pt;
1142         union perf_event *event = ptq->event_buf;
1143         struct perf_sample sample = { .ip = 0, };
1144
1145         if (pt->synth_opts.initial_skip &&
1146             pt->num_events++ < pt->synth_opts.initial_skip)
1147                 return 0;
1148
1149         event->sample.header.type = PERF_RECORD_SAMPLE;
1150         event->sample.header.misc = PERF_RECORD_MISC_USER;
1151         event->sample.header.size = sizeof(struct perf_event_header);
1152
1153         if (!pt->timeless_decoding)
1154                 sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
1155
1156         sample.cpumode = PERF_RECORD_MISC_USER;
1157         sample.ip = ptq->state->from_ip;
1158         sample.pid = ptq->pid;
1159         sample.tid = ptq->tid;
1160         sample.addr = ptq->state->to_ip;
1161         sample.id = ptq->pt->instructions_id;
1162         sample.stream_id = ptq->pt->instructions_id;
1163         sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
1164         sample.cpu = ptq->cpu;
1165         sample.flags = ptq->flags;
1166         sample.insn_len = ptq->insn_len;
1167         memcpy(sample.insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
1168
1169         ptq->last_insn_cnt = ptq->state->tot_insn_cnt;
1170
1171         if (pt->synth_opts.callchain) {
1172                 thread_stack__sample(ptq->thread, ptq->chain,
1173                                      pt->synth_opts.callchain_sz, sample.ip);
1174                 sample.callchain = ptq->chain;
1175         }
1176
1177         if (pt->synth_opts.last_branch) {
1178                 intel_pt_copy_last_branch_rb(ptq);
1179                 sample.branch_stack = ptq->last_branch;
1180         }
1181
1182         if (pt->synth_opts.inject) {
1183                 ret = intel_pt_inject_event(event, &sample,
1184                                             pt->instructions_sample_type,
1185                                             pt->synth_needs_swap);
1186                 if (ret)
1187                         return ret;
1188         }
1189
1190         ret = perf_session__deliver_synth_event(pt->session, event, &sample);
1191         if (ret)
1192                 pr_err("Intel Processor Trace: failed to deliver instruction event, error %d\n",
1193                        ret);
1194
1195         if (pt->synth_opts.last_branch)
1196                 intel_pt_reset_last_branch_rb(ptq);
1197
1198         return ret;
1199 }
1200
1201 static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
1202 {
1203         int ret;
1204         struct intel_pt *pt = ptq->pt;
1205         union perf_event *event = ptq->event_buf;
1206         struct perf_sample sample = { .ip = 0, };
1207
1208         if (pt->synth_opts.initial_skip &&
1209             pt->num_events++ < pt->synth_opts.initial_skip)
1210                 return 0;
1211
1212         event->sample.header.type = PERF_RECORD_SAMPLE;
1213         event->sample.header.misc = PERF_RECORD_MISC_USER;
1214         event->sample.header.size = sizeof(struct perf_event_header);
1215
1216         if (!pt->timeless_decoding)
1217                 sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
1218
1219         sample.cpumode = PERF_RECORD_MISC_USER;
1220         sample.ip = ptq->state->from_ip;
1221         sample.pid = ptq->pid;
1222         sample.tid = ptq->tid;
1223         sample.addr = ptq->state->to_ip;
1224         sample.id = ptq->pt->transactions_id;
1225         sample.stream_id = ptq->pt->transactions_id;
1226         sample.period = 1;
1227         sample.cpu = ptq->cpu;
1228         sample.flags = ptq->flags;
1229         sample.insn_len = ptq->insn_len;
1230         memcpy(sample.insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
1231
1232         if (pt->synth_opts.callchain) {
1233                 thread_stack__sample(ptq->thread, ptq->chain,
1234                                      pt->synth_opts.callchain_sz, sample.ip);
1235                 sample.callchain = ptq->chain;
1236         }
1237
1238         if (pt->synth_opts.last_branch) {
1239                 intel_pt_copy_last_branch_rb(ptq);
1240                 sample.branch_stack = ptq->last_branch;
1241         }
1242
1243         if (pt->synth_opts.inject) {
1244                 ret = intel_pt_inject_event(event, &sample,
1245                                             pt->transactions_sample_type,
1246                                             pt->synth_needs_swap);
1247                 if (ret)
1248                         return ret;
1249         }
1250
1251         ret = perf_session__deliver_synth_event(pt->session, event, &sample);
1252         if (ret)
1253                 pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n",
1254                        ret);
1255
1256         if (pt->synth_opts.last_branch)
1257                 intel_pt_reset_last_branch_rb(ptq);
1258
1259         return ret;
1260 }
1261
1262 static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
1263                                 pid_t pid, pid_t tid, u64 ip)
1264 {
1265         union perf_event event;
1266         char msg[MAX_AUXTRACE_ERROR_MSG];
1267         int err;
1268
1269         intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG);
1270
1271         auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
1272                              code, cpu, pid, tid, ip, msg);
1273
1274         err = perf_session__deliver_synth_event(pt->session, &event, NULL);
1275         if (err)
1276                 pr_err("Intel Processor Trace: failed to deliver error event, error %d\n",
1277                        err);
1278
1279         return err;
1280 }
1281
1282 static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq)
1283 {
1284         struct auxtrace_queue *queue;
1285         pid_t tid = ptq->next_tid;
1286         int err;
1287
1288         if (tid == -1)
1289                 return 0;
1290
1291         intel_pt_log("switch: cpu %d tid %d\n", ptq->cpu, tid);
1292
1293         err = machine__set_current_tid(pt->machine, ptq->cpu, -1, tid);
1294
1295         queue = &pt->queues.queue_array[ptq->queue_nr];
1296         intel_pt_set_pid_tid_cpu(pt, queue);
1297
1298         ptq->next_tid = -1;
1299
1300         return err;
1301 }
1302
1303 static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip)
1304 {
1305         struct intel_pt *pt = ptq->pt;
1306
1307         return ip == pt->switch_ip &&
1308                (ptq->flags & PERF_IP_FLAG_BRANCH) &&
1309                !(ptq->flags & (PERF_IP_FLAG_CONDITIONAL | PERF_IP_FLAG_ASYNC |
1310                                PERF_IP_FLAG_INTERRUPT | PERF_IP_FLAG_TX_ABORT));
1311 }
1312
1313 static int intel_pt_sample(struct intel_pt_queue *ptq)
1314 {
1315         const struct intel_pt_state *state = ptq->state;
1316         struct intel_pt *pt = ptq->pt;
1317         int err;
1318
1319         if (!ptq->have_sample)
1320                 return 0;
1321
1322         ptq->have_sample = false;
1323
1324         if (pt->sample_instructions &&
1325             (state->type & INTEL_PT_INSTRUCTION)) {
1326                 err = intel_pt_synth_instruction_sample(ptq);
1327                 if (err)
1328                         return err;
1329         }
1330
1331         if (pt->sample_transactions &&
1332             (state->type & INTEL_PT_TRANSACTION)) {
1333                 err = intel_pt_synth_transaction_sample(ptq);
1334                 if (err)
1335                         return err;
1336         }
1337
1338         if (!(state->type & INTEL_PT_BRANCH))
1339                 return 0;
1340
1341         if (pt->synth_opts.callchain || pt->synth_opts.thread_stack)
1342                 thread_stack__event(ptq->thread, ptq->flags, state->from_ip,
1343                                     state->to_ip, ptq->insn_len,
1344                                     state->trace_nr);
1345         else
1346                 thread_stack__set_trace_nr(ptq->thread, state->trace_nr);
1347
1348         if (pt->sample_branches) {
1349                 err = intel_pt_synth_branch_sample(ptq);
1350                 if (err)
1351                         return err;
1352         }
1353
1354         if (pt->synth_opts.last_branch)
1355                 intel_pt_update_last_branch_rb(ptq);
1356
1357         if (!pt->sync_switch)
1358                 return 0;
1359
1360         if (intel_pt_is_switch_ip(ptq, state->to_ip)) {
1361                 switch (ptq->switch_state) {
1362                 case INTEL_PT_SS_UNKNOWN:
1363                 case INTEL_PT_SS_EXPECTING_SWITCH_IP:
1364                         err = intel_pt_next_tid(pt, ptq);
1365                         if (err)
1366                                 return err;
1367                         ptq->switch_state = INTEL_PT_SS_TRACING;
1368                         break;
1369                 default:
1370                         ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_EVENT;
1371                         return 1;
1372                 }
1373         } else if (!state->to_ip) {
1374                 ptq->switch_state = INTEL_PT_SS_NOT_TRACING;
1375         } else if (ptq->switch_state == INTEL_PT_SS_NOT_TRACING) {
1376                 ptq->switch_state = INTEL_PT_SS_UNKNOWN;
1377         } else if (ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
1378                    state->to_ip == pt->ptss_ip &&
1379                    (ptq->flags & PERF_IP_FLAG_CALL)) {
1380                 ptq->switch_state = INTEL_PT_SS_TRACING;
1381         }
1382
1383         return 0;
1384 }
1385
1386 static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip)
1387 {
1388         struct machine *machine = pt->machine;
1389         struct map *map;
1390         struct symbol *sym, *start;
1391         u64 ip, switch_ip = 0;
1392         const char *ptss;
1393
1394         if (ptss_ip)
1395                 *ptss_ip = 0;
1396
1397         map = machine__kernel_map(machine);
1398         if (!map)
1399                 return 0;
1400
1401         if (map__load(map))
1402                 return 0;
1403
1404         start = dso__first_symbol(map->dso, MAP__FUNCTION);
1405
1406         for (sym = start; sym; sym = dso__next_symbol(sym)) {
1407                 if (sym->binding == STB_GLOBAL &&
1408                     !strcmp(sym->name, "__switch_to")) {
1409                         ip = map->unmap_ip(map, sym->start);
1410                         if (ip >= map->start && ip < map->end) {
1411                                 switch_ip = ip;
1412                                 break;
1413                         }
1414                 }
1415         }
1416
1417         if (!switch_ip || !ptss_ip)
1418                 return 0;
1419
1420         if (pt->have_sched_switch == 1)
1421                 ptss = "perf_trace_sched_switch";
1422         else
1423                 ptss = "__perf_event_task_sched_out";
1424
1425         for (sym = start; sym; sym = dso__next_symbol(sym)) {
1426                 if (!strcmp(sym->name, ptss)) {
1427                         ip = map->unmap_ip(map, sym->start);
1428                         if (ip >= map->start && ip < map->end) {
1429                                 *ptss_ip = ip;
1430                                 break;
1431                         }
1432                 }
1433         }
1434
1435         return switch_ip;
1436 }
1437
1438 static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
1439 {
1440         const struct intel_pt_state *state = ptq->state;
1441         struct intel_pt *pt = ptq->pt;
1442         int err;
1443
1444         if (!pt->kernel_start) {
1445                 pt->kernel_start = machine__kernel_start(pt->machine);
1446                 if (pt->per_cpu_mmaps &&
1447                     (pt->have_sched_switch == 1 || pt->have_sched_switch == 3) &&
1448                     !pt->timeless_decoding && intel_pt_tracing_kernel(pt) &&
1449                     !pt->sampling_mode) {
1450                         pt->switch_ip = intel_pt_switch_ip(pt, &pt->ptss_ip);
1451                         if (pt->switch_ip) {
1452                                 intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n",
1453                                              pt->switch_ip, pt->ptss_ip);
1454                                 pt->sync_switch = true;
1455                         }
1456                 }
1457         }
1458
1459         intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
1460                      ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
1461         while (1) {
1462                 err = intel_pt_sample(ptq);
1463                 if (err)
1464                         return err;
1465
1466                 state = intel_pt_decode(ptq->decoder);
1467                 if (state->err) {
1468                         if (state->err == INTEL_PT_ERR_NODATA)
1469                                 return 1;
1470                         if (pt->sync_switch &&
1471                             state->from_ip >= pt->kernel_start) {
1472                                 pt->sync_switch = false;
1473                                 intel_pt_next_tid(pt, ptq);
1474                         }
1475                         if (pt->synth_opts.errors) {
1476                                 err = intel_pt_synth_error(pt, state->err,
1477                                                            ptq->cpu, ptq->pid,
1478                                                            ptq->tid,
1479                                                            state->from_ip);
1480                                 if (err)
1481                                         return err;
1482                         }
1483                         continue;
1484                 }
1485
1486                 ptq->state = state;
1487                 ptq->have_sample = true;
1488                 intel_pt_sample_flags(ptq);
1489
1490                 /* Use estimated TSC upon return to user space */
1491                 if (pt->est_tsc &&
1492                     (state->from_ip >= pt->kernel_start || !state->from_ip) &&
1493                     state->to_ip && state->to_ip < pt->kernel_start) {
1494                         intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
1495                                      state->timestamp, state->est_timestamp);
1496                         ptq->timestamp = state->est_timestamp;
1497                 /* Use estimated TSC in unknown switch state */
1498                 } else if (pt->sync_switch &&
1499                            ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
1500                            intel_pt_is_switch_ip(ptq, state->to_ip) &&
1501                            ptq->next_tid == -1) {
1502                         intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
1503                                      state->timestamp, state->est_timestamp);
1504                         ptq->timestamp = state->est_timestamp;
1505                 } else if (state->timestamp > ptq->timestamp) {
1506                         ptq->timestamp = state->timestamp;
1507                 }
1508
1509                 if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) {
1510                         *timestamp = ptq->timestamp;
1511                         return 0;
1512                 }
1513         }
1514         return 0;
1515 }
1516
1517 static inline int intel_pt_update_queues(struct intel_pt *pt)
1518 {
1519         if (pt->queues.new_data) {
1520                 pt->queues.new_data = false;
1521                 return intel_pt_setup_queues(pt);
1522         }
1523         return 0;
1524 }
1525
1526 static int intel_pt_process_queues(struct intel_pt *pt, u64 timestamp)
1527 {
1528         unsigned int queue_nr;
1529         u64 ts;
1530         int ret;
1531
1532         while (1) {
1533                 struct auxtrace_queue *queue;
1534                 struct intel_pt_queue *ptq;
1535
1536                 if (!pt->heap.heap_cnt)
1537                         return 0;
1538
1539                 if (pt->heap.heap_array[0].ordinal >= timestamp)
1540                         return 0;
1541
1542                 queue_nr = pt->heap.heap_array[0].queue_nr;
1543                 queue = &pt->queues.queue_array[queue_nr];
1544                 ptq = queue->priv;
1545
1546                 intel_pt_log("queue %u processing 0x%" PRIx64 " to 0x%" PRIx64 "\n",
1547                              queue_nr, pt->heap.heap_array[0].ordinal,
1548                              timestamp);
1549
1550                 auxtrace_heap__pop(&pt->heap);
1551
1552                 if (pt->heap.heap_cnt) {
1553                         ts = pt->heap.heap_array[0].ordinal + 1;
1554                         if (ts > timestamp)
1555                                 ts = timestamp;
1556                 } else {
1557                         ts = timestamp;
1558                 }
1559
1560                 intel_pt_set_pid_tid_cpu(pt, queue);
1561
1562                 ret = intel_pt_run_decoder(ptq, &ts);
1563
1564                 if (ret < 0) {
1565                         auxtrace_heap__add(&pt->heap, queue_nr, ts);
1566                         return ret;
1567                 }
1568
1569                 if (!ret) {
1570                         ret = auxtrace_heap__add(&pt->heap, queue_nr, ts);
1571                         if (ret < 0)
1572                                 return ret;
1573                 } else {
1574                         ptq->on_heap = false;
1575                 }
1576         }
1577
1578         return 0;
1579 }
1580
1581 static int intel_pt_process_timeless_queues(struct intel_pt *pt, pid_t tid,
1582                                             u64 time_)
1583 {
1584         struct auxtrace_queues *queues = &pt->queues;
1585         unsigned int i;
1586         u64 ts = 0;
1587
1588         for (i = 0; i < queues->nr_queues; i++) {
1589                 struct auxtrace_queue *queue = &pt->queues.queue_array[i];
1590                 struct intel_pt_queue *ptq = queue->priv;
1591
1592                 if (ptq && (tid == -1 || ptq->tid == tid)) {
1593                         ptq->time = time_;
1594                         intel_pt_set_pid_tid_cpu(pt, queue);
1595                         intel_pt_run_decoder(ptq, &ts);
1596                 }
1597         }
1598         return 0;
1599 }
1600
1601 static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample)
1602 {
1603         return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu,
1604                                     sample->pid, sample->tid, 0);
1605 }
1606
1607 static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu)
1608 {
1609         unsigned i, j;
1610
1611         if (cpu < 0 || !pt->queues.nr_queues)
1612                 return NULL;
1613
1614         if ((unsigned)cpu >= pt->queues.nr_queues)
1615                 i = pt->queues.nr_queues - 1;
1616         else
1617                 i = cpu;
1618
1619         if (pt->queues.queue_array[i].cpu == cpu)
1620                 return pt->queues.queue_array[i].priv;
1621
1622         for (j = 0; i > 0; j++) {
1623                 if (pt->queues.queue_array[--i].cpu == cpu)
1624                         return pt->queues.queue_array[i].priv;
1625         }
1626
1627         for (; j < pt->queues.nr_queues; j++) {
1628                 if (pt->queues.queue_array[j].cpu == cpu)
1629                         return pt->queues.queue_array[j].priv;
1630         }
1631
1632         return NULL;
1633 }
1634
1635 static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid,
1636                                 u64 timestamp)
1637 {
1638         struct intel_pt_queue *ptq;
1639         int err;
1640
1641         if (!pt->sync_switch)
1642                 return 1;
1643
1644         ptq = intel_pt_cpu_to_ptq(pt, cpu);
1645         if (!ptq)
1646                 return 1;
1647
1648         switch (ptq->switch_state) {
1649         case INTEL_PT_SS_NOT_TRACING:
1650                 ptq->next_tid = -1;
1651                 break;
1652         case INTEL_PT_SS_UNKNOWN:
1653         case INTEL_PT_SS_TRACING:
1654                 ptq->next_tid = tid;
1655                 ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_IP;
1656                 return 0;
1657         case INTEL_PT_SS_EXPECTING_SWITCH_EVENT:
1658                 if (!ptq->on_heap) {
1659                         ptq->timestamp = perf_time_to_tsc(timestamp,
1660                                                           &pt->tc);
1661                         err = auxtrace_heap__add(&pt->heap, ptq->queue_nr,
1662                                                  ptq->timestamp);
1663                         if (err)
1664                                 return err;
1665                         ptq->on_heap = true;
1666                 }
1667                 ptq->switch_state = INTEL_PT_SS_TRACING;
1668                 break;
1669         case INTEL_PT_SS_EXPECTING_SWITCH_IP:
1670                 ptq->next_tid = tid;
1671                 intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu);
1672                 break;
1673         default:
1674                 break;
1675         }
1676
1677         return 1;
1678 }
1679
1680 static int intel_pt_process_switch(struct intel_pt *pt,
1681                                    struct perf_sample *sample)
1682 {
1683         struct perf_evsel *evsel;
1684         pid_t tid;
1685         int cpu, ret;
1686
1687         evsel = perf_evlist__id2evsel(pt->session->evlist, sample->id);
1688         if (evsel != pt->switch_evsel)
1689                 return 0;
1690
1691         tid = perf_evsel__intval(evsel, sample, "next_pid");
1692         cpu = sample->cpu;
1693
1694         intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
1695                      cpu, tid, sample->time, perf_time_to_tsc(sample->time,
1696                      &pt->tc));
1697
1698         ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
1699         if (ret <= 0)
1700                 return ret;
1701
1702         return machine__set_current_tid(pt->machine, cpu, -1, tid);
1703 }
1704
1705 static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event,
1706                                    struct perf_sample *sample)
1707 {
1708         bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
1709         pid_t pid, tid;
1710         int cpu, ret;
1711
1712         cpu = sample->cpu;
1713
1714         if (pt->have_sched_switch == 3) {
1715                 if (!out)
1716                         return 0;
1717                 if (event->header.type != PERF_RECORD_SWITCH_CPU_WIDE) {
1718                         pr_err("Expecting CPU-wide context switch event\n");
1719                         return -EINVAL;
1720                 }
1721                 pid = event->context_switch.next_prev_pid;
1722                 tid = event->context_switch.next_prev_tid;
1723         } else {
1724                 if (out)
1725                         return 0;
1726                 pid = sample->pid;
1727                 tid = sample->tid;
1728         }
1729
1730         if (tid == -1) {
1731                 pr_err("context_switch event has no tid\n");
1732                 return -EINVAL;
1733         }
1734
1735         intel_pt_log("context_switch: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
1736                      cpu, pid, tid, sample->time, perf_time_to_tsc(sample->time,
1737                      &pt->tc));
1738
1739         ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
1740         if (ret <= 0)
1741                 return ret;
1742
1743         return machine__set_current_tid(pt->machine, cpu, pid, tid);
1744 }
1745
1746 static int intel_pt_process_itrace_start(struct intel_pt *pt,
1747                                          union perf_event *event,
1748                                          struct perf_sample *sample)
1749 {
1750         if (!pt->per_cpu_mmaps)
1751                 return 0;
1752
1753         intel_pt_log("itrace_start: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
1754                      sample->cpu, event->itrace_start.pid,
1755                      event->itrace_start.tid, sample->time,
1756                      perf_time_to_tsc(sample->time, &pt->tc));
1757
1758         return machine__set_current_tid(pt->machine, sample->cpu,
1759                                         event->itrace_start.pid,
1760                                         event->itrace_start.tid);
1761 }
1762
1763 static int intel_pt_process_event(struct perf_session *session,
1764                                   union perf_event *event,
1765                                   struct perf_sample *sample,
1766                                   struct perf_tool *tool)
1767 {
1768         struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1769                                            auxtrace);
1770         u64 timestamp;
1771         int err = 0;
1772
1773         if (dump_trace)
1774                 return 0;
1775
1776         if (!tool->ordered_events) {
1777                 pr_err("Intel Processor Trace requires ordered events\n");
1778                 return -EINVAL;
1779         }
1780
1781         if (sample->time && sample->time != (u64)-1)
1782                 timestamp = perf_time_to_tsc(sample->time, &pt->tc);
1783         else
1784                 timestamp = 0;
1785
1786         if (timestamp || pt->timeless_decoding) {
1787                 err = intel_pt_update_queues(pt);
1788                 if (err)
1789                         return err;
1790         }
1791
1792         if (pt->timeless_decoding) {
1793                 if (event->header.type == PERF_RECORD_EXIT) {
1794                         err = intel_pt_process_timeless_queues(pt,
1795                                                                event->fork.tid,
1796                                                                sample->time);
1797                 }
1798         } else if (timestamp) {
1799                 err = intel_pt_process_queues(pt, timestamp);
1800         }
1801         if (err)
1802                 return err;
1803
1804         if (event->header.type == PERF_RECORD_AUX &&
1805             (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
1806             pt->synth_opts.errors) {
1807                 err = intel_pt_lost(pt, sample);
1808                 if (err)
1809                         return err;
1810         }
1811
1812         if (pt->switch_evsel && event->header.type == PERF_RECORD_SAMPLE)
1813                 err = intel_pt_process_switch(pt, sample);
1814         else if (event->header.type == PERF_RECORD_ITRACE_START)
1815                 err = intel_pt_process_itrace_start(pt, event, sample);
1816         else if (event->header.type == PERF_RECORD_SWITCH ||
1817                  event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
1818                 err = intel_pt_context_switch(pt, event, sample);
1819
1820         intel_pt_log("event %s (%u): cpu %d time %"PRIu64" tsc %#"PRIx64"\n",
1821                      perf_event__name(event->header.type), event->header.type,
1822                      sample->cpu, sample->time, timestamp);
1823
1824         return err;
1825 }
1826
1827 static int intel_pt_flush(struct perf_session *session, struct perf_tool *tool)
1828 {
1829         struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1830                                            auxtrace);
1831         int ret;
1832
1833         if (dump_trace)
1834                 return 0;
1835
1836         if (!tool->ordered_events)
1837                 return -EINVAL;
1838
1839         ret = intel_pt_update_queues(pt);
1840         if (ret < 0)
1841                 return ret;
1842
1843         if (pt->timeless_decoding)
1844                 return intel_pt_process_timeless_queues(pt, -1,
1845                                                         MAX_TIMESTAMP - 1);
1846
1847         return intel_pt_process_queues(pt, MAX_TIMESTAMP);
1848 }
1849
1850 static void intel_pt_free_events(struct perf_session *session)
1851 {
1852         struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1853                                            auxtrace);
1854         struct auxtrace_queues *queues = &pt->queues;
1855         unsigned int i;
1856
1857         for (i = 0; i < queues->nr_queues; i++) {
1858                 intel_pt_free_queue(queues->queue_array[i].priv);
1859                 queues->queue_array[i].priv = NULL;
1860         }
1861         intel_pt_log_disable();
1862         auxtrace_queues__free(queues);
1863 }
1864
1865 static void intel_pt_free(struct perf_session *session)
1866 {
1867         struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1868                                            auxtrace);
1869
1870         auxtrace_heap__free(&pt->heap);
1871         intel_pt_free_events(session);
1872         session->auxtrace = NULL;
1873         thread__put(pt->unknown_thread);
1874         addr_filters__exit(&pt->filts);
1875         zfree(&pt->filter);
1876         free(pt);
1877 }
1878
1879 static int intel_pt_process_auxtrace_event(struct perf_session *session,
1880                                            union perf_event *event,
1881                                            struct perf_tool *tool __maybe_unused)
1882 {
1883         struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1884                                            auxtrace);
1885
1886         if (pt->sampling_mode)
1887                 return 0;
1888
1889         if (!pt->data_queued) {
1890                 struct auxtrace_buffer *buffer;
1891                 off_t data_offset;
1892                 int fd = perf_data_file__fd(session->file);
1893                 int err;
1894
1895                 if (perf_data_file__is_pipe(session->file)) {
1896                         data_offset = 0;
1897                 } else {
1898                         data_offset = lseek(fd, 0, SEEK_CUR);
1899                         if (data_offset == -1)
1900                                 return -errno;
1901                 }
1902
1903                 err = auxtrace_queues__add_event(&pt->queues, session, event,
1904                                                  data_offset, &buffer);
1905                 if (err)
1906                         return err;
1907
1908                 /* Dump here now we have copied a piped trace out of the pipe */
1909                 if (dump_trace) {
1910                         if (auxtrace_buffer__get_data(buffer, fd)) {
1911                                 intel_pt_dump_event(pt, buffer->data,
1912                                                     buffer->size);
1913                                 auxtrace_buffer__put_data(buffer);
1914                         }
1915                 }
1916         }
1917
1918         return 0;
1919 }
1920
1921 struct intel_pt_synth {
1922         struct perf_tool dummy_tool;
1923         struct perf_session *session;
1924 };
1925
1926 static int intel_pt_event_synth(struct perf_tool *tool,
1927                                 union perf_event *event,
1928                                 struct perf_sample *sample __maybe_unused,
1929                                 struct machine *machine __maybe_unused)
1930 {
1931         struct intel_pt_synth *intel_pt_synth =
1932                         container_of(tool, struct intel_pt_synth, dummy_tool);
1933
1934         return perf_session__deliver_synth_event(intel_pt_synth->session, event,
1935                                                  NULL);
1936 }
1937
1938 static int intel_pt_synth_event(struct perf_session *session,
1939                                 struct perf_event_attr *attr, u64 id)
1940 {
1941         struct intel_pt_synth intel_pt_synth;
1942
1943         memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth));
1944         intel_pt_synth.session = session;
1945
1946         return perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1,
1947                                            &id, intel_pt_event_synth);
1948 }
1949
1950 static int intel_pt_synth_events(struct intel_pt *pt,
1951                                  struct perf_session *session)
1952 {
1953         struct perf_evlist *evlist = session->evlist;
1954         struct perf_evsel *evsel;
1955         struct perf_event_attr attr;
1956         bool found = false;
1957         u64 id;
1958         int err;
1959
1960         evlist__for_each_entry(evlist, evsel) {
1961                 if (evsel->attr.type == pt->pmu_type && evsel->ids) {
1962                         found = true;
1963                         break;
1964                 }
1965         }
1966
1967         if (!found) {
1968                 pr_debug("There are no selected events with Intel Processor Trace data\n");
1969                 return 0;
1970         }
1971
1972         memset(&attr, 0, sizeof(struct perf_event_attr));
1973         attr.size = sizeof(struct perf_event_attr);
1974         attr.type = PERF_TYPE_HARDWARE;
1975         attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK;
1976         attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1977                             PERF_SAMPLE_PERIOD;
1978         if (pt->timeless_decoding)
1979                 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1980         else
1981                 attr.sample_type |= PERF_SAMPLE_TIME;
1982         if (!pt->per_cpu_mmaps)
1983                 attr.sample_type &= ~(u64)PERF_SAMPLE_CPU;
1984         attr.exclude_user = evsel->attr.exclude_user;
1985         attr.exclude_kernel = evsel->attr.exclude_kernel;
1986         attr.exclude_hv = evsel->attr.exclude_hv;
1987         attr.exclude_host = evsel->attr.exclude_host;
1988         attr.exclude_guest = evsel->attr.exclude_guest;
1989         attr.sample_id_all = evsel->attr.sample_id_all;
1990         attr.read_format = evsel->attr.read_format;
1991
1992         id = evsel->id[0] + 1000000000;
1993         if (!id)
1994                 id = 1;
1995
1996         if (pt->synth_opts.instructions) {
1997                 attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1998                 if (pt->synth_opts.period_type == PERF_ITRACE_PERIOD_NANOSECS)
1999                         attr.sample_period =
2000                                 intel_pt_ns_to_ticks(pt, pt->synth_opts.period);
2001                 else
2002                         attr.sample_period = pt->synth_opts.period;
2003                 pt->instructions_sample_period = attr.sample_period;
2004                 if (pt->synth_opts.callchain)
2005                         attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
2006                 if (pt->synth_opts.last_branch)
2007                         attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
2008                 pr_debug("Synthesizing 'instructions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
2009                          id, (u64)attr.sample_type);
2010                 err = intel_pt_synth_event(session, &attr, id);
2011                 if (err) {
2012                         pr_err("%s: failed to synthesize 'instructions' event type\n",
2013                                __func__);
2014                         return err;
2015                 }
2016                 pt->sample_instructions = true;
2017                 pt->instructions_sample_type = attr.sample_type;
2018                 pt->instructions_id = id;
2019                 id += 1;
2020         }
2021
2022         if (pt->synth_opts.transactions) {
2023                 attr.config = PERF_COUNT_HW_INSTRUCTIONS;
2024                 attr.sample_period = 1;
2025                 if (pt->synth_opts.callchain)
2026                         attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
2027                 if (pt->synth_opts.last_branch)
2028                         attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
2029                 pr_debug("Synthesizing 'transactions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
2030                          id, (u64)attr.sample_type);
2031                 err = intel_pt_synth_event(session, &attr, id);
2032                 if (err) {
2033                         pr_err("%s: failed to synthesize 'transactions' event type\n",
2034                                __func__);
2035                         return err;
2036                 }
2037                 pt->sample_transactions = true;
2038                 pt->transactions_sample_type = attr.sample_type;
2039                 pt->transactions_id = id;
2040                 id += 1;
2041                 evlist__for_each_entry(evlist, evsel) {
2042                         if (evsel->id && evsel->id[0] == pt->transactions_id) {
2043                                 if (evsel->name)
2044                                         zfree(&evsel->name);
2045                                 evsel->name = strdup("transactions");
2046                                 break;
2047                         }
2048                 }
2049         }
2050
2051         if (pt->synth_opts.branches) {
2052                 attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
2053                 attr.sample_period = 1;
2054                 attr.sample_type |= PERF_SAMPLE_ADDR;
2055                 attr.sample_type &= ~(u64)PERF_SAMPLE_CALLCHAIN;
2056                 attr.sample_type &= ~(u64)PERF_SAMPLE_BRANCH_STACK;
2057                 pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
2058                          id, (u64)attr.sample_type);
2059                 err = intel_pt_synth_event(session, &attr, id);
2060                 if (err) {
2061                         pr_err("%s: failed to synthesize 'branches' event type\n",
2062                                __func__);
2063                         return err;
2064                 }
2065                 pt->sample_branches = true;
2066                 pt->branches_sample_type = attr.sample_type;
2067                 pt->branches_id = id;
2068         }
2069
2070         pt->synth_needs_swap = evsel->needs_swap;
2071
2072         return 0;
2073 }
2074
2075 static struct perf_evsel *intel_pt_find_sched_switch(struct perf_evlist *evlist)
2076 {
2077         struct perf_evsel *evsel;
2078
2079         evlist__for_each_entry_reverse(evlist, evsel) {
2080                 const char *name = perf_evsel__name(evsel);
2081
2082                 if (!strcmp(name, "sched:sched_switch"))
2083                         return evsel;
2084         }
2085
2086         return NULL;
2087 }
2088
2089 static bool intel_pt_find_switch(struct perf_evlist *evlist)
2090 {
2091         struct perf_evsel *evsel;
2092
2093         evlist__for_each_entry(evlist, evsel) {
2094                 if (evsel->attr.context_switch)
2095                         return true;
2096         }
2097
2098         return false;
2099 }
2100
2101 static int intel_pt_perf_config(const char *var, const char *value, void *data)
2102 {
2103         struct intel_pt *pt = data;
2104
2105         if (!strcmp(var, "intel-pt.mispred-all"))
2106                 pt->mispred_all = perf_config_bool(var, value);
2107
2108         return 0;
2109 }
2110
2111 static const char * const intel_pt_info_fmts[] = {
2112         [INTEL_PT_PMU_TYPE]             = "  PMU Type            %"PRId64"\n",
2113         [INTEL_PT_TIME_SHIFT]           = "  Time Shift          %"PRIu64"\n",
2114         [INTEL_PT_TIME_MULT]            = "  Time Muliplier      %"PRIu64"\n",
2115         [INTEL_PT_TIME_ZERO]            = "  Time Zero           %"PRIu64"\n",
2116         [INTEL_PT_CAP_USER_TIME_ZERO]   = "  Cap Time Zero       %"PRId64"\n",
2117         [INTEL_PT_TSC_BIT]              = "  TSC bit             %#"PRIx64"\n",
2118         [INTEL_PT_NORETCOMP_BIT]        = "  NoRETComp bit       %#"PRIx64"\n",
2119         [INTEL_PT_HAVE_SCHED_SWITCH]    = "  Have sched_switch   %"PRId64"\n",
2120         [INTEL_PT_SNAPSHOT_MODE]        = "  Snapshot mode       %"PRId64"\n",
2121         [INTEL_PT_PER_CPU_MMAPS]        = "  Per-cpu maps        %"PRId64"\n",
2122         [INTEL_PT_MTC_BIT]              = "  MTC bit             %#"PRIx64"\n",
2123         [INTEL_PT_TSC_CTC_N]            = "  TSC:CTC numerator   %"PRIu64"\n",
2124         [INTEL_PT_TSC_CTC_D]            = "  TSC:CTC denominator %"PRIu64"\n",
2125         [INTEL_PT_CYC_BIT]              = "  CYC bit             %#"PRIx64"\n",
2126         [INTEL_PT_MAX_NONTURBO_RATIO]   = "  Max non-turbo ratio %"PRIu64"\n",
2127         [INTEL_PT_FILTER_STR_LEN]       = "  Filter string len.  %"PRIu64"\n",
2128 };
2129
2130 static void intel_pt_print_info(u64 *arr, int start, int finish)
2131 {
2132         int i;
2133
2134         if (!dump_trace)
2135                 return;
2136
2137         for (i = start; i <= finish; i++)
2138                 fprintf(stdout, intel_pt_info_fmts[i], arr[i]);
2139 }
2140
2141 static void intel_pt_print_info_str(const char *name, const char *str)
2142 {
2143         if (!dump_trace)
2144                 return;
2145
2146         fprintf(stdout, "  %-20s%s\n", name, str ? str : "");
2147 }
2148
2149 static bool intel_pt_has(struct auxtrace_info_event *auxtrace_info, int pos)
2150 {
2151         return auxtrace_info->header.size >=
2152                 sizeof(struct auxtrace_info_event) + (sizeof(u64) * (pos + 1));
2153 }
2154
2155 int intel_pt_process_auxtrace_info(union perf_event *event,
2156                                    struct perf_session *session)
2157 {
2158         struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
2159         size_t min_sz = sizeof(u64) * INTEL_PT_PER_CPU_MMAPS;
2160         struct intel_pt *pt;
2161         void *info_end;
2162         u64 *info;
2163         int err;
2164
2165         if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) +
2166                                         min_sz)
2167                 return -EINVAL;
2168
2169         pt = zalloc(sizeof(struct intel_pt));
2170         if (!pt)
2171                 return -ENOMEM;
2172
2173         addr_filters__init(&pt->filts);
2174
2175         err = perf_config(intel_pt_perf_config, pt);
2176         if (err)
2177                 goto err_free;
2178
2179         err = auxtrace_queues__init(&pt->queues);
2180         if (err)
2181                 goto err_free;
2182
2183         intel_pt_log_set_name(INTEL_PT_PMU_NAME);
2184
2185         pt->session = session;
2186         pt->machine = &session->machines.host; /* No kvm support */
2187         pt->auxtrace_type = auxtrace_info->type;
2188         pt->pmu_type = auxtrace_info->priv[INTEL_PT_PMU_TYPE];
2189         pt->tc.time_shift = auxtrace_info->priv[INTEL_PT_TIME_SHIFT];
2190         pt->tc.time_mult = auxtrace_info->priv[INTEL_PT_TIME_MULT];
2191         pt->tc.time_zero = auxtrace_info->priv[INTEL_PT_TIME_ZERO];
2192         pt->cap_user_time_zero = auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO];
2193         pt->tsc_bit = auxtrace_info->priv[INTEL_PT_TSC_BIT];
2194         pt->noretcomp_bit = auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT];
2195         pt->have_sched_switch = auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH];
2196         pt->snapshot_mode = auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE];
2197         pt->per_cpu_mmaps = auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS];
2198         intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_PMU_TYPE,
2199                             INTEL_PT_PER_CPU_MMAPS);
2200
2201         if (intel_pt_has(auxtrace_info, INTEL_PT_CYC_BIT)) {
2202                 pt->mtc_bit = auxtrace_info->priv[INTEL_PT_MTC_BIT];
2203                 pt->mtc_freq_bits = auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS];
2204                 pt->tsc_ctc_ratio_n = auxtrace_info->priv[INTEL_PT_TSC_CTC_N];
2205                 pt->tsc_ctc_ratio_d = auxtrace_info->priv[INTEL_PT_TSC_CTC_D];
2206                 pt->cyc_bit = auxtrace_info->priv[INTEL_PT_CYC_BIT];
2207                 intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_MTC_BIT,
2208                                     INTEL_PT_CYC_BIT);
2209         }
2210
2211         if (intel_pt_has(auxtrace_info, INTEL_PT_MAX_NONTURBO_RATIO)) {
2212                 pt->max_non_turbo_ratio =
2213                         auxtrace_info->priv[INTEL_PT_MAX_NONTURBO_RATIO];
2214                 intel_pt_print_info(&auxtrace_info->priv[0],
2215                                     INTEL_PT_MAX_NONTURBO_RATIO,
2216                                     INTEL_PT_MAX_NONTURBO_RATIO);
2217         }
2218
2219         info = &auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] + 1;
2220         info_end = (void *)info + auxtrace_info->header.size;
2221
2222         if (intel_pt_has(auxtrace_info, INTEL_PT_FILTER_STR_LEN)) {
2223                 size_t len;
2224
2225                 len = auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN];
2226                 intel_pt_print_info(&auxtrace_info->priv[0],
2227                                     INTEL_PT_FILTER_STR_LEN,
2228                                     INTEL_PT_FILTER_STR_LEN);
2229                 if (len) {
2230                         const char *filter = (const char *)info;
2231
2232                         len = roundup(len + 1, 8);
2233                         info += len >> 3;
2234                         if ((void *)info > info_end) {
2235                                 pr_err("%s: bad filter string length\n", __func__);
2236                                 err = -EINVAL;
2237                                 goto err_free_queues;
2238                         }
2239                         pt->filter = memdup(filter, len);
2240                         if (!pt->filter) {
2241                                 err = -ENOMEM;
2242                                 goto err_free_queues;
2243                         }
2244                         if (session->header.needs_swap)
2245                                 mem_bswap_64(pt->filter, len);
2246                         if (pt->filter[len - 1]) {
2247                                 pr_err("%s: filter string not null terminated\n", __func__);
2248                                 err = -EINVAL;
2249                                 goto err_free_queues;
2250                         }
2251                         err = addr_filters__parse_bare_filter(&pt->filts,
2252                                                               filter);
2253                         if (err)
2254                                 goto err_free_queues;
2255                 }
2256                 intel_pt_print_info_str("Filter string", pt->filter);
2257         }
2258
2259         pt->timeless_decoding = intel_pt_timeless_decoding(pt);
2260         pt->have_tsc = intel_pt_have_tsc(pt);
2261         pt->sampling_mode = false;
2262         pt->est_tsc = !pt->timeless_decoding;
2263
2264         pt->unknown_thread = thread__new(999999999, 999999999);
2265         if (!pt->unknown_thread) {
2266                 err = -ENOMEM;
2267                 goto err_free_queues;
2268         }
2269
2270         /*
2271          * Since this thread will not be kept in any rbtree not in a
2272          * list, initialize its list node so that at thread__put() the
2273          * current thread lifetime assuption is kept and we don't segfault
2274          * at list_del_init().
2275          */
2276         INIT_LIST_HEAD(&pt->unknown_thread->node);
2277
2278         err = thread__set_comm(pt->unknown_thread, "unknown", 0);
2279         if (err)
2280                 goto err_delete_thread;
2281         if (thread__init_map_groups(pt->unknown_thread, pt->machine)) {
2282                 err = -ENOMEM;
2283                 goto err_delete_thread;
2284         }
2285
2286         pt->auxtrace.process_event = intel_pt_process_event;
2287         pt->auxtrace.process_auxtrace_event = intel_pt_process_auxtrace_event;
2288         pt->auxtrace.flush_events = intel_pt_flush;
2289         pt->auxtrace.free_events = intel_pt_free_events;
2290         pt->auxtrace.free = intel_pt_free;
2291         session->auxtrace = &pt->auxtrace;
2292
2293         if (dump_trace)
2294                 return 0;
2295
2296         if (pt->have_sched_switch == 1) {
2297                 pt->switch_evsel = intel_pt_find_sched_switch(session->evlist);
2298                 if (!pt->switch_evsel) {
2299                         pr_err("%s: missing sched_switch event\n", __func__);
2300                         err = -EINVAL;
2301                         goto err_delete_thread;
2302                 }
2303         } else if (pt->have_sched_switch == 2 &&
2304                    !intel_pt_find_switch(session->evlist)) {
2305                 pr_err("%s: missing context_switch attribute flag\n", __func__);
2306                 err = -EINVAL;
2307                 goto err_delete_thread;
2308         }
2309
2310         if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
2311                 pt->synth_opts = *session->itrace_synth_opts;
2312         } else {
2313                 itrace_synth_opts__set_default(&pt->synth_opts);
2314                 if (use_browser != -1) {
2315                         pt->synth_opts.branches = false;
2316                         pt->synth_opts.callchain = true;
2317                 }
2318                 if (session->itrace_synth_opts)
2319                         pt->synth_opts.thread_stack =
2320                                 session->itrace_synth_opts->thread_stack;
2321         }
2322
2323         if (pt->synth_opts.log)
2324                 intel_pt_log_enable();
2325
2326         /* Maximum non-turbo ratio is TSC freq / 100 MHz */
2327         if (pt->tc.time_mult) {
2328                 u64 tsc_freq = intel_pt_ns_to_ticks(pt, 1000000000);
2329
2330                 if (!pt->max_non_turbo_ratio)
2331                         pt->max_non_turbo_ratio =
2332                                         (tsc_freq + 50000000) / 100000000;
2333                 intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq);
2334                 intel_pt_log("Maximum non-turbo ratio %u\n",
2335                              pt->max_non_turbo_ratio);
2336         }
2337
2338         if (pt->synth_opts.calls)
2339                 pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
2340                                        PERF_IP_FLAG_TRACE_END;
2341         if (pt->synth_opts.returns)
2342                 pt->branches_filter |= PERF_IP_FLAG_RETURN |
2343                                        PERF_IP_FLAG_TRACE_BEGIN;
2344
2345         if (pt->synth_opts.callchain && !symbol_conf.use_callchain) {
2346                 symbol_conf.use_callchain = true;
2347                 if (callchain_register_param(&callchain_param) < 0) {
2348                         symbol_conf.use_callchain = false;
2349                         pt->synth_opts.callchain = false;
2350                 }
2351         }
2352
2353         err = intel_pt_synth_events(pt, session);
2354         if (err)
2355                 goto err_delete_thread;
2356
2357         err = auxtrace_queues__process_index(&pt->queues, session);
2358         if (err)
2359                 goto err_delete_thread;
2360
2361         if (pt->queues.populated)
2362                 pt->data_queued = true;
2363
2364         if (pt->timeless_decoding)
2365                 pr_debug2("Intel PT decoding without timestamps\n");
2366
2367         return 0;
2368
2369 err_delete_thread:
2370         thread__zput(pt->unknown_thread);
2371 err_free_queues:
2372         intel_pt_log_disable();
2373         auxtrace_queues__free(&pt->queues);
2374         session->auxtrace = NULL;
2375 err_free:
2376         addr_filters__exit(&pt->filts);
2377         zfree(&pt->filter);
2378         free(pt);
2379         return err;
2380 }