]> asedeno.scripts.mit.edu Git - linux.git/blobdiff - tools/perf/builtin-trace.c
tools headers uapi: Update tools's copy of asm-generic/unistd.h
[linux.git] / tools / perf / builtin-trace.c
index 6a748eca2edb4231a2517ae3d12e2023ca6bd076..22ab8e67c7600865d7fc7a884feba24f15bbe66b 100644 (file)
@@ -77,7 +77,8 @@ struct trace {
                struct syscall  *table;
                struct {
                        struct perf_evsel *sys_enter,
-                                         *sys_exit;
+                                         *sys_exit,
+                                         *augmented;
                }               events;
        } syscalls;
        struct record_opts      opts;
@@ -121,7 +122,6 @@ struct trace {
        bool                    force;
        bool                    vfs_getname;
        int                     trace_pgfaults;
-       int                     open_id;
 };
 
 struct tp_field {
@@ -157,13 +157,11 @@ TP_UINT_FIELD__SWAPPED(16);
 TP_UINT_FIELD__SWAPPED(32);
 TP_UINT_FIELD__SWAPPED(64);
 
-static int tp_field__init_uint(struct tp_field *field,
-                              struct format_field *format_field,
-                              bool needs_swap)
+static int __tp_field__init_uint(struct tp_field *field, int size, int offset, bool needs_swap)
 {
-       field->offset = format_field->offset;
+       field->offset = offset;
 
-       switch (format_field->size) {
+       switch (size) {
        case 1:
                field->integer = tp_field__u8;
                break;
@@ -183,18 +181,28 @@ static int tp_field__init_uint(struct tp_field *field,
        return 0;
 }
 
+static int tp_field__init_uint(struct tp_field *field, struct format_field *format_field, bool needs_swap)
+{
+       return __tp_field__init_uint(field, format_field->size, format_field->offset, needs_swap);
+}
+
 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
 {
        return sample->raw_data + field->offset;
 }
 
-static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
+static int __tp_field__init_ptr(struct tp_field *field, int offset)
 {
-       field->offset = format_field->offset;
+       field->offset = offset;
        field->pointer = tp_field__ptr;
        return 0;
 }
 
+static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
+{
+       return __tp_field__init_ptr(field, format_field->offset);
+}
+
 struct syscall_tp {
        struct tp_field id;
        union {
@@ -240,7 +248,47 @@ static void perf_evsel__delete_priv(struct perf_evsel *evsel)
        perf_evsel__delete(evsel);
 }
 
-static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
+static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel)
+{
+       struct syscall_tp *sc = evsel->priv = malloc(sizeof(struct syscall_tp));
+
+       if (evsel->priv != NULL) {
+               if (perf_evsel__init_tp_uint_field(evsel, &sc->id, "__syscall_nr"))
+                       goto out_delete;
+               return 0;
+       }
+
+       return -ENOMEM;
+out_delete:
+       zfree(&evsel->priv);
+       return -ENOENT;
+}
+
+static int perf_evsel__init_augmented_syscall_tp(struct perf_evsel *evsel)
+{
+       struct syscall_tp *sc = evsel->priv = malloc(sizeof(struct syscall_tp));
+
+       if (evsel->priv != NULL) {       /* field, sizeof_field, offsetof_field */
+               if (__tp_field__init_uint(&sc->id, sizeof(long), sizeof(long long), evsel->needs_swap))
+                       goto out_delete;
+
+               return 0;
+       }
+
+       return -ENOMEM;
+out_delete:
+       zfree(&evsel->priv);
+       return -EINVAL;
+}
+
+static int perf_evsel__init_augmented_syscall_tp_args(struct perf_evsel *evsel)
+{
+       struct syscall_tp *sc = evsel->priv;
+
+       return __tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64));
+}
+
+static int perf_evsel__init_raw_syscall_tp(struct perf_evsel *evsel, void *handler)
 {
        evsel->priv = malloc(sizeof(struct syscall_tp));
        if (evsel->priv != NULL) {
@@ -258,7 +306,7 @@ static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
        return -ENOENT;
 }
 
-static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
+static struct perf_evsel *perf_evsel__raw_syscall_newtp(const char *direction, void *handler)
 {
        struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
 
@@ -269,7 +317,7 @@ static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void
        if (IS_ERR(evsel))
                return NULL;
 
-       if (perf_evsel__init_syscall_tp(evsel, handler))
+       if (perf_evsel__init_raw_syscall_tp(evsel, handler))
                goto out_delete;
 
        return evsel;
@@ -291,7 +339,7 @@ size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const cha
 {
        int idx = val - sa->offset;
 
-       if (idx < 0 || idx >= sa->nr_entries)
+       if (idx < 0 || idx >= sa->nr_entries || sa->entries[idx] == NULL)
                return scnprintf(bf, size, intfmt, val);
 
        return scnprintf(bf, size, "%s", sa->entries[idx]);
@@ -761,10 +809,12 @@ static struct syscall_fmt {
          .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
        { .name     = "socket",
          .arg = { [0] = STRARRAY(family, socket_families),
-                  [1] = { .scnprintf = SCA_SK_TYPE, /* type */ }, }, },
+                  [1] = { .scnprintf = SCA_SK_TYPE, /* type */ },
+                  [2] = { .scnprintf = SCA_SK_PROTO, /* protocol */ }, }, },
        { .name     = "socketpair",
          .arg = { [0] = STRARRAY(family, socket_families),
-                  [1] = { .scnprintf = SCA_SK_TYPE, /* type */ }, }, },
+                  [1] = { .scnprintf = SCA_SK_TYPE, /* type */ },
+                  [2] = { .scnprintf = SCA_SK_PROTO, /* protocol */ }, }, },
        { .name     = "stat", .alias = "newstat", },
        { .name     = "statx",
          .arg = { [0] = { .scnprintf = SCA_FDAT,        /* fdat */ },
@@ -803,12 +853,17 @@ static struct syscall_fmt *syscall_fmt__find(const char *name)
        return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
 }
 
+/*
+ * is_exit: is this "exit" or "exit_group"?
+ * is_open: is this "open" or "openat"? To associate the fd returned in sys_exit with the pathname in sys_enter.
+ */
 struct syscall {
        struct event_format *tp_format;
        int                 nr_args;
+       bool                is_exit;
+       bool                is_open;
        struct format_field *args;
        const char          *name;
-       bool                is_exit;
        struct syscall_fmt  *fmt;
        struct syscall_arg_fmt *arg_fmt;
 };
@@ -1297,6 +1352,7 @@ static int trace__read_syscall_info(struct trace *trace, int id)
        }
 
        sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
+       sc->is_open = !strcmp(name, "open") || !strcmp(name, "openat");
 
        return syscall__set_arg_fmts(sc);
 }
@@ -1659,6 +1715,37 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
        return err;
 }
 
+static int trace__fprintf_sys_enter(struct trace *trace, struct perf_evsel *evsel,
+                                   struct perf_sample *sample)
+{
+       struct thread_trace *ttrace;
+       struct thread *thread;
+       int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
+       struct syscall *sc = trace__syscall_info(trace, evsel, id);
+       char msg[1024];
+       void *args;
+
+       if (sc == NULL)
+               return -1;
+
+       thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
+       ttrace = thread__trace(thread, trace->output);
+       /*
+        * We need to get ttrace just to make sure it is there when syscall__scnprintf_args()
+        * and the rest of the beautifiers accessing it via struct syscall_arg touches it.
+        */
+       if (ttrace == NULL)
+               goto out_put;
+
+       args = perf_evsel__sc_tp_ptr(evsel, args, sample);
+       syscall__scnprintf_args(sc, msg, sizeof(msg), args, trace, thread);
+       fprintf(trace->output, "%s", msg);
+       err = 0;
+out_put:
+       thread__put(thread);
+       return err;
+}
+
 static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
                                    struct perf_sample *sample,
                                    struct callchain_cursor *cursor)
@@ -1720,7 +1807,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
 
        ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
 
-       if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
+       if (sc->is_open && ret >= 0 && ttrace->filename.pending_open) {
                trace__set_fd_pathname(thread, ret, ttrace->filename.name);
                ttrace->filename.pending_open = false;
                ++trace->stats.vfs_getname;
@@ -1955,11 +2042,17 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
        fprintf(trace->output, "%s:", evsel->name);
 
        if (perf_evsel__is_bpf_output(evsel)) {
-               bpf_output__fprintf(trace, sample);
+               if (evsel == trace->syscalls.events.augmented)
+                       trace__fprintf_sys_enter(trace, evsel, sample);
+               else
+                       bpf_output__fprintf(trace, sample);
        } else if (evsel->tp_format) {
-               event_format__fprintf(evsel->tp_format, sample->cpu,
-                                     sample->raw_data, sample->raw_size,
-                                     trace->output);
+               if (strncmp(evsel->tp_format->name, "sys_enter_", 10) ||
+                   trace__fprintf_sys_enter(trace, evsel, sample)) {
+                       event_format__fprintf(evsel->tp_format, sample->cpu,
+                                             sample->raw_data, sample->raw_size,
+                                             trace->output);
+               }
        }
 
        fprintf(trace->output, "\n");
@@ -2240,14 +2333,14 @@ static int trace__add_syscall_newtp(struct trace *trace)
        struct perf_evlist *evlist = trace->evlist;
        struct perf_evsel *sys_enter, *sys_exit;
 
-       sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
+       sys_enter = perf_evsel__raw_syscall_newtp("sys_enter", trace__sys_enter);
        if (sys_enter == NULL)
                goto out;
 
        if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
                goto out_delete_sys_enter;
 
-       sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
+       sys_exit = perf_evsel__raw_syscall_newtp("sys_exit", trace__sys_exit);
        if (sys_exit == NULL)
                goto out_delete_sys_enter;
 
@@ -2669,7 +2762,7 @@ static int trace__replay(struct trace *trace)
                                                             "syscalls:sys_enter");
 
        if (evsel &&
-           (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
+           (perf_evsel__init_raw_syscall_tp(evsel, trace__sys_enter) < 0 ||
            perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
                pr_err("Error during initialize raw_syscalls:sys_enter event\n");
                goto out;
@@ -2681,7 +2774,7 @@ static int trace__replay(struct trace *trace)
                evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
                                                             "syscalls:sys_exit");
        if (evsel &&
-           (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
+           (perf_evsel__init_raw_syscall_tp(evsel, trace__sys_exit) < 0 ||
            perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
                pr_err("Error during initialize raw_syscalls:sys_exit event\n");
                goto out;
@@ -2921,6 +3014,36 @@ static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
                evsel->handler = handler;
 }
 
+static int evlist__set_syscall_tp_fields(struct perf_evlist *evlist)
+{
+       struct perf_evsel *evsel;
+
+       evlist__for_each_entry(evlist, evsel) {
+               if (evsel->priv || !evsel->tp_format)
+                       continue;
+
+               if (strcmp(evsel->tp_format->system, "syscalls"))
+                       continue;
+
+               if (perf_evsel__init_syscall_tp(evsel))
+                       return -1;
+
+               if (!strncmp(evsel->tp_format->name, "sys_enter_", 10)) {
+                       struct syscall_tp *sc = evsel->priv;
+
+                       if (__tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64)))
+                               return -1;
+               } else if (!strncmp(evsel->tp_format->name, "sys_exit_", 9)) {
+                       struct syscall_tp *sc = evsel->priv;
+
+                       if (__tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap))
+                               return -1;
+               }
+       }
+
+       return 0;
+}
+
 /*
  * XXX: Hackish, just splitting the combined -e+--event (syscalls
  * (raw_syscalls:{sys_{enter,exit}} + events (tracepoints, HW, SW, etc) to use
@@ -2990,6 +3113,7 @@ static int trace__parse_events_option(const struct option *opt, const char *str,
 
                if (trace__validate_ev_qualifier(trace))
                        goto out;
+               trace->trace_syscalls = true;
        }
 
        err = 0;
@@ -3045,7 +3169,7 @@ int cmd_trace(int argc, const char **argv)
                },
                .output = stderr,
                .show_comm = true,
-               .trace_syscalls = true,
+               .trace_syscalls = false,
                .kernel_syscallchains = false,
                .max_stack = UINT_MAX,
        };
@@ -3120,8 +3244,9 @@ int cmd_trace(int argc, const char **argv)
        };
        bool __maybe_unused max_stack_user_set = true;
        bool mmap_pages_user_set = true;
+       struct perf_evsel *evsel;
        const char * const trace_subcommands[] = { "record", NULL };
-       int err;
+       int err = -1;
        char bf[BUFSIZ];
 
        signal(SIGSEGV, sighandler_dump_stack);
@@ -3144,6 +3269,20 @@ int cmd_trace(int argc, const char **argv)
                                       "cgroup monitoring only available in system-wide mode");
        }
 
+       evsel = bpf__setup_output_event(trace.evlist, "__augmented_syscalls__");
+       if (IS_ERR(evsel)) {
+               bpf__strerror_setup_output_event(trace.evlist, PTR_ERR(evsel), bf, sizeof(bf));
+               pr_err("ERROR: Setup trace syscalls enter failed: %s\n", bf);
+               goto out;
+       }
+
+       if (evsel) {
+               if (perf_evsel__init_augmented_syscall_tp(evsel) ||
+                   perf_evsel__init_augmented_syscall_tp_args(evsel))
+                       goto out;
+               trace.syscalls.events.augmented = evsel;
+       }
+
        err = bpf__setup_stdout(trace.evlist);
        if (err) {
                bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
@@ -3179,8 +3318,13 @@ int cmd_trace(int argc, const char **argv)
                symbol_conf.use_callchain = true;
        }
 
-       if (trace.evlist->nr_entries > 0)
+       if (trace.evlist->nr_entries > 0) {
                evlist__set_evsel_handler(trace.evlist, trace__event_handler);
+               if (evlist__set_syscall_tp_fields(trace.evlist)) {
+                       perror("failed to set syscalls:* tracepoint fields");
+                       goto out;
+               }
+       }
 
        if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
                return trace__record(&trace, argc-1, &argv[1]);
@@ -3191,13 +3335,7 @@ int cmd_trace(int argc, const char **argv)
 
        if (!trace.trace_syscalls && !trace.trace_pgfaults &&
            trace.evlist->nr_entries == 0 /* Was --events used? */) {
-               pr_err("Please specify something to trace.\n");
-               return -1;
-       }
-
-       if (!trace.trace_syscalls && trace.ev_qualifier) {
-               pr_err("The -e option can't be used with --no-syscalls.\n");
-               goto out;
+               trace.trace_syscalls = true;
        }
 
        if (output_name != NULL) {
@@ -3208,8 +3346,6 @@ int cmd_trace(int argc, const char **argv)
                }
        }
 
-       trace.open_id = syscalltbl__id(trace.sctbl, "open");
-
        err = target__validate(&trace.opts.target);
        if (err) {
                target__strerror(&trace.opts.target, err, bf, sizeof(bf));