summaryrefslogtreecommitdiff
path: root/tools/perf/builtin-trace.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-10-23 13:32:18 +0100
committerLinus Torvalds <torvalds@linux-foundation.org>2018-10-23 13:32:18 +0100
commitc05f3642f4304dd081876e77a68555b6aba4483f (patch)
tree915baf10c3518d162c00e62b5d855cf92282ed79 /tools/perf/builtin-trace.c
parent0200fbdd431519d730b5d399a12840ec832b27cc (diff)
parentdda93b45389f025fd3422d22cc31cc1ea6040305 (diff)
downloadlinux-c05f3642f4304dd081876e77a68555b6aba4483f.tar.gz
linux-c05f3642f4304dd081876e77a68555b6aba4483f.tar.bz2
linux-c05f3642f4304dd081876e77a68555b6aba4483f.zip
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar: "The main updates in this cycle were: - Lots of perf tooling changes too voluminous to list (big perf trace and perf stat improvements, lots of libtraceevent reorganization, etc.), so I'll list the authors and refer to the changelog for details: Benjamin Peterson, Jérémie Galarneau, Kim Phillips, Peter Zijlstra, Ravi Bangoria, Sangwon Hong, Sean V Kelley, Steven Rostedt, Thomas Gleixner, Ding Xiang, Eduardo Habkost, Thomas Richter, Andi Kleen, Sanskriti Sharma, Adrian Hunter, Tzvetomir Stoyanov, Arnaldo Carvalho de Melo, Jiri Olsa. ... with the bulk of the changes written by Jiri Olsa, Tzvetomir Stoyanov and Arnaldo Carvalho de Melo. - Continued intel_rdt work with a focus on playing well with perf events. This also imported some non-perf RDT work due to dependencies. (Reinette Chatre) - Implement counter freezing for Arch Perfmon v4 (Skylake and newer). This allows to speed up the PMI handler by avoiding unnecessary MSR writes and make it more accurate. (Andi Kleen) - kprobes cleanups and simplification (Masami Hiramatsu) - Intel Goldmont PMU updates (Kan Liang) - ... plus misc other fixes and updates" * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (155 commits) kprobes/x86: Use preempt_enable() in optimized_callback() x86/intel_rdt: Prevent pseudo-locking from using stale pointers kprobes, x86/ptrace.h: Make regs_get_kernel_stack_nth() not fault on bad stack perf/x86/intel: Export mem events only if there's PEBS support x86/cpu: Drop pointless static qualifier in punit_dev_state_show() x86/intel_rdt: Fix initial allocation to consider CDP x86/intel_rdt: CBM overlap should also check for overlap with CDP peer x86/intel_rdt: Introduce utility to obtain CDP peer tools lib traceevent, perf tools: Move struct tep_handler definition in a local header file tools lib traceevent: Separate out tep_strerror() for strerror_r() issues perf python: More portable way to make CFLAGS work with clang perf python: Make clang_has_option() work on Python 3 perf tools: Free temporary 'sys' string in read_event_files() perf tools: Avoid double free in read_event_file() perf tools: Free 'printk' string in parse_ftrace_printk() perf tools: Cleanup trace-event-info 'tdata' leak perf strbuf: Match va_{add,copy} with va_end perf test: S390 does not support watchpoints in test 22 perf auxtrace: Include missing asm/bitsperlong.h to get BITS_PER_LONG tools include: Adopt linux/bits.h ...
Diffstat (limited to 'tools/perf/builtin-trace.c')
-rw-r--r--tools/perf/builtin-trace.c179
1 files changed, 140 insertions, 39 deletions
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 22ab8e67c760..90289f31dd87 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -181,7 +181,7 @@ static int __tp_field__init_uint(struct tp_field *field, int size, int offset, b
return 0;
}
-static int tp_field__init_uint(struct tp_field *field, struct format_field *format_field, bool needs_swap)
+static int tp_field__init_uint(struct tp_field *field, struct tep_format_field *format_field, bool needs_swap)
{
return __tp_field__init_uint(field, format_field->size, format_field->offset, needs_swap);
}
@@ -198,7 +198,7 @@ static int __tp_field__init_ptr(struct tp_field *field, int offset)
return 0;
}
-static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
+static int tp_field__init_ptr(struct tp_field *field, struct tep_format_field *format_field)
{
return __tp_field__init_ptr(field, format_field->offset);
}
@@ -214,7 +214,7 @@ static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
struct tp_field *field,
const char *name)
{
- struct format_field *format_field = perf_evsel__field(evsel, name);
+ struct tep_format_field *format_field = perf_evsel__field(evsel, name);
if (format_field == NULL)
return -1;
@@ -230,7 +230,7 @@ static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
struct tp_field *field,
const char *name)
{
- struct format_field *format_field = perf_evsel__field(evsel, name);
+ struct tep_format_field *format_field = perf_evsel__field(evsel, name);
if (format_field == NULL)
return -1;
@@ -288,6 +288,13 @@ static int perf_evsel__init_augmented_syscall_tp_args(struct perf_evsel *evsel)
return __tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64));
}
+static int perf_evsel__init_augmented_syscall_tp_ret(struct perf_evsel *evsel)
+{
+ struct syscall_tp *sc = evsel->priv;
+
+ return __tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap);
+}
+
static int perf_evsel__init_raw_syscall_tp(struct perf_evsel *evsel, void *handler)
{
evsel->priv = malloc(sizeof(struct syscall_tp));
@@ -498,16 +505,6 @@ static const char *clockid[] = {
};
static DEFINE_STRARRAY(clockid);
-static const char *socket_families[] = {
- "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
- "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
- "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
- "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
- "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
- "ALG", "NFC", "VSOCK",
-};
-static DEFINE_STRARRAY(socket_families);
-
static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
struct syscall_arg *arg)
{
@@ -631,6 +628,8 @@ static struct syscall_fmt {
} syscall_fmts[] = {
{ .name = "access",
.arg = { [1] = { .scnprintf = SCA_ACCMODE, /* mode */ }, }, },
+ { .name = "bind",
+ .arg = { [1] = { .scnprintf = SCA_SOCKADDR, /* umyaddr */ }, }, },
{ .name = "bpf",
.arg = { [0] = STRARRAY(cmd, bpf_cmd), }, },
{ .name = "brk", .hexret = true,
@@ -645,6 +644,8 @@ static struct syscall_fmt {
[4] = { .name = "tls", .scnprintf = SCA_HEX, }, }, },
{ .name = "close",
.arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, },
+ { .name = "connect",
+ .arg = { [1] = { .scnprintf = SCA_SOCKADDR, /* servaddr */ }, }, },
{ .name = "epoll_ctl",
.arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, },
{ .name = "eventfd2",
@@ -801,7 +802,8 @@ static struct syscall_fmt {
{ .name = "sendmsg",
.arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
{ .name = "sendto",
- .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
+ .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ },
+ [4] = { .scnprintf = SCA_SOCKADDR, /* addr */ }, }, },
{ .name = "set_tid_address", .errpid = true, },
{ .name = "setitimer",
.arg = { [0] = STRARRAY(which, itimers), }, },
@@ -830,6 +832,7 @@ static struct syscall_fmt {
.arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
{ .name = "tkill",
.arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
+ { .name = "umount2", .alias = "umount", },
{ .name = "uname", .alias = "newuname", },
{ .name = "unlinkat",
.arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
@@ -856,13 +859,15 @@ static struct syscall_fmt *syscall_fmt__find(const char *name)
/*
* is_exit: is this "exit" or "exit_group"?
* is_open: is this "open" or "openat"? To associate the fd returned in sys_exit with the pathname in sys_enter.
+ * args_size: sum of the sizes of the syscall arguments, anything after that is augmented stuff: pathname for openat, etc.
*/
struct syscall {
- struct event_format *tp_format;
+ struct tep_event_format *tp_format;
int nr_args;
+ int args_size;
bool is_exit;
bool is_open;
- struct format_field *args;
+ struct tep_format_field *args;
const char *name;
struct syscall_fmt *fmt;
struct syscall_arg_fmt *arg_fmt;
@@ -1095,11 +1100,21 @@ static void thread__set_filename_pos(struct thread *thread, const char *bf,
ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
}
+static size_t syscall_arg__scnprintf_augmented_string(struct syscall_arg *arg, char *bf, size_t size)
+{
+ struct augmented_arg *augmented_arg = arg->augmented.args;
+
+ return scnprintf(bf, size, "%.*s", augmented_arg->size, augmented_arg->value);
+}
+
static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
struct syscall_arg *arg)
{
unsigned long ptr = arg->val;
+ if (arg->augmented.args)
+ return syscall_arg__scnprintf_augmented_string(arg, bf, size);
+
if (!arg->trace->vfs_getname)
return scnprintf(bf, size, "%#x", ptr);
@@ -1142,11 +1157,9 @@ static void sig_handler(int sig)
interrupted = sig == SIGINT;
}
-static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
- u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
+static size_t trace__fprintf_comm_tid(struct trace *trace, struct thread *thread, FILE *fp)
{
- size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
- printed += fprintf_duration(duration, duration_calculated, fp);
+ size_t printed = 0;
if (trace->multiple_threads) {
if (trace->show_comm)
@@ -1157,6 +1170,14 @@ static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thre
return printed;
}
+static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
+ u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
+{
+ size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
+ printed += fprintf_duration(duration, duration_calculated, fp);
+ return printed + trace__fprintf_comm_tid(trace, thread, fp);
+}
+
static int trace__process_event(struct trace *trace, struct machine *machine,
union perf_event *event, struct perf_sample *sample)
{
@@ -1258,10 +1279,12 @@ static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args)
static int syscall__set_arg_fmts(struct syscall *sc)
{
- struct format_field *field;
+ struct tep_format_field *field, *last_field = NULL;
int idx = 0, len;
for (field = sc->args; field; field = field->next, ++idx) {
+ last_field = field;
+
if (sc->fmt && sc->fmt->arg[idx].scnprintf)
continue;
@@ -1270,7 +1293,7 @@ static int syscall__set_arg_fmts(struct syscall *sc)
strcmp(field->name, "path") == 0 ||
strcmp(field->name, "pathname") == 0))
sc->arg_fmt[idx].scnprintf = SCA_FILENAME;
- else if (field->flags & FIELD_IS_POINTER)
+ else if (field->flags & TEP_FIELD_IS_POINTER)
sc->arg_fmt[idx].scnprintf = syscall_arg__scnprintf_hex;
else if (strcmp(field->type, "pid_t") == 0)
sc->arg_fmt[idx].scnprintf = SCA_PID;
@@ -1292,6 +1315,9 @@ static int syscall__set_arg_fmts(struct syscall *sc)
}
}
+ if (last_field)
+ sc->args_size = last_field->offset + last_field->size;
+
return 0;
}
@@ -1472,14 +1498,18 @@ static size_t syscall__scnprintf_val(struct syscall *sc, char *bf, size_t size,
}
static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
- unsigned char *args, struct trace *trace,
- struct thread *thread)
+ unsigned char *args, void *augmented_args, int augmented_args_size,
+ struct trace *trace, struct thread *thread)
{
size_t printed = 0;
unsigned long val;
u8 bit = 1;
struct syscall_arg arg = {
.args = args,
+ .augmented = {
+ .size = augmented_args_size,
+ .args = augmented_args,
+ },
.idx = 0,
.mask = 0,
.trace = trace,
@@ -1495,7 +1525,7 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
ttrace->ret_scnprintf = NULL;
if (sc->args != NULL) {
- struct format_field *field;
+ struct tep_format_field *field;
for (field = sc->args; field;
field = field->next, ++arg.idx, bit <<= 1) {
@@ -1654,6 +1684,17 @@ static int trace__fprintf_sample(struct trace *trace, struct perf_evsel *evsel,
return printed;
}
+static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sample, int *augmented_args_size)
+{
+ void *augmented_args = NULL;
+
+ *augmented_args_size = sample->raw_size - sc->args_size;
+ if (*augmented_args_size > 0)
+ augmented_args = sample->raw_data + sc->args_size;
+
+ return augmented_args;
+}
+
static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
union perf_event *event __maybe_unused,
struct perf_sample *sample)
@@ -1663,6 +1704,8 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
size_t printed = 0;
struct thread *thread;
int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
+ int augmented_args_size = 0;
+ void *augmented_args = NULL;
struct syscall *sc = trace__syscall_info(trace, evsel, id);
struct thread_trace *ttrace;
@@ -1686,13 +1729,24 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
trace__printf_interrupted_entry(trace);
-
+ /*
+ * If this is raw_syscalls.sys_enter, then it always comes with the 6 possible
+ * arguments, even if the syscall being handled, say "openat", uses only 4 arguments
+ * this breaks syscall__augmented_args() check for augmented args, as we calculate
+ * syscall->args_size using each syscalls:sys_enter_NAME tracefs format file,
+ * so when handling, say the openat syscall, we end up getting 6 args for the
+ * raw_syscalls:sys_enter event, when we expected just 4, we end up mistakenly
+ * thinking that the extra 2 u64 args are the augmented filename, so just check
+ * here and avoid using augmented syscalls when the evsel is the raw_syscalls one.
+ */
+ if (evsel != trace->syscalls.events.sys_enter)
+ augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size);
ttrace->entry_time = sample->time;
msg = ttrace->entry_str;
printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
- args, trace, thread);
+ args, augmented_args, augmented_args_size, trace, thread);
if (sc->is_exit) {
if (!(trace->duration_filter || trace->summary_only || trace->failure_only || trace->min_stack)) {
@@ -1723,7 +1777,8 @@ static int trace__fprintf_sys_enter(struct trace *trace, struct perf_evsel *evse
int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
struct syscall *sc = trace__syscall_info(trace, evsel, id);
char msg[1024];
- void *args;
+ void *args, *augmented_args = NULL;
+ int augmented_args_size;
if (sc == NULL)
return -1;
@@ -1738,7 +1793,8 @@ static int trace__fprintf_sys_enter(struct trace *trace, struct perf_evsel *evse
goto out_put;
args = perf_evsel__sc_tp_ptr(evsel, args, sample);
- syscall__scnprintf_args(sc, msg, sizeof(msg), args, trace, thread);
+ augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size);
+ syscall__scnprintf_args(sc, msg, sizeof(msg), args, augmented_args, augmented_args_size, trace, thread);
fprintf(trace->output, "%s", msg);
err = 0;
out_put:
@@ -2022,6 +2078,7 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
union perf_event *event __maybe_unused,
struct perf_sample *sample)
{
+ struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
int callchain_ret = 0;
if (sample->callchain) {
@@ -2039,13 +2096,31 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
if (trace->trace_syscalls)
fprintf(trace->output, "( ): ");
+ if (thread)
+ trace__fprintf_comm_tid(trace, thread, trace->output);
+
+ if (evsel == trace->syscalls.events.augmented) {
+ int id = perf_evsel__sc_tp_uint(evsel, id, sample);
+ struct syscall *sc = trace__syscall_info(trace, evsel, id);
+
+ if (sc) {
+ fprintf(trace->output, "%s(", sc->name);
+ trace__fprintf_sys_enter(trace, evsel, sample);
+ fputc(')', trace->output);
+ goto newline;
+ }
+
+ /*
+ * XXX: Not having the associated syscall info or not finding/adding
+ * the thread should never happen, but if it does...
+ * fall thru and print it as a bpf_output event.
+ */
+ }
+
fprintf(trace->output, "%s:", evsel->name);
if (perf_evsel__is_bpf_output(evsel)) {
- if (evsel == trace->syscalls.events.augmented)
- trace__fprintf_sys_enter(trace, evsel, sample);
- else
- bpf_output__fprintf(trace, sample);
+ bpf_output__fprintf(trace, sample);
} else if (evsel->tp_format) {
if (strncmp(evsel->tp_format->name, "sys_enter_", 10) ||
trace__fprintf_sys_enter(trace, evsel, sample)) {
@@ -2055,12 +2130,14 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
}
}
+newline:
fprintf(trace->output, "\n");
if (callchain_ret > 0)
trace__fprintf_callchain(trace, sample);
else if (callchain_ret < 0)
pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
+ thread__put(thread);
out:
return 0;
}
@@ -3276,12 +3353,8 @@ int cmd_trace(int argc, const char **argv)
goto out;
}
- if (evsel) {
- if (perf_evsel__init_augmented_syscall_tp(evsel) ||
- perf_evsel__init_augmented_syscall_tp_args(evsel))
- goto out;
+ if (evsel)
trace.syscalls.events.augmented = evsel;
- }
err = bpf__setup_stdout(trace.evlist);
if (err) {
@@ -3326,6 +3399,34 @@ int cmd_trace(int argc, const char **argv)
}
}
+ /*
+ * If we are augmenting syscalls, then combine what we put in the
+ * __augmented_syscalls__ BPF map with what is in the
+ * syscalls:sys_exit_FOO tracepoints, i.e. just like we do without BPF,
+ * combining raw_syscalls:sys_enter with raw_syscalls:sys_exit.
+ *
+ * We'll switch to look at two BPF maps, one for sys_enter and the
+ * other for sys_exit when we start augmenting the sys_exit paths with
+ * buffers that are being copied from kernel to userspace, think 'read'
+ * syscall.
+ */
+ if (trace.syscalls.events.augmented) {
+ evsel = trace.syscalls.events.augmented;
+
+ if (perf_evsel__init_augmented_syscall_tp(evsel) ||
+ perf_evsel__init_augmented_syscall_tp_args(evsel))
+ goto out;
+ evsel->handler = trace__sys_enter;
+
+ evlist__for_each_entry(trace.evlist, evsel) {
+ if (strstarts(perf_evsel__name(evsel), "syscalls:sys_exit_")) {
+ perf_evsel__init_augmented_syscall_tp(evsel);
+ perf_evsel__init_augmented_syscall_tp_ret(evsel);
+ evsel->handler = trace__sys_exit;
+ }
+ }
+ }
+
if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
return trace__record(&trace, argc-1, &argv[1]);