diff options
Diffstat (limited to 'tools/perf/examples/bpf')
-rw-r--r-- | tools/perf/examples/bpf/5sec.c | 8 | ||||
-rw-r--r-- | tools/perf/examples/bpf/augmented_raw_syscalls.c | 175 | ||||
-rw-r--r-- | tools/perf/examples/bpf/augmented_syscalls.c | 169 | ||||
-rw-r--r-- | tools/perf/examples/bpf/empty.c | 13 | ||||
-rw-r--r-- | tools/perf/examples/bpf/etcsnoop.c | 76 | ||||
-rw-r--r-- | tools/perf/examples/bpf/hello.c | 24 |
6 files changed, 186 insertions, 279 deletions
diff --git a/tools/perf/examples/bpf/5sec.c b/tools/perf/examples/bpf/5sec.c index e6b6181c6dc6..3bd7fc17631f 100644 --- a/tools/perf/examples/bpf/5sec.c +++ b/tools/perf/examples/bpf/5sec.c @@ -39,13 +39,15 @@ Copyright (C) 2018 Red Hat, Inc., Arnaldo Carvalho de Melo <acme@redhat.com> */ -#include <bpf.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> #define NSEC_PER_SEC 1000000000L -int probe(hrtimer_nanosleep, rqtp)(void *ctx, int err, long long sec) +SEC("hrtimer_nanosleep=hrtimer_nanosleep rqtp") +int hrtimer_nanosleep(void *ctx, int err, long long sec) { return sec / NSEC_PER_SEC == 5ULL; } -license(GPL); +char _license[] SEC("license") = "GPL"; diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c index a262dcd020f4..9a03189d33d3 100644 --- a/tools/perf/examples/bpf/augmented_raw_syscalls.c +++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c @@ -14,39 +14,52 @@ * code that will combine entry/exit in a strace like way. */ -#include <unistd.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> #include <linux/limits.h> -#include <linux/socket.h> -#include <pid_filter.h> -/* bpf-output associated map */ -bpf_map(__augmented_syscalls__, PERF_EVENT_ARRAY, int, u32, __NR_CPUS__); +// FIXME: These should come from system headers +typedef char bool; +typedef int pid_t; +typedef long long int __s64; +typedef __s64 time64_t; -/* - * string_args_len: one per syscall arg, 0 means not a string or don't copy it, - * PATH_MAX for copying everything, any other value to limit - * it a la 'strace -s strsize'. - */ -struct syscall { - bool enabled; - u16 string_args_len[6]; +struct timespec64 { + time64_t tv_sec; + long int tv_nsec; }; -bpf_map(syscalls, ARRAY, int, struct syscall, 512); +/* bpf-output associated map */ +struct __augmented_syscalls__ { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __type(key, int); + __type(value, __u32); + __uint(max_entries, __NR_CPUS__); +} __augmented_syscalls__ SEC(".maps"); /* * What to augment at entry? * * Pointer arg payloads (filenames, etc) passed from userspace to the kernel */ -bpf_map(syscalls_sys_enter, PROG_ARRAY, u32, u32, 512); +struct syscalls_sys_enter { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 512); +} syscalls_sys_enter SEC(".maps"); /* * What to augment at exit? * * Pointer arg payloads returned from the kernel (struct stat, etc) to userspace. */ -bpf_map(syscalls_sys_exit, PROG_ARRAY, u32, u32, 512); +struct syscalls_sys_exit { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 512); +} syscalls_sys_exit SEC(".maps"); struct syscall_enter_args { unsigned long long common_tp_fields; @@ -66,7 +79,38 @@ struct augmented_arg { char value[PATH_MAX]; }; -pid_filter(pids_filtered); +struct pids_filtered { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, pid_t); + __type(value, bool); + __uint(max_entries, 64); +} pids_filtered SEC(".maps"); + +/* + * Desired design of maximum size and alignment (see RFC2553) + */ +#define SS_MAXSIZE 128 /* Implementation specific max size */ + +typedef unsigned short sa_family_t; + +/* + * FIXME: Should come from system headers + * + * The definition uses anonymous union and struct in order to control the + * default alignment. + */ +struct sockaddr_storage { + union { + struct { + sa_family_t ss_family; /* address family */ + /* Following field(s) are implementation specific */ + char __data[SS_MAXSIZE - sizeof(unsigned short)]; + /* space to achieve desired size, */ + /* _SS_MAXSIZE value minus size of ss_family */ + }; + void *__align; /* implementation specific desired alignment */ + }; +}; struct augmented_args_payload { struct syscall_enter_args args; @@ -75,11 +119,17 @@ struct augmented_args_payload { struct augmented_arg arg, arg2; }; struct sockaddr_storage saddr; + char __data[sizeof(struct augmented_arg)]; }; }; // We need more tmp space than the BPF stack can give us -bpf_map(augmented_args_tmp, PERCPU_ARRAY, int, struct augmented_args_payload, 1); +struct augmented_args_tmp { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __type(key, int); + __type(value, struct augmented_args_payload); + __uint(max_entries, 1); +} augmented_args_tmp SEC(".maps"); static inline struct augmented_args_payload *augmented_args_payload(void) { @@ -90,14 +140,14 @@ static inline struct augmented_args_payload *augmented_args_payload(void) static inline int augmented__output(void *ctx, struct augmented_args_payload *args, int len) { /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ - return perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, args, len); + return bpf_perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, args, len); } static inline unsigned int augmented_arg__read_str(struct augmented_arg *augmented_arg, const void *arg, unsigned int arg_len) { unsigned int augmented_len = sizeof(*augmented_arg); - int string_len = probe_read_str(&augmented_arg->value, arg_len, arg); + int string_len = bpf_probe_read_str(&augmented_arg->value, arg_len, arg); augmented_arg->size = augmented_arg->err = 0; /* @@ -146,7 +196,7 @@ int sys_enter_connect(struct syscall_enter_args *args) if (socklen > sizeof(augmented_args->saddr)) socklen = sizeof(augmented_args->saddr); - probe_read(&augmented_args->saddr, socklen, sockaddr_arg); + bpf_probe_read(&augmented_args->saddr, socklen, sockaddr_arg); return augmented__output(args, augmented_args, len + socklen); } @@ -165,7 +215,7 @@ int sys_enter_sendto(struct syscall_enter_args *args) if (socklen > sizeof(augmented_args->saddr)) socklen = sizeof(augmented_args->saddr); - probe_read(&augmented_args->saddr, socklen, sockaddr_arg); + bpf_probe_read(&augmented_args->saddr, socklen, sockaddr_arg); return augmented__output(args, augmented_args, len + socklen); } @@ -234,6 +284,80 @@ int sys_enter_renameat(struct syscall_enter_args *args) return augmented__output(args, augmented_args, len); } +#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ + +// we need just the start, get the size to then copy it +struct perf_event_attr_size { + __u32 type; + /* + * Size of the attr structure, for fwd/bwd compat. + */ + __u32 size; +}; + +SEC("!syscalls:sys_enter_perf_event_open") +int sys_enter_perf_event_open(struct syscall_enter_args *args) +{ + struct augmented_args_payload *augmented_args = augmented_args_payload(); + const struct perf_event_attr_size *attr = (const struct perf_event_attr_size *)args->args[0], *attr_read; + unsigned int len = sizeof(augmented_args->args); + + if (augmented_args == NULL) + goto failure; + + if (bpf_probe_read(&augmented_args->__data, sizeof(*attr), attr) < 0) + goto failure; + + attr_read = (const struct perf_event_attr_size *)augmented_args->__data; + + __u32 size = attr_read->size; + + if (!size) + size = PERF_ATTR_SIZE_VER0; + + if (size > sizeof(augmented_args->__data)) + goto failure; + + // Now that we read attr->size and tested it against the size limits, read it completely + if (bpf_probe_read(&augmented_args->__data, size, attr) < 0) + goto failure; + + return augmented__output(args, augmented_args, len + size); +failure: + return 1; /* Failure: don't filter */ +} + +SEC("!syscalls:sys_enter_clock_nanosleep") +int sys_enter_clock_nanosleep(struct syscall_enter_args *args) +{ + struct augmented_args_payload *augmented_args = augmented_args_payload(); + const void *rqtp_arg = (const void *)args->args[2]; + unsigned int len = sizeof(augmented_args->args); + __u32 size = sizeof(struct timespec64); + + if (augmented_args == NULL) + goto failure; + + if (size > sizeof(augmented_args->__data)) + goto failure; + + bpf_probe_read(&augmented_args->__data, size, rqtp_arg); + + return augmented__output(args, augmented_args, len + size); +failure: + return 1; /* Failure: don't filter */ +} + +static pid_t getpid(void) +{ + return bpf_get_current_pid_tgid(); +} + +static bool pid_filter__has(struct pids_filtered *pids, pid_t pid) +{ + return bpf_map_lookup_elem(pids, &pid) != NULL; +} + SEC("raw_syscalls:sys_enter") int sys_enter(struct syscall_enter_args *args) { @@ -248,7 +372,6 @@ int sys_enter(struct syscall_enter_args *args) * initial, non-augmented raw_syscalls:sys_enter payload. */ unsigned int len = sizeof(augmented_args->args); - struct syscall *syscall; if (pid_filter__has(&pids_filtered, getpid())) return 0; @@ -257,7 +380,7 @@ int sys_enter(struct syscall_enter_args *args) if (augmented_args == NULL) return 1; - probe_read(&augmented_args->args, sizeof(augmented_args->args), args); + bpf_probe_read(&augmented_args->args, sizeof(augmented_args->args), args); /* * Jump to syscall specific augmenter, even if the default one, @@ -278,7 +401,7 @@ int sys_exit(struct syscall_exit_args *args) if (pid_filter__has(&pids_filtered, getpid())) return 0; - probe_read(&exit_args, sizeof(exit_args), args); + bpf_probe_read(&exit_args, sizeof(exit_args), args); /* * Jump to syscall specific return augmenter, even if the default one, * "!raw_syscalls:unaugmented" that will just return 1 to return the @@ -291,4 +414,4 @@ int sys_exit(struct syscall_exit_args *args) return 0; } -license(GPL); +char _license[] SEC("license") = "GPL"; diff --git a/tools/perf/examples/bpf/augmented_syscalls.c b/tools/perf/examples/bpf/augmented_syscalls.c deleted file mode 100644 index 524fdb8534b3..000000000000 --- a/tools/perf/examples/bpf/augmented_syscalls.c +++ /dev/null @@ -1,169 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Augment syscalls with the contents of the pointer arguments. - * - * Test it with: - * - * perf trace -e tools/perf/examples/bpf/augmented_syscalls.c cat /etc/passwd > /dev/null - * - * It'll catch some openat syscalls related to the dynamic linked and - * the last one should be the one for '/etc/passwd'. - * - * This matches what is marshalled into the raw_syscall:sys_enter payload - * expected by the 'perf trace' beautifiers, and can be used by them, that will - * check if perf_sample->raw_data is more than what is expected for each - * syscalls:sys_{enter,exit}_SYSCALL tracepoint, uing the extra data as the - * contents of pointer arguments. - */ - -#include <stdio.h> -#include <linux/socket.h> - -/* bpf-output associated map */ -bpf_map(__augmented_syscalls__, PERF_EVENT_ARRAY, int, u32, __NR_CPUS__); - -struct syscall_exit_args { - unsigned long long common_tp_fields; - long syscall_nr; - long ret; -}; - -struct augmented_filename { - unsigned int size; - int reserved; - char value[256]; -}; - -#define augmented_filename_syscall(syscall) \ -struct augmented_enter_##syscall##_args { \ - struct syscall_enter_##syscall##_args args; \ - struct augmented_filename filename; \ -}; \ -int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ -{ \ - struct augmented_enter_##syscall##_args augmented_args = { .filename.reserved = 0, }; \ - unsigned int len = sizeof(augmented_args); \ - probe_read(&augmented_args.args, sizeof(augmented_args.args), args); \ - augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, \ - sizeof(augmented_args.filename.value), \ - args->filename_ptr); \ - if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) { \ - len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size; \ - len &= sizeof(augmented_args.filename.value) - 1; \ - } \ - /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ \ - return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ - &augmented_args, len); \ -} \ -int syscall_exit(syscall)(struct syscall_exit_args *args) \ -{ \ - return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */ \ -} - -struct syscall_enter_openat_args { - unsigned long long common_tp_fields; - long syscall_nr; - long dfd; - char *filename_ptr; - long flags; - long mode; -}; - -augmented_filename_syscall(openat); - -struct syscall_enter_open_args { - unsigned long long common_tp_fields; - long syscall_nr; - char *filename_ptr; - long flags; - long mode; -}; - -augmented_filename_syscall(open); - -struct syscall_enter_inotify_add_watch_args { - unsigned long long common_tp_fields; - long syscall_nr; - long fd; - char *filename_ptr; - long mask; -}; - -augmented_filename_syscall(inotify_add_watch); - -struct statbuf; - -struct syscall_enter_newstat_args { - unsigned long long common_tp_fields; - long syscall_nr; - char *filename_ptr; - struct stat *statbuf; -}; - -augmented_filename_syscall(newstat); - -#ifndef _K_SS_MAXSIZE -#define _K_SS_MAXSIZE 128 -#endif - -#define augmented_sockaddr_syscall(syscall) \ -struct augmented_enter_##syscall##_args { \ - struct syscall_enter_##syscall##_args args; \ - struct sockaddr_storage addr; \ -}; \ -int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ -{ \ - struct augmented_enter_##syscall##_args augmented_args; \ - unsigned long addrlen = sizeof(augmented_args.addr); \ - probe_read(&augmented_args.args, sizeof(augmented_args.args), args); \ -/* FIXME_CLANG_OPTIMIZATION_THAT_ACCESSES_USER_CONTROLLED_ADDRLEN_DESPITE_THIS_CHECK */ \ -/* if (addrlen > augmented_args.args.addrlen) */ \ -/* addrlen = augmented_args.args.addrlen; */ \ -/* */ \ - probe_read(&augmented_args.addr, addrlen, args->addr_ptr); \ - /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ \ - return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ - &augmented_args, \ - sizeof(augmented_args) - sizeof(augmented_args.addr) + addrlen);\ -} \ -int syscall_exit(syscall)(struct syscall_exit_args *args) \ -{ \ - return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */ \ -} - -struct sockaddr; - -struct syscall_enter_bind_args { - unsigned long long common_tp_fields; - long syscall_nr; - long fd; - struct sockaddr *addr_ptr; - unsigned long addrlen; -}; - -augmented_sockaddr_syscall(bind); - -struct syscall_enter_connect_args { - unsigned long long common_tp_fields; - long syscall_nr; - long fd; - struct sockaddr *addr_ptr; - unsigned long addrlen; -}; - -augmented_sockaddr_syscall(connect); - -struct syscall_enter_sendto_args { - unsigned long long common_tp_fields; - long syscall_nr; - long fd; - void *buff; - long len; - unsigned long flags; - struct sockaddr *addr_ptr; - long addr_len; -}; - -augmented_sockaddr_syscall(sendto); - -license(GPL); diff --git a/tools/perf/examples/bpf/empty.c b/tools/perf/examples/bpf/empty.c index 7d7fb0c9fe76..3e296c0c53d7 100644 --- a/tools/perf/examples/bpf/empty.c +++ b/tools/perf/examples/bpf/empty.c @@ -1,3 +1,12 @@ -#include <bpf/bpf.h> +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> -license(GPL); +struct syscall_enter_args; + +SEC("raw_syscalls:sys_enter") +int sys_enter(struct syscall_enter_args *args) +{ + return 0; +} +char _license[] SEC("license") = "GPL"; diff --git a/tools/perf/examples/bpf/etcsnoop.c b/tools/perf/examples/bpf/etcsnoop.c deleted file mode 100644 index e81b535346c0..000000000000 --- a/tools/perf/examples/bpf/etcsnoop.c +++ /dev/null @@ -1,76 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Augment the filename syscalls with the contents of the filename pointer argument - * filtering only those that do not start with /etc/. - * - * Test it with: - * - * perf trace -e tools/perf/examples/bpf/augmented_syscalls.c cat /etc/passwd > /dev/null - * - * It'll catch some openat syscalls related to the dynamic linked and - * the last one should be the one for '/etc/passwd'. - * - * This matches what is marshalled into the raw_syscall:sys_enter payload - * expected by the 'perf trace' beautifiers, and can be used by them unmodified, - * which will be done as that feature is implemented in the next csets, for now - * it will appear in a dump done by the default tracepoint handler in 'perf trace', - * that uses bpf_output__fprintf() to just dump those contents, as done with - * the bpf-output event associated with the __bpf_output__ map declared in - * tools/perf/include/bpf/stdio.h. - */ - -#include <stdio.h> - -/* bpf-output associated map */ -bpf_map(__augmented_syscalls__, PERF_EVENT_ARRAY, int, u32, __NR_CPUS__); - -struct augmented_filename { - int size; - int reserved; - char value[64]; -}; - -#define augmented_filename_syscall_enter(syscall) \ -struct augmented_enter_##syscall##_args { \ - struct syscall_enter_##syscall##_args args; \ - struct augmented_filename filename; \ -}; \ -int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ -{ \ - char etc[6] = "/etc/"; \ - struct augmented_enter_##syscall##_args augmented_args = { .filename.reserved = 0, }; \ - probe_read(&augmented_args.args, sizeof(augmented_args.args), args); \ - augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, \ - sizeof(augmented_args.filename.value), \ - args->filename_ptr); \ - if (__builtin_memcmp(augmented_args.filename.value, etc, 4) != 0) \ - return 0; \ - /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ \ - return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ - &augmented_args, \ - (sizeof(augmented_args) - sizeof(augmented_args.filename.value) + \ - augmented_args.filename.size)); \ -} - -struct syscall_enter_openat_args { - unsigned long long common_tp_fields; - long syscall_nr; - long dfd; - char *filename_ptr; - long flags; - long mode; -}; - -augmented_filename_syscall_enter(openat); - -struct syscall_enter_open_args { - unsigned long long common_tp_fields; - long syscall_nr; - char *filename_ptr; - long flags; - long mode; -}; - -augmented_filename_syscall_enter(open); - -license(GPL); diff --git a/tools/perf/examples/bpf/hello.c b/tools/perf/examples/bpf/hello.c index cf3c2fdc7f79..e9080b0df158 100644 --- a/tools/perf/examples/bpf/hello.c +++ b/tools/perf/examples/bpf/hello.c @@ -1,9 +1,27 @@ -#include <stdio.h> +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> -int syscall_enter(openat)(void *args) +struct __bpf_stdout__ { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __type(key, int); + __type(value, __u32); + __uint(max_entries, __NR_CPUS__); +} __bpf_stdout__ SEC(".maps"); + +#define puts(from) \ + ({ const int __len = sizeof(from); \ + char __from[sizeof(from)] = from; \ + bpf_perf_event_output(args, &__bpf_stdout__, BPF_F_CURRENT_CPU, \ + &__from, __len & (sizeof(from) - 1)); }) + +struct syscall_enter_args; + +SEC("raw_syscalls:sys_enter") +int sys_enter(struct syscall_enter_args *args) { puts("Hello, world\n"); return 0; } -license(GPL); +char _license[] SEC("license") = "GPL"; |