From 65bb2e0fc5a67200db95911207ef9662aaf7ab9d Mon Sep 17 00:00:00 2001
From: Xu Wang <vulab@iscas.ac.cn>
Date: Tue, 18 Aug 2020 07:16:11 +0000
Subject: libbpf: Convert comma to semicolon

Replace a comma between expression statements by a semicolon.

Signed-off-by: Xu Wang <vulab@iscas.ac.cn>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200818071611.21923-1-vulab@iscas.ac.cn
---
 tools/lib/bpf/libbpf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 5d20b2da4427..5055e1531e43 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -8594,7 +8594,7 @@ struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
 	struct perf_buffer_params p = {};
 	struct perf_event_attr attr = { 0, };
 
-	attr.config = PERF_COUNT_SW_BPF_OUTPUT,
+	attr.config = PERF_COUNT_SW_BPF_OUTPUT;
 	attr.type = PERF_TYPE_SOFTWARE;
 	attr.sample_type = PERF_SAMPLE_RAW;
 	attr.sample_period = 1;
-- 
cgit v1.2.3


From 8d708236058601f1b08b0ca2a1cd9fff28034416 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 18 Aug 2020 14:33:50 -0700
Subject: libbpf: Disable -Wswitch-enum compiler warning

That compilation warning is more annoying, than helpful.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200818213356.2629020-2-andriin@fb.com
---
 tools/lib/bpf/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index bf8ed134cb8a..95c946e94ca5 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -107,7 +107,7 @@ ifeq ($(feature-reallocarray), 0)
 endif
 
 # Append required CFLAGS
-override CFLAGS += $(EXTRA_WARNINGS)
+override CFLAGS += $(EXTRA_WARNINGS) -Wno-switch-enum
 override CFLAGS += -Werror -Wall
 override CFLAGS += -fPIC
 override CFLAGS += $(INCLUDES)
-- 
cgit v1.2.3


From 47b6cb4d0add454859f386337e6079e01ac7db8f Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 18 Aug 2020 14:33:51 -0700
Subject: libbpf: Make kernel feature probing lazy

Turn libbpf's kernel feature probing into lazily-performed checks. This allows
to skip performing unnecessary feature checks, if a given BPF application
doesn't rely on a particular kernel feature. As we grow number of feature
probes, libbpf might perform less unnecessary syscalls and scale better with
number of feature probes long-term.

By decoupling feature checks from bpf_object, it's also possible to perform
feature probing from libbpf static helpers and low-level APIs, if necessary.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200818213356.2629020-3-andriin@fb.com
---
 tools/lib/bpf/libbpf.c | 150 ++++++++++++++++++++++++++++---------------------
 1 file changed, 86 insertions(+), 64 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 5055e1531e43..2ee168822d33 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -165,23 +165,26 @@ static inline __u64 ptr_to_u64(const void *ptr)
 	return (__u64) (unsigned long) ptr;
 }
 
-struct bpf_capabilities {
+enum kern_feature_id {
 	/* v4.14: kernel support for program & map names. */
-	__u32 name:1;
+	FEAT_PROG_NAME,
 	/* v5.2: kernel support for global data sections. */
-	__u32 global_data:1;
+	FEAT_GLOBAL_DATA,
 	/* BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO support */
-	__u32 btf_func:1;
+	FEAT_BTF_FUNC,
 	/* BTF_KIND_VAR and BTF_KIND_DATASEC support */
-	__u32 btf_datasec:1;
-	/* BPF_F_MMAPABLE is supported for arrays */
-	__u32 array_mmap:1;
+	FEAT_BTF_DATASEC,
 	/* BTF_FUNC_GLOBAL is supported */
-	__u32 btf_func_global:1;
+	FEAT_BTF_GLOBAL_FUNC,
+	/* BPF_F_MMAPABLE is supported for arrays */
+	FEAT_ARRAY_MMAP,
 	/* kernel support for expected_attach_type in BPF_PROG_LOAD */
-	__u32 exp_attach_type:1;
+	FEAT_EXP_ATTACH_TYPE,
+	__FEAT_CNT,
 };
 
+static bool kernel_supports(enum kern_feature_id feat_id);
+
 enum reloc_type {
 	RELO_LD64,
 	RELO_CALL,
@@ -253,8 +256,6 @@ struct bpf_program {
 	__u32 func_info_rec_size;
 	__u32 func_info_cnt;
 
-	struct bpf_capabilities *caps;
-
 	void *line_info;
 	__u32 line_info_rec_size;
 	__u32 line_info_cnt;
@@ -436,8 +437,6 @@ struct bpf_object {
 	void *priv;
 	bpf_object_clear_priv_t clear_priv;
 
-	struct bpf_capabilities caps;
-
 	char path[];
 };
 #define obj_elf_valid(o)	((o)->efile.elf)
@@ -561,7 +560,6 @@ bpf_object__add_program(struct bpf_object *obj, void *data, size_t size,
 	if (err)
 		return err;
 
-	prog.caps = &obj->caps;
 	progs = obj->programs;
 	nr_progs = obj->nr_programs;
 
@@ -2340,18 +2338,18 @@ static bool section_have_execinstr(struct bpf_object *obj, int idx)
 
 static bool btf_needs_sanitization(struct bpf_object *obj)
 {
-	bool has_func_global = obj->caps.btf_func_global;
-	bool has_datasec = obj->caps.btf_datasec;
-	bool has_func = obj->caps.btf_func;
+	bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
+	bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
+	bool has_func = kernel_supports(FEAT_BTF_FUNC);
 
 	return !has_func || !has_datasec || !has_func_global;
 }
 
 static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
 {
-	bool has_func_global = obj->caps.btf_func_global;
-	bool has_datasec = obj->caps.btf_datasec;
-	bool has_func = obj->caps.btf_func;
+	bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
+	bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
+	bool has_func = kernel_supports(FEAT_BTF_FUNC);
 	struct btf_type *t;
 	int i, j, vlen;
 
@@ -3433,8 +3431,7 @@ bpf_object__probe_loading(struct bpf_object *obj)
 	return 0;
 }
 
-static int
-bpf_object__probe_name(struct bpf_object *obj)
+static int probe_kern_prog_name(void)
 {
 	struct bpf_load_program_attr attr;
 	struct bpf_insn insns[] = {
@@ -3453,15 +3450,14 @@ bpf_object__probe_name(struct bpf_object *obj)
 	attr.name = "test";
 	ret = bpf_load_program_xattr(&attr, NULL, 0);
 	if (ret >= 0) {
-		obj->caps.name = 1;
 		close(ret);
+		return 1;
 	}
 
 	return 0;
 }
 
-static int
-bpf_object__probe_global_data(struct bpf_object *obj)
+static int probe_kern_global_data(void)
 {
 	struct bpf_load_program_attr prg_attr;
 	struct bpf_create_map_attr map_attr;
@@ -3498,16 +3494,16 @@ bpf_object__probe_global_data(struct bpf_object *obj)
 	prg_attr.license = "GPL";
 
 	ret = bpf_load_program_xattr(&prg_attr, NULL, 0);
+	close(map);
 	if (ret >= 0) {
-		obj->caps.global_data = 1;
 		close(ret);
+		return 1;
 	}
 
-	close(map);
 	return 0;
 }
 
-static int bpf_object__probe_btf_func(struct bpf_object *obj)
+static int probe_kern_btf_func(void)
 {
 	static const char strs[] = "\0int\0x\0a";
 	/* void x(int a) {} */
@@ -3525,7 +3521,6 @@ static int bpf_object__probe_btf_func(struct bpf_object *obj)
 	btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types),
 				      strs, sizeof(strs));
 	if (btf_fd >= 0) {
-		obj->caps.btf_func = 1;
 		close(btf_fd);
 		return 1;
 	}
@@ -3533,7 +3528,7 @@ static int bpf_object__probe_btf_func(struct bpf_object *obj)
 	return 0;
 }
 
-static int bpf_object__probe_btf_func_global(struct bpf_object *obj)
+static int probe_kern_btf_func_global(void)
 {
 	static const char strs[] = "\0int\0x\0a";
 	/* static void x(int a) {} */
@@ -3551,7 +3546,6 @@ static int bpf_object__probe_btf_func_global(struct bpf_object *obj)
 	btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types),
 				      strs, sizeof(strs));
 	if (btf_fd >= 0) {
-		obj->caps.btf_func_global = 1;
 		close(btf_fd);
 		return 1;
 	}
@@ -3559,7 +3553,7 @@ static int bpf_object__probe_btf_func_global(struct bpf_object *obj)
 	return 0;
 }
 
-static int bpf_object__probe_btf_datasec(struct bpf_object *obj)
+static int probe_kern_btf_datasec(void)
 {
 	static const char strs[] = "\0x\0.data";
 	/* static int a; */
@@ -3578,7 +3572,6 @@ static int bpf_object__probe_btf_datasec(struct bpf_object *obj)
 	btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types),
 				      strs, sizeof(strs));
 	if (btf_fd >= 0) {
-		obj->caps.btf_datasec = 1;
 		close(btf_fd);
 		return 1;
 	}
@@ -3586,7 +3579,7 @@ static int bpf_object__probe_btf_datasec(struct bpf_object *obj)
 	return 0;
 }
 
-static int bpf_object__probe_array_mmap(struct bpf_object *obj)
+static int probe_kern_array_mmap(void)
 {
 	struct bpf_create_map_attr attr = {
 		.map_type = BPF_MAP_TYPE_ARRAY,
@@ -3599,16 +3592,13 @@ static int bpf_object__probe_array_mmap(struct bpf_object *obj)
 
 	fd = bpf_create_map_xattr(&attr);
 	if (fd >= 0) {
-		obj->caps.array_mmap = 1;
 		close(fd);
 		return 1;
 	}
-
 	return 0;
 }
 
-static int
-bpf_object__probe_exp_attach_type(struct bpf_object *obj)
+static int probe_kern_exp_attach_type(void)
 {
 	struct bpf_load_program_attr attr;
 	struct bpf_insn insns[] = {
@@ -3631,34 +3621,67 @@ bpf_object__probe_exp_attach_type(struct bpf_object *obj)
 
 	fd = bpf_load_program_xattr(&attr, NULL, 0);
 	if (fd >= 0) {
-		obj->caps.exp_attach_type = 1;
 		close(fd);
 		return 1;
 	}
 	return 0;
 }
 
-static int
-bpf_object__probe_caps(struct bpf_object *obj)
-{
-	int (*probe_fn[])(struct bpf_object *obj) = {
-		bpf_object__probe_name,
-		bpf_object__probe_global_data,
-		bpf_object__probe_btf_func,
-		bpf_object__probe_btf_func_global,
-		bpf_object__probe_btf_datasec,
-		bpf_object__probe_array_mmap,
-		bpf_object__probe_exp_attach_type,
-	};
-	int i, ret;
+enum kern_feature_result {
+	FEAT_UNKNOWN = 0,
+	FEAT_SUPPORTED = 1,
+	FEAT_MISSING = 2,
+};
 
-	for (i = 0; i < ARRAY_SIZE(probe_fn); i++) {
-		ret = probe_fn[i](obj);
-		if (ret < 0)
-			pr_debug("Probe #%d failed with %d.\n", i, ret);
+typedef int (*feature_probe_fn)(void);
+
+static struct kern_feature_desc {
+	const char *desc;
+	feature_probe_fn probe;
+	enum kern_feature_result res;
+} feature_probes[__FEAT_CNT] = {
+	[FEAT_PROG_NAME] = {
+		"BPF program name", probe_kern_prog_name,
+	},
+	[FEAT_GLOBAL_DATA] = {
+		"global variables", probe_kern_global_data,
+	},
+	[FEAT_BTF_FUNC] = {
+		"BTF functions", probe_kern_btf_func,
+	},
+	[FEAT_BTF_GLOBAL_FUNC] = {
+		"BTF global function", probe_kern_btf_func_global,
+	},
+	[FEAT_BTF_DATASEC] = {
+		"BTF data section and variable", probe_kern_btf_datasec,
+	},
+	[FEAT_ARRAY_MMAP] = {
+		"ARRAY map mmap()", probe_kern_array_mmap,
+	},
+	[FEAT_EXP_ATTACH_TYPE] = {
+		"BPF_PROG_LOAD expected_attach_type attribute",
+		probe_kern_exp_attach_type,
+	},
+};
+
+static bool kernel_supports(enum kern_feature_id feat_id)
+{
+	struct kern_feature_desc *feat = &feature_probes[feat_id];
+	int ret;
+
+	if (READ_ONCE(feat->res) == FEAT_UNKNOWN) {
+		ret = feat->probe();
+		if (ret > 0) {
+			WRITE_ONCE(feat->res, FEAT_SUPPORTED);
+		} else if (ret == 0) {
+			WRITE_ONCE(feat->res, FEAT_MISSING);
+		} else {
+			pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret);
+			WRITE_ONCE(feat->res, FEAT_MISSING);
+		}
 	}
 
-	return 0;
+	return READ_ONCE(feat->res) == FEAT_SUPPORTED;
 }
 
 static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
@@ -3760,7 +3783,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
 
 	memset(&create_attr, 0, sizeof(create_attr));
 
-	if (obj->caps.name)
+	if (kernel_supports(FEAT_PROG_NAME))
 		create_attr.name = map->name;
 	create_attr.map_ifindex = map->map_ifindex;
 	create_attr.map_type = def->type;
@@ -5364,12 +5387,12 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
 	memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
 	load_attr.prog_type = prog->type;
 	/* old kernels might not support specifying expected_attach_type */
-	if (!prog->caps->exp_attach_type && prog->sec_def &&
+	if (!kernel_supports(FEAT_EXP_ATTACH_TYPE) && prog->sec_def &&
 	    prog->sec_def->is_exp_attach_type_optional)
 		load_attr.expected_attach_type = 0;
 	else
 		load_attr.expected_attach_type = prog->expected_attach_type;
-	if (prog->caps->name)
+	if (kernel_supports(FEAT_PROG_NAME))
 		load_attr.name = prog->name;
 	load_attr.insns = insns;
 	load_attr.insns_cnt = insns_cnt;
@@ -5387,7 +5410,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
 	}
 	/* specify func_info/line_info only if kernel supports them */
 	btf_fd = bpf_object__btf_fd(prog->obj);
-	if (btf_fd >= 0 && prog->obj->caps.btf_func) {
+	if (btf_fd >= 0 && kernel_supports(FEAT_BTF_FUNC)) {
 		load_attr.prog_btf_fd = btf_fd;
 		load_attr.func_info = prog->func_info;
 		load_attr.func_info_rec_size = prog->func_info_rec_size;
@@ -5750,11 +5773,11 @@ static int bpf_object__sanitize_maps(struct bpf_object *obj)
 	bpf_object__for_each_map(m, obj) {
 		if (!bpf_map__is_internal(m))
 			continue;
-		if (!obj->caps.global_data) {
+		if (!kernel_supports(FEAT_GLOBAL_DATA)) {
 			pr_warn("kernel doesn't support global data\n");
 			return -ENOTSUP;
 		}
-		if (!obj->caps.array_mmap)
+		if (!kernel_supports(FEAT_ARRAY_MMAP))
 			m->def.map_flags ^= BPF_F_MMAPABLE;
 	}
 
@@ -5904,7 +5927,6 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
 	}
 
 	err = bpf_object__probe_loading(obj);
-	err = err ? : bpf_object__probe_caps(obj);
 	err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
 	err = err ? : bpf_object__sanitize_and_load_btf(obj);
 	err = err ? : bpf_object__sanitize_maps(obj);
-- 
cgit v1.2.3


From bb180fb2401dfed0ba85699064e60d62af351ff9 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 18 Aug 2020 14:33:52 -0700
Subject: libbpf: Factor out common logic of testing and closing FD

Factor out common piece of logic that detects support for a feature based on
successfully created FD. Also take care of closing FD, if it was created.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200818213356.2629020-4-andriin@fb.com
---
 tools/lib/bpf/libbpf.c | 70 ++++++++++++--------------------------------------
 1 file changed, 17 insertions(+), 53 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 2ee168822d33..08875ab9fe35 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -3431,6 +3431,13 @@ bpf_object__probe_loading(struct bpf_object *obj)
 	return 0;
 }
 
+static int probe_fd(int fd)
+{
+	if (fd >= 0)
+		close(fd);
+	return fd >= 0;
+}
+
 static int probe_kern_prog_name(void)
 {
 	struct bpf_load_program_attr attr;
@@ -3449,12 +3456,7 @@ static int probe_kern_prog_name(void)
 	attr.license = "GPL";
 	attr.name = "test";
 	ret = bpf_load_program_xattr(&attr, NULL, 0);
-	if (ret >= 0) {
-		close(ret);
-		return 1;
-	}
-
-	return 0;
+	return probe_fd(ret);
 }
 
 static int probe_kern_global_data(void)
@@ -3495,12 +3497,7 @@ static int probe_kern_global_data(void)
 
 	ret = bpf_load_program_xattr(&prg_attr, NULL, 0);
 	close(map);
-	if (ret >= 0) {
-		close(ret);
-		return 1;
-	}
-
-	return 0;
+	return probe_fd(ret);
 }
 
 static int probe_kern_btf_func(void)
@@ -3516,16 +3513,9 @@ static int probe_kern_btf_func(void)
 		/* FUNC x */                                    /* [3] */
 		BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2),
 	};
-	int btf_fd;
 
-	btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types),
-				      strs, sizeof(strs));
-	if (btf_fd >= 0) {
-		close(btf_fd);
-		return 1;
-	}
-
-	return 0;
+	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+					     strs, sizeof(strs)));
 }
 
 static int probe_kern_btf_func_global(void)
@@ -3541,16 +3531,9 @@ static int probe_kern_btf_func_global(void)
 		/* FUNC x BTF_FUNC_GLOBAL */                    /* [3] */
 		BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2),
 	};
-	int btf_fd;
 
-	btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types),
-				      strs, sizeof(strs));
-	if (btf_fd >= 0) {
-		close(btf_fd);
-		return 1;
-	}
-
-	return 0;
+	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+					     strs, sizeof(strs)));
 }
 
 static int probe_kern_btf_datasec(void)
@@ -3567,16 +3550,9 @@ static int probe_kern_btf_datasec(void)
 		BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
 		BTF_VAR_SECINFO_ENC(2, 0, 4),
 	};
-	int btf_fd;
 
-	btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types),
-				      strs, sizeof(strs));
-	if (btf_fd >= 0) {
-		close(btf_fd);
-		return 1;
-	}
-
-	return 0;
+	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+					     strs, sizeof(strs)));
 }
 
 static int probe_kern_array_mmap(void)
@@ -3588,14 +3564,8 @@ static int probe_kern_array_mmap(void)
 		.value_size = sizeof(int),
 		.max_entries = 1,
 	};
-	int fd;
 
-	fd = bpf_create_map_xattr(&attr);
-	if (fd >= 0) {
-		close(fd);
-		return 1;
-	}
-	return 0;
+	return probe_fd(bpf_create_map_xattr(&attr));
 }
 
 static int probe_kern_exp_attach_type(void)
@@ -3605,7 +3575,6 @@ static int probe_kern_exp_attach_type(void)
 		BPF_MOV64_IMM(BPF_REG_0, 0),
 		BPF_EXIT_INSN(),
 	};
-	int fd;
 
 	memset(&attr, 0, sizeof(attr));
 	/* use any valid combination of program type and (optional)
@@ -3619,12 +3588,7 @@ static int probe_kern_exp_attach_type(void)
 	attr.insns_cnt = ARRAY_SIZE(insns);
 	attr.license = "GPL";
 
-	fd = bpf_load_program_xattr(&attr, NULL, 0);
-	if (fd >= 0) {
-		close(fd);
-		return 1;
-	}
-	return 0;
+	return probe_fd(bpf_load_program_xattr(&attr, NULL, 0));
 }
 
 enum kern_feature_result {
-- 
cgit v1.2.3


From 109cea5a594f921632ca599b385aaa0575971045 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 18 Aug 2020 14:33:53 -0700
Subject: libbpf: Sanitize BPF program code for bpf_probe_read_{kernel,
 user}[_str]

Add BPF program code sanitization pass, replacing calls to BPF
bpf_probe_read_{kernel,user}[_str]() helpers with bpf_probe_read[_str](), if
libbpf detects that kernel doesn't support new variants.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200818213356.2629020-5-andriin@fb.com
---
 tools/lib/bpf/libbpf.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 80 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 08875ab9fe35..e3ab1794d2c3 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -180,6 +180,8 @@ enum kern_feature_id {
 	FEAT_ARRAY_MMAP,
 	/* kernel support for expected_attach_type in BPF_PROG_LOAD */
 	FEAT_EXP_ATTACH_TYPE,
+	/* bpf_probe_read_{kernel,user}[_str] helpers */
+	FEAT_PROBE_READ_KERN,
 	__FEAT_CNT,
 };
 
@@ -3591,6 +3593,27 @@ static int probe_kern_exp_attach_type(void)
 	return probe_fd(bpf_load_program_xattr(&attr, NULL, 0));
 }
 
+static int probe_kern_probe_read_kernel(void)
+{
+	struct bpf_load_program_attr attr;
+	struct bpf_insn insns[] = {
+		BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),	/* r1 = r10 (fp) */
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),	/* r1 += -8 */
+		BPF_MOV64_IMM(BPF_REG_2, 8),		/* r2 = 8 */
+		BPF_MOV64_IMM(BPF_REG_3, 0),		/* r3 = 0 */
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel),
+		BPF_EXIT_INSN(),
+	};
+
+	memset(&attr, 0, sizeof(attr));
+	attr.prog_type = BPF_PROG_TYPE_KPROBE;
+	attr.insns = insns;
+	attr.insns_cnt = ARRAY_SIZE(insns);
+	attr.license = "GPL";
+
+	return probe_fd(bpf_load_program_xattr(&attr, NULL, 0));
+}
+
 enum kern_feature_result {
 	FEAT_UNKNOWN = 0,
 	FEAT_SUPPORTED = 1,
@@ -3626,6 +3649,9 @@ static struct kern_feature_desc {
 		"BPF_PROG_LOAD expected_attach_type attribute",
 		probe_kern_exp_attach_type,
 	},
+	[FEAT_PROBE_READ_KERN] = {
+		"bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel,
+	}
 };
 
 static bool kernel_supports(enum kern_feature_id feat_id)
@@ -5335,6 +5361,53 @@ static int bpf_object__collect_reloc(struct bpf_object *obj)
 	return 0;
 }
 
+static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id)
+{
+	__u8 class = BPF_CLASS(insn->code);
+
+	if ((class == BPF_JMP || class == BPF_JMP32) &&
+	    BPF_OP(insn->code) == BPF_CALL &&
+	    BPF_SRC(insn->code) == BPF_K &&
+	    insn->src_reg == 0 && insn->dst_reg == 0) {
+		    if (func_id)
+			    *func_id = insn->imm;
+		    return true;
+	}
+	return false;
+}
+
+static int bpf_object__sanitize_prog(struct bpf_object* obj, struct bpf_program *prog)
+{
+	struct bpf_insn *insn = prog->insns;
+	enum bpf_func_id func_id;
+	int i;
+
+	for (i = 0; i < prog->insns_cnt; i++, insn++) {
+		if (!insn_is_helper_call(insn, &func_id))
+			continue;
+
+		/* on kernels that don't yet support
+		 * bpf_probe_read_{kernel,user}[_str] helpers, fall back
+		 * to bpf_probe_read() which works well for old kernels
+		 */
+		switch (func_id) {
+		case BPF_FUNC_probe_read_kernel:
+		case BPF_FUNC_probe_read_user:
+			if (!kernel_supports(FEAT_PROBE_READ_KERN))
+				insn->imm = BPF_FUNC_probe_read;
+			break;
+		case BPF_FUNC_probe_read_kernel_str:
+		case BPF_FUNC_probe_read_user_str:
+			if (!kernel_supports(FEAT_PROBE_READ_KERN))
+				insn->imm = BPF_FUNC_probe_read_str;
+			break;
+		default:
+			break;
+		}
+	}
+	return 0;
+}
+
 static int
 load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
 	     char *license, __u32 kern_version, int *pfd)
@@ -5549,6 +5622,13 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level)
 	size_t i;
 	int err;
 
+	for (i = 0; i < obj->nr_programs; i++) {
+		prog = &obj->programs[i];
+		err = bpf_object__sanitize_prog(obj, prog);
+		if (err)
+			return err;
+	}
+
 	for (i = 0; i < obj->nr_programs; i++) {
 		prog = &obj->programs[i];
 		if (bpf_program__is_function_storage(prog, obj))
-- 
cgit v1.2.3


From 02f47faa25db134f6043fb6b12a68b5d4c980bb6 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 18 Aug 2020 14:33:54 -0700
Subject: selftests/bpf: Fix test_vmlinux test to use bpf_probe_read_user()

The test is reading UAPI kernel structure from user-space. So it doesn't need
CO-RE relocations and has to use bpf_probe_read_user().

Fixes: acbd06206bbb ("selftests/bpf: Add vmlinux.h selftest exercising tracing of syscalls")
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200818213356.2629020-6-andriin@fb.com
---
 tools/testing/selftests/bpf/progs/test_vmlinux.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/test_vmlinux.c b/tools/testing/selftests/bpf/progs/test_vmlinux.c
index 29fa09d6a6c6..e9dfa0313d1b 100644
--- a/tools/testing/selftests/bpf/progs/test_vmlinux.c
+++ b/tools/testing/selftests/bpf/progs/test_vmlinux.c
@@ -19,12 +19,14 @@ SEC("tp/syscalls/sys_enter_nanosleep")
 int handle__tp(struct trace_event_raw_sys_enter *args)
 {
 	struct __kernel_timespec *ts;
+	long tv_nsec;
 
 	if (args->id != __NR_nanosleep)
 		return 0;
 
 	ts = (void *)args->args[0];
-	if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC)
+	if (bpf_probe_read_user(&tv_nsec, sizeof(ts->tv_nsec), &ts->tv_nsec) ||
+	    tv_nsec != MY_TV_NSEC)
 		return 0;
 
 	tp_called = true;
@@ -35,12 +37,14 @@ SEC("raw_tp/sys_enter")
 int BPF_PROG(handle__raw_tp, struct pt_regs *regs, long id)
 {
 	struct __kernel_timespec *ts;
+	long tv_nsec;
 
 	if (id != __NR_nanosleep)
 		return 0;
 
 	ts = (void *)PT_REGS_PARM1_CORE(regs);
-	if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC)
+	if (bpf_probe_read_user(&tv_nsec, sizeof(ts->tv_nsec), &ts->tv_nsec) ||
+	    tv_nsec != MY_TV_NSEC)
 		return 0;
 
 	raw_tp_called = true;
@@ -51,12 +55,14 @@ SEC("tp_btf/sys_enter")
 int BPF_PROG(handle__tp_btf, struct pt_regs *regs, long id)
 {
 	struct __kernel_timespec *ts;
+	long tv_nsec;
 
 	if (id != __NR_nanosleep)
 		return 0;
 
 	ts = (void *)PT_REGS_PARM1_CORE(regs);
-	if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC)
+	if (bpf_probe_read_user(&tv_nsec, sizeof(ts->tv_nsec), &ts->tv_nsec) ||
+	    tv_nsec != MY_TV_NSEC)
 		return 0;
 
 	tp_btf_called = true;
-- 
cgit v1.2.3


From 70785cfb19287b4b075a266887be5e024bf1206a Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 18 Aug 2020 14:33:55 -0700
Subject: libbpf: Switch tracing and CO-RE helper macros to
 bpf_probe_read_kernel()

Now that libbpf can automatically fallback to bpf_probe_read() on old kernels
not yet supporting bpf_probe_read_kernel(), switch libbpf BPF-side helper
macros to use appropriate BPF helper for reading kernel data.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Cc: Ilya Leoshkevich <iii@linux.ibm.com>
Link: https://lore.kernel.org/bpf/20200818213356.2629020-7-andriin@fb.com
---
 tools/lib/bpf/bpf_core_read.h | 40 ++++++++++++++++++++++------------------
 tools/lib/bpf/bpf_tracing.h   |  4 ++--
 2 files changed, 24 insertions(+), 20 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h
index eae5cccff761..03152cb143b7 100644
--- a/tools/lib/bpf/bpf_core_read.h
+++ b/tools/lib/bpf/bpf_core_read.h
@@ -24,27 +24,29 @@ enum bpf_field_info_kind {
 
 #if __BYTE_ORDER == __LITTLE_ENDIAN
 #define __CORE_BITFIELD_PROBE_READ(dst, src, fld)			      \
-	bpf_probe_read((void *)dst,					      \
-		       __CORE_RELO(src, fld, BYTE_SIZE),		      \
-		       (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET))
+	bpf_probe_read_kernel(						      \
+			(void *)dst,				      \
+			__CORE_RELO(src, fld, BYTE_SIZE),		      \
+			(const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET))
 #else
 /* semantics of LSHIFT_64 assumes loading values into low-ordered bytes, so
  * for big-endian we need to adjust destination pointer accordingly, based on
  * field byte size
  */
 #define __CORE_BITFIELD_PROBE_READ(dst, src, fld)			      \
-	bpf_probe_read((void *)dst + (8 - __CORE_RELO(src, fld, BYTE_SIZE)),  \
-		       __CORE_RELO(src, fld, BYTE_SIZE),		      \
-		       (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET))
+	bpf_probe_read_kernel(						      \
+			(void *)dst + (8 - __CORE_RELO(src, fld, BYTE_SIZE)), \
+			__CORE_RELO(src, fld, BYTE_SIZE),		      \
+			(const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET))
 #endif
 
 /*
  * Extract bitfield, identified by s->field, and return its value as u64.
  * All this is done in relocatable manner, so bitfield changes such as
  * signedness, bit size, offset changes, this will be handled automatically.
- * This version of macro is using bpf_probe_read() to read underlying integer
- * storage. Macro functions as an expression and its return type is
- * bpf_probe_read()'s return value: 0, on success, <0 on error.
+ * This version of macro is using bpf_probe_read_kernel() to read underlying
+ * integer storage. Macro functions as an expression and its return type is
+ * bpf_probe_read_kernel()'s return value: 0, on success, <0 on error.
  */
 #define BPF_CORE_READ_BITFIELD_PROBED(s, field) ({			      \
 	unsigned long long val = 0;					      \
@@ -99,8 +101,8 @@ enum bpf_field_info_kind {
 	__builtin_preserve_field_info(field, BPF_FIELD_BYTE_SIZE)
 
 /*
- * bpf_core_read() abstracts away bpf_probe_read() call and captures offset
- * relocation for source address using __builtin_preserve_access_index()
+ * bpf_core_read() abstracts away bpf_probe_read_kernel() call and captures
+ * offset relocation for source address using __builtin_preserve_access_index()
  * built-in, provided by Clang.
  *
  * __builtin_preserve_access_index() takes as an argument an expression of
@@ -115,8 +117,8 @@ enum bpf_field_info_kind {
  * (local) BTF, used to record relocation.
  */
 #define bpf_core_read(dst, sz, src)					    \
-	bpf_probe_read(dst, sz,						    \
-		       (const void *)__builtin_preserve_access_index(src))
+	bpf_probe_read_kernel(dst, sz,					    \
+			      (const void *)__builtin_preserve_access_index(src))
 
 /*
  * bpf_core_read_str() is a thin wrapper around bpf_probe_read_str()
@@ -124,8 +126,8 @@ enum bpf_field_info_kind {
  * argument.
  */
 #define bpf_core_read_str(dst, sz, src)					    \
-	bpf_probe_read_str(dst, sz,					    \
-			   (const void *)__builtin_preserve_access_index(src))
+	bpf_probe_read_kernel_str(dst, sz,				    \
+				  (const void *)__builtin_preserve_access_index(src))
 
 #define ___concat(a, b) a ## b
 #define ___apply(fn, n) ___concat(fn, n)
@@ -239,15 +241,17 @@ enum bpf_field_info_kind {
  *	int x = BPF_CORE_READ(s, a.b.c, d.e, f, g);
  *
  * BPF_CORE_READ will decompose above statement into 4 bpf_core_read (BPF
- * CO-RE relocatable bpf_probe_read() wrapper) calls, logically equivalent to:
+ * CO-RE relocatable bpf_probe_read_kernel() wrapper) calls, logically
+ * equivalent to:
  * 1. const void *__t = s->a.b.c;
  * 2. __t = __t->d.e;
  * 3. __t = __t->f;
  * 4. return __t->g;
  *
  * Equivalence is logical, because there is a heavy type casting/preservation
- * involved, as well as all the reads are happening through bpf_probe_read()
- * calls using __builtin_preserve_access_index() to emit CO-RE relocations.
+ * involved, as well as all the reads are happening through
+ * bpf_probe_read_kernel() calls using __builtin_preserve_access_index() to
+ * emit CO-RE relocations.
  *
  * N.B. Only up to 9 "field accessors" are supported, which should be more
  * than enough for any practical purpose.
diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index eebf020cbe3e..f9ef37707888 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -289,9 +289,9 @@ struct pt_regs;
 #define BPF_KRETPROBE_READ_RET_IP		BPF_KPROBE_READ_RET_IP
 #else
 #define BPF_KPROBE_READ_RET_IP(ip, ctx)					    \
-	({ bpf_probe_read(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); })
+	({ bpf_probe_read_kernel(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); })
 #define BPF_KRETPROBE_READ_RET_IP(ip, ctx)				    \
-	({ bpf_probe_read(&(ip), sizeof(ip),				    \
+	({ bpf_probe_read_kernel(&(ip), sizeof(ip),			    \
 			  (void *)(PT_REGS_FP(ctx) + sizeof(ip))); })
 #endif
 
-- 
cgit v1.2.3


From 68b08647c7d56ea15114435c3023adc017d11b81 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 18 Aug 2020 14:33:56 -0700
Subject: libbpf: Detect minimal BTF support and skip BTF loading, if missing

Detect whether a kernel supports any BTF at all, and if not, don't even
attempt loading BTF to avoid unnecessary log messages like:

  libbpf: Error loading BTF: Invalid argument(22)
  libbpf: Error loading .BTF into kernel: -22. BTF is optional, ignoring.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200818213356.2629020-8-andriin@fb.com
---
 tools/lib/bpf/libbpf.c | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index e3ab1794d2c3..775fa6317483 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -170,6 +170,8 @@ enum kern_feature_id {
 	FEAT_PROG_NAME,
 	/* v5.2: kernel support for global data sections. */
 	FEAT_GLOBAL_DATA,
+	/* BTF support */
+	FEAT_BTF,
 	/* BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO support */
 	FEAT_BTF_FUNC,
 	/* BTF_KIND_VAR and BTF_KIND_DATASEC support */
@@ -2533,6 +2535,15 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
 	if (!obj->btf)
 		return 0;
 
+	if (!kernel_supports(FEAT_BTF)) {
+		if (kernel_needs_btf(obj)) {
+			err = -EOPNOTSUPP;
+			goto report;
+		}
+		pr_debug("Kernel doesn't support BTF, skipping uploading it.\n");
+		return 0;
+	}
+
 	sanitize = btf_needs_sanitization(obj);
 	if (sanitize) {
 		const void *raw_data;
@@ -2558,6 +2569,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
 		}
 		btf__free(kern_btf);
 	}
+report:
 	if (err) {
 		btf_mandatory = kernel_needs_btf(obj);
 		pr_warn("Error loading .BTF into kernel: %d. %s\n", err,
@@ -3502,6 +3514,18 @@ static int probe_kern_global_data(void)
 	return probe_fd(ret);
 }
 
+static int probe_kern_btf(void)
+{
+	static const char strs[] = "\0int";
+	__u32 types[] = {
+		/* int */
+		BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
+	};
+
+	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+					     strs, sizeof(strs)));
+}
+
 static int probe_kern_btf_func(void)
 {
 	static const char strs[] = "\0int\0x\0a";
@@ -3633,6 +3657,9 @@ static struct kern_feature_desc {
 	[FEAT_GLOBAL_DATA] = {
 		"global variables", probe_kern_global_data,
 	},
+	[FEAT_BTF] = {
+		"minimal BTF", probe_kern_btf,
+	},
 	[FEAT_BTF_FUNC] = {
 		"BTF functions", probe_kern_btf_func,
 	},
-- 
cgit v1.2.3


From 81ba0889027505b7d5136319117e473a69a923c4 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 18 Aug 2020 15:39:13 -0700
Subject: libbpf: Improve error logging for mismatched BTF kind cases

Instead of printing out integer value of BTF kind, print out a string
representation of a kind.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200818223921.2911963-2-andriin@fb.com
---
 tools/lib/bpf/libbpf.c | 59 +++++++++++++++++++++++++++++++++++---------------
 1 file changed, 41 insertions(+), 18 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 775fa6317483..94d92763bcb1 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1886,6 +1886,29 @@ resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id)
 	return btf_is_func_proto(t) ? t : NULL;
 }
 
+static const char *btf_kind_str(const struct btf_type *t)
+{
+	switch (btf_kind(t)) {
+	case BTF_KIND_UNKN: return "void";
+	case BTF_KIND_INT: return "int";
+	case BTF_KIND_PTR: return "ptr";
+	case BTF_KIND_ARRAY: return "array";
+	case BTF_KIND_STRUCT: return "struct";
+	case BTF_KIND_UNION: return "union";
+	case BTF_KIND_ENUM: return "enum";
+	case BTF_KIND_FWD: return "fwd";
+	case BTF_KIND_TYPEDEF: return "typedef";
+	case BTF_KIND_VOLATILE: return "volatile";
+	case BTF_KIND_CONST: return "const";
+	case BTF_KIND_RESTRICT: return "restrict";
+	case BTF_KIND_FUNC: return "func";
+	case BTF_KIND_FUNC_PROTO: return "func_proto";
+	case BTF_KIND_VAR: return "var";
+	case BTF_KIND_DATASEC: return "datasec";
+	default: return "unknown";
+	}
+}
+
 /*
  * Fetch integer attribute of BTF map definition. Such attributes are
  * represented using a pointer to an array, in which dimensionality of array
@@ -1902,8 +1925,8 @@ static bool get_map_field_int(const char *map_name, const struct btf *btf,
 	const struct btf_type *arr_t;
 
 	if (!btf_is_ptr(t)) {
-		pr_warn("map '%s': attr '%s': expected PTR, got %u.\n",
-			map_name, name, btf_kind(t));
+		pr_warn("map '%s': attr '%s': expected PTR, got %s.\n",
+			map_name, name, btf_kind_str(t));
 		return false;
 	}
 
@@ -1914,8 +1937,8 @@ static bool get_map_field_int(const char *map_name, const struct btf *btf,
 		return false;
 	}
 	if (!btf_is_array(arr_t)) {
-		pr_warn("map '%s': attr '%s': expected ARRAY, got %u.\n",
-			map_name, name, btf_kind(arr_t));
+		pr_warn("map '%s': attr '%s': expected ARRAY, got %s.\n",
+			map_name, name, btf_kind_str(arr_t));
 		return false;
 	}
 	arr_info = btf_array(arr_t);
@@ -2009,8 +2032,8 @@ static int parse_btf_map_def(struct bpf_object *obj,
 				return -EINVAL;
 			}
 			if (!btf_is_ptr(t)) {
-				pr_warn("map '%s': key spec is not PTR: %u.\n",
-					map->name, btf_kind(t));
+				pr_warn("map '%s': key spec is not PTR: %s.\n",
+					map->name, btf_kind_str(t));
 				return -EINVAL;
 			}
 			sz = btf__resolve_size(obj->btf, t->type);
@@ -2051,8 +2074,8 @@ static int parse_btf_map_def(struct bpf_object *obj,
 				return -EINVAL;
 			}
 			if (!btf_is_ptr(t)) {
-				pr_warn("map '%s': value spec is not PTR: %u.\n",
-					map->name, btf_kind(t));
+				pr_warn("map '%s': value spec is not PTR: %s.\n",
+					map->name, btf_kind_str(t));
 				return -EINVAL;
 			}
 			sz = btf__resolve_size(obj->btf, t->type);
@@ -2109,14 +2132,14 @@ static int parse_btf_map_def(struct bpf_object *obj,
 			t = skip_mods_and_typedefs(obj->btf, btf_array(t)->type,
 						   NULL);
 			if (!btf_is_ptr(t)) {
-				pr_warn("map '%s': map-in-map inner def is of unexpected kind %u.\n",
-					map->name, btf_kind(t));
+				pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
+					map->name, btf_kind_str(t));
 				return -EINVAL;
 			}
 			t = skip_mods_and_typedefs(obj->btf, t->type, NULL);
 			if (!btf_is_struct(t)) {
-				pr_warn("map '%s': map-in-map inner def is of unexpected kind %u.\n",
-					map->name, btf_kind(t));
+				pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
+					map->name, btf_kind_str(t));
 				return -EINVAL;
 			}
 
@@ -2207,8 +2230,8 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
 		return -EINVAL;
 	}
 	if (!btf_is_var(var)) {
-		pr_warn("map '%s': unexpected var kind %u.\n",
-			map_name, btf_kind(var));
+		pr_warn("map '%s': unexpected var kind %s.\n",
+			map_name, btf_kind_str(var));
 		return -EINVAL;
 	}
 	if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED &&
@@ -2220,8 +2243,8 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
 
 	def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
 	if (!btf_is_struct(def)) {
-		pr_warn("map '%s': unexpected def kind %u.\n",
-			map_name, btf_kind(var));
+		pr_warn("map '%s': unexpected def kind %s.\n",
+			map_name, btf_kind_str(var));
 		return -EINVAL;
 	}
 	if (def->size > vi->size) {
@@ -4212,8 +4235,8 @@ static int bpf_core_spec_parse(const struct btf *btf,
 				return sz;
 			spec->bit_offset += access_idx * sz * 8;
 		} else {
-			pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %d\n",
-				type_id, spec_str, i, id, btf_kind(t));
+			pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n",
+				type_id, spec_str, i, id, btf_kind_str(t));
 			return -EINVAL;
 		}
 	}
-- 
cgit v1.2.3


From 28b93c64499ae09d9dc8c04123b15f8654a93c4c Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 18 Aug 2020 15:39:14 -0700
Subject: libbpf: Clean up and improve CO-RE reloc logging

Add logging of local/target type kind (struct/union/typedef/etc). Preserve
unresolved root type ID (for cases of typedef). Improve the format of CO-RE
reloc spec output format to contain only relevant and succinct info.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200818223921.2911963-3-andriin@fb.com
---
 tools/lib/bpf/btf.c             |  17 ++---
 tools/lib/bpf/btf.h             |  38 ---------
 tools/lib/bpf/libbpf.c          | 165 +++++++++++++++++++++++++---------------
 tools/lib/bpf/libbpf_internal.h |  78 ++++++++++++++-----
 4 files changed, 169 insertions(+), 129 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 7dfca7016aaa..1deedbd19c6c 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1131,14 +1131,14 @@ static int btf_ext_setup_line_info(struct btf_ext *btf_ext)
 	return btf_ext_setup_info(btf_ext, &param);
 }
 
-static int btf_ext_setup_field_reloc(struct btf_ext *btf_ext)
+static int btf_ext_setup_core_relos(struct btf_ext *btf_ext)
 {
 	struct btf_ext_sec_setup_param param = {
-		.off = btf_ext->hdr->field_reloc_off,
-		.len = btf_ext->hdr->field_reloc_len,
-		.min_rec_size = sizeof(struct bpf_field_reloc),
-		.ext_info = &btf_ext->field_reloc_info,
-		.desc = "field_reloc",
+		.off = btf_ext->hdr->core_relo_off,
+		.len = btf_ext->hdr->core_relo_len,
+		.min_rec_size = sizeof(struct bpf_core_relo),
+		.ext_info = &btf_ext->core_relo_info,
+		.desc = "core_relo",
 	};
 
 	return btf_ext_setup_info(btf_ext, &param);
@@ -1217,10 +1217,9 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
 	if (err)
 		goto done;
 
-	if (btf_ext->hdr->hdr_len <
-	    offsetofend(struct btf_ext_header, field_reloc_len))
+	if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len))
 		goto done;
-	err = btf_ext_setup_field_reloc(btf_ext);
+	err = btf_ext_setup_core_relos(btf_ext);
 	if (err)
 		goto done;
 
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 1ca14448df4c..91f0ad0e0325 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -24,44 +24,6 @@ struct btf_type;
 
 struct bpf_object;
 
-/*
- * The .BTF.ext ELF section layout defined as
- *   struct btf_ext_header
- *   func_info subsection
- *
- * The func_info subsection layout:
- *   record size for struct bpf_func_info in the func_info subsection
- *   struct btf_sec_func_info for section #1
- *   a list of bpf_func_info records for section #1
- *     where struct bpf_func_info mimics one in include/uapi/linux/bpf.h
- *     but may not be identical
- *   struct btf_sec_func_info for section #2
- *   a list of bpf_func_info records for section #2
- *   ......
- *
- * Note that the bpf_func_info record size in .BTF.ext may not
- * be the same as the one defined in include/uapi/linux/bpf.h.
- * The loader should ensure that record_size meets minimum
- * requirement and pass the record as is to the kernel. The
- * kernel will handle the func_info properly based on its contents.
- */
-struct btf_ext_header {
-	__u16	magic;
-	__u8	version;
-	__u8	flags;
-	__u32	hdr_len;
-
-	/* All offsets are in bytes relative to the end of this header */
-	__u32	func_info_off;
-	__u32	func_info_len;
-	__u32	line_info_off;
-	__u32	line_info_len;
-
-	/* optional part of .BTF.ext header */
-	__u32	field_reloc_off;
-	__u32	field_reloc_len;
-};
-
 LIBBPF_API void btf__free(struct btf *btf);
 LIBBPF_API struct btf *btf__new(const void *data, __u32 size);
 LIBBPF_API struct btf *btf__parse(const char *path, struct btf_ext **btf_ext);
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 94d92763bcb1..894d388094e3 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -2524,7 +2524,7 @@ static int bpf_object__load_vmlinux_btf(struct bpf_object *obj)
 	int err;
 
 	/* CO-RE relocations need kernel BTF */
-	if (obj->btf_ext && obj->btf_ext->field_reloc_info.len)
+	if (obj->btf_ext && obj->btf_ext->core_relo_info.len)
 		need_vmlinux_btf = true;
 
 	bpf_object__for_each_program(prog, obj) {
@@ -4074,6 +4074,10 @@ struct bpf_core_spec {
 	const struct btf *btf;
 	/* high-level spec: named fields and array indices only */
 	struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN];
+	/* original unresolved (no skip_mods_or_typedefs) root type ID */
+	__u32 root_type_id;
+	/* CO-RE relocation kind */
+	enum bpf_core_relo_kind relo_kind;
 	/* high-level spec length */
 	int len;
 	/* raw, low-level spec: 1-to-1 with accessor spec string */
@@ -4104,8 +4108,36 @@ static bool is_flex_arr(const struct btf *btf,
 	return acc->idx == btf_vlen(t) - 1;
 }
 
+static const char *core_relo_kind_str(enum bpf_core_relo_kind kind)
+{
+	switch (kind) {
+	case BPF_FIELD_BYTE_OFFSET: return "byte_off";
+	case BPF_FIELD_BYTE_SIZE: return "byte_sz";
+	case BPF_FIELD_EXISTS: return "field_exists";
+	case BPF_FIELD_SIGNED: return "signed";
+	case BPF_FIELD_LSHIFT_U64: return "lshift_u64";
+	case BPF_FIELD_RSHIFT_U64: return "rshift_u64";
+	default: return "unknown";
+	}
+}
+
+static bool core_relo_is_field_based(enum bpf_core_relo_kind kind)
+{
+	switch (kind) {
+	case BPF_FIELD_BYTE_OFFSET:
+	case BPF_FIELD_BYTE_SIZE:
+	case BPF_FIELD_EXISTS:
+	case BPF_FIELD_SIGNED:
+	case BPF_FIELD_LSHIFT_U64:
+	case BPF_FIELD_RSHIFT_U64:
+		return true;
+	default:
+		return false;
+	}
+}
+
 /*
- * Turn bpf_field_reloc into a low- and high-level spec representation,
+ * Turn bpf_core_relo into a low- and high-level spec representation,
  * validating correctness along the way, as well as calculating resulting
  * field bit offset, specified by accessor string. Low-level spec captures
  * every single level of nestedness, including traversing anonymous
@@ -4135,9 +4167,10 @@ static bool is_flex_arr(const struct btf *btf,
  *   - array element #3 access (corresponds to '3' in low-level spec).
  *
  */
-static int bpf_core_spec_parse(const struct btf *btf,
+static int bpf_core_parse_spec(const struct btf *btf,
 			       __u32 type_id,
 			       const char *spec_str,
+			       enum bpf_core_relo_kind relo_kind,
 			       struct bpf_core_spec *spec)
 {
 	int access_idx, parsed_len, i;
@@ -4152,6 +4185,8 @@ static int bpf_core_spec_parse(const struct btf *btf,
 
 	memset(spec, 0, sizeof(*spec));
 	spec->btf = btf;
+	spec->root_type_id = type_id;
+	spec->relo_kind = relo_kind;
 
 	/* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */
 	while (*spec_str) {
@@ -4178,6 +4213,9 @@ static int bpf_core_spec_parse(const struct btf *btf,
 	spec->spec[0].idx = access_idx;
 	spec->len++;
 
+	if (!core_relo_is_field_based(relo_kind))
+		return -EINVAL;
+
 	sz = btf__resolve_size(btf, id);
 	if (sz < 0)
 		return sz;
@@ -4285,17 +4323,17 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
 					   const struct btf *targ_btf)
 {
 	size_t local_essent_len, targ_essent_len;
-	const char *local_name, *targ_name;
-	const struct btf_type *t;
+	const char *local_name, *targ_name, *targ_kind;
+	const struct btf_type *t, *local_t;
 	struct ids_vec *cand_ids;
 	__u32 *new_ids;
 	int i, err, n;
 
-	t = btf__type_by_id(local_btf, local_type_id);
-	if (!t)
+	local_t = btf__type_by_id(local_btf, local_type_id);
+	if (!local_t)
 		return ERR_PTR(-EINVAL);
 
-	local_name = btf__name_by_offset(local_btf, t->name_off);
+	local_name = btf__name_by_offset(local_btf, local_t->name_off);
 	if (str_is_empty(local_name))
 		return ERR_PTR(-EINVAL);
 	local_essent_len = bpf_core_essential_name_len(local_name);
@@ -4310,6 +4348,7 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
 		targ_name = btf__name_by_offset(targ_btf, t->name_off);
 		if (str_is_empty(targ_name))
 			continue;
+		targ_kind = btf_kind_str(t);
 
 		t = skip_mods_and_typedefs(targ_btf, i, NULL);
 		if (!btf_is_composite(t) && !btf_is_array(t))
@@ -4320,8 +4359,9 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
 			continue;
 
 		if (strncmp(local_name, targ_name, local_essent_len) == 0) {
-			pr_debug("[%d] %s: found candidate [%d] %s\n",
-				 local_type_id, local_name, i, targ_name);
+			pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s\n",
+				 local_type_id, btf_kind_str(local_t),
+				 local_name, i, targ_kind, targ_name);
 			new_ids = reallocarray(cand_ids->data,
 					       cand_ids->len + 1,
 					       sizeof(*cand_ids->data));
@@ -4510,6 +4550,8 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
 
 	memset(targ_spec, 0, sizeof(*targ_spec));
 	targ_spec->btf = targ_btf;
+	targ_spec->root_type_id = targ_id;
+	targ_spec->relo_kind = local_spec->relo_kind;
 
 	local_acc = &local_spec->spec[0];
 	targ_acc = &targ_spec->spec[0];
@@ -4570,7 +4612,7 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
 }
 
 static int bpf_core_calc_field_relo(const struct bpf_program *prog,
-				    const struct bpf_field_reloc *relo,
+				    const struct bpf_core_relo *relo,
 				    const struct bpf_core_spec *spec,
 				    __u32 *val, bool *validate)
 {
@@ -4691,7 +4733,7 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog,
  * 2. rX += <imm> (arithmetic operations with immediate operand);
  */
 static int bpf_core_reloc_insn(struct bpf_program *prog,
-			       const struct bpf_field_reloc *relo,
+			       const struct bpf_core_relo *relo,
 			       int relo_idx,
 			       const struct bpf_core_spec *local_spec,
 			       const struct bpf_core_spec *targ_spec)
@@ -4795,25 +4837,30 @@ static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
 	__u32 type_id;
 	int i;
 
-	type_id = spec->spec[0].type_id;
+	type_id = spec->root_type_id;
 	t = btf__type_by_id(spec->btf, type_id);
 	s = btf__name_by_offset(spec->btf, t->name_off);
-	libbpf_print(level, "[%u] %s + ", type_id, s);
 
-	for (i = 0; i < spec->raw_len; i++)
-		libbpf_print(level, "%d%s", spec->raw_spec[i],
-			     i == spec->raw_len - 1 ? " => " : ":");
+	libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s);
+
+	if (core_relo_is_field_based(spec->relo_kind)) {
+		for (i = 0; i < spec->len; i++) {
+			if (spec->spec[i].name)
+				libbpf_print(level, ".%s", spec->spec[i].name);
+			else if (i > 0 || spec->spec[i].idx > 0)
+				libbpf_print(level, "[%u]", spec->spec[i].idx);
+		}
 
-	libbpf_print(level, "%u.%u @ &x",
-		     spec->bit_offset / 8, spec->bit_offset % 8);
+		libbpf_print(level, " (");
+		for (i = 0; i < spec->raw_len; i++)
+			libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]);
 
-	for (i = 0; i < spec->len; i++) {
-		if (spec->spec[i].name)
-			libbpf_print(level, ".%s", spec->spec[i].name);
+		if (spec->bit_offset % 8)
+			libbpf_print(level, " @ offset %u.%u)",
+				     spec->bit_offset / 8, spec->bit_offset % 8);
 		else
-			libbpf_print(level, "[%u]", spec->spec[i].idx);
+			libbpf_print(level, " @ offset %u)", spec->bit_offset / 8);
 	}
-
 }
 
 static size_t bpf_core_hash_fn(const void *key, void *ctx)
@@ -4877,12 +4924,12 @@ static void *u32_as_hash_key(__u32 x)
  *    CPU-wise compared to prebuilding a map from all local type names to
  *    a list of candidate type names. It's also sped up by caching resolved
  *    list of matching candidates per each local "root" type ID, that has at
- *    least one bpf_field_reloc associated with it. This list is shared
+ *    least one bpf_core_relo associated with it. This list is shared
  *    between multiple relocations for the same type ID and is updated as some
  *    of the candidates are pruned due to structural incompatibility.
  */
 static int bpf_core_reloc_field(struct bpf_program *prog,
-				 const struct bpf_field_reloc *relo,
+				 const struct bpf_core_relo *relo,
 				 int relo_idx,
 				 const struct btf *local_btf,
 				 const struct btf *targ_btf,
@@ -4891,8 +4938,8 @@ static int bpf_core_reloc_field(struct bpf_program *prog,
 	const char *prog_name = bpf_program__title(prog, false);
 	struct bpf_core_spec local_spec, cand_spec, targ_spec;
 	const void *type_key = u32_as_hash_key(relo->type_id);
-	const struct btf_type *local_type, *cand_type;
-	const char *local_name, *cand_name;
+	const struct btf_type *local_type;
+	const char *local_name;
 	struct ids_vec *cand_ids;
 	__u32 local_id, cand_id;
 	const char *spec_str;
@@ -4911,24 +4958,24 @@ static int bpf_core_reloc_field(struct bpf_program *prog,
 	if (str_is_empty(spec_str))
 		return -EINVAL;
 
-	err = bpf_core_spec_parse(local_btf, local_id, spec_str, &local_spec);
+	err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec);
 	if (err) {
-		pr_warn("prog '%s': relo #%d: parsing [%d] %s + %s failed: %d\n",
-			prog_name, relo_idx, local_id, local_name, spec_str,
-			err);
+		pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n",
+			prog_name, relo_idx, local_id, btf_kind_str(local_type),
+			local_name, spec_str, err);
 		return -EINVAL;
 	}
 
-	pr_debug("prog '%s': relo #%d: kind %d, spec is ", prog_name, relo_idx,
-		 relo->kind);
+	pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog_name,
+		 relo_idx, core_relo_kind_str(relo->kind), relo->kind);
 	bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec);
 	libbpf_print(LIBBPF_DEBUG, "\n");
 
 	if (!hashmap__find(cand_cache, type_key, (void **)&cand_ids)) {
 		cand_ids = bpf_core_find_cands(local_btf, local_id, targ_btf);
 		if (IS_ERR(cand_ids)) {
-			pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s: %ld",
-				prog_name, relo_idx, local_id, local_name,
+			pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld",
+				prog_name, relo_idx, local_id, btf_kind_str(local_type), local_name,
 				PTR_ERR(cand_ids));
 			return PTR_ERR(cand_ids);
 		}
@@ -4941,20 +4988,20 @@ static int bpf_core_reloc_field(struct bpf_program *prog,
 
 	for (i = 0, j = 0; i < cand_ids->len; i++) {
 		cand_id = cand_ids->data[i];
-		cand_type = btf__type_by_id(targ_btf, cand_id);
-		cand_name = btf__name_by_offset(targ_btf, cand_type->name_off);
-
-		err = bpf_core_spec_match(&local_spec, targ_btf,
-					  cand_id, &cand_spec);
-		pr_debug("prog '%s': relo #%d: matching candidate #%d %s against spec ",
-			 prog_name, relo_idx, i, cand_name);
-		bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
-		libbpf_print(LIBBPF_DEBUG, ": %d\n", err);
+		err = bpf_core_spec_match(&local_spec, targ_btf, cand_id, &cand_spec);
 		if (err < 0) {
-			pr_warn("prog '%s': relo #%d: matching error: %d\n",
-				prog_name, relo_idx, err);
+			pr_warn("prog '%s': relo #%d: error matching candidate #%d ",
+				prog_name, relo_idx, i);
+			bpf_core_dump_spec(LIBBPF_WARN, &cand_spec);
+			libbpf_print(LIBBPF_WARN, ": %d\n", err);
 			return err;
 		}
+
+		pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog_name,
+			 relo_idx, err == 0 ? "non-matching" : "matching", i);
+		bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
+		libbpf_print(LIBBPF_DEBUG, "\n");
+
 		if (err == 0)
 			continue;
 
@@ -4996,8 +5043,8 @@ static int bpf_core_reloc_field(struct bpf_program *prog,
 	 * to a specific instruction number in its log.
 	 */
 	if (j == 0)
-		pr_debug("prog '%s': relo #%d: no matching targets found for [%d] %s + %s\n",
-			 prog_name, relo_idx, local_id, local_name, spec_str);
+		pr_debug("prog '%s': relo #%d: no matching targets found\n",
+			 prog_name, relo_idx);
 
 	/* bpf_core_reloc_insn should know how to handle missing targ_spec */
 	err = bpf_core_reloc_insn(prog, relo, relo_idx, &local_spec,
@@ -5012,10 +5059,10 @@ static int bpf_core_reloc_field(struct bpf_program *prog,
 }
 
 static int
-bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path)
+bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
 {
 	const struct btf_ext_info_sec *sec;
-	const struct bpf_field_reloc *rec;
+	const struct bpf_core_relo *rec;
 	const struct btf_ext_info *seg;
 	struct hashmap_entry *entry;
 	struct hashmap *cand_cache = NULL;
@@ -5024,6 +5071,9 @@ bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path)
 	const char *sec_name;
 	int i, err = 0;
 
+	if (obj->btf_ext->core_relo_info.len == 0)
+		return 0;
+
 	if (targ_btf_path)
 		targ_btf = btf__parse_elf(targ_btf_path, NULL);
 	else
@@ -5039,7 +5089,7 @@ bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path)
 		goto out;
 	}
 
-	seg = &obj->btf_ext->field_reloc_info;
+	seg = &obj->btf_ext->core_relo_info;
 	for_each_btf_ext_sec(seg, sec) {
 		sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
 		if (str_is_empty(sec_name)) {
@@ -5087,17 +5137,6 @@ out:
 	return err;
 }
 
-static int
-bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
-{
-	int err = 0;
-
-	if (obj->btf_ext->field_reloc_info.len)
-		err = bpf_core_reloc_fields(obj, targ_btf_path);
-
-	return err;
-}
-
 static int
 bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj,
 			struct reloc_desc *relo)
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 50d70e90d5f1..b776a7125c92 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -138,6 +138,44 @@ struct btf_ext_info {
 	     i < (sec)->num_info;					\
 	     i++, rec = (void *)rec + (seg)->rec_size)
 
+/*
+ * The .BTF.ext ELF section layout defined as
+ *   struct btf_ext_header
+ *   func_info subsection
+ *
+ * The func_info subsection layout:
+ *   record size for struct bpf_func_info in the func_info subsection
+ *   struct btf_sec_func_info for section #1
+ *   a list of bpf_func_info records for section #1
+ *     where struct bpf_func_info mimics one in include/uapi/linux/bpf.h
+ *     but may not be identical
+ *   struct btf_sec_func_info for section #2
+ *   a list of bpf_func_info records for section #2
+ *   ......
+ *
+ * Note that the bpf_func_info record size in .BTF.ext may not
+ * be the same as the one defined in include/uapi/linux/bpf.h.
+ * The loader should ensure that record_size meets minimum
+ * requirement and pass the record as is to the kernel. The
+ * kernel will handle the func_info properly based on its contents.
+ */
+struct btf_ext_header {
+	__u16	magic;
+	__u8	version;
+	__u8	flags;
+	__u32	hdr_len;
+
+	/* All offsets are in bytes relative to the end of this header */
+	__u32	func_info_off;
+	__u32	func_info_len;
+	__u32	line_info_off;
+	__u32	line_info_len;
+
+	/* optional part of .BTF.ext header */
+	__u32	core_relo_off;
+	__u32	core_relo_len;
+};
+
 struct btf_ext {
 	union {
 		struct btf_ext_header *hdr;
@@ -145,7 +183,7 @@ struct btf_ext {
 	};
 	struct btf_ext_info func_info;
 	struct btf_ext_info line_info;
-	struct btf_ext_info field_reloc_info;
+	struct btf_ext_info core_relo_info;
 	__u32 data_size;
 };
 
@@ -170,32 +208,34 @@ struct bpf_line_info_min {
 	__u32	line_col;
 };
 
-/* bpf_field_info_kind encodes which aspect of captured field has to be
- * adjusted by relocations. Currently supported values are:
- *   - BPF_FIELD_BYTE_OFFSET: field offset (in bytes);
- *   - BPF_FIELD_EXISTS: field existence (1, if field exists; 0, otherwise);
+/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value
+ * has to be adjusted by relocations.
  */
-enum bpf_field_info_kind {
+enum bpf_core_relo_kind {
 	BPF_FIELD_BYTE_OFFSET = 0,	/* field byte offset */
-	BPF_FIELD_BYTE_SIZE = 1,
+	BPF_FIELD_BYTE_SIZE = 1,	/* field size in bytes */
 	BPF_FIELD_EXISTS = 2,		/* field existence in target kernel */
-	BPF_FIELD_SIGNED = 3,
-	BPF_FIELD_LSHIFT_U64 = 4,
-	BPF_FIELD_RSHIFT_U64 = 5,
+	BPF_FIELD_SIGNED = 3,		/* field signedness (0 - unsigned, 1 - signed) */
+	BPF_FIELD_LSHIFT_U64 = 4,	/* bitfield-specific left bitshift */
+	BPF_FIELD_RSHIFT_U64 = 5,	/* bitfield-specific right bitshift */
 };
 
-/* The minimum bpf_field_reloc checked by the loader
+/* The minimum bpf_core_relo checked by the loader
  *
- * Field relocation captures the following data:
+ * CO-RE relocation captures the following data:
  * - insn_off - instruction offset (in bytes) within a BPF program that needs
  *   its insn->imm field to be relocated with actual field info;
  * - type_id - BTF type ID of the "root" (containing) entity of a relocatable
- *   field;
+ *   type or field;
  * - access_str_off - offset into corresponding .BTF string section. String
- *   itself encodes an accessed field using a sequence of field and array
- *   indicies, separated by colon (:). It's conceptually very close to LLVM's
- *   getelementptr ([0]) instruction's arguments for identifying offset to 
- *   a field.
+ *   interpretation depends on specific relocation kind:
+ *     - for field-based relocations, string encodes an accessed field using
+ *     a sequence of field and array indices, separated by colon (:). It's
+ *     conceptually very close to LLVM's getelementptr ([0]) instruction's
+ *     arguments for identifying offset to a field.
+ *     - for type-based relocations, strings is expected to be just "0";
+ *     - for enum value-based relocations, string contains an index of enum
+ *     value within its enum type;
  *
  * Example to provide a better feel.
  *
@@ -226,11 +266,11 @@ enum bpf_field_info_kind {
  *
  *   [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction
  */
-struct bpf_field_reloc {
+struct bpf_core_relo {
 	__u32   insn_off;
 	__u32   type_id;
 	__u32   access_str_off;
-	enum bpf_field_info_kind kind;
+	enum bpf_core_relo_kind kind;
 };
 
 #endif /* __LIBBPF_LIBBPF_INTERNAL_H */
-- 
cgit v1.2.3


From 353c788c2054257359114f12827d86f8e3332e10 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 18 Aug 2020 15:39:15 -0700
Subject: libbpf: Improve relocation ambiguity detection

Split the instruction patching logic into relocation value calculation and
application of relocation to instruction. Using this, evaluate relocation
against each matching candidate and validate that all candidates agree on
relocated value. If not, report ambiguity and fail load.

This logic is necessary to avoid dangerous (however unlikely) accidental match
against two incompatible candidate types. Without this change, libbpf will
pick a random type as *the* candidate and apply potentially invalid
relocation.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200818223921.2911963-4-andriin@fb.com
---
 tools/lib/bpf/libbpf.c | 170 ++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 124 insertions(+), 46 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 894d388094e3..0276cf85d763 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -4616,14 +4616,25 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog,
 				    const struct bpf_core_spec *spec,
 				    __u32 *val, bool *validate)
 {
-	const struct bpf_core_accessor *acc = &spec->spec[spec->len - 1];
-	const struct btf_type *t = btf__type_by_id(spec->btf, acc->type_id);
+	const struct bpf_core_accessor *acc;
+	const struct btf_type *t;
 	__u32 byte_off, byte_sz, bit_off, bit_sz;
 	const struct btf_member *m;
 	const struct btf_type *mt;
 	bool bitfield;
 	__s64 sz;
 
+	if (relo->kind == BPF_FIELD_EXISTS) {
+		*val = spec ? 1 : 0;
+		return 0;
+	}
+
+	if (!spec)
+		return -EUCLEAN; /* request instruction poisoning */
+
+	acc = &spec->spec[spec->len - 1];
+	t = btf__type_by_id(spec->btf, acc->type_id);
+
 	/* a[n] accessor needs special handling */
 	if (!acc->name) {
 		if (relo->kind == BPF_FIELD_BYTE_OFFSET) {
@@ -4709,21 +4720,88 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog,
 		break;
 	case BPF_FIELD_EXISTS:
 	default:
-		pr_warn("prog '%s': unknown relo %d at insn #%d\n",
-			bpf_program__title(prog, false),
-			relo->kind, relo->insn_off / 8);
-		return -EINVAL;
+		return -EOPNOTSUPP;
 	}
 
 	return 0;
 }
 
+struct bpf_core_relo_res
+{
+	/* expected value in the instruction, unless validate == false */
+	__u32 orig_val;
+	/* new value that needs to be patched up to */
+	__u32 new_val;
+	/* relocation unsuccessful, poison instruction, but don't fail load */
+	bool poison;
+	/* some relocations can't be validated against orig_val */
+	bool validate;
+};
+
+/* Calculate original and target relocation values, given local and target
+ * specs and relocation kind. These values are calculated for each candidate.
+ * If there are multiple candidates, resulting values should all be consistent
+ * with each other. Otherwise, libbpf will refuse to proceed due to ambiguity.
+ * If instruction has to be poisoned, *poison will be set to true.
+ */
+static int bpf_core_calc_relo(const struct bpf_program *prog,
+			      const struct bpf_core_relo *relo,
+			      int relo_idx,
+			      const struct bpf_core_spec *local_spec,
+			      const struct bpf_core_spec *targ_spec,
+			      struct bpf_core_relo_res *res)
+{
+	int err = -EOPNOTSUPP;
+
+	res->orig_val = 0;
+	res->new_val = 0;
+	res->poison = false;
+	res->validate = true;
+
+	if (core_relo_is_field_based(relo->kind)) {
+		err = bpf_core_calc_field_relo(prog, relo, local_spec, &res->orig_val, &res->validate);
+		err = err ?: bpf_core_calc_field_relo(prog, relo, targ_spec, &res->new_val, NULL);
+	}
+
+	if (err == -EUCLEAN) {
+		/* EUCLEAN is used to signal instruction poisoning request */
+		res->poison = true;
+		err = 0;
+	} else if (err == -EOPNOTSUPP) {
+		/* EOPNOTSUPP means unknown/unsupported relocation */
+		pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n",
+			bpf_program__title(prog, false), relo_idx,
+			core_relo_kind_str(relo->kind), relo->kind, relo->insn_off / 8);
+	}
+
+	return err;
+}
+
+/*
+ * Turn instruction for which CO_RE relocation failed into invalid one with
+ * distinct signature.
+ */
+static void bpf_core_poison_insn(struct bpf_program *prog, int relo_idx,
+				 int insn_idx, struct bpf_insn *insn)
+{
+	pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n",
+		 bpf_program__title(prog, false), relo_idx, insn_idx);
+	insn->code = BPF_JMP | BPF_CALL;
+	insn->dst_reg = 0;
+	insn->src_reg = 0;
+	insn->off = 0;
+	/* if this instruction is reachable (not a dead code),
+	 * verifier will complain with the following message:
+	 * invalid func unknown#195896080
+	 */
+	insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
+}
+
 /*
  * Patch relocatable BPF instruction.
  *
  * Patched value is determined by relocation kind and target specification.
- * For field existence relocation target spec will be NULL if field is not
- * found.
+ * For existence relocations target spec will be NULL if field/type is not found.
  * Expected insn->imm value is determined using relocation kind and local
  * spec, and is checked before patching instruction. If actual insn->imm value
  * is wrong, bail out with error.
@@ -4732,16 +4810,14 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog,
  * 1. rX = <imm> (assignment with immediate operand);
  * 2. rX += <imm> (arithmetic operations with immediate operand);
  */
-static int bpf_core_reloc_insn(struct bpf_program *prog,
+static int bpf_core_patch_insn(struct bpf_program *prog,
 			       const struct bpf_core_relo *relo,
 			       int relo_idx,
-			       const struct bpf_core_spec *local_spec,
-			       const struct bpf_core_spec *targ_spec)
+			       const struct bpf_core_relo_res *res)
 {
 	__u32 orig_val, new_val;
 	struct bpf_insn *insn;
-	bool validate = true;
-	int insn_idx, err;
+	int insn_idx;
 	__u8 class;
 
 	if (relo->insn_off % sizeof(struct bpf_insn))
@@ -4750,39 +4826,20 @@ static int bpf_core_reloc_insn(struct bpf_program *prog,
 	insn = &prog->insns[insn_idx];
 	class = BPF_CLASS(insn->code);
 
-	if (relo->kind == BPF_FIELD_EXISTS) {
-		orig_val = 1; /* can't generate EXISTS relo w/o local field */
-		new_val = targ_spec ? 1 : 0;
-	} else if (!targ_spec) {
-		pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n",
-			 bpf_program__title(prog, false), relo_idx, insn_idx);
-		insn->code = BPF_JMP | BPF_CALL;
-		insn->dst_reg = 0;
-		insn->src_reg = 0;
-		insn->off = 0;
-		/* if this instruction is reachable (not a dead code),
-		 * verifier will complain with the following message:
-		 * invalid func unknown#195896080
-		 */
-		insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
+	if (res->poison) {
+		bpf_core_poison_insn(prog, relo_idx, insn_idx, insn);
 		return 0;
-	} else {
-		err = bpf_core_calc_field_relo(prog, relo, local_spec,
-					       &orig_val, &validate);
-		if (err)
-			return err;
-		err = bpf_core_calc_field_relo(prog, relo, targ_spec,
-					       &new_val, NULL);
-		if (err)
-			return err;
 	}
 
+	orig_val = res->orig_val;
+	new_val = res->new_val;
+
 	switch (class) {
 	case BPF_ALU:
 	case BPF_ALU64:
 		if (BPF_SRC(insn->code) != BPF_K)
 			return -EINVAL;
-		if (validate && insn->imm != orig_val) {
+		if (res->validate && insn->imm != orig_val) {
 			pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n",
 				bpf_program__title(prog, false), relo_idx,
 				insn_idx, insn->imm, orig_val, new_val);
@@ -4797,7 +4854,7 @@ static int bpf_core_reloc_insn(struct bpf_program *prog,
 	case BPF_LDX:
 	case BPF_ST:
 	case BPF_STX:
-		if (validate && insn->off != orig_val) {
+		if (res->validate && insn->off != orig_val) {
 			pr_warn("prog '%s': relo #%d: unexpected insn #%d (LD/LDX/ST/STX) value: got %u, exp %u -> %u\n",
 				bpf_program__title(prog, false), relo_idx,
 				insn_idx, insn->off, orig_val, new_val);
@@ -4938,6 +4995,7 @@ static int bpf_core_reloc_field(struct bpf_program *prog,
 	const char *prog_name = bpf_program__title(prog, false);
 	struct bpf_core_spec local_spec, cand_spec, targ_spec;
 	const void *type_key = u32_as_hash_key(relo->type_id);
+	struct bpf_core_relo_res cand_res, targ_res;
 	const struct btf_type *local_type;
 	const char *local_name;
 	struct ids_vec *cand_ids;
@@ -5005,16 +5063,31 @@ static int bpf_core_reloc_field(struct bpf_program *prog,
 		if (err == 0)
 			continue;
 
+		err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, &cand_spec, &cand_res);
+		if (err)
+			return err;
+
 		if (j == 0) {
+			targ_res = cand_res;
 			targ_spec = cand_spec;
 		} else if (cand_spec.bit_offset != targ_spec.bit_offset) {
-			/* if there are many candidates, they should all
-			 * resolve to the same bit offset
+			/* if there are many field relo candidates, they
+			 * should all resolve to the same bit offset
 			 */
-			pr_warn("prog '%s': relo #%d: offset ambiguity: %u != %u\n",
+			pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n",
 				prog_name, relo_idx, cand_spec.bit_offset,
 				targ_spec.bit_offset);
 			return -EINVAL;
+		} else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) {
+			/* all candidates should result in the same relocation
+			 * decision and value, otherwise it's dangerous to
+			 * proceed due to ambiguity
+			 */
+			pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n",
+				prog_name, relo_idx,
+				cand_res.poison ? "failure" : "success", cand_res.new_val,
+				targ_res.poison ? "failure" : "success", targ_res.new_val);
+			return -EINVAL;
 		}
 
 		cand_ids->data[j++] = cand_spec.spec[0].type_id;
@@ -5042,13 +5115,18 @@ static int bpf_core_reloc_field(struct bpf_program *prog,
 	 * verifier. If it was an error, then verifier will complain and point
 	 * to a specific instruction number in its log.
 	 */
-	if (j == 0)
+	if (j == 0) {
 		pr_debug("prog '%s': relo #%d: no matching targets found\n",
 			 prog_name, relo_idx);
 
-	/* bpf_core_reloc_insn should know how to handle missing targ_spec */
-	err = bpf_core_reloc_insn(prog, relo, relo_idx, &local_spec,
-				  j ? &targ_spec : NULL);
+		/* calculate single target relo result explicitly */
+		err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, NULL, &targ_res);
+		if (err)
+			return err;
+	}
+
+	/* bpf_core_patch_insn() should know how to handle missing targ_spec */
+	err = bpf_core_patch_insn(prog, relo, relo_idx, &targ_res);
 	if (err) {
 		pr_warn("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n",
 			prog_name, relo_idx, relo->insn_off, err);
-- 
cgit v1.2.3


From 00b2e95325f8c9dd56a7dae691b01e321838a557 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 18 Aug 2020 15:39:16 -0700
Subject: selftests/bpf: Add test validating failure on ambiguous relocation
 value

Add test simulating ambiguous field size relocation, while fields themselves
are at the exact same offset.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200818223921.2911963-5-andriin@fb.com
---
 .../testing/selftests/bpf/prog_tests/core_reloc.c  |  1 +
 .../progs/btf__core_reloc_size___err_ambiguous.c   |  4 ++++
 .../testing/selftests/bpf/progs/core_reloc_types.h | 25 ++++++++++++++++++++++
 3 files changed, 30 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_size___err_ambiguous.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index a54eafc5e4b3..4d650e99be28 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -452,6 +452,7 @@ static struct core_reloc_test_case test_cases[] = {
 	/* size relocation checks */
 	SIZE_CASE(size),
 	SIZE_CASE(size___diff_sz),
+	SIZE_ERR_CASE(size___err_ambiguous),
 };
 
 struct data {
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_size___err_ambiguous.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_size___err_ambiguous.c
new file mode 100644
index 000000000000..f3e9904df9c2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_size___err_ambiguous.c
@@ -0,0 +1,4 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_size___err_ambiguous1 x,
+       struct core_reloc_size___err_ambiguous2 y) {}
diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h
index 69139ed66216..3b1126c0bc8f 100644
--- a/tools/testing/selftests/bpf/progs/core_reloc_types.h
+++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h
@@ -809,3 +809,28 @@ struct core_reloc_size___diff_sz {
 	void *ptr_field;
 	enum { OTHER_VALUE = 0xFFFFFFFFFFFFFFFF } enum_field;
 };
+
+/* Error case of two candidates with the fields (int_field) at the same
+ * offset, but with differing final relocation values: size 4 vs size 1
+ */
+struct core_reloc_size___err_ambiguous1 {
+	/* int at offset 0 */
+	int int_field;
+
+	struct { int x; } struct_field;
+	union { int x; } union_field;
+	int arr_field[4];
+	void *ptr_field;
+	enum { VALUE___1 = 123 } enum_field;
+};
+
+struct core_reloc_size___err_ambiguous2 {
+	/* char at offset 0 */
+	char int_field;
+
+	struct { int x; } struct_field;
+	union { int x; } union_field;
+	int arr_field[4];
+	void *ptr_field;
+	enum { VALUE___2 = 123 } enum_field;
+};
-- 
cgit v1.2.3


From 029258d7b22894fabcecb1626e1b87d18a6823f4 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 18 Aug 2020 18:36:04 -0700
Subject: libbpf: Remove any use of reallocarray() in libbpf

Re-implement glibc's reallocarray() for libbpf internal-only use.
reallocarray(), unfortunately, is not available in all versions of glibc, so
requires extra feature detection and using reallocarray() stub from
<tools/libc_compat.h> and COMPAT_NEED_REALLOCARRAY. All this complicates build
of libbpf unnecessarily and is just a maintenance burden. Instead, it's
trivial to implement libbpf-specific internal version and use it throughout
libbpf.

Which is what this patch does, along with converting some realloc() uses that
should really have been reallocarray() in the first place.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200819013607.3607269-2-andriin@fb.com
---
 tools/lib/bpf/Makefile          |  6 +-----
 tools/lib/bpf/btf.c             | 11 +++++------
 tools/lib/bpf/btf_dump.c        |  6 ++----
 tools/lib/bpf/libbpf.c          | 21 ++++++++++-----------
 tools/lib/bpf/libbpf_internal.h | 25 +++++++++++++++++++++++++
 tools/lib/bpf/ringbuf.c         |  5 ++---
 6 files changed, 45 insertions(+), 29 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 95c946e94ca5..621ad96d06fd 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -56,7 +56,7 @@ ifndef VERBOSE
 endif
 
 FEATURE_USER = .libbpf
-FEATURE_TESTS = libelf libelf-mmap zlib bpf reallocarray
+FEATURE_TESTS = libelf libelf-mmap zlib bpf
 FEATURE_DISPLAY = libelf zlib bpf
 
 INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi
@@ -102,10 +102,6 @@ ifeq ($(feature-libelf-mmap), 1)
   override CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT
 endif
 
-ifeq ($(feature-reallocarray), 0)
-  override CFLAGS += -DCOMPAT_NEED_REALLOCARRAY
-endif
-
 # Append required CFLAGS
 override CFLAGS += $(EXTRA_WARNINGS) -Wno-switch-enum
 override CFLAGS += -Werror -Wall
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 1deedbd19c6c..1b7d85d94a07 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -61,7 +61,7 @@ static int btf_add_type(struct btf *btf, struct btf_type *t)
 		expand_by = max(btf->types_size >> 2, 16U);
 		new_size = min(BTF_MAX_NR_TYPES, btf->types_size + expand_by);
 
-		new_types = realloc(btf->types, sizeof(*new_types) * new_size);
+		new_types = libbpf_reallocarray(btf->types, new_size, sizeof(*new_types));
 		if (!new_types)
 			return -ENOMEM;
 
@@ -1574,7 +1574,7 @@ static int btf_dedup_hypot_map_add(struct btf_dedup *d,
 		__u32 *new_list;
 
 		d->hypot_cap += max((size_t)16, d->hypot_cap / 2);
-		new_list = realloc(d->hypot_list, sizeof(__u32) * d->hypot_cap);
+		new_list = libbpf_reallocarray(d->hypot_list, d->hypot_cap, sizeof(__u32));
 		if (!new_list)
 			return -ENOMEM;
 		d->hypot_list = new_list;
@@ -1870,8 +1870,7 @@ static int btf_dedup_strings(struct btf_dedup *d)
 			struct btf_str_ptr *new_ptrs;
 
 			strs.cap += max(strs.cnt / 2, 16U);
-			new_ptrs = realloc(strs.ptrs,
-					   sizeof(strs.ptrs[0]) * strs.cap);
+			new_ptrs = libbpf_reallocarray(strs.ptrs, strs.cap, sizeof(strs.ptrs[0]));
 			if (!new_ptrs) {
 				err = -ENOMEM;
 				goto done;
@@ -2956,8 +2955,8 @@ static int btf_dedup_compact_types(struct btf_dedup *d)
 	d->btf->nr_types = next_type_id - 1;
 	d->btf->types_size = d->btf->nr_types;
 	d->btf->hdr->type_len = p - types_start;
-	new_types = realloc(d->btf->types,
-			    (1 + d->btf->nr_types) * sizeof(struct btf_type *));
+	new_types = libbpf_reallocarray(d->btf->types, (1 + d->btf->nr_types),
+					sizeof(struct btf_type *));
 	if (!new_types)
 		return -ENOMEM;
 	d->btf->types = new_types;
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index fe39bd774697..1ad852ad0a86 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -323,8 +323,7 @@ static int btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id)
 
 	if (d->emit_queue_cnt >= d->emit_queue_cap) {
 		new_cap = max(16, d->emit_queue_cap * 3 / 2);
-		new_queue = realloc(d->emit_queue,
-				    new_cap * sizeof(new_queue[0]));
+		new_queue = libbpf_reallocarray(d->emit_queue, new_cap, sizeof(new_queue[0]));
 		if (!new_queue)
 			return -ENOMEM;
 		d->emit_queue = new_queue;
@@ -1003,8 +1002,7 @@ static int btf_dump_push_decl_stack_id(struct btf_dump *d, __u32 id)
 
 	if (d->decl_stack_cnt >= d->decl_stack_cap) {
 		new_cap = max(16, d->decl_stack_cap * 3 / 2);
-		new_stack = realloc(d->decl_stack,
-				    new_cap * sizeof(new_stack[0]));
+		new_stack = libbpf_reallocarray(d->decl_stack, new_cap, sizeof(new_stack[0]));
 		if (!new_stack)
 			return -ENOMEM;
 		d->decl_stack = new_stack;
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 0276cf85d763..2653bcee73b7 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -44,7 +44,6 @@
 #include <sys/vfs.h>
 #include <sys/utsname.h>
 #include <sys/resource.h>
-#include <tools/libc_compat.h>
 #include <libelf.h>
 #include <gelf.h>
 #include <zlib.h>
@@ -567,7 +566,7 @@ bpf_object__add_program(struct bpf_object *obj, void *data, size_t size,
 	progs = obj->programs;
 	nr_progs = obj->nr_programs;
 
-	progs = reallocarray(progs, nr_progs + 1, sizeof(progs[0]));
+	progs = libbpf_reallocarray(progs, nr_progs + 1, sizeof(progs[0]));
 	if (!progs) {
 		/*
 		 * In this case the original obj->programs
@@ -1292,7 +1291,7 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
 		return &obj->maps[obj->nr_maps++];
 
 	new_cap = max((size_t)4, obj->maps_cap * 3 / 2);
-	new_maps = realloc(obj->maps, new_cap * sizeof(*obj->maps));
+	new_maps = libbpf_reallocarray(obj->maps, new_cap, sizeof(*obj->maps));
 	if (!new_maps) {
 		pr_warn("alloc maps for object failed\n");
 		return ERR_PTR(-ENOMEM);
@@ -2721,8 +2720,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
 				continue;
 			}
 
-			sects = reallocarray(sects, nr_sects + 1,
-					     sizeof(*obj->efile.reloc_sects));
+			sects = libbpf_reallocarray(sects, nr_sects + 1,
+						    sizeof(*obj->efile.reloc_sects));
 			if (!sects) {
 				pr_warn("reloc_sects realloc failed\n");
 				return -ENOMEM;
@@ -2925,7 +2924,7 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
 			continue;
 
 		ext = obj->externs;
-		ext = reallocarray(ext, obj->nr_extern + 1, sizeof(*ext));
+		ext = libbpf_reallocarray(ext, obj->nr_extern + 1, sizeof(*ext));
 		if (!ext)
 			return -ENOMEM;
 		obj->externs = ext;
@@ -4362,9 +4361,9 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
 			pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s\n",
 				 local_type_id, btf_kind_str(local_t),
 				 local_name, i, targ_kind, targ_name);
-			new_ids = reallocarray(cand_ids->data,
-					       cand_ids->len + 1,
-					       sizeof(*cand_ids->data));
+			new_ids = libbpf_reallocarray(cand_ids->data,
+						      cand_ids->len + 1,
+						      sizeof(*cand_ids->data));
 			if (!new_ids) {
 				err = -ENOMEM;
 				goto err_out;
@@ -5231,7 +5230,7 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj,
 			return -LIBBPF_ERRNO__RELOC;
 		}
 		new_cnt = prog->insns_cnt + text->insns_cnt;
-		new_insn = reallocarray(prog->insns, new_cnt, sizeof(*insn));
+		new_insn = libbpf_reallocarray(prog->insns, new_cnt, sizeof(*insn));
 		if (!new_insn) {
 			pr_warn("oom in prog realloc\n");
 			return -ENOMEM;
@@ -5473,7 +5472,7 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj,
 		moff /= bpf_ptr_sz;
 		if (moff >= map->init_slots_sz) {
 			new_sz = moff + 1;
-			tmp = realloc(map->init_slots, new_sz * host_ptr_sz);
+			tmp = libbpf_reallocarray(map->init_slots, new_sz, host_ptr_sz);
 			if (!tmp)
 				return -ENOMEM;
 			map->init_slots = tmp;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index b776a7125c92..954bc2bd040c 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -9,6 +9,7 @@
 #ifndef __LIBBPF_LIBBPF_INTERNAL_H
 #define __LIBBPF_LIBBPF_INTERNAL_H
 
+#include <stdlib.h>
 #include "libbpf.h"
 
 #define BTF_INFO_ENC(kind, kind_flag, vlen) \
@@ -23,6 +24,12 @@
 #define BTF_PARAM_ENC(name, type) (name), (type)
 #define BTF_VAR_SECINFO_ENC(type, offset, size) (type), (offset), (size)
 
+#ifndef likely
+#define likely(x) __builtin_expect(!!(x), 1)
+#endif
+#ifndef unlikely
+#define unlikely(x) __builtin_expect(!!(x), 0)
+#endif
 #ifndef min
 # define min(x, y) ((x) < (y) ? (x) : (y))
 #endif
@@ -63,6 +70,24 @@ do {				\
 #define pr_info(fmt, ...)	__pr(LIBBPF_INFO, fmt, ##__VA_ARGS__)
 #define pr_debug(fmt, ...)	__pr(LIBBPF_DEBUG, fmt, ##__VA_ARGS__)
 
+/*
+ * Re-implement glibc's reallocarray() for libbpf internal-only use.
+ * reallocarray(), unfortunately, is not available in all versions of glibc,
+ * so requires extra feature detection and using reallocarray() stub from
+ * <tools/libc_compat.h> and COMPAT_NEED_REALLOCARRAY. All this complicates
+ * build of libbpf unnecessarily and is just a maintenance burden. Instead,
+ * it's trivial to implement libbpf-specific internal version and use it
+ * throughout libbpf.
+ */
+static inline void *libbpf_reallocarray(void *ptr, size_t nmemb, size_t size)
+{
+	size_t total;
+
+	if (unlikely(__builtin_mul_overflow(nmemb, size, &total)))
+		return NULL;
+	return realloc(ptr, total);
+}
+
 static inline bool libbpf_validate_opts(const char *opts,
 					size_t opts_sz, size_t user_sz,
 					const char *type_name)
diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c
index 4fc6c6cbb4eb..5bd234be8a14 100644
--- a/tools/lib/bpf/ringbuf.c
+++ b/tools/lib/bpf/ringbuf.c
@@ -16,7 +16,6 @@
 #include <asm/barrier.h>
 #include <sys/mman.h>
 #include <sys/epoll.h>
-#include <tools/libc_compat.h>
 
 #include "libbpf.h"
 #include "libbpf_internal.h"
@@ -82,12 +81,12 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
 		return -EINVAL;
 	}
 
-	tmp = reallocarray(rb->rings, rb->ring_cnt + 1, sizeof(*rb->rings));
+	tmp = libbpf_reallocarray(rb->rings, rb->ring_cnt + 1, sizeof(*rb->rings));
 	if (!tmp)
 		return -ENOMEM;
 	rb->rings = tmp;
 
-	tmp = reallocarray(rb->events, rb->ring_cnt + 1, sizeof(*rb->events));
+	tmp = libbpf_reallocarray(rb->events, rb->ring_cnt + 1, sizeof(*rb->events));
 	if (!tmp)
 		return -ENOMEM;
 	rb->events = tmp;
-- 
cgit v1.2.3


From 7084566a236fbc98beb11430d8d67dd08b2ac151 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 18 Aug 2020 18:36:05 -0700
Subject: tools/bpftool: Remove libbpf_internal.h usage in bpftool

Most netlink-related functions were unique to bpftool usage, so I moved them
into net.c. Few functions are still used by both bpftool and libbpf itself
internally, so I've copy-pasted them (libbpf_nl_get_link,
libbpf_netlink_open). It's a bit of duplication of code, but better separation
of libbpf as a library with public API and bpftool, relying on unexposed
functions in libbpf.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200819013607.3607269-3-andriin@fb.com
---
 tools/bpf/bpftool/gen.c         |   2 -
 tools/bpf/bpftool/net.c         | 299 +++++++++++++++++++++++++++++++++++++---
 tools/lib/bpf/libbpf_internal.h |  12 --
 tools/lib/bpf/netlink.c         | 125 +----------------
 4 files changed, 288 insertions(+), 150 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index f61184653633..4033c46d83e7 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -19,11 +19,9 @@
 #include <sys/mman.h>
 #include <bpf/btf.h>
 
-#include "bpf/libbpf_internal.h"
 #include "json_writer.h"
 #include "main.h"
 
-
 #define MAX_OBJ_NAME_LEN 64
 
 static void sanitize_identifier(char *name)
diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c
index 56c3a2bae3ef..910e7bac6e9e 100644
--- a/tools/bpf/bpftool/net.c
+++ b/tools/bpf/bpftool/net.c
@@ -6,22 +6,27 @@
 #include <fcntl.h>
 #include <stdlib.h>
 #include <string.h>
+#include <time.h>
 #include <unistd.h>
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
 #include <net/if.h>
 #include <linux/if.h>
 #include <linux/rtnetlink.h>
+#include <linux/socket.h>
 #include <linux/tc_act/tc_bpf.h>
 #include <sys/socket.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 
 #include "bpf/nlattr.h"
-#include "bpf/libbpf_internal.h"
 #include "main.h"
 #include "netlink_dumper.h"
 
+#ifndef SOL_NETLINK
+#define SOL_NETLINK 270
+#endif
+
 struct ip_devname_ifindex {
 	char	devname[64];
 	int	ifindex;
@@ -85,6 +90,266 @@ static enum net_attach_type parse_attach_type(const char *str)
 	return net_attach_type_size;
 }
 
+typedef int (*dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb);
+
+typedef int (*__dump_nlmsg_t)(struct nlmsghdr *nlmsg, dump_nlmsg_t, void *cookie);
+
+static int netlink_open(__u32 *nl_pid)
+{
+	struct sockaddr_nl sa;
+	socklen_t addrlen;
+	int one = 1, ret;
+	int sock;
+
+	memset(&sa, 0, sizeof(sa));
+	sa.nl_family = AF_NETLINK;
+
+	sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+	if (sock < 0)
+		return -errno;
+
+	if (setsockopt(sock, SOL_NETLINK, NETLINK_EXT_ACK,
+		       &one, sizeof(one)) < 0) {
+		p_err("Netlink error reporting not supported");
+	}
+
+	if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
+		ret = -errno;
+		goto cleanup;
+	}
+
+	addrlen = sizeof(sa);
+	if (getsockname(sock, (struct sockaddr *)&sa, &addrlen) < 0) {
+		ret = -errno;
+		goto cleanup;
+	}
+
+	if (addrlen != sizeof(sa)) {
+		ret = -LIBBPF_ERRNO__INTERNAL;
+		goto cleanup;
+	}
+
+	*nl_pid = sa.nl_pid;
+	return sock;
+
+cleanup:
+	close(sock);
+	return ret;
+}
+
+static int netlink_recv(int sock, __u32 nl_pid, __u32 seq,
+			    __dump_nlmsg_t _fn, dump_nlmsg_t fn,
+			    void *cookie)
+{
+	bool multipart = true;
+	struct nlmsgerr *err;
+	struct nlmsghdr *nh;
+	char buf[4096];
+	int len, ret;
+
+	while (multipart) {
+		multipart = false;
+		len = recv(sock, buf, sizeof(buf), 0);
+		if (len < 0) {
+			ret = -errno;
+			goto done;
+		}
+
+		if (len == 0)
+			break;
+
+		for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
+		     nh = NLMSG_NEXT(nh, len)) {
+			if (nh->nlmsg_pid != nl_pid) {
+				ret = -LIBBPF_ERRNO__WRNGPID;
+				goto done;
+			}
+			if (nh->nlmsg_seq != seq) {
+				ret = -LIBBPF_ERRNO__INVSEQ;
+				goto done;
+			}
+			if (nh->nlmsg_flags & NLM_F_MULTI)
+				multipart = true;
+			switch (nh->nlmsg_type) {
+			case NLMSG_ERROR:
+				err = (struct nlmsgerr *)NLMSG_DATA(nh);
+				if (!err->error)
+					continue;
+				ret = err->error;
+				libbpf_nla_dump_errormsg(nh);
+				goto done;
+			case NLMSG_DONE:
+				return 0;
+			default:
+				break;
+			}
+			if (_fn) {
+				ret = _fn(nh, fn, cookie);
+				if (ret)
+					return ret;
+			}
+		}
+	}
+	ret = 0;
+done:
+	return ret;
+}
+
+static int __dump_class_nlmsg(struct nlmsghdr *nlh,
+			      dump_nlmsg_t dump_class_nlmsg,
+			      void *cookie)
+{
+	struct nlattr *tb[TCA_MAX + 1], *attr;
+	struct tcmsg *t = NLMSG_DATA(nlh);
+	int len;
+
+	len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t));
+	attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t)));
+	if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0)
+		return -LIBBPF_ERRNO__NLPARSE;
+
+	return dump_class_nlmsg(cookie, t, tb);
+}
+
+static int netlink_get_class(int sock, unsigned int nl_pid, int ifindex,
+			     dump_nlmsg_t dump_class_nlmsg, void *cookie)
+{
+	struct {
+		struct nlmsghdr nlh;
+		struct tcmsg t;
+	} req = {
+		.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
+		.nlh.nlmsg_type = RTM_GETTCLASS,
+		.nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+		.t.tcm_family = AF_UNSPEC,
+		.t.tcm_ifindex = ifindex,
+	};
+	int seq = time(NULL);
+
+	req.nlh.nlmsg_seq = seq;
+	if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
+		return -errno;
+
+	return netlink_recv(sock, nl_pid, seq, __dump_class_nlmsg,
+			    dump_class_nlmsg, cookie);
+}
+
+static int __dump_qdisc_nlmsg(struct nlmsghdr *nlh,
+			      dump_nlmsg_t dump_qdisc_nlmsg,
+			      void *cookie)
+{
+	struct nlattr *tb[TCA_MAX + 1], *attr;
+	struct tcmsg *t = NLMSG_DATA(nlh);
+	int len;
+
+	len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t));
+	attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t)));
+	if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0)
+		return -LIBBPF_ERRNO__NLPARSE;
+
+	return dump_qdisc_nlmsg(cookie, t, tb);
+}
+
+static int netlink_get_qdisc(int sock, unsigned int nl_pid, int ifindex,
+			     dump_nlmsg_t dump_qdisc_nlmsg, void *cookie)
+{
+	struct {
+		struct nlmsghdr nlh;
+		struct tcmsg t;
+	} req = {
+		.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
+		.nlh.nlmsg_type = RTM_GETQDISC,
+		.nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+		.t.tcm_family = AF_UNSPEC,
+		.t.tcm_ifindex = ifindex,
+	};
+	int seq = time(NULL);
+
+	req.nlh.nlmsg_seq = seq;
+	if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
+		return -errno;
+
+	return netlink_recv(sock, nl_pid, seq, __dump_qdisc_nlmsg,
+			    dump_qdisc_nlmsg, cookie);
+}
+
+static int __dump_filter_nlmsg(struct nlmsghdr *nlh,
+			       dump_nlmsg_t dump_filter_nlmsg,
+			       void *cookie)
+{
+	struct nlattr *tb[TCA_MAX + 1], *attr;
+	struct tcmsg *t = NLMSG_DATA(nlh);
+	int len;
+
+	len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t));
+	attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t)));
+	if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0)
+		return -LIBBPF_ERRNO__NLPARSE;
+
+	return dump_filter_nlmsg(cookie, t, tb);
+}
+
+static int netlink_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle,
+			      dump_nlmsg_t dump_filter_nlmsg, void *cookie)
+{
+	struct {
+		struct nlmsghdr nlh;
+		struct tcmsg t;
+	} req = {
+		.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
+		.nlh.nlmsg_type = RTM_GETTFILTER,
+		.nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+		.t.tcm_family = AF_UNSPEC,
+		.t.tcm_ifindex = ifindex,
+		.t.tcm_parent = handle,
+	};
+	int seq = time(NULL);
+
+	req.nlh.nlmsg_seq = seq;
+	if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
+		return -errno;
+
+	return netlink_recv(sock, nl_pid, seq, __dump_filter_nlmsg,
+			    dump_filter_nlmsg, cookie);
+}
+
+static int __dump_link_nlmsg(struct nlmsghdr *nlh,
+			     dump_nlmsg_t dump_link_nlmsg, void *cookie)
+{
+	struct nlattr *tb[IFLA_MAX + 1], *attr;
+	struct ifinfomsg *ifi = NLMSG_DATA(nlh);
+	int len;
+
+	len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
+	attr = (struct nlattr *) ((void *) ifi + NLMSG_ALIGN(sizeof(*ifi)));
+	if (libbpf_nla_parse(tb, IFLA_MAX, attr, len, NULL) != 0)
+		return -LIBBPF_ERRNO__NLPARSE;
+
+	return dump_link_nlmsg(cookie, ifi, tb);
+}
+
+static int netlink_get_link(int sock, unsigned int nl_pid,
+			    dump_nlmsg_t dump_link_nlmsg, void *cookie)
+{
+	struct {
+		struct nlmsghdr nlh;
+		struct ifinfomsg ifm;
+	} req = {
+		.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
+		.nlh.nlmsg_type = RTM_GETLINK,
+		.nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+		.ifm.ifi_family = AF_PACKET,
+	};
+	int seq = time(NULL);
+
+	req.nlh.nlmsg_seq = seq;
+	if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
+		return -errno;
+
+	return netlink_recv(sock, nl_pid, seq, __dump_link_nlmsg,
+			    dump_link_nlmsg, cookie);
+}
+
 static int dump_link_nlmsg(void *cookie, void *msg, struct nlattr **tb)
 {
 	struct bpf_netdev_t *netinfo = cookie;
@@ -168,14 +433,14 @@ static int show_dev_tc_bpf(int sock, unsigned int nl_pid,
 	tcinfo.array_len = 0;
 
 	tcinfo.is_qdisc = false;
-	ret = libbpf_nl_get_class(sock, nl_pid, dev->ifindex,
-				  dump_class_qdisc_nlmsg, &tcinfo);
+	ret = netlink_get_class(sock, nl_pid, dev->ifindex,
+				dump_class_qdisc_nlmsg, &tcinfo);
 	if (ret)
 		goto out;
 
 	tcinfo.is_qdisc = true;
-	ret = libbpf_nl_get_qdisc(sock, nl_pid, dev->ifindex,
-				  dump_class_qdisc_nlmsg, &tcinfo);
+	ret = netlink_get_qdisc(sock, nl_pid, dev->ifindex,
+				dump_class_qdisc_nlmsg, &tcinfo);
 	if (ret)
 		goto out;
 
@@ -183,9 +448,9 @@ static int show_dev_tc_bpf(int sock, unsigned int nl_pid,
 	filter_info.ifindex = dev->ifindex;
 	for (i = 0; i < tcinfo.used_len; i++) {
 		filter_info.kind = tcinfo.handle_array[i].kind;
-		ret = libbpf_nl_get_filter(sock, nl_pid, dev->ifindex,
-					   tcinfo.handle_array[i].handle,
-					   dump_filter_nlmsg, &filter_info);
+		ret = netlink_get_filter(sock, nl_pid, dev->ifindex,
+					 tcinfo.handle_array[i].handle,
+					 dump_filter_nlmsg, &filter_info);
 		if (ret)
 			goto out;
 	}
@@ -193,22 +458,22 @@ static int show_dev_tc_bpf(int sock, unsigned int nl_pid,
 	/* root, ingress and egress handle */
 	handle = TC_H_ROOT;
 	filter_info.kind = "root";
-	ret = libbpf_nl_get_filter(sock, nl_pid, dev->ifindex, handle,
-				   dump_filter_nlmsg, &filter_info);
+	ret = netlink_get_filter(sock, nl_pid, dev->ifindex, handle,
+				 dump_filter_nlmsg, &filter_info);
 	if (ret)
 		goto out;
 
 	handle = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS);
 	filter_info.kind = "clsact/ingress";
-	ret = libbpf_nl_get_filter(sock, nl_pid, dev->ifindex, handle,
-				   dump_filter_nlmsg, &filter_info);
+	ret = netlink_get_filter(sock, nl_pid, dev->ifindex, handle,
+				 dump_filter_nlmsg, &filter_info);
 	if (ret)
 		goto out;
 
 	handle = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS);
 	filter_info.kind = "clsact/egress";
-	ret = libbpf_nl_get_filter(sock, nl_pid, dev->ifindex, handle,
-				   dump_filter_nlmsg, &filter_info);
+	ret = netlink_get_filter(sock, nl_pid, dev->ifindex, handle,
+				 dump_filter_nlmsg, &filter_info);
 	if (ret)
 		goto out;
 
@@ -386,7 +651,7 @@ static int do_show(int argc, char **argv)
 	struct bpf_attach_info attach_info = {};
 	int i, sock, ret, filter_idx = -1;
 	struct bpf_netdev_t dev_array;
-	unsigned int nl_pid;
+	unsigned int nl_pid = 0;
 	char err_buf[256];
 
 	if (argc == 2) {
@@ -401,7 +666,7 @@ static int do_show(int argc, char **argv)
 	if (ret)
 		return -1;
 
-	sock = libbpf_netlink_open(&nl_pid);
+	sock = netlink_open(&nl_pid);
 	if (sock < 0) {
 		fprintf(stderr, "failed to open netlink sock\n");
 		return -1;
@@ -416,7 +681,7 @@ static int do_show(int argc, char **argv)
 		jsonw_start_array(json_wtr);
 	NET_START_OBJECT;
 	NET_START_ARRAY("xdp", "%s:\n");
-	ret = libbpf_nl_get_link(sock, nl_pid, dump_link_nlmsg, &dev_array);
+	ret = netlink_get_link(sock, nl_pid, dump_link_nlmsg, &dev_array);
 	NET_END_ARRAY("\n");
 
 	if (!ret) {
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 954bc2bd040c..65931e989eea 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -130,18 +130,6 @@ int bpf_object__section_size(const struct bpf_object *obj, const char *name,
 int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
 				__u32 *off);
 
-struct nlattr;
-typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb);
-int libbpf_netlink_open(unsigned int *nl_pid);
-int libbpf_nl_get_link(int sock, unsigned int nl_pid,
-		       libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie);
-int libbpf_nl_get_class(int sock, unsigned int nl_pid, int ifindex,
-			libbpf_dump_nlmsg_t dump_class_nlmsg, void *cookie);
-int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex,
-			libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie);
-int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle,
-			 libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie);
-
 struct btf_ext_info {
 	/*
 	 * info points to the individual info section (e.g. func_info and
diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
index 312f887570b2..2465538a5ba9 100644
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -22,6 +22,8 @@
 #define SOL_NETLINK 270
 #endif
 
+typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb);
+
 typedef int (*__dump_nlmsg_t)(struct nlmsghdr *nlmsg, libbpf_dump_nlmsg_t,
 			      void *cookie);
 
@@ -31,7 +33,7 @@ struct xdp_id_md {
 	struct xdp_link_info info;
 };
 
-int libbpf_netlink_open(__u32 *nl_pid)
+static int libbpf_netlink_open(__u32 *nl_pid)
 {
 	struct sockaddr_nl sa;
 	socklen_t addrlen;
@@ -283,6 +285,9 @@ static int get_xdp_info(void *cookie, void *msg, struct nlattr **tb)
 	return 0;
 }
 
+static int libbpf_nl_get_link(int sock, unsigned int nl_pid,
+			      libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie);
+
 int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
 			  size_t info_size, __u32 flags)
 {
@@ -368,121 +373,3 @@ int libbpf_nl_get_link(int sock, unsigned int nl_pid,
 	return bpf_netlink_recv(sock, nl_pid, seq, __dump_link_nlmsg,
 				dump_link_nlmsg, cookie);
 }
-
-static int __dump_class_nlmsg(struct nlmsghdr *nlh,
-			      libbpf_dump_nlmsg_t dump_class_nlmsg,
-			      void *cookie)
-{
-	struct nlattr *tb[TCA_MAX + 1], *attr;
-	struct tcmsg *t = NLMSG_DATA(nlh);
-	int len;
-
-	len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t));
-	attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t)));
-	if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0)
-		return -LIBBPF_ERRNO__NLPARSE;
-
-	return dump_class_nlmsg(cookie, t, tb);
-}
-
-int libbpf_nl_get_class(int sock, unsigned int nl_pid, int ifindex,
-			libbpf_dump_nlmsg_t dump_class_nlmsg, void *cookie)
-{
-	struct {
-		struct nlmsghdr nlh;
-		struct tcmsg t;
-	} req = {
-		.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
-		.nlh.nlmsg_type = RTM_GETTCLASS,
-		.nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
-		.t.tcm_family = AF_UNSPEC,
-		.t.tcm_ifindex = ifindex,
-	};
-	int seq = time(NULL);
-
-	req.nlh.nlmsg_seq = seq;
-	if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
-		return -errno;
-
-	return bpf_netlink_recv(sock, nl_pid, seq, __dump_class_nlmsg,
-				dump_class_nlmsg, cookie);
-}
-
-static int __dump_qdisc_nlmsg(struct nlmsghdr *nlh,
-			      libbpf_dump_nlmsg_t dump_qdisc_nlmsg,
-			      void *cookie)
-{
-	struct nlattr *tb[TCA_MAX + 1], *attr;
-	struct tcmsg *t = NLMSG_DATA(nlh);
-	int len;
-
-	len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t));
-	attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t)));
-	if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0)
-		return -LIBBPF_ERRNO__NLPARSE;
-
-	return dump_qdisc_nlmsg(cookie, t, tb);
-}
-
-int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex,
-			libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie)
-{
-	struct {
-		struct nlmsghdr nlh;
-		struct tcmsg t;
-	} req = {
-		.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
-		.nlh.nlmsg_type = RTM_GETQDISC,
-		.nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
-		.t.tcm_family = AF_UNSPEC,
-		.t.tcm_ifindex = ifindex,
-	};
-	int seq = time(NULL);
-
-	req.nlh.nlmsg_seq = seq;
-	if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
-		return -errno;
-
-	return bpf_netlink_recv(sock, nl_pid, seq, __dump_qdisc_nlmsg,
-				dump_qdisc_nlmsg, cookie);
-}
-
-static int __dump_filter_nlmsg(struct nlmsghdr *nlh,
-			       libbpf_dump_nlmsg_t dump_filter_nlmsg,
-			       void *cookie)
-{
-	struct nlattr *tb[TCA_MAX + 1], *attr;
-	struct tcmsg *t = NLMSG_DATA(nlh);
-	int len;
-
-	len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t));
-	attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t)));
-	if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0)
-		return -LIBBPF_ERRNO__NLPARSE;
-
-	return dump_filter_nlmsg(cookie, t, tb);
-}
-
-int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle,
-			 libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie)
-{
-	struct {
-		struct nlmsghdr nlh;
-		struct tcmsg t;
-	} req = {
-		.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
-		.nlh.nlmsg_type = RTM_GETTFILTER,
-		.nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
-		.t.tcm_family = AF_UNSPEC,
-		.t.tcm_ifindex = ifindex,
-		.t.tcm_parent = handle,
-	};
-	int seq = time(NULL);
-
-	req.nlh.nlmsg_seq = seq;
-	if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
-		return -errno;
-
-	return bpf_netlink_recv(sock, nl_pid, seq, __dump_filter_nlmsg,
-				dump_filter_nlmsg, cookie);
-}
-- 
cgit v1.2.3


From 85367030a6c7ef3373347cf816c698995074f6f0 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 18 Aug 2020 18:36:06 -0700
Subject: libbpf: Centralize poisoning and poison reallocarray()

Most of libbpf source files already include libbpf_internal.h, so it's a good
place to centralize identifier poisoning. So move kernel integer type
poisoning there. And also add reallocarray to a poison list to prevent
accidental use of it. libbpf_reallocarray() should be used universally
instead.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200819013607.3607269-4-andriin@fb.com
---
 tools/lib/bpf/bpf.c             | 3 ---
 tools/lib/bpf/bpf_prog_linfo.c  | 3 ---
 tools/lib/bpf/btf.c             | 3 ---
 tools/lib/bpf/btf_dump.c        | 3 ---
 tools/lib/bpf/hashmap.c         | 3 +++
 tools/lib/bpf/libbpf.c          | 3 ---
 tools/lib/bpf/libbpf_internal.h | 7 +++++++
 tools/lib/bpf/libbpf_probes.c   | 3 ---
 tools/lib/bpf/netlink.c         | 3 ---
 tools/lib/bpf/nlattr.c          | 9 +++------
 tools/lib/bpf/ringbuf.c         | 3 ---
 tools/lib/bpf/xsk.c             | 3 ---
 12 files changed, 13 insertions(+), 33 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 0750681057c2..82b983ff6569 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -32,9 +32,6 @@
 #include "libbpf.h"
 #include "libbpf_internal.h"
 
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
-
 /*
  * When building perf, unistd.h is overridden. __NR_bpf is
  * required to be defined explicitly.
diff --git a/tools/lib/bpf/bpf_prog_linfo.c b/tools/lib/bpf/bpf_prog_linfo.c
index bafca49cb1e6..3ed1a27b5f7c 100644
--- a/tools/lib/bpf/bpf_prog_linfo.c
+++ b/tools/lib/bpf/bpf_prog_linfo.c
@@ -8,9 +8,6 @@
 #include "libbpf.h"
 #include "libbpf_internal.h"
 
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
-
 struct bpf_prog_linfo {
 	void *raw_linfo;
 	void *raw_jited_linfo;
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 1b7d85d94a07..a3d259e614b0 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -21,9 +21,6 @@
 #include "libbpf_internal.h"
 #include "hashmap.h"
 
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
-
 #define BTF_MAX_NR_TYPES 0x7fffffffU
 #define BTF_MAX_STR_OFFSET 0x7fffffffU
 
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index 1ad852ad0a86..0eaafd9bcfea 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -19,9 +19,6 @@
 #include "libbpf.h"
 #include "libbpf_internal.h"
 
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
-
 static const char PREFIXES[] = "\t\t\t\t\t\t\t\t\t\t\t\t\t";
 static const size_t PREFIX_CNT = sizeof(PREFIXES) - 1;
 
diff --git a/tools/lib/bpf/hashmap.c b/tools/lib/bpf/hashmap.c
index a405dad068f5..3c20b126d60d 100644
--- a/tools/lib/bpf/hashmap.c
+++ b/tools/lib/bpf/hashmap.c
@@ -15,6 +15,9 @@
 /* make sure libbpf doesn't use kernel-only integer typedefs */
 #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
 
+/* prevent accidental re-addition of reallocarray() */
+#pragma GCC poison reallocarray
+
 /* start with 4 buckets */
 #define HASHMAP_MIN_CAP_BITS 2
 
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 2653bcee73b7..4b96e0eefea8 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -55,9 +55,6 @@
 #include "libbpf_internal.h"
 #include "hashmap.h"
 
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
-
 #ifndef EM_BPF
 #define EM_BPF 247
 #endif
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 65931e989eea..c8ed352671d5 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -10,6 +10,13 @@
 #define __LIBBPF_LIBBPF_INTERNAL_H
 
 #include <stdlib.h>
+
+/* make sure libbpf doesn't use kernel-only integer typedefs */
+#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
+
+/* prevent accidental re-addition of reallocarray() */
+#pragma GCC poison reallocarray
+
 #include "libbpf.h"
 
 #define BTF_INFO_ENC(kind, kind_flag, vlen) \
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 5a3d3f078408..010c9a76fd2b 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -17,9 +17,6 @@
 #include "libbpf.h"
 #include "libbpf_internal.h"
 
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
-
 static bool grep(const char *buffer, const char *pattern)
 {
 	return !!strstr(buffer, pattern);
diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
index 2465538a5ba9..4dd73de00b6f 100644
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -15,9 +15,6 @@
 #include "libbpf_internal.h"
 #include "nlattr.h"
 
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
-
 #ifndef SOL_NETLINK
 #define SOL_NETLINK 270
 #endif
diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c
index 0ad41dfea8eb..b607fa9852b1 100644
--- a/tools/lib/bpf/nlattr.c
+++ b/tools/lib/bpf/nlattr.c
@@ -7,14 +7,11 @@
  */
 
 #include <errno.h>
-#include "nlattr.h"
-#include "libbpf_internal.h"
-#include <linux/rtnetlink.h>
 #include <string.h>
 #include <stdio.h>
-
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
+#include <linux/rtnetlink.h>
+#include "nlattr.h"
+#include "libbpf_internal.h"
 
 static uint16_t nla_attr_minlen[LIBBPF_NLA_TYPE_MAX+1] = {
 	[LIBBPF_NLA_U8]		= sizeof(uint8_t),
diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c
index 5bd234be8a14..5c6522c89af1 100644
--- a/tools/lib/bpf/ringbuf.c
+++ b/tools/lib/bpf/ringbuf.c
@@ -21,9 +21,6 @@
 #include "libbpf_internal.h"
 #include "bpf.h"
 
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
-
 struct ring {
 	ring_buffer_sample_fn sample_cb;
 	void *ctx;
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index f7f4efb70a4c..a9b02103767b 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -32,9 +32,6 @@
 #include "libbpf_internal.h"
 #include "xsk.h"
 
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
-
 #ifndef SOL_XDP
  #define SOL_XDP 283
 #endif
-- 
cgit v1.2.3


From 22dd1ac91a77675238d10059c479ab6fa6160c60 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 18 Aug 2020 18:36:07 -0700
Subject: tools: Remove feature-libelf-mmap feature detection

It's trivial to handle missing ELF_C_MMAP_READ support in libelf the way that
objtool has solved it in
("774bec3fddcc objtool: Add fallback from ELF_C_READ_MMAP to ELF_C_READ").

So instead of having an entire feature detector for that, just do what objtool
does for perf and libbpf. And keep their Makefiles a bit simpler.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200819013607.3607269-5-andriin@fb.com
---
 tools/build/Makefile.feature           |  1 -
 tools/build/feature/Makefile           |  4 ----
 tools/build/feature/test-all.c         |  4 ----
 tools/build/feature/test-libelf-mmap.c |  9 ---------
 tools/lib/bpf/Makefile                 |  6 +-----
 tools/lib/bpf/libbpf.c                 | 14 ++++++--------
 tools/perf/Makefile.config             |  4 ----
 tools/perf/util/symbol.h               |  2 +-
 8 files changed, 8 insertions(+), 36 deletions(-)
 delete mode 100644 tools/build/feature/test-libelf-mmap.c

(limited to 'tools')

diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index c1daf4d57518..38415d251075 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -46,7 +46,6 @@ FEATURE_TESTS_BASIC :=                  \
         libelf-getphdrnum               \
         libelf-gelf_getnote             \
         libelf-getshdrstrndx            \
-        libelf-mmap                     \
         libnuma                         \
         numa_num_possible_cpus          \
         libperl                         \
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index d220fe952747..b2a2347c67ed 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -25,7 +25,6 @@ FILES=                                          \
          test-libelf-getphdrnum.bin             \
          test-libelf-gelf_getnote.bin           \
          test-libelf-getshdrstrndx.bin          \
-         test-libelf-mmap.bin                   \
          test-libdebuginfod.bin                 \
          test-libnuma.bin                       \
          test-numa_num_possible_cpus.bin        \
@@ -146,9 +145,6 @@ $(OUTPUT)test-dwarf.bin:
 $(OUTPUT)test-dwarf_getlocations.bin:
 	$(BUILD) $(DWARFLIBS)
 
-$(OUTPUT)test-libelf-mmap.bin:
-	$(BUILD) -lelf
-
 $(OUTPUT)test-libelf-getphdrnum.bin:
 	$(BUILD) -lelf
 
diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c
index 5479e543b194..5284e6e9c756 100644
--- a/tools/build/feature/test-all.c
+++ b/tools/build/feature/test-all.c
@@ -30,10 +30,6 @@
 # include "test-libelf.c"
 #undef main
 
-#define main main_test_libelf_mmap
-# include "test-libelf-mmap.c"
-#undef main
-
 #define main main_test_get_current_dir_name
 # include "test-get_current_dir_name.c"
 #undef main
diff --git a/tools/build/feature/test-libelf-mmap.c b/tools/build/feature/test-libelf-mmap.c
deleted file mode 100644
index 2c3ef81affe2..000000000000
--- a/tools/build/feature/test-libelf-mmap.c
+++ /dev/null
@@ -1,9 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <libelf.h>
-
-int main(void)
-{
-	Elf *elf = elf_begin(0, ELF_C_READ_MMAP, 0);
-
-	return (long)elf;
-}
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 621ad96d06fd..c5dbfafdf889 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -56,7 +56,7 @@ ifndef VERBOSE
 endif
 
 FEATURE_USER = .libbpf
-FEATURE_TESTS = libelf libelf-mmap zlib bpf
+FEATURE_TESTS = libelf zlib bpf
 FEATURE_DISPLAY = libelf zlib bpf
 
 INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi
@@ -98,10 +98,6 @@ else
   CFLAGS := -g -Wall
 endif
 
-ifeq ($(feature-libelf-mmap), 1)
-  override CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT
-endif
-
 # Append required CFLAGS
 override CFLAGS += $(EXTRA_WARNINGS) -Wno-switch-enum
 override CFLAGS += -Werror -Wall
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 4b96e0eefea8..4a81c6b2d21b 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -150,12 +150,6 @@ static void pr_perm_msg(int err)
 	___err; })
 #endif
 
-#ifdef HAVE_LIBELF_MMAP_SUPPORT
-# define LIBBPF_ELF_C_READ_MMAP ELF_C_READ_MMAP
-#else
-# define LIBBPF_ELF_C_READ_MMAP ELF_C_READ
-#endif
-
 static inline __u64 ptr_to_u64(const void *ptr)
 {
 	return (__u64) (unsigned long) ptr;
@@ -1064,6 +1058,11 @@ static void bpf_object__elf_finish(struct bpf_object *obj)
 	obj->efile.obj_buf_sz = 0;
 }
 
+/* if libelf is old and doesn't support mmap(), fall back to read() */
+#ifndef ELF_C_READ_MMAP
+#define ELF_C_READ_MMAP ELF_C_READ
+#endif
+
 static int bpf_object__elf_init(struct bpf_object *obj)
 {
 	int err = 0;
@@ -1092,8 +1091,7 @@ static int bpf_object__elf_init(struct bpf_object *obj)
 			return err;
 		}
 
-		obj->efile.elf = elf_begin(obj->efile.fd,
-					   LIBBPF_ELF_C_READ_MMAP, NULL);
+		obj->efile.elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL);
 	}
 
 	if (!obj->efile.elf) {
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 190be4fa5c21..81bb099f6f06 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -483,10 +483,6 @@ ifndef NO_LIBELF
   EXTLIBS += -lelf
   $(call detected,CONFIG_LIBELF)
 
-  ifeq ($(feature-libelf-mmap), 1)
-    CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT
-  endif
-
   ifeq ($(feature-libelf-getphdrnum), 1)
     CFLAGS += -DHAVE_ELF_GETPHDRNUM_SUPPORT
   endif
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index ff4f4c47e148..03e264a27cd3 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -28,7 +28,7 @@ struct option;
  * libelf 0.8.x and earlier do not support ELF_C_READ_MMAP;
  * for newer versions we can use mmap to reduce memory usage:
  */
-#ifdef HAVE_LIBELF_MMAP_SUPPORT
+#ifdef ELF_C_READ_MMAP
 # define PERF_ELF_C_READ_MMAP ELF_C_READ_MMAP
 #else
 # define PERF_ELF_C_READ_MMAP ELF_C_READ
-- 
cgit v1.2.3


From 3fc32f40c40207bf85ce1b007f18981c4673df96 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Wed, 19 Aug 2020 12:45:15 -0700
Subject: libbpf: Implement type-based CO-RE relocations support

Implement support for TYPE_EXISTS/TYPE_SIZE/TYPE_ID_LOCAL/TYPE_ID_REMOTE
relocations. These are examples of type-based relocations, as opposed to
field-based relocations supported already. The difference is that they are
calculating relocation values based on the type itself, not a field within
a struct/union.

Type-based relos have slightly different semantics when matching local types
to kernel target types, see comments in bpf_core_types_are_compat() for
details. Their behavior on failure to find target type in kernel BTF also
differs. Instead of "poisoning" relocatable instruction and failing load
subsequently in kernel, they return 0 (which is rarely a valid return result,
so user BPF code can use that to detect success/failure of the relocation and
deal with it without extra "guarding" relocations). Also, it's always possible
to check existence of the type in target kernel with TYPE_EXISTS relocation,
similarly to a field-based FIELD_EXISTS.

TYPE_ID_LOCAL relocation is a bit special in that it always succeeds (barring
any libbpf/Clang bugs) and resolved to BTF ID using **local** BTF info of BPF
program itself. Tests in subsequent patches demonstrate the usage and
semantics of new relocations.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200819194519.3375898-2-andriin@fb.com
---
 tools/lib/bpf/bpf_core_read.h   |  52 ++++++++-
 tools/lib/bpf/libbpf.c          | 231 ++++++++++++++++++++++++++++++++++++----
 tools/lib/bpf/libbpf_internal.h |   4 +
 3 files changed, 263 insertions(+), 24 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h
index 03152cb143b7..684bfb86361a 100644
--- a/tools/lib/bpf/bpf_core_read.h
+++ b/tools/lib/bpf/bpf_core_read.h
@@ -19,6 +19,18 @@ enum bpf_field_info_kind {
 	BPF_FIELD_RSHIFT_U64 = 5,
 };
 
+/* second argument to __builtin_btf_type_id() built-in */
+enum bpf_type_id_kind {
+	BPF_TYPE_ID_LOCAL = 0,		/* BTF type ID in local program */
+	BPF_TYPE_ID_TARGET = 1,		/* BTF type ID in target kernel */
+};
+
+/* second argument to __builtin_preserve_type_info() built-in */
+enum bpf_type_info_kind {
+	BPF_TYPE_EXISTS = 0,		/* type existence in target kernel */
+	BPF_TYPE_SIZE = 1,		/* type size in target kernel */
+};
+
 #define __CORE_RELO(src, field, info)					      \
 	__builtin_preserve_field_info((src)->field, BPF_FIELD_##info)
 
@@ -94,12 +106,50 @@ enum bpf_field_info_kind {
 	__builtin_preserve_field_info(field, BPF_FIELD_EXISTS)
 
 /*
- * Convenience macro to get byte size of a field. Works for integers,
+ * Convenience macro to get the byte size of a field. Works for integers,
  * struct/unions, pointers, arrays, and enums.
  */
 #define bpf_core_field_size(field)					    \
 	__builtin_preserve_field_info(field, BPF_FIELD_BYTE_SIZE)
 
+/*
+ * Convenience macro to get BTF type ID of a specified type, using a local BTF
+ * information. Return 32-bit unsigned integer with type ID from program's own
+ * BTF. Always succeeds.
+ */
+#define bpf_core_type_id_local(type)					    \
+	__builtin_btf_type_id(*(typeof(type) *)0, BPF_TYPE_ID_LOCAL)
+
+/*
+ * Convenience macro to get BTF type ID of a target kernel's type that matches
+ * specified local type.
+ * Returns:
+ *    - valid 32-bit unsigned type ID in kernel BTF;
+ *    - 0, if no matching type was found in a target kernel BTF.
+ */
+#define bpf_core_type_id_kernel(type)					    \
+	__builtin_btf_type_id(*(typeof(type) *)0, BPF_TYPE_ID_TARGET)
+
+/*
+ * Convenience macro to check that provided named type
+ * (struct/union/enum/typedef) exists in a target kernel.
+ * Returns:
+ *    1, if such type is present in target kernel's BTF;
+ *    0, if no matching type is found.
+ */
+#define bpf_core_type_exists(type)					    \
+	__builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_EXISTS)
+
+/*
+ * Convenience macro to get the byte size of a provided named type
+ * (struct/union/enum/typedef) in a target kernel.
+ * Returns:
+ *    >= 0 size (in bytes), if type is present in target kernel's BTF;
+ *    0, if no matching type is found.
+ */
+#define bpf_core_type_size(type)					    \
+	__builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_SIZE)
+
 /*
  * bpf_core_read() abstracts away bpf_probe_read_kernel() call and captures
  * offset relocation for source address using __builtin_preserve_access_index()
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 4a81c6b2d21b..882759dfc33e 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -4111,6 +4111,10 @@ static const char *core_relo_kind_str(enum bpf_core_relo_kind kind)
 	case BPF_FIELD_SIGNED: return "signed";
 	case BPF_FIELD_LSHIFT_U64: return "lshift_u64";
 	case BPF_FIELD_RSHIFT_U64: return "rshift_u64";
+	case BPF_TYPE_ID_LOCAL: return "local_type_id";
+	case BPF_TYPE_ID_TARGET: return "target_type_id";
+	case BPF_TYPE_EXISTS: return "type_exists";
+	case BPF_TYPE_SIZE: return "type_size";
 	default: return "unknown";
 	}
 }
@@ -4130,6 +4134,19 @@ static bool core_relo_is_field_based(enum bpf_core_relo_kind kind)
 	}
 }
 
+static bool core_relo_is_type_based(enum bpf_core_relo_kind kind)
+{
+	switch (kind) {
+	case BPF_TYPE_ID_LOCAL:
+	case BPF_TYPE_ID_TARGET:
+	case BPF_TYPE_EXISTS:
+	case BPF_TYPE_SIZE:
+		return true;
+	default:
+		return false;
+	}
+}
+
 /*
  * Turn bpf_core_relo into a low- and high-level spec representation,
  * validating correctness along the way, as well as calculating resulting
@@ -4160,6 +4177,9 @@ static bool core_relo_is_field_based(enum bpf_core_relo_kind kind)
  *   - field 'a' access (corresponds to '2' in low-level spec);
  *   - array element #3 access (corresponds to '3' in low-level spec).
  *
+ * Type-based relocations (TYPE_EXISTS/TYPE_SIZE,
+ * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their
+ * spec and raw_spec are kept empty.
  */
 static int bpf_core_parse_spec(const struct btf *btf,
 			       __u32 type_id,
@@ -4182,6 +4202,13 @@ static int bpf_core_parse_spec(const struct btf *btf,
 	spec->root_type_id = type_id;
 	spec->relo_kind = relo_kind;
 
+	/* type-based relocations don't have a field access string */
+	if (core_relo_is_type_based(relo_kind)) {
+		if (strcmp(spec_str, "0"))
+			return -EINVAL;
+		return 0;
+	}
+
 	/* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */
 	while (*spec_str) {
 		if (*spec_str == ':')
@@ -4317,7 +4344,7 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
 					   const struct btf *targ_btf)
 {
 	size_t local_essent_len, targ_essent_len;
-	const char *local_name, *targ_name, *targ_kind;
+	const char *local_name, *targ_name;
 	const struct btf_type *t, *local_t;
 	struct ids_vec *cand_ids;
 	__u32 *new_ids;
@@ -4339,13 +4366,11 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
 	n = btf__get_nr_types(targ_btf);
 	for (i = 1; i <= n; i++) {
 		t = btf__type_by_id(targ_btf, i);
-		targ_name = btf__name_by_offset(targ_btf, t->name_off);
-		if (str_is_empty(targ_name))
+		if (btf_kind(t) != btf_kind(local_t))
 			continue;
-		targ_kind = btf_kind_str(t);
 
-		t = skip_mods_and_typedefs(targ_btf, i, NULL);
-		if (!btf_is_composite(t) && !btf_is_array(t))
+		targ_name = btf__name_by_offset(targ_btf, t->name_off);
+		if (str_is_empty(targ_name))
 			continue;
 
 		targ_essent_len = bpf_core_essential_name_len(targ_name);
@@ -4355,7 +4380,7 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
 		if (strncmp(local_name, targ_name, local_essent_len) == 0) {
 			pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s\n",
 				 local_type_id, btf_kind_str(local_t),
-				 local_name, i, targ_kind, targ_name);
+				 local_name, i, btf_kind_str(t), targ_name);
 			new_ids = libbpf_reallocarray(cand_ids->data,
 						      cand_ids->len + 1,
 						      sizeof(*cand_ids->data));
@@ -4373,8 +4398,9 @@ err_out:
 	return ERR_PTR(err);
 }
 
-/* Check two types for compatibility, skipping const/volatile/restrict and
- * typedefs, to ensure we are relocating compatible entities:
+/* Check two types for compatibility for the purpose of field access
+ * relocation. const/volatile/restrict and typedefs are skipped to ensure we
+ * are relocating semantically compatible entities:
  *   - any two STRUCTs/UNIONs are compatible and can be mixed;
  *   - any two FWDs are compatible, if their names match (modulo flavor suffix);
  *   - any two PTRs are always compatible;
@@ -4529,6 +4555,100 @@ static int bpf_core_match_member(const struct btf *local_btf,
 	return 0;
 }
 
+/* Check local and target types for compatibility. This check is used for
+ * type-based CO-RE relocations and follow slightly different rules than
+ * field-based relocations. This function assumes that root types were already
+ * checked for name match. Beyond that initial root-level name check, names
+ * are completely ignored. Compatibility rules are as follows:
+ *   - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but
+ *     kind should match for local and target types (i.e., STRUCT is not
+ *     compatible with UNION);
+ *   - for ENUMs, the size is ignored;
+ *   - for INT, size and signedness are ignored;
+ *   - for ARRAY, dimensionality is ignored, element types are checked for
+ *     compatibility recursively;
+ *   - CONST/VOLATILE/RESTRICT modifiers are ignored;
+ *   - TYPEDEFs/PTRs are compatible if types they pointing to are compatible;
+ *   - FUNC_PROTOs are compatible if they have compatible signature: same
+ *     number of input args and compatible return and argument types.
+ * These rules are not set in stone and probably will be adjusted as we get
+ * more experience with using BPF CO-RE relocations.
+ */
+static int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
+				     const struct btf *targ_btf, __u32 targ_id)
+{
+	const struct btf_type *local_type, *targ_type;
+	int depth = 32; /* max recursion depth */
+
+	/* caller made sure that names match (ignoring flavor suffix) */
+	local_type = btf__type_by_id(local_btf, local_id);
+	targ_type = btf__type_by_id(local_btf, local_id);
+	if (btf_kind(local_type) != btf_kind(targ_type))
+		return 0;
+
+recur:
+	depth--;
+	if (depth < 0)
+		return -EINVAL;
+
+	local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
+	targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
+	if (!local_type || !targ_type)
+		return -EINVAL;
+
+	if (btf_kind(local_type) != btf_kind(targ_type))
+		return 0;
+
+	switch (btf_kind(local_type)) {
+	case BTF_KIND_UNKN:
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION:
+	case BTF_KIND_ENUM:
+	case BTF_KIND_FWD:
+		return 1;
+	case BTF_KIND_INT:
+		/* just reject deprecated bitfield-like integers; all other
+		 * integers are by default compatible between each other
+		 */
+		return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0;
+	case BTF_KIND_PTR:
+		local_id = local_type->type;
+		targ_id = targ_type->type;
+		goto recur;
+	case BTF_KIND_ARRAY:
+		local_id = btf_array(local_type)->type;
+		targ_id = btf_array(targ_type)->type;
+		goto recur;
+	case BTF_KIND_FUNC_PROTO: {
+		struct btf_param *local_p = btf_params(local_type);
+		struct btf_param *targ_p = btf_params(targ_type);
+		__u16 local_vlen = btf_vlen(local_type);
+		__u16 targ_vlen = btf_vlen(targ_type);
+		int i, err;
+
+		if (local_vlen != targ_vlen)
+			return 0;
+
+		for (i = 0; i < local_vlen; i++, local_p++, targ_p++) {
+			skip_mods_and_typedefs(local_btf, local_p->type, &local_id);
+			skip_mods_and_typedefs(targ_btf, targ_p->type, &targ_id);
+			err = bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id);
+			if (err <= 0)
+				return err;
+		}
+
+		/* tail recurse for return type check */
+		skip_mods_and_typedefs(local_btf, local_type->type, &local_id);
+		skip_mods_and_typedefs(targ_btf, targ_type->type, &targ_id);
+		goto recur;
+	}
+	default:
+		pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n",
+			btf_kind_str(local_type), local_id, targ_id);
+		return 0;
+	}
+}
+
 /*
  * Try to match local spec to a target type and, if successful, produce full
  * target spec (high-level, low-level + bit offset).
@@ -4547,6 +4667,12 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
 	targ_spec->root_type_id = targ_id;
 	targ_spec->relo_kind = local_spec->relo_kind;
 
+	if (core_relo_is_type_based(local_spec->relo_kind)) {
+		return bpf_core_types_are_compat(local_spec->btf,
+						 local_spec->root_type_id,
+						 targ_btf, targ_id);
+	}
+
 	local_acc = &local_spec->spec[0];
 	targ_acc = &targ_spec->spec[0];
 
@@ -4720,6 +4846,40 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog,
 	return 0;
 }
 
+static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,
+				   const struct bpf_core_spec *spec,
+				   __u32 *val)
+{
+	__s64 sz;
+
+	/* type-based relos return zero when target type is not found */
+	if (!spec) {
+		*val = 0;
+		return 0;
+	}
+
+	switch (relo->kind) {
+	case BPF_TYPE_ID_TARGET:
+		*val = spec->root_type_id;
+		break;
+	case BPF_TYPE_EXISTS:
+		*val = 1;
+		break;
+	case BPF_TYPE_SIZE:
+		sz = btf__resolve_size(spec->btf, spec->root_type_id);
+		if (sz < 0)
+			return -EINVAL;
+		*val = sz;
+		break;
+	case BPF_TYPE_ID_LOCAL:
+	/* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
 struct bpf_core_relo_res
 {
 	/* expected value in the instruction, unless validate == false */
@@ -4755,6 +4915,9 @@ static int bpf_core_calc_relo(const struct bpf_program *prog,
 	if (core_relo_is_field_based(relo->kind)) {
 		err = bpf_core_calc_field_relo(prog, relo, local_spec, &res->orig_val, &res->validate);
 		err = err ?: bpf_core_calc_field_relo(prog, relo, targ_spec, &res->new_val, NULL);
+	} else if (core_relo_is_type_based(relo->kind)) {
+		err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val);
+		err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val);
 	}
 
 	if (err == -EUCLEAN) {
@@ -4894,6 +5057,9 @@ static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
 
 	libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s);
 
+	if (core_relo_is_type_based(spec->relo_kind))
+		return;
+
 	if (core_relo_is_field_based(spec->relo_kind)) {
 		for (i = 0; i < spec->len; i++) {
 			if (spec->spec[i].name)
@@ -4911,6 +5077,7 @@ static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
 				     spec->bit_offset / 8, spec->bit_offset % 8);
 		else
 			libbpf_print(level, " @ offset %u)", spec->bit_offset / 8);
+		return;
 	}
 }
 
@@ -4979,12 +5146,12 @@ static void *u32_as_hash_key(__u32 x)
  *    between multiple relocations for the same type ID and is updated as some
  *    of the candidates are pruned due to structural incompatibility.
  */
-static int bpf_core_reloc_field(struct bpf_program *prog,
-				 const struct bpf_core_relo *relo,
-				 int relo_idx,
-				 const struct btf *local_btf,
-				 const struct btf *targ_btf,
-				 struct hashmap *cand_cache)
+static int bpf_core_apply_relo(struct bpf_program *prog,
+			       const struct bpf_core_relo *relo,
+			       int relo_idx,
+			       const struct btf *local_btf,
+			       const struct btf *targ_btf,
+			       struct hashmap *cand_cache)
 {
 	const char *prog_name = bpf_program__title(prog, false);
 	struct bpf_core_spec local_spec, cand_spec, targ_spec;
@@ -5003,7 +5170,7 @@ static int bpf_core_reloc_field(struct bpf_program *prog,
 		return -EINVAL;
 
 	local_name = btf__name_by_offset(local_btf, local_type->name_off);
-	if (str_is_empty(local_name))
+	if (!local_name)
 		return -EINVAL;
 
 	spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
@@ -5014,7 +5181,8 @@ static int bpf_core_reloc_field(struct bpf_program *prog,
 	if (err) {
 		pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n",
 			prog_name, relo_idx, local_id, btf_kind_str(local_type),
-			local_name, spec_str, err);
+			str_is_empty(local_name) ? "<anon>" : local_name,
+			spec_str, err);
 		return -EINVAL;
 	}
 
@@ -5023,12 +5191,28 @@ static int bpf_core_reloc_field(struct bpf_program *prog,
 	bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec);
 	libbpf_print(LIBBPF_DEBUG, "\n");
 
+	/* TYPE_ID_LOCAL relo is special and doesn't need candidate search */
+	if (relo->kind == BPF_TYPE_ID_LOCAL) {
+		targ_res.validate = true;
+		targ_res.poison = false;
+		targ_res.orig_val = local_spec.root_type_id;
+		targ_res.new_val = local_spec.root_type_id;
+		goto patch_insn;
+	}
+
+	/* libbpf doesn't support candidate search for anonymous types */
+	if (str_is_empty(spec_str)) {
+		pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n",
+			prog_name, relo_idx, core_relo_kind_str(relo->kind), relo->kind);
+		return -EOPNOTSUPP;
+	}
+
 	if (!hashmap__find(cand_cache, type_key, (void **)&cand_ids)) {
 		cand_ids = bpf_core_find_cands(local_btf, local_id, targ_btf);
 		if (IS_ERR(cand_ids)) {
 			pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld",
-				prog_name, relo_idx, local_id, btf_kind_str(local_type), local_name,
-				PTR_ERR(cand_ids));
+				prog_name, relo_idx, local_id, btf_kind_str(local_type),
+				local_name, PTR_ERR(cand_ids));
 			return PTR_ERR(cand_ids);
 		}
 		err = hashmap__set(cand_cache, type_key, cand_ids, NULL, NULL);
@@ -5084,7 +5268,7 @@ static int bpf_core_reloc_field(struct bpf_program *prog,
 			return -EINVAL;
 		}
 
-		cand_ids->data[j++] = cand_spec.spec[0].type_id;
+		cand_ids->data[j++] = cand_spec.root_type_id;
 	}
 
 	/*
@@ -5103,7 +5287,7 @@ static int bpf_core_reloc_field(struct bpf_program *prog,
 	 * as well as expected case, depending whether instruction w/
 	 * relocation is guarded in some way that makes it unreachable (dead
 	 * code) if relocation can't be resolved. This is handled in
-	 * bpf_core_reloc_insn() uniformly by replacing that instruction with
+	 * bpf_core_patch_insn() uniformly by replacing that instruction with
 	 * BPF helper call insn (using invalid helper ID). If that instruction
 	 * is indeed unreachable, then it will be ignored and eliminated by
 	 * verifier. If it was an error, then verifier will complain and point
@@ -5119,6 +5303,7 @@ static int bpf_core_reloc_field(struct bpf_program *prog,
 			return err;
 	}
 
+patch_insn:
 	/* bpf_core_patch_insn() should know how to handle missing targ_spec */
 	err = bpf_core_patch_insn(prog, relo, relo_idx, &targ_res);
 	if (err) {
@@ -5186,8 +5371,8 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
 			 sec_name, sec->num_info);
 
 		for_each_btf_ext_rec(seg, sec, i, rec) {
-			err = bpf_core_reloc_field(prog, rec, i, obj->btf,
-						   targ_btf, cand_cache);
+			err = bpf_core_apply_relo(prog, rec, i, obj->btf,
+						  targ_btf, cand_cache);
 			if (err) {
 				pr_warn("prog '%s': relo #%d: failed to relocate: %d\n",
 					sec_name, i, err);
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index c8ed352671d5..edd3511aa242 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -238,6 +238,10 @@ enum bpf_core_relo_kind {
 	BPF_FIELD_SIGNED = 3,		/* field signedness (0 - unsigned, 1 - signed) */
 	BPF_FIELD_LSHIFT_U64 = 4,	/* bitfield-specific left bitshift */
 	BPF_FIELD_RSHIFT_U64 = 5,	/* bitfield-specific right bitshift */
+	BPF_TYPE_ID_LOCAL = 6,		/* type ID in local BPF object */
+	BPF_TYPE_ID_TARGET = 7,		/* type ID in target kernel */
+	BPF_TYPE_EXISTS = 8,		/* type existence in target kernel */
+	BPF_TYPE_SIZE = 9,		/* type size in bytes */
 };
 
 /* The minimum bpf_core_relo checked by the loader
-- 
cgit v1.2.3


From 124a892d1c416dc4f639031afb79d47857d9b0b0 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Wed, 19 Aug 2020 12:45:16 -0700
Subject: selftests/bpf: Test TYPE_EXISTS and TYPE_SIZE CO-RE relocations

Add selftests for TYPE_EXISTS and TYPE_SIZE relocations, testing correctness
of relocations and handling of type compatiblity/incompatibility.

If __builtin_preserve_type_info() is not supported by compiler, skip tests.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200819194519.3375898-3-andriin@fb.com
---
 .../testing/selftests/bpf/prog_tests/core_reloc.c  | 125 ++++++++++---
 .../bpf/progs/btf__core_reloc_type_based.c         |   3 +
 .../btf__core_reloc_type_based___all_missing.c     |   3 +
 .../progs/btf__core_reloc_type_based___diff_sz.c   |   3 +
 .../btf__core_reloc_type_based___fn_wrong_args.c   |   3 +
 .../progs/btf__core_reloc_type_based___incompat.c  |   3 +
 .../testing/selftests/bpf/progs/core_reloc_types.h | 202 ++++++++++++++++++++-
 .../selftests/bpf/progs/test_core_reloc_kernel.c   |   2 +
 .../bpf/progs/test_core_reloc_type_based.c         | 124 +++++++++++++
 9 files changed, 446 insertions(+), 22 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_type_based.c
 create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___all_missing.c
 create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff_sz.c
 create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___fn_wrong_args.c
 create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___incompat.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index 4d650e99be28..b775ce0ede41 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -177,14 +177,13 @@
 	.fails = true,							\
 }
 
-#define EXISTENCE_CASE_COMMON(name)					\
+#define FIELD_EXISTS_CASE_COMMON(name)					\
 	.case_name = #name,						\
 	.bpf_obj_file = "test_core_reloc_existence.o",			\
-	.btf_src_file = "btf__core_reloc_" #name ".o",			\
-	.relaxed_core_relocs = true
+	.btf_src_file = "btf__core_reloc_" #name ".o"			\
 
-#define EXISTENCE_ERR_CASE(name) {					\
-	EXISTENCE_CASE_COMMON(name),					\
+#define FIELD_EXISTS_ERR_CASE(name) {					\
+	FIELD_EXISTS_CASE_COMMON(name),					\
 	.fails = true,							\
 }
 
@@ -253,6 +252,23 @@
 	.fails = true,							\
 }
 
+#define TYPE_BASED_CASE_COMMON(name)					\
+	.case_name = #name,						\
+	.bpf_obj_file = "test_core_reloc_type_based.o",		\
+	.btf_src_file = "btf__core_reloc_" #name ".o"			\
+
+#define TYPE_BASED_CASE(name, ...) {					\
+	TYPE_BASED_CASE_COMMON(name),					\
+	.output = STRUCT_TO_CHAR_PTR(core_reloc_type_based_output)	\
+			__VA_ARGS__,					\
+	.output_len = sizeof(struct core_reloc_type_based_output),	\
+}
+
+#define TYPE_BASED_ERR_CASE(name) {					\
+	TYPE_BASED_CASE_COMMON(name),					\
+	.fails = true,							\
+}
+
 struct core_reloc_test_case {
 	const char *case_name;
 	const char *bpf_obj_file;
@@ -364,7 +380,7 @@ static struct core_reloc_test_case test_cases[] = {
 
 	/* validate field existence checks */
 	{
-		EXISTENCE_CASE_COMMON(existence),
+		FIELD_EXISTS_CASE_COMMON(existence),
 		.input = STRUCT_TO_CHAR_PTR(core_reloc_existence) {
 			.a = 1,
 			.b = 2,
@@ -388,7 +404,7 @@ static struct core_reloc_test_case test_cases[] = {
 		.output_len = sizeof(struct core_reloc_existence_output),
 	},
 	{
-		EXISTENCE_CASE_COMMON(existence___minimal),
+		FIELD_EXISTS_CASE_COMMON(existence___minimal),
 		.input = STRUCT_TO_CHAR_PTR(core_reloc_existence___minimal) {
 			.a = 42,
 		},
@@ -408,12 +424,12 @@ static struct core_reloc_test_case test_cases[] = {
 		.output_len = sizeof(struct core_reloc_existence_output),
 	},
 
-	EXISTENCE_ERR_CASE(existence__err_int_sz),
-	EXISTENCE_ERR_CASE(existence__err_int_type),
-	EXISTENCE_ERR_CASE(existence__err_int_kind),
-	EXISTENCE_ERR_CASE(existence__err_arr_kind),
-	EXISTENCE_ERR_CASE(existence__err_arr_value_type),
-	EXISTENCE_ERR_CASE(existence__err_struct_type),
+	FIELD_EXISTS_ERR_CASE(existence__err_int_sz),
+	FIELD_EXISTS_ERR_CASE(existence__err_int_type),
+	FIELD_EXISTS_ERR_CASE(existence__err_int_kind),
+	FIELD_EXISTS_ERR_CASE(existence__err_arr_kind),
+	FIELD_EXISTS_ERR_CASE(existence__err_arr_value_type),
+	FIELD_EXISTS_ERR_CASE(existence__err_struct_type),
 
 	/* bitfield relocation checks */
 	BITFIELDS_CASE(bitfields, {
@@ -453,11 +469,73 @@ static struct core_reloc_test_case test_cases[] = {
 	SIZE_CASE(size),
 	SIZE_CASE(size___diff_sz),
 	SIZE_ERR_CASE(size___err_ambiguous),
+
+	/* validate type existence and size relocations */
+	TYPE_BASED_CASE(type_based, {
+		.struct_exists = 1,
+		.union_exists = 1,
+		.enum_exists = 1,
+		.typedef_named_struct_exists = 1,
+		.typedef_anon_struct_exists = 1,
+		.typedef_struct_ptr_exists = 1,
+		.typedef_int_exists = 1,
+		.typedef_enum_exists = 1,
+		.typedef_void_ptr_exists = 1,
+		.typedef_func_proto_exists = 1,
+		.typedef_arr_exists = 1,
+		.struct_sz = sizeof(struct a_struct),
+		.union_sz = sizeof(union a_union),
+		.enum_sz = sizeof(enum an_enum),
+		.typedef_named_struct_sz = sizeof(named_struct_typedef),
+		.typedef_anon_struct_sz = sizeof(anon_struct_typedef),
+		.typedef_struct_ptr_sz = sizeof(struct_ptr_typedef),
+		.typedef_int_sz = sizeof(int_typedef),
+		.typedef_enum_sz = sizeof(enum_typedef),
+		.typedef_void_ptr_sz = sizeof(void_ptr_typedef),
+		.typedef_func_proto_sz = sizeof(func_proto_typedef),
+		.typedef_arr_sz = sizeof(arr_typedef),
+	}),
+	TYPE_BASED_CASE(type_based___all_missing, {
+		/* all zeros */
+	}),
+	TYPE_BASED_CASE(type_based___diff_sz, {
+		.struct_exists = 1,
+		.union_exists = 1,
+		.enum_exists = 1,
+		.typedef_named_struct_exists = 1,
+		.typedef_anon_struct_exists = 1,
+		.typedef_struct_ptr_exists = 1,
+		.typedef_int_exists = 1,
+		.typedef_enum_exists = 1,
+		.typedef_void_ptr_exists = 1,
+		.typedef_func_proto_exists = 1,
+		.typedef_arr_exists = 1,
+		.struct_sz = sizeof(struct a_struct___diff_sz),
+		.union_sz = sizeof(union a_union___diff_sz),
+		.enum_sz = sizeof(enum an_enum___diff_sz),
+		.typedef_named_struct_sz = sizeof(named_struct_typedef___diff_sz),
+		.typedef_anon_struct_sz = sizeof(anon_struct_typedef___diff_sz),
+		.typedef_struct_ptr_sz = sizeof(struct_ptr_typedef___diff_sz),
+		.typedef_int_sz = sizeof(int_typedef___diff_sz),
+		.typedef_enum_sz = sizeof(enum_typedef___diff_sz),
+		.typedef_void_ptr_sz = sizeof(void_ptr_typedef___diff_sz),
+		.typedef_func_proto_sz = sizeof(func_proto_typedef___diff_sz),
+		.typedef_arr_sz = sizeof(arr_typedef___diff_sz),
+	}),
+	TYPE_BASED_CASE(type_based___incompat, {
+		.enum_exists = 1,
+		.enum_sz = sizeof(enum an_enum),
+	}),
+	TYPE_BASED_CASE(type_based___fn_wrong_args, {
+		.struct_exists = 1,
+		.struct_sz = sizeof(struct a_struct),
+	}),
 };
 
 struct data {
 	char in[256];
 	char out[256];
+	bool skip;
 	uint64_t my_pid_tgid;
 };
 
@@ -516,15 +594,10 @@ void test_core_reloc(void)
 		load_attr.log_level = 0;
 		load_attr.target_btf_path = test_case->btf_src_file;
 		err = bpf_object__load_xattr(&load_attr);
-		if (test_case->fails) {
-			CHECK(!err, "obj_load_fail",
-			      "should fail to load prog '%s'\n", probe_name);
+		if (err) {
+			if (!test_case->fails)
+				CHECK(false, "obj_load", "failed to load prog '%s': %d\n", probe_name, err);
 			goto cleanup;
-		} else {
-			if (CHECK(err, "obj_load",
-				  "failed to load prog '%s': %d\n",
-				  probe_name, err))
-				goto cleanup;
 		}
 
 		data_map = bpf_object__find_map_by_name(obj, "test_cor.bss");
@@ -552,6 +625,16 @@ void test_core_reloc(void)
 		/* trigger test run */
 		usleep(1);
 
+		if (data->skip) {
+			test__skip();
+			goto cleanup;
+		}
+
+		if (test_case->fails) {
+			CHECK(false, "obj_load_fail", "should fail to load prog '%s'\n", probe_name);
+			goto cleanup;
+		}
+
 		equal = memcmp(data->out, test_case->output,
 			       test_case->output_len) == 0;
 		if (CHECK(!equal, "check_result",
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based.c
new file mode 100644
index 000000000000..fc3f69e58c71
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_based x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___all_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___all_missing.c
new file mode 100644
index 000000000000..51511648b4ec
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___all_missing.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_based___all_missing x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff_sz.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff_sz.c
new file mode 100644
index 000000000000..67db3dceb279
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff_sz.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_based___diff_sz x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___fn_wrong_args.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___fn_wrong_args.c
new file mode 100644
index 000000000000..b357fc65431d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___fn_wrong_args.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_based___fn_wrong_args x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___incompat.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___incompat.c
new file mode 100644
index 000000000000..8ddf20d33d9e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___incompat.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_based___incompat x) {}
diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h
index 3b1126c0bc8f..d998537867a2 100644
--- a/tools/testing/selftests/bpf/progs/core_reloc_types.h
+++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h
@@ -652,7 +652,7 @@ struct core_reloc_misc_extensible {
 };
 
 /*
- * EXISTENCE
+ * FIELD EXISTENCE
  */
 struct core_reloc_existence_output {
 	int a_exists;
@@ -834,3 +834,203 @@ struct core_reloc_size___err_ambiguous2 {
 	void *ptr_field;
 	enum { VALUE___2 = 123 } enum_field;
 };
+
+/*
+ * TYPE EXISTENCE & SIZE
+ */
+struct core_reloc_type_based_output {
+	bool struct_exists;
+	bool union_exists;
+	bool enum_exists;
+	bool typedef_named_struct_exists;
+	bool typedef_anon_struct_exists;
+	bool typedef_struct_ptr_exists;
+	bool typedef_int_exists;
+	bool typedef_enum_exists;
+	bool typedef_void_ptr_exists;
+	bool typedef_func_proto_exists;
+	bool typedef_arr_exists;
+
+	int struct_sz;
+	int union_sz;
+	int enum_sz;
+	int typedef_named_struct_sz;
+	int typedef_anon_struct_sz;
+	int typedef_struct_ptr_sz;
+	int typedef_int_sz;
+	int typedef_enum_sz;
+	int typedef_void_ptr_sz;
+	int typedef_func_proto_sz;
+	int typedef_arr_sz;
+};
+
+struct a_struct {
+	int x;
+};
+
+union a_union {
+	int y;
+	int z;
+};
+
+typedef struct a_struct named_struct_typedef;
+
+typedef struct { int x, y, z; } anon_struct_typedef;
+
+typedef struct {
+	int a, b, c;
+} *struct_ptr_typedef;
+
+enum an_enum {
+	AN_ENUM_VAL1 = 1,
+	AN_ENUM_VAL2 = 2,
+	AN_ENUM_VAL3 = 3,
+};
+
+typedef int int_typedef;
+
+typedef enum { TYPEDEF_ENUM_VAL1, TYPEDEF_ENUM_VAL2 } enum_typedef;
+
+typedef void *void_ptr_typedef;
+
+typedef int (*func_proto_typedef)(long);
+
+typedef char arr_typedef[20];
+
+struct core_reloc_type_based {
+	struct a_struct f1;
+	union a_union f2;
+	enum an_enum f3;
+	named_struct_typedef f4;
+	anon_struct_typedef f5;
+	struct_ptr_typedef f6;
+	int_typedef f7;
+	enum_typedef f8;
+	void_ptr_typedef f9;
+	func_proto_typedef f10;
+	arr_typedef f11;
+};
+
+/* no types in target */
+struct core_reloc_type_based___all_missing {
+};
+
+/* different type sizes, extra modifiers, anon vs named enums, etc */
+struct a_struct___diff_sz {
+	long x;
+	int y;
+	char z;
+};
+
+union a_union___diff_sz {
+	char yy;
+	char zz;
+};
+
+typedef struct a_struct___diff_sz named_struct_typedef___diff_sz;
+
+typedef struct { long xx, yy, zzz; } anon_struct_typedef___diff_sz;
+
+typedef struct {
+	char aa[1], bb[2], cc[3];
+} *struct_ptr_typedef___diff_sz;
+
+enum an_enum___diff_sz {
+	AN_ENUM_VAL1___diff_sz = 0x123412341234,
+	AN_ENUM_VAL2___diff_sz = 2,
+};
+
+typedef unsigned long int_typedef___diff_sz;
+
+typedef enum an_enum___diff_sz enum_typedef___diff_sz;
+
+typedef const void * const void_ptr_typedef___diff_sz;
+
+typedef int_typedef___diff_sz (*func_proto_typedef___diff_sz)(char);
+
+typedef int arr_typedef___diff_sz[2];
+
+struct core_reloc_type_based___diff_sz {
+	struct a_struct___diff_sz f1;
+	union a_union___diff_sz f2;
+	enum an_enum___diff_sz f3;
+	named_struct_typedef___diff_sz f4;
+	anon_struct_typedef___diff_sz f5;
+	struct_ptr_typedef___diff_sz f6;
+	int_typedef___diff_sz f7;
+	enum_typedef___diff_sz f8;
+	void_ptr_typedef___diff_sz f9;
+	func_proto_typedef___diff_sz f10;
+	arr_typedef___diff_sz f11;
+};
+
+/* incompatibilities between target and local types */
+union a_struct___incompat { /* union instead of struct */
+	int x;
+};
+
+struct a_union___incompat { /* struct instead of union */
+	int y;
+	int z;
+};
+
+/* typedef to union, not to struct */
+typedef union a_struct___incompat named_struct_typedef___incompat;
+
+/* typedef to void pointer, instead of struct */
+typedef void *anon_struct_typedef___incompat;
+
+/* extra pointer indirection */
+typedef struct {
+	int a, b, c;
+} **struct_ptr_typedef___incompat;
+
+/* typedef of a struct with int, instead of int */
+typedef struct { int x; } int_typedef___incompat;
+
+/* typedef to func_proto, instead of enum */
+typedef int (*enum_typedef___incompat)(void);
+
+/* pointer to char instead of void */
+typedef char *void_ptr_typedef___incompat;
+
+/* void return type instead of int */
+typedef void (*func_proto_typedef___incompat)(long);
+
+/* multi-dimensional array instead of a single-dimensional */
+typedef int arr_typedef___incompat[20][2];
+
+struct core_reloc_type_based___incompat {
+	union a_struct___incompat f1;
+	struct a_union___incompat f2;
+	/* the only valid one is enum, to check that something still succeeds */
+	enum an_enum f3;
+	named_struct_typedef___incompat f4;
+	anon_struct_typedef___incompat f5;
+	struct_ptr_typedef___incompat f6;
+	int_typedef___incompat f7;
+	enum_typedef___incompat f8;
+	void_ptr_typedef___incompat f9;
+	func_proto_typedef___incompat f10;
+	arr_typedef___incompat f11;
+};
+
+/* func_proto with incompatible signature */
+typedef void (*func_proto_typedef___fn_wrong_ret1)(long);
+typedef int * (*func_proto_typedef___fn_wrong_ret2)(long);
+typedef struct { int x; } int_struct_typedef;
+typedef int_struct_typedef (*func_proto_typedef___fn_wrong_ret3)(long);
+typedef int (*func_proto_typedef___fn_wrong_arg)(void *);
+typedef int (*func_proto_typedef___fn_wrong_arg_cnt1)(long, long);
+typedef int (*func_proto_typedef___fn_wrong_arg_cnt2)(void);
+
+struct core_reloc_type_based___fn_wrong_args {
+	/* one valid type to make sure relos still work */
+	struct a_struct f1;
+	func_proto_typedef___fn_wrong_ret1 f2;
+	func_proto_typedef___fn_wrong_ret2 f3;
+	func_proto_typedef___fn_wrong_ret3 f4;
+	func_proto_typedef___fn_wrong_arg f5;
+	func_proto_typedef___fn_wrong_arg_cnt1 f6;
+	func_proto_typedef___fn_wrong_arg_cnt2 f7;
+};
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c
index aba928fd60d3..145028b52ad8 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c
@@ -3,6 +3,7 @@
 
 #include <linux/bpf.h>
 #include <stdint.h>
+#include <stdbool.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_core_read.h>
 
@@ -11,6 +12,7 @@ char _license[] SEC("license") = "GPL";
 struct {
 	char in[256];
 	char out[256];
+	bool skip;
 	uint64_t my_pid_tgid;
 } data = {};
 
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c b/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c
new file mode 100644
index 000000000000..6ab259d02dc0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	char in[256];
+	char out[256];
+	bool skip;
+} data = {};
+
+struct a_struct {
+	int x;
+};
+
+union a_union {
+	int y;
+	int z;
+};
+
+typedef struct a_struct named_struct_typedef;
+
+typedef struct { int x, y, z; } anon_struct_typedef;
+
+typedef struct {
+	int a, b, c;
+} *struct_ptr_typedef;
+
+enum an_enum {
+	AN_ENUM_VAL1 = 1,
+	AN_ENUM_VAL2 = 2,
+	AN_ENUM_VAL3 = 3,
+};
+
+typedef int int_typedef;
+
+typedef enum { TYPEDEF_ENUM_VAL1, TYPEDEF_ENUM_VAL2 } enum_typedef;
+
+typedef void *void_ptr_typedef;
+
+typedef int (*func_proto_typedef)(long);
+
+typedef char arr_typedef[20];
+
+struct core_reloc_type_based {
+	struct a_struct f1;
+	union a_union f2;
+	enum an_enum f3;
+	named_struct_typedef f4;
+	anon_struct_typedef f5;
+	struct_ptr_typedef f6;
+	int_typedef f7;
+	enum_typedef f8;
+	void_ptr_typedef f9;
+	func_proto_typedef f10;
+	arr_typedef f11;
+};
+
+struct core_reloc_type_based_output {
+	bool struct_exists;
+	bool union_exists;
+	bool enum_exists;
+	bool typedef_named_struct_exists;
+	bool typedef_anon_struct_exists;
+	bool typedef_struct_ptr_exists;
+	bool typedef_int_exists;
+	bool typedef_enum_exists;
+	bool typedef_void_ptr_exists;
+	bool typedef_func_proto_exists;
+	bool typedef_arr_exists;
+
+	int struct_sz;
+	int union_sz;
+	int enum_sz;
+	int typedef_named_struct_sz;
+	int typedef_anon_struct_sz;
+	int typedef_struct_ptr_sz;
+	int typedef_int_sz;
+	int typedef_enum_sz;
+	int typedef_void_ptr_sz;
+	int typedef_func_proto_sz;
+	int typedef_arr_sz;
+};
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_type_based(void *ctx)
+{
+#if __has_builtin(__builtin_preserve_type_info)
+	struct core_reloc_type_based_output *out = (void *)&data.out;
+
+	out->struct_exists = bpf_core_type_exists(struct a_struct);
+	out->union_exists = bpf_core_type_exists(union a_union);
+	out->enum_exists = bpf_core_type_exists(enum an_enum);
+	out->typedef_named_struct_exists = bpf_core_type_exists(named_struct_typedef);
+	out->typedef_anon_struct_exists = bpf_core_type_exists(anon_struct_typedef);
+	out->typedef_struct_ptr_exists = bpf_core_type_exists(struct_ptr_typedef);
+	out->typedef_int_exists = bpf_core_type_exists(int_typedef);
+	out->typedef_enum_exists = bpf_core_type_exists(enum_typedef);
+	out->typedef_void_ptr_exists = bpf_core_type_exists(void_ptr_typedef);
+	out->typedef_func_proto_exists = bpf_core_type_exists(func_proto_typedef);
+	out->typedef_arr_exists = bpf_core_type_exists(arr_typedef);
+
+	out->struct_sz = bpf_core_type_size(struct a_struct);
+	out->union_sz = bpf_core_type_size(union a_union);
+	out->enum_sz = bpf_core_type_size(enum an_enum);
+	out->typedef_named_struct_sz = bpf_core_type_size(named_struct_typedef);
+	out->typedef_anon_struct_sz = bpf_core_type_size(anon_struct_typedef);
+	out->typedef_struct_ptr_sz = bpf_core_type_size(struct_ptr_typedef);
+	out->typedef_int_sz = bpf_core_type_size(int_typedef);
+	out->typedef_enum_sz = bpf_core_type_size(enum_typedef);
+	out->typedef_void_ptr_sz = bpf_core_type_size(void_ptr_typedef);
+	out->typedef_func_proto_sz = bpf_core_type_size(func_proto_typedef);
+	out->typedef_arr_sz = bpf_core_type_size(arr_typedef);
+#else
+	data.skip = true;
+#endif
+	return 0;
+}
-- 
cgit v1.2.3


From 4836bf5e2ec6e410c7f0c044402951484901a676 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Wed, 19 Aug 2020 12:45:17 -0700
Subject: selftests/bpf: Add CO-RE relo test for TYPE_ID_LOCAL/TYPE_ID_TARGET

Add tests for BTF type ID relocations. To allow testing this, enhance
core_relo.c test runner to allow dynamic initialization of test inputs.
If Clang doesn't have necessary support for new functionality, test is
skipped.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200819194519.3375898-4-andriin@fb.com
---
 .../testing/selftests/bpf/prog_tests/core_reloc.c  | 168 ++++++++++++++++++++-
 .../selftests/bpf/progs/btf__core_reloc_type_id.c  |   3 +
 .../btf__core_reloc_type_id___missing_targets.c    |   3 +
 .../testing/selftests/bpf/progs/core_reloc_types.h |  41 +++++
 .../bpf/progs/test_core_reloc_type_based.c         |  14 --
 .../selftests/bpf/progs/test_core_reloc_type_id.c  | 113 ++++++++++++++
 6 files changed, 323 insertions(+), 19 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_type_id.c
 create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_type_id___missing_targets.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index b775ce0ede41..ad550510ef69 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -3,6 +3,9 @@
 #include "progs/core_reloc_types.h"
 #include <sys/mman.h>
 #include <sys/syscall.h>
+#include <bpf/btf.h>
+
+static int duration = 0;
 
 #define STRUCT_TO_CHAR_PTR(struct_name) (const char *)&(struct struct_name)
 
@@ -269,6 +272,27 @@
 	.fails = true,							\
 }
 
+#define TYPE_ID_CASE_COMMON(name)					\
+	.case_name = #name,						\
+	.bpf_obj_file = "test_core_reloc_type_id.o",			\
+	.btf_src_file = "btf__core_reloc_" #name ".o"			\
+
+#define TYPE_ID_CASE(name, setup_fn) {					\
+	TYPE_ID_CASE_COMMON(name),					\
+	.output = STRUCT_TO_CHAR_PTR(core_reloc_type_id_output) {},	\
+	.output_len = sizeof(struct core_reloc_type_id_output),		\
+	.setup = setup_fn,						\
+}
+
+#define TYPE_ID_ERR_CASE(name) {					\
+	TYPE_ID_CASE_COMMON(name),					\
+	.fails = true,							\
+}
+
+struct core_reloc_test_case;
+
+typedef int (*setup_test_fn)(struct core_reloc_test_case *test);
+
 struct core_reloc_test_case {
 	const char *case_name;
 	const char *bpf_obj_file;
@@ -280,8 +304,136 @@ struct core_reloc_test_case {
 	bool fails;
 	bool relaxed_core_relocs;
 	bool direct_raw_tp;
+	setup_test_fn setup;
 };
 
+static int find_btf_type(const struct btf *btf, const char *name, __u32 kind)
+{
+	int id;
+
+	id = btf__find_by_name_kind(btf, name, kind);
+	if (CHECK(id <= 0, "find_type_id", "failed to find '%s', kind %d: %d\n", name, kind, id))
+		return -1;
+
+	return id;
+}
+
+static int setup_type_id_case_local(struct core_reloc_test_case *test)
+{
+	struct core_reloc_type_id_output *exp = (void *)test->output;
+	struct btf *local_btf = btf__parse(test->bpf_obj_file, NULL);
+	struct btf *targ_btf = btf__parse(test->btf_src_file, NULL);
+	const struct btf_type *t;
+	const char *name;
+	int i;
+
+	if (CHECK(IS_ERR(local_btf), "local_btf", "failed: %ld\n", PTR_ERR(local_btf)) ||
+	    CHECK(IS_ERR(targ_btf), "targ_btf", "failed: %ld\n", PTR_ERR(targ_btf))) {
+		btf__free(local_btf);
+		btf__free(targ_btf);
+		return -EINVAL;
+	}
+
+	exp->local_anon_struct = -1;
+	exp->local_anon_union = -1;
+	exp->local_anon_enum = -1;
+	exp->local_anon_func_proto_ptr = -1;
+	exp->local_anon_void_ptr = -1;
+	exp->local_anon_arr = -1;
+
+	for (i = 1; i <= btf__get_nr_types(local_btf); i++)
+	{
+		t = btf__type_by_id(local_btf, i);
+		/* we are interested only in anonymous types */
+		if (t->name_off)
+			continue;
+
+		if (btf_is_struct(t) && btf_vlen(t) &&
+		    (name = btf__name_by_offset(local_btf, btf_members(t)[0].name_off)) &&
+		    strcmp(name, "marker_field") == 0) {
+			exp->local_anon_struct = i;
+		} else if (btf_is_union(t) && btf_vlen(t) &&
+			 (name = btf__name_by_offset(local_btf, btf_members(t)[0].name_off)) &&
+			 strcmp(name, "marker_field") == 0) {
+			exp->local_anon_union = i;
+		} else if (btf_is_enum(t) && btf_vlen(t) &&
+			 (name = btf__name_by_offset(local_btf, btf_enum(t)[0].name_off)) &&
+			 strcmp(name, "MARKER_ENUM_VAL") == 0) {
+			exp->local_anon_enum = i;
+		} else if (btf_is_ptr(t) && (t = btf__type_by_id(local_btf, t->type))) {
+			if (btf_is_func_proto(t) && (t = btf__type_by_id(local_btf, t->type)) &&
+			    btf_is_int(t) && (name = btf__name_by_offset(local_btf, t->name_off)) &&
+			    strcmp(name, "_Bool") == 0) {
+				/* ptr -> func_proto -> _Bool */
+				exp->local_anon_func_proto_ptr = i;
+			} else if (btf_is_void(t)) {
+				/* ptr -> void */
+				exp->local_anon_void_ptr = i;
+			}
+		} else if (btf_is_array(t) && (t = btf__type_by_id(local_btf, btf_array(t)->type)) &&
+			   btf_is_int(t) && (name = btf__name_by_offset(local_btf, t->name_off)) &&
+			   strcmp(name, "_Bool") == 0) {
+			/* _Bool[] */
+			exp->local_anon_arr = i;
+		}
+	}
+
+	exp->local_struct = find_btf_type(local_btf, "a_struct", BTF_KIND_STRUCT);
+	exp->local_union = find_btf_type(local_btf, "a_union", BTF_KIND_UNION);
+	exp->local_enum = find_btf_type(local_btf, "an_enum", BTF_KIND_ENUM);
+	exp->local_int = find_btf_type(local_btf, "int", BTF_KIND_INT);
+	exp->local_struct_typedef = find_btf_type(local_btf, "named_struct_typedef", BTF_KIND_TYPEDEF);
+	exp->local_func_proto_typedef = find_btf_type(local_btf, "func_proto_typedef", BTF_KIND_TYPEDEF);
+	exp->local_arr_typedef = find_btf_type(local_btf, "arr_typedef", BTF_KIND_TYPEDEF);
+
+	btf__free(local_btf);
+	btf__free(targ_btf);
+	return 0;
+}
+
+static int setup_type_id_case_success(struct core_reloc_test_case *test) {
+	struct core_reloc_type_id_output *exp = (void *)test->output;
+	struct btf *targ_btf = btf__parse(test->btf_src_file, NULL);
+	int err;
+
+	err = setup_type_id_case_local(test);
+	if (err)
+		return err;
+
+	targ_btf = btf__parse(test->btf_src_file, NULL);
+
+	exp->targ_struct = find_btf_type(targ_btf, "a_struct", BTF_KIND_STRUCT);
+	exp->targ_union = find_btf_type(targ_btf, "a_union", BTF_KIND_UNION);
+	exp->targ_enum = find_btf_type(targ_btf, "an_enum", BTF_KIND_ENUM);
+	exp->targ_int = find_btf_type(targ_btf, "int", BTF_KIND_INT);
+	exp->targ_struct_typedef = find_btf_type(targ_btf, "named_struct_typedef", BTF_KIND_TYPEDEF);
+	exp->targ_func_proto_typedef = find_btf_type(targ_btf, "func_proto_typedef", BTF_KIND_TYPEDEF);
+	exp->targ_arr_typedef = find_btf_type(targ_btf, "arr_typedef", BTF_KIND_TYPEDEF);
+
+	btf__free(targ_btf);
+	return 0;
+}
+
+static int setup_type_id_case_failure(struct core_reloc_test_case *test)
+{
+	struct core_reloc_type_id_output *exp = (void *)test->output;
+	int err;
+
+	err = setup_type_id_case_local(test);
+	if (err)
+		return err;
+
+	exp->targ_struct = 0;
+	exp->targ_union = 0;
+	exp->targ_enum = 0;
+	exp->targ_int = 0;
+	exp->targ_struct_typedef = 0;
+	exp->targ_func_proto_typedef = 0;
+	exp->targ_arr_typedef = 0;
+
+	return 0;
+}
+
 static struct core_reloc_test_case test_cases[] = {
 	/* validate we can find kernel image and use its BTF for relocs */
 	{
@@ -530,6 +682,10 @@ static struct core_reloc_test_case test_cases[] = {
 		.struct_exists = 1,
 		.struct_sz = sizeof(struct a_struct),
 	}),
+
+	/* BTF_TYPE_ID_LOCAL/BTF_TYPE_ID_TARGET tests */
+	TYPE_ID_CASE(type_id, setup_type_id_case_success),
+	TYPE_ID_CASE(type_id___missing_targets, setup_type_id_case_failure),
 };
 
 struct data {
@@ -551,7 +707,7 @@ void test_core_reloc(void)
 	struct bpf_object_load_attr load_attr = {};
 	struct core_reloc_test_case *test_case;
 	const char *tp_name, *probe_name;
-	int err, duration = 0, i, equal;
+	int err, i, equal;
 	struct bpf_link *link = NULL;
 	struct bpf_map *data_map;
 	struct bpf_program *prog;
@@ -567,11 +723,13 @@ void test_core_reloc(void)
 		if (!test__start_subtest(test_case->case_name))
 			continue;
 
-		DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
-			.relaxed_core_relocs = test_case->relaxed_core_relocs,
-		);
+		if (test_case->setup) {
+			err = test_case->setup(test_case);
+			if (CHECK(err, "test_setup", "test #%d setup failed: %d\n", i, err))
+				continue;
+		}
 
-		obj = bpf_object__open_file(test_case->bpf_obj_file, &opts);
+		obj = bpf_object__open_file(test_case->bpf_obj_file, NULL);
 		if (CHECK(IS_ERR(obj), "obj_open", "failed to open '%s': %ld\n",
 			  test_case->bpf_obj_file, PTR_ERR(obj)))
 			continue;
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id.c
new file mode 100644
index 000000000000..abbe5bddcefd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_id x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id___missing_targets.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id___missing_targets.c
new file mode 100644
index 000000000000..24e7caf4f013
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id___missing_targets.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_id___missing_targets x) {}
diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h
index d998537867a2..10afcc5f219f 100644
--- a/tools/testing/selftests/bpf/progs/core_reloc_types.h
+++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h
@@ -1034,3 +1034,44 @@ struct core_reloc_type_based___fn_wrong_args {
 	func_proto_typedef___fn_wrong_arg_cnt1 f6;
 	func_proto_typedef___fn_wrong_arg_cnt2 f7;
 };
+
+/*
+ * TYPE ID MAPPING (LOCAL AND TARGET)
+ */
+struct core_reloc_type_id_output {
+	int local_anon_struct;
+	int local_anon_union;
+	int local_anon_enum;
+	int local_anon_func_proto_ptr;
+	int local_anon_void_ptr;
+	int local_anon_arr;
+
+	int local_struct;
+	int local_union;
+	int local_enum;
+	int local_int;
+	int local_struct_typedef;
+	int local_func_proto_typedef;
+	int local_arr_typedef;
+
+	int targ_struct;
+	int targ_union;
+	int targ_enum;
+	int targ_int;
+	int targ_struct_typedef;
+	int targ_func_proto_typedef;
+	int targ_arr_typedef;
+};
+
+struct core_reloc_type_id {
+	struct a_struct f1;
+	union a_union f2;
+	enum an_enum f3;
+	named_struct_typedef f4;
+	func_proto_typedef f5;
+	arr_typedef f6;
+};
+
+struct core_reloc_type_id___missing_targets {
+	/* nothing */
+};
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c b/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c
index 6ab259d02dc0..fb60f8195c53 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c
@@ -48,20 +48,6 @@ typedef int (*func_proto_typedef)(long);
 
 typedef char arr_typedef[20];
 
-struct core_reloc_type_based {
-	struct a_struct f1;
-	union a_union f2;
-	enum an_enum f3;
-	named_struct_typedef f4;
-	anon_struct_typedef f5;
-	struct_ptr_typedef f6;
-	int_typedef f7;
-	enum_typedef f8;
-	void_ptr_typedef f9;
-	func_proto_typedef f10;
-	arr_typedef f11;
-};
-
 struct core_reloc_type_based_output {
 	bool struct_exists;
 	bool union_exists;
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c b/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c
new file mode 100644
index 000000000000..23e6e6bf276c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	char in[256];
+	char out[256];
+	bool skip;
+} data = {};
+
+/* some types are shared with test_core_reloc_type_based.c */
+struct a_struct {
+	int x;
+};
+
+union a_union {
+	int y;
+	int z;
+};
+
+enum an_enum {
+	AN_ENUM_VAL1 = 1,
+	AN_ENUM_VAL2 = 2,
+	AN_ENUM_VAL3 = 3,
+};
+
+typedef struct a_struct named_struct_typedef;
+
+typedef int (*func_proto_typedef)(long);
+
+typedef char arr_typedef[20];
+
+struct core_reloc_type_id_output {
+	int local_anon_struct;
+	int local_anon_union;
+	int local_anon_enum;
+	int local_anon_func_proto_ptr;
+	int local_anon_void_ptr;
+	int local_anon_arr;
+
+	int local_struct;
+	int local_union;
+	int local_enum;
+	int local_int;
+	int local_struct_typedef;
+	int local_func_proto_typedef;
+	int local_arr_typedef;
+
+	int targ_struct;
+	int targ_union;
+	int targ_enum;
+	int targ_int;
+	int targ_struct_typedef;
+	int targ_func_proto_typedef;
+	int targ_arr_typedef;
+};
+
+/* preserve types even if Clang doesn't support built-in */
+struct a_struct t1 = {};
+union a_union t2 = {};
+enum an_enum t3 = 0;
+named_struct_typedef t4 = {};
+func_proto_typedef t5 = 0;
+arr_typedef t6 = {};
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_type_id(void *ctx)
+{
+	/* We use __builtin_btf_type_id() in this tests, but up until the time
+	 * __builtin_preserve_type_info() was added it contained a bug that
+	 * would make this test fail. The bug was fixed with addition of
+	 * __builtin_preserve_type_info(), though, so that's what we are using
+	 * to detect whether this test has to be executed, however strange
+	 * that might look like.
+	 */
+#if __has_builtin(__builtin_preserve_type_info)
+	struct core_reloc_type_id_output *out = (void *)&data.out;
+
+	out->local_anon_struct = bpf_core_type_id_local(struct { int marker_field; });
+	out->local_anon_union = bpf_core_type_id_local(union { int marker_field; });
+	out->local_anon_enum = bpf_core_type_id_local(enum { MARKER_ENUM_VAL = 123 });
+	out->local_anon_func_proto_ptr = bpf_core_type_id_local(_Bool(*)(int));
+	out->local_anon_void_ptr = bpf_core_type_id_local(void *);
+	out->local_anon_arr = bpf_core_type_id_local(_Bool[47]);
+
+	out->local_struct = bpf_core_type_id_local(struct a_struct);
+	out->local_union = bpf_core_type_id_local(union a_union);
+	out->local_enum = bpf_core_type_id_local(enum an_enum);
+	out->local_int = bpf_core_type_id_local(int);
+	out->local_struct_typedef = bpf_core_type_id_local(named_struct_typedef);
+	out->local_func_proto_typedef = bpf_core_type_id_local(func_proto_typedef);
+	out->local_arr_typedef = bpf_core_type_id_local(arr_typedef);
+
+	out->targ_struct = bpf_core_type_id_kernel(struct a_struct);
+	out->targ_union = bpf_core_type_id_kernel(union a_union);
+	out->targ_enum = bpf_core_type_id_kernel(enum an_enum);
+	out->targ_int = bpf_core_type_id_kernel(int);
+	out->targ_struct_typedef = bpf_core_type_id_kernel(named_struct_typedef);
+	out->targ_func_proto_typedef = bpf_core_type_id_kernel(func_proto_typedef);
+	out->targ_arr_typedef = bpf_core_type_id_kernel(arr_typedef);
+#else
+	data.skip = true;
+#endif
+
+	return 0;
+}
-- 
cgit v1.2.3


From eacaaed784e2c9da69dea3030c81062c1fd66a37 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Wed, 19 Aug 2020 12:45:18 -0700
Subject: libbpf: Implement enum value-based CO-RE relocations

Implement two relocations of a new enumerator value-based CO-RE relocation
kind: ENUMVAL_EXISTS and ENUMVAL_VALUE.

First, ENUMVAL_EXISTS, allows to detect the presence of a named enumerator
value in the target (kernel) BTF. This is useful to do BPF helper/map/program
type support detection from BPF program side. bpf_core_enum_value_exists()
macro helper is provided to simplify built-in usage.

Second, ENUMVAL_VALUE, allows to capture enumerator integer value and relocate
it according to the target BTF, if it changes. This is useful to have
a guarantee against intentional or accidental re-ordering/re-numbering of some
of the internal (non-UAPI) enumerations, where kernel developers don't care
about UAPI backwards compatiblity concerns. bpf_core_enum_value() allows to
capture this succinctly and use correct enum values in code.

LLVM uses ldimm64 instruction to capture enumerator value-based relocations,
so add support for ldimm64 instruction patching as well.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200819194519.3375898-5-andriin@fb.com
---
 tools/lib/bpf/bpf_core_read.h   |  28 ++++++++
 tools/lib/bpf/libbpf.c          | 145 ++++++++++++++++++++++++++++++++++++++--
 tools/lib/bpf/libbpf_internal.h |   2 +
 3 files changed, 170 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h
index 684bfb86361a..bbcefb3ff5a5 100644
--- a/tools/lib/bpf/bpf_core_read.h
+++ b/tools/lib/bpf/bpf_core_read.h
@@ -31,6 +31,12 @@ enum bpf_type_info_kind {
 	BPF_TYPE_SIZE = 1,		/* type size in target kernel */
 };
 
+/* second argument to __builtin_preserve_enum_value() built-in */
+enum bpf_enum_value_kind {
+	BPF_ENUMVAL_EXISTS = 0,		/* enum value existence in kernel */
+	BPF_ENUMVAL_VALUE = 1,		/* enum value value relocation */
+};
+
 #define __CORE_RELO(src, field, info)					      \
 	__builtin_preserve_field_info((src)->field, BPF_FIELD_##info)
 
@@ -150,6 +156,28 @@ enum bpf_type_info_kind {
 #define bpf_core_type_size(type)					    \
 	__builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_SIZE)
 
+/*
+ * Convenience macro to check that provided enumerator value is defined in
+ * a target kernel.
+ * Returns:
+ *    1, if specified enum type and its enumerator value are present in target
+ *    kernel's BTF;
+ *    0, if no matching enum and/or enum value within that enum is found.
+ */
+#define bpf_core_enum_value_exists(enum_type, enum_value)		    \
+	__builtin_preserve_enum_value(*(typeof(enum_type) *)enum_value, BPF_ENUMVAL_EXISTS)
+
+/*
+ * Convenience macro to get the integer value of an enumerator value in
+ * a target kernel.
+ * Returns:
+ *    64-bit value, if specified enum type and its enumerator value are
+ *    present in target kernel's BTF;
+ *    0, if no matching enum and/or enum value within that enum is found.
+ */
+#define bpf_core_enum_value(enum_type, enum_value)			    \
+	__builtin_preserve_enum_value(*(typeof(enum_type) *)enum_value, BPF_ENUMVAL_VALUE)
+
 /*
  * bpf_core_read() abstracts away bpf_probe_read_kernel() call and captures
  * offset relocation for source address using __builtin_preserve_access_index()
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 882759dfc33e..77d420c02094 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -4115,6 +4115,8 @@ static const char *core_relo_kind_str(enum bpf_core_relo_kind kind)
 	case BPF_TYPE_ID_TARGET: return "target_type_id";
 	case BPF_TYPE_EXISTS: return "type_exists";
 	case BPF_TYPE_SIZE: return "type_size";
+	case BPF_ENUMVAL_EXISTS: return "enumval_exists";
+	case BPF_ENUMVAL_VALUE: return "enumval_value";
 	default: return "unknown";
 	}
 }
@@ -4147,6 +4149,17 @@ static bool core_relo_is_type_based(enum bpf_core_relo_kind kind)
 	}
 }
 
+static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
+{
+	switch (kind) {
+	case BPF_ENUMVAL_EXISTS:
+	case BPF_ENUMVAL_VALUE:
+		return true;
+	default:
+		return false;
+	}
+}
+
 /*
  * Turn bpf_core_relo into a low- and high-level spec representation,
  * validating correctness along the way, as well as calculating resulting
@@ -4180,6 +4193,9 @@ static bool core_relo_is_type_based(enum bpf_core_relo_kind kind)
  * Type-based relocations (TYPE_EXISTS/TYPE_SIZE,
  * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their
  * spec and raw_spec are kept empty.
+ *
+ * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access
+ * string to specify enumerator's value index that need to be relocated.
  */
 static int bpf_core_parse_spec(const struct btf *btf,
 			       __u32 type_id,
@@ -4224,16 +4240,25 @@ static int bpf_core_parse_spec(const struct btf *btf,
 	if (spec->raw_len == 0)
 		return -EINVAL;
 
-	/* first spec value is always reloc type array index */
 	t = skip_mods_and_typedefs(btf, type_id, &id);
 	if (!t)
 		return -EINVAL;
 
 	access_idx = spec->raw_spec[0];
-	spec->spec[0].type_id = id;
-	spec->spec[0].idx = access_idx;
+	acc = &spec->spec[0];
+	acc->type_id = id;
+	acc->idx = access_idx;
 	spec->len++;
 
+	if (core_relo_is_enumval_based(relo_kind)) {
+		if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t))
+			return -EINVAL;
+
+		/* record enumerator name in a first accessor */
+		acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off);
+		return 0;
+	}
+
 	if (!core_relo_is_field_based(relo_kind))
 		return -EINVAL;
 
@@ -4676,6 +4701,39 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
 	local_acc = &local_spec->spec[0];
 	targ_acc = &targ_spec->spec[0];
 
+	if (core_relo_is_enumval_based(local_spec->relo_kind)) {
+		size_t local_essent_len, targ_essent_len;
+		const struct btf_enum *e;
+		const char *targ_name;
+
+		/* has to resolve to an enum */
+		targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id);
+		if (!btf_is_enum(targ_type))
+			return 0;
+
+		local_essent_len = bpf_core_essential_name_len(local_acc->name);
+
+		for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) {
+			targ_name = btf__name_by_offset(targ_spec->btf, e->name_off);
+			targ_essent_len = bpf_core_essential_name_len(targ_name);
+			if (targ_essent_len != local_essent_len)
+				continue;
+			if (strncmp(local_acc->name, targ_name, local_essent_len) == 0) {
+				targ_acc->type_id = targ_id;
+				targ_acc->idx = i;
+				targ_acc->name = targ_name;
+				targ_spec->len++;
+				targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
+				targ_spec->raw_len++;
+				return 1;
+			}
+		}
+		return 0;
+	}
+
+	if (!core_relo_is_field_based(local_spec->relo_kind))
+		return -EINVAL;
+
 	for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) {
 		targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id,
 						   &targ_id);
@@ -4880,6 +4938,31 @@ static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,
 	return 0;
 }
 
+static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo,
+				      const struct bpf_core_spec *spec,
+				      __u32 *val)
+{
+	const struct btf_type *t;
+	const struct btf_enum *e;
+
+	switch (relo->kind) {
+	case BPF_ENUMVAL_EXISTS:
+		*val = spec ? 1 : 0;
+		break;
+	case BPF_ENUMVAL_VALUE:
+		if (!spec)
+			return -EUCLEAN; /* request instruction poisoning */
+		t = btf__type_by_id(spec->btf, spec->spec[0].type_id);
+		e = btf_enum(t) + spec->spec[0].idx;
+		*val = e->val;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
 struct bpf_core_relo_res
 {
 	/* expected value in the instruction, unless validate == false */
@@ -4918,6 +5001,9 @@ static int bpf_core_calc_relo(const struct bpf_program *prog,
 	} else if (core_relo_is_type_based(relo->kind)) {
 		err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val);
 		err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val);
+	} else if (core_relo_is_enumval_based(relo->kind)) {
+		err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val);
+		err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val);
 	}
 
 	if (err == -EUCLEAN) {
@@ -4954,6 +5040,11 @@ static void bpf_core_poison_insn(struct bpf_program *prog, int relo_idx,
 	insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
 }
 
+static bool is_ldimm64(struct bpf_insn *insn)
+{
+	return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
+}
+
 /*
  * Patch relocatable BPF instruction.
  *
@@ -4966,6 +5057,7 @@ static void bpf_core_poison_insn(struct bpf_program *prog, int relo_idx,
  * Currently three kinds of BPF instructions are supported:
  * 1. rX = <imm> (assignment with immediate operand);
  * 2. rX += <imm> (arithmetic operations with immediate operand);
+ * 3. rX = <imm64> (load with 64-bit immediate value).
  */
 static int bpf_core_patch_insn(struct bpf_program *prog,
 			       const struct bpf_core_relo *relo,
@@ -4984,6 +5076,11 @@ static int bpf_core_patch_insn(struct bpf_program *prog,
 	class = BPF_CLASS(insn->code);
 
 	if (res->poison) {
+		/* poison second part of ldimm64 to avoid confusing error from
+		 * verifier about "unknown opcode 00"
+		 */
+		if (is_ldimm64(insn))
+			bpf_core_poison_insn(prog, relo_idx, insn_idx + 1, insn + 1);
 		bpf_core_poison_insn(prog, relo_idx, insn_idx, insn);
 		return 0;
 	}
@@ -5012,7 +5109,7 @@ static int bpf_core_patch_insn(struct bpf_program *prog,
 	case BPF_ST:
 	case BPF_STX:
 		if (res->validate && insn->off != orig_val) {
-			pr_warn("prog '%s': relo #%d: unexpected insn #%d (LD/LDX/ST/STX) value: got %u, exp %u -> %u\n",
+			pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n",
 				bpf_program__title(prog, false), relo_idx,
 				insn_idx, insn->off, orig_val, new_val);
 			return -EINVAL;
@@ -5029,8 +5126,36 @@ static int bpf_core_patch_insn(struct bpf_program *prog,
 			 bpf_program__title(prog, false), relo_idx, insn_idx,
 			 orig_val, new_val);
 		break;
+	case BPF_LD: {
+		__u64 imm;
+
+		if (!is_ldimm64(insn) ||
+		    insn[0].src_reg != 0 || insn[0].off != 0 ||
+		    insn_idx + 1 >= prog->insns_cnt ||
+		    insn[1].code != 0 || insn[1].dst_reg != 0 ||
+		    insn[1].src_reg != 0 || insn[1].off != 0) {
+			pr_warn("prog '%s': relo #%d: insn #%d (LDIMM64) has unexpected form\n",
+				bpf_program__title(prog, false), relo_idx, insn_idx);
+			return -EINVAL;
+		}
+
+		imm = insn[0].imm + ((__u64)insn[1].imm << 32);
+		if (res->validate && imm != orig_val) {
+			pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n",
+				bpf_program__title(prog, false), relo_idx,
+				insn_idx, imm, orig_val, new_val);
+			return -EINVAL;
+		}
+
+		insn[0].imm = new_val;
+		insn[1].imm = 0; /* currently only 32-bit values are supported */
+		pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n",
+			 bpf_program__title(prog, false), relo_idx, insn_idx,
+			 imm, new_val);
+		break;
+	}
 	default:
-		pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n",
+		pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:0x%x, src:0x%x, dst:0x%x, off:0x%x, imm:0x%x\n",
 			bpf_program__title(prog, false), relo_idx,
 			insn_idx, insn->code, insn->src_reg, insn->dst_reg,
 			insn->off, insn->imm);
@@ -5047,6 +5172,7 @@ static int bpf_core_patch_insn(struct bpf_program *prog,
 static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
 {
 	const struct btf_type *t;
+	const struct btf_enum *e;
 	const char *s;
 	__u32 type_id;
 	int i;
@@ -5060,6 +5186,15 @@ static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
 	if (core_relo_is_type_based(spec->relo_kind))
 		return;
 
+	if (core_relo_is_enumval_based(spec->relo_kind)) {
+		t = skip_mods_and_typedefs(spec->btf, type_id, NULL);
+		e = btf_enum(t) + spec->raw_spec[0];
+		s = btf__name_by_offset(spec->btf, e->name_off);
+
+		libbpf_print(level, "::%s = %u", s, e->val);
+		return;
+	}
+
 	if (core_relo_is_field_based(spec->relo_kind)) {
 		for (i = 0; i < spec->len; i++) {
 			if (spec->spec[i].name)
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index edd3511aa242..61dff515a2f0 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -242,6 +242,8 @@ enum bpf_core_relo_kind {
 	BPF_TYPE_ID_TARGET = 7,		/* type ID in target kernel */
 	BPF_TYPE_EXISTS = 8,		/* type existence in target kernel */
 	BPF_TYPE_SIZE = 9,		/* type size in bytes */
+	BPF_ENUMVAL_EXISTS = 10,	/* enum value existence in target kernel */
+	BPF_ENUMVAL_VALUE = 11,		/* enum value integer value */
 };
 
 /* The minimum bpf_core_relo checked by the loader
-- 
cgit v1.2.3


From 3357490555eacd39c2bf3e355a0e435f658d806f Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Wed, 19 Aug 2020 12:45:19 -0700
Subject: selftests/bpf: Add tests for ENUMVAL_EXISTS/ENUMVAL_VALUE relocations

Add tests validating existence and value relocations for enum value-based
relocations. If __builtin_preserve_enum_value() built-in is not supported,
skip tests.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200819194519.3375898-6-andriin@fb.com
---
 .../testing/selftests/bpf/prog_tests/core_reloc.c  | 56 +++++++++++++++
 .../selftests/bpf/progs/btf__core_reloc_enumval.c  |  3 +
 .../bpf/progs/btf__core_reloc_enumval___diff.c     |  3 +
 .../progs/btf__core_reloc_enumval___err_missing.c  |  3 +
 .../progs/btf__core_reloc_enumval___val3_missing.c |  3 +
 .../testing/selftests/bpf/progs/core_reloc_types.h | 84 ++++++++++++++++++++++
 .../selftests/bpf/progs/test_core_reloc_enumval.c  | 72 +++++++++++++++++++
 7 files changed, 224 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_enumval.c
 create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___diff.c
 create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___err_missing.c
 create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___val3_missing.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_core_reloc_enumval.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index ad550510ef69..30e40ff4b0d8 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -289,6 +289,23 @@ static int duration = 0;
 	.fails = true,							\
 }
 
+#define ENUMVAL_CASE_COMMON(name)					\
+	.case_name = #name,						\
+	.bpf_obj_file = "test_core_reloc_enumval.o",			\
+	.btf_src_file = "btf__core_reloc_" #name ".o"			\
+
+#define ENUMVAL_CASE(name, ...) {					\
+	ENUMVAL_CASE_COMMON(name),					\
+	.output = STRUCT_TO_CHAR_PTR(core_reloc_enumval_output)		\
+			__VA_ARGS__,					\
+	.output_len = sizeof(struct core_reloc_enumval_output),		\
+}
+
+#define ENUMVAL_ERR_CASE(name) {					\
+	ENUMVAL_CASE_COMMON(name),					\
+	.fails = true,							\
+}
+
 struct core_reloc_test_case;
 
 typedef int (*setup_test_fn)(struct core_reloc_test_case *test);
@@ -686,6 +703,45 @@ static struct core_reloc_test_case test_cases[] = {
 	/* BTF_TYPE_ID_LOCAL/BTF_TYPE_ID_TARGET tests */
 	TYPE_ID_CASE(type_id, setup_type_id_case_success),
 	TYPE_ID_CASE(type_id___missing_targets, setup_type_id_case_failure),
+
+	/* Enumerator value existence and value relocations */
+	ENUMVAL_CASE(enumval, {
+		.named_val1_exists = true,
+		.named_val2_exists = true,
+		.named_val3_exists = true,
+		.anon_val1_exists = true,
+		.anon_val2_exists = true,
+		.anon_val3_exists = true,
+		.named_val1 = 1,
+		.named_val2 = 2,
+		.anon_val1 = 0x10,
+		.anon_val2 = 0x20,
+	}),
+	ENUMVAL_CASE(enumval___diff, {
+		.named_val1_exists = true,
+		.named_val2_exists = true,
+		.named_val3_exists = true,
+		.anon_val1_exists = true,
+		.anon_val2_exists = true,
+		.anon_val3_exists = true,
+		.named_val1 = 101,
+		.named_val2 = 202,
+		.anon_val1 = 0x11,
+		.anon_val2 = 0x22,
+	}),
+	ENUMVAL_CASE(enumval___val3_missing, {
+		.named_val1_exists = true,
+		.named_val2_exists = true,
+		.named_val3_exists = false,
+		.anon_val1_exists = true,
+		.anon_val2_exists = true,
+		.anon_val3_exists = false,
+		.named_val1 = 111,
+		.named_val2 = 222,
+		.anon_val1 = 0x111,
+		.anon_val2 = 0x222,
+	}),
+	ENUMVAL_ERR_CASE(enumval___err_missing),
 };
 
 struct data {
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval.c
new file mode 100644
index 000000000000..48e62f3f074f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_enumval x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___diff.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___diff.c
new file mode 100644
index 000000000000..53e5e5a76888
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___diff.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_enumval___diff x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___err_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___err_missing.c
new file mode 100644
index 000000000000..d024fb2ac06e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___err_missing.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_enumval___err_missing x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___val3_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___val3_missing.c
new file mode 100644
index 000000000000..9de6595d250c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___val3_missing.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_enumval___val3_missing x) {}
diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h
index 10afcc5f219f..e6e616cb7bc9 100644
--- a/tools/testing/selftests/bpf/progs/core_reloc_types.h
+++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h
@@ -1075,3 +1075,87 @@ struct core_reloc_type_id {
 struct core_reloc_type_id___missing_targets {
 	/* nothing */
 };
+
+/*
+ * ENUMERATOR VALUE EXISTENCE AND VALUE RELOCATION
+ */
+struct core_reloc_enumval_output {
+	bool named_val1_exists;
+	bool named_val2_exists;
+	bool named_val3_exists;
+	bool anon_val1_exists;
+	bool anon_val2_exists;
+	bool anon_val3_exists;
+
+	int named_val1;
+	int named_val2;
+	int anon_val1;
+	int anon_val2;
+};
+
+enum named_enum {
+	NAMED_ENUM_VAL1 = 1,
+	NAMED_ENUM_VAL2 = 2,
+	NAMED_ENUM_VAL3 = 3,
+};
+
+typedef enum {
+	ANON_ENUM_VAL1 = 0x10,
+	ANON_ENUM_VAL2 = 0x20,
+	ANON_ENUM_VAL3 = 0x30,
+} anon_enum;
+
+struct core_reloc_enumval {
+	enum named_enum f1;
+	anon_enum f2;
+};
+
+/* differing enumerator values */
+enum named_enum___diff {
+	NAMED_ENUM_VAL1___diff = 101,
+	NAMED_ENUM_VAL2___diff = 202,
+	NAMED_ENUM_VAL3___diff = 303,
+};
+
+typedef enum {
+	ANON_ENUM_VAL1___diff = 0x11,
+	ANON_ENUM_VAL2___diff = 0x22,
+	ANON_ENUM_VAL3___diff = 0x33,
+} anon_enum___diff;
+
+struct core_reloc_enumval___diff {
+	enum named_enum___diff f1;
+	anon_enum___diff f2;
+};
+
+/* missing (optional) third enum value */
+enum named_enum___val3_missing {
+	NAMED_ENUM_VAL1___val3_missing = 111,
+	NAMED_ENUM_VAL2___val3_missing = 222,
+};
+
+typedef enum {
+	ANON_ENUM_VAL1___val3_missing = 0x111,
+	ANON_ENUM_VAL2___val3_missing = 0x222,
+} anon_enum___val3_missing;
+
+struct core_reloc_enumval___val3_missing {
+	enum named_enum___val3_missing f1;
+	anon_enum___val3_missing f2;
+};
+
+/* missing (mandatory) second enum value, should fail */
+enum named_enum___err_missing {
+	NAMED_ENUM_VAL1___err_missing = 1,
+	NAMED_ENUM_VAL3___err_missing = 3,
+};
+
+typedef enum {
+	ANON_ENUM_VAL1___err_missing = 0x111,
+	ANON_ENUM_VAL3___err_missing = 0x222,
+} anon_enum___err_missing;
+
+struct core_reloc_enumval___err_missing {
+	enum named_enum___err_missing f1;
+	anon_enum___err_missing f2;
+};
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_enumval.c b/tools/testing/selftests/bpf/progs/test_core_reloc_enumval.c
new file mode 100644
index 000000000000..e7ef3dada2bf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_enumval.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	char in[256];
+	char out[256];
+	bool skip;
+} data = {};
+
+enum named_enum {
+	NAMED_ENUM_VAL1 = 1,
+	NAMED_ENUM_VAL2 = 2,
+	NAMED_ENUM_VAL3 = 3,
+};
+
+typedef enum {
+	ANON_ENUM_VAL1 = 0x10,
+	ANON_ENUM_VAL2 = 0x20,
+	ANON_ENUM_VAL3 = 0x30,
+} anon_enum;
+
+struct core_reloc_enumval_output {
+	bool named_val1_exists;
+	bool named_val2_exists;
+	bool named_val3_exists;
+	bool anon_val1_exists;
+	bool anon_val2_exists;
+	bool anon_val3_exists;
+
+	int named_val1;
+	int named_val2;
+	int anon_val1;
+	int anon_val2;
+};
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_enumval(void *ctx)
+{
+#if __has_builtin(__builtin_preserve_enum_value)
+	struct core_reloc_enumval_output *out = (void *)&data.out;
+	enum named_enum named = 0;
+	anon_enum anon = 0;
+
+	out->named_val1_exists = bpf_core_enum_value_exists(named, NAMED_ENUM_VAL1);
+	out->named_val2_exists = bpf_core_enum_value_exists(enum named_enum, NAMED_ENUM_VAL2);
+	out->named_val3_exists = bpf_core_enum_value_exists(enum named_enum, NAMED_ENUM_VAL3);
+
+	out->anon_val1_exists = bpf_core_enum_value_exists(anon, ANON_ENUM_VAL1);
+	out->anon_val2_exists = bpf_core_enum_value_exists(anon_enum, ANON_ENUM_VAL2);
+	out->anon_val3_exists = bpf_core_enum_value_exists(anon_enum, ANON_ENUM_VAL3);
+
+	out->named_val1 = bpf_core_enum_value(named, NAMED_ENUM_VAL1);
+	out->named_val2 = bpf_core_enum_value(named, NAMED_ENUM_VAL2);
+	/* NAMED_ENUM_VAL3 value is optional */
+
+	out->anon_val1 = bpf_core_enum_value(anon, ANON_ENUM_VAL1);
+	out->anon_val2 = bpf_core_enum_value(anon, ANON_ENUM_VAL2);
+	/* ANON_ENUM_VAL3 value is optional */
+#else
+	data.skip = true;
+#endif
+
+	return 0;
+}
-- 
cgit v1.2.3


From 6e9cab2e3fa597419ec8a117a9f3e3ebc251c120 Mon Sep 17 00:00:00 2001
From: Xu Wang <vulab@iscas.ac.cn>
Date: Wed, 19 Aug 2020 02:53:24 +0000
Subject: libbpf: Simplify the return expression of build_map_pin_path()

Simplify the return expression.

Signed-off-by: Xu Wang <vulab@iscas.ac.cn>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200819025324.14680-1-vulab@iscas.ac.cn
---
 tools/lib/bpf/libbpf.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 77d420c02094..829d62a3ad5f 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1943,7 +1943,7 @@ static bool get_map_field_int(const char *map_name, const struct btf *btf,
 static int build_map_pin_path(struct bpf_map *map, const char *path)
 {
 	char buf[PATH_MAX];
-	int err, len;
+	int len;
 
 	if (!path)
 		path = "/sys/fs/bpf";
@@ -1954,11 +1954,7 @@ static int build_map_pin_path(struct bpf_map *map, const char *path)
 	else if (len >= PATH_MAX)
 		return -ENAMETOOLONG;
 
-	err = bpf_map__set_pin_path(map, buf);
-	if (err)
-		return err;
-
-	return 0;
+	return bpf_map__set_pin_path(map, buf);
 }
 
 
-- 
cgit v1.2.3


From d71fa5c9763c24dd997a2fa4feb7a13a95bab42c Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Tue, 18 Aug 2020 21:27:58 -0700
Subject: bpf: Add kernel module with user mode driver that populates bpffs.

Add kernel module with user mode driver that populates bpffs with
BPF iterators.

$ mount bpffs /my/bpffs/ -t bpf
$ ls -la /my/bpffs/
total 4
drwxrwxrwt  2 root root    0 Jul  2 00:27 .
drwxr-xr-x 19 root root 4096 Jul  2 00:09 ..
-rw-------  1 root root    0 Jul  2 00:27 maps.debug
-rw-------  1 root root    0 Jul  2 00:27 progs.debug

The user mode driver will load BPF Type Formats, create BPF maps, populate BPF
maps, load two BPF programs, attach them to BPF iterators, and finally send two
bpf_link IDs back to the kernel.
The kernel will pin two bpf_links into newly mounted bpffs instance under
names "progs.debug" and "maps.debug". These two files become human readable.

$ cat /my/bpffs/progs.debug
  id name            attached
  11 dump_bpf_map    bpf_iter_bpf_map
  12 dump_bpf_prog   bpf_iter_bpf_prog
  27 test_pkt_access
  32 test_main       test_pkt_access test_pkt_access
  33 test_subprog1   test_pkt_access_subprog1 test_pkt_access
  34 test_subprog2   test_pkt_access_subprog2 test_pkt_access
  35 test_subprog3   test_pkt_access_subprog3 test_pkt_access
  36 new_get_skb_len get_skb_len test_pkt_access
  37 new_get_skb_ifindex get_skb_ifindex test_pkt_access
  38 new_get_constant get_constant test_pkt_access

The BPF program dump_bpf_prog() in iterators.bpf.c is printing this data about
all BPF programs currently loaded in the system. This information is unstable
and will change from kernel to kernel as ".debug" suffix conveys.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200819042759.51280-4-alexei.starovoitov@gmail.com
---
 tools/lib/bpf/Makefile | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index c5dbfafdf889..66b2cfadf262 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -1,6 +1,9 @@
 # SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
 # Most of this file is copied from tools/lib/traceevent/Makefile
 
+RM ?= rm
+srctree = $(abs_srctree)
+
 LIBBPF_VERSION := $(shell \
 	grep -oE '^LIBBPF_([0-9.]+)' libbpf.map | \
 	sort -rV | head -n1 | cut -d'_' -f2)
@@ -188,7 +191,7 @@ $(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED)
 	@ln -sf $(@F) $(OUTPUT)libbpf.so.$(LIBBPF_MAJOR_VERSION)
 
 $(OUTPUT)libbpf.a: $(BPF_IN_STATIC)
-	$(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^
+	$(QUIET_LINK)$(RM) -f $@; $(AR) rcs $@ $^
 
 $(OUTPUT)libbpf.pc:
 	$(QUIET_GEN)sed -e "s|@PREFIX@|$(prefix)|" \
@@ -291,7 +294,7 @@ cscope:
 	cscope -b -q -I $(srctree)/include -f cscope.out
 
 tags:
-	rm -f TAGS tags
+	$(RM) -f TAGS tags
 	ls *.c *.h | xargs $(TAGS_PROG) -a
 
 # Declare the contents of the .PHONY variable as phony.  We keep that
-- 
cgit v1.2.3


From edb65ee5aa2550f6035c8fb169a359bc18acb84a Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Tue, 18 Aug 2020 21:27:59 -0700
Subject: selftests/bpf: Add bpffs preload test.

Add a test that mounts two bpffs instances and checks progs.debug
and maps.debug for sanity data.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200819042759.51280-5-alexei.starovoitov@gmail.com
---
 .../testing/selftests/bpf/prog_tests/test_bpffs.c  | 94 ++++++++++++++++++++++
 1 file changed, 94 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/test_bpffs.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpffs.c b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c
new file mode 100644
index 000000000000..172c999e523c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <test_progs.h>
+
+#define TDIR "/sys/kernel/debug"
+
+static int read_iter(char *file)
+{
+	/* 1024 should be enough to get contiguous 4 "iter" letters at some point */
+	char buf[1024];
+	int fd, len;
+
+	fd = open(file, 0);
+	if (fd < 0)
+		return -1;
+	while ((len = read(fd, buf, sizeof(buf))) > 0)
+		if (strstr(buf, "iter")) {
+			close(fd);
+			return 0;
+		}
+	close(fd);
+	return -1;
+}
+
+static int fn(void)
+{
+	int err, duration = 0;
+
+	err = unshare(CLONE_NEWNS);
+	if (CHECK(err, "unshare", "failed: %d\n", errno))
+		goto out;
+
+	err = mount("", "/", "", MS_REC | MS_PRIVATE, NULL);
+	if (CHECK(err, "mount /", "failed: %d\n", errno))
+		goto out;
+
+	err = umount(TDIR);
+	if (CHECK(err, "umount " TDIR, "failed: %d\n", errno))
+		goto out;
+
+	err = mount("none", TDIR, "tmpfs", 0, NULL);
+	if (CHECK(err, "mount", "mount root failed: %d\n", errno))
+		goto out;
+
+	err = mkdir(TDIR "/fs1", 0777);
+	if (CHECK(err, "mkdir "TDIR"/fs1", "failed: %d\n", errno))
+		goto out;
+	err = mkdir(TDIR "/fs2", 0777);
+	if (CHECK(err, "mkdir "TDIR"/fs2", "failed: %d\n", errno))
+		goto out;
+
+	err = mount("bpf", TDIR "/fs1", "bpf", 0, NULL);
+	if (CHECK(err, "mount bpffs "TDIR"/fs1", "failed: %d\n", errno))
+		goto out;
+	err = mount("bpf", TDIR "/fs2", "bpf", 0, NULL);
+	if (CHECK(err, "mount bpffs " TDIR "/fs2", "failed: %d\n", errno))
+		goto out;
+
+	err = read_iter(TDIR "/fs1/maps.debug");
+	if (CHECK(err, "reading " TDIR "/fs1/maps.debug", "failed\n"))
+		goto out;
+	err = read_iter(TDIR "/fs2/progs.debug");
+	if (CHECK(err, "reading " TDIR "/fs2/progs.debug", "failed\n"))
+		goto out;
+out:
+	umount(TDIR "/fs1");
+	umount(TDIR "/fs2");
+	rmdir(TDIR "/fs1");
+	rmdir(TDIR "/fs2");
+	umount(TDIR);
+	exit(err);
+}
+
+void test_test_bpffs(void)
+{
+	int err, duration = 0, status = 0;
+	pid_t pid;
+
+	pid = fork();
+	if (CHECK(pid == -1, "clone", "clone failed %d", errno))
+		return;
+	if (pid == 0)
+		fn();
+	err = waitpid(pid, &status, 0);
+	if (CHECK(err == -1 && errno != ECHILD, "waitpid", "failed %d", errno))
+		return;
+	if (CHECK(WEXITSTATUS(status), "bpffs test ", "failed %d", WEXITSTATUS(status)))
+		return;
+}
-- 
cgit v1.2.3


From 9b2f6fecf3b77d2457a13f77a563c07304f15775 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Wed, 19 Aug 2020 23:14:08 -0700
Subject: libbpf: Fix detection of BPF helper call instruction

BPF_CALL | BPF_JMP32 is explicitly not allowed by verifier for BPF helper
calls, so don't detect it as a valid call. Also drop the check on func_id
pointer, as it's currently always non-null.

Fixes: 109cea5a594f ("libbpf: Sanitize BPF program code for bpf_probe_read_{kernel, user}[_str]")
Reported-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200820061411.1755905-1-andriin@fb.com
---
 tools/lib/bpf/libbpf.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 829d62a3ad5f..0bc1fd813408 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -5840,14 +5840,12 @@ static int bpf_object__collect_reloc(struct bpf_object *obj)
 
 static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id)
 {
-	__u8 class = BPF_CLASS(insn->code);
-
-	if ((class == BPF_JMP || class == BPF_JMP32) &&
+	if (BPF_CLASS(insn->code) == BPF_JMP &&
 	    BPF_OP(insn->code) == BPF_CALL &&
 	    BPF_SRC(insn->code) == BPF_K &&
-	    insn->src_reg == 0 && insn->dst_reg == 0) {
-		    if (func_id)
-			    *func_id = insn->imm;
+	    insn->src_reg == 0 &&
+	    insn->dst_reg == 0) {
+		    *func_id = insn->imm;
 		    return true;
 	}
 	return false;
-- 
cgit v1.2.3


From dda1ec9fc7f8383cb469a82614dbce61f357f3f8 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Wed, 19 Aug 2020 23:14:09 -0700
Subject: libbpf: Fix libbpf build on compilers missing __builtin_mul_overflow

GCC compilers older than version 5 don't support __builtin_mul_overflow yet.
Given GCC 4.9 is the minimal supported compiler for building kernel and the
fact that libbpf is a dependency of resolve_btfids, which is dependency of
CONFIG_DEBUG_INFO_BTF=y, this needs to be handled. This patch fixes the issue
by falling back to slower detection of integer overflow in such cases.

Fixes: 029258d7b228 ("libbpf: Remove any use of reallocarray() in libbpf")
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200820061411.1755905-2-andriin@fb.com
---
 tools/lib/bpf/libbpf_internal.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 61dff515a2f0..4d1c366fca2c 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -10,6 +10,7 @@
 #define __LIBBPF_LIBBPF_INTERNAL_H
 
 #include <stdlib.h>
+#include <limits.h>
 
 /* make sure libbpf doesn't use kernel-only integer typedefs */
 #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
@@ -77,6 +78,9 @@ do {				\
 #define pr_info(fmt, ...)	__pr(LIBBPF_INFO, fmt, ##__VA_ARGS__)
 #define pr_debug(fmt, ...)	__pr(LIBBPF_DEBUG, fmt, ##__VA_ARGS__)
 
+#ifndef __has_builtin
+#define __has_builtin(x) 0
+#endif
 /*
  * Re-implement glibc's reallocarray() for libbpf internal-only use.
  * reallocarray(), unfortunately, is not available in all versions of glibc,
@@ -90,8 +94,14 @@ static inline void *libbpf_reallocarray(void *ptr, size_t nmemb, size_t size)
 {
 	size_t total;
 
+#if __has_builtin(__builtin_mul_overflow)
 	if (unlikely(__builtin_mul_overflow(nmemb, size, &total)))
 		return NULL;
+#else
+	if (size == 0 || nmemb > ULONG_MAX / size)
+		return NULL;
+	total = nmemb * size;
+#endif
 	return realloc(ptr, total);
 }
 
-- 
cgit v1.2.3


From 37a6a9e7676381cdf6868d2c013893074a1ee7b6 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Wed, 19 Aug 2020 23:14:10 -0700
Subject: selftests/bpf: Fix two minor compilation warnings reported by GCC 4.9

GCC 4.9 seems to be more strict in some regards. Fix two minor issue it
reported.

Fixes: 1c1052e0140a ("tools/testing/selftests/bpf: Add self-tests for new helper bpf_get_ns_current_pid_tgid.")
Fixes: 2d7824ffd25c ("selftests: bpf: Add test for sk_assign")
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200820061411.1755905-3-andriin@fb.com
---
 tools/testing/selftests/bpf/prog_tests/sk_assign.c         | 3 ++-
 tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/sk_assign.c b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
index 47fa04adc147..d43038d2b9e1 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_assign.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
@@ -268,6 +268,7 @@ void test_sk_assign(void)
 	int server = -1;
 	int server_map;
 	int self_net;
+	int i;
 
 	self_net = open(NS_SELF, O_RDONLY);
 	if (CHECK_FAIL(self_net < 0)) {
@@ -286,7 +287,7 @@ void test_sk_assign(void)
 		goto cleanup;
 	}
 
-	for (int i = 0; i < ARRAY_SIZE(tests) && !READ_ONCE(stop); i++) {
+	for (i = 0; i < ARRAY_SIZE(tests) && !READ_ONCE(stop); i++) {
 		struct test_sk_cfg *test = &tests[i];
 		const struct sockaddr *addr;
 		const int zero = 0;
diff --git a/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c b/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c
index ed253f252cd0..ec53b1ef90d2 100644
--- a/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c
+++ b/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c
@@ -156,4 +156,5 @@ cleanup:
 			bpf_object__close(obj);
 		}
 	}
+	return 0;
 }
-- 
cgit v1.2.3


From 149cb3395504736485df1b93133e710dcf215c56 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Wed, 19 Aug 2020 23:14:11 -0700
Subject: selftests/bpf: List newest Clang built-ins needed for some CO-RE
 selftests

Record which built-ins are optional and needed for some of recent BPF CO-RE
subtests. Document Clang diff that fixed corner-case issue with
__builtin_btf_type_id().

Suggested-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200820061411.1755905-4-andriin@fb.com
---
 tools/testing/selftests/bpf/README.rst              | 21 +++++++++++++++++++++
 .../selftests/bpf/progs/test_core_reloc_type_id.c   |  4 +++-
 2 files changed, 24 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
index e885d351595f..66acfcf15ff2 100644
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -43,3 +43,24 @@ This is due to a llvm BPF backend bug. The fix
   https://reviews.llvm.org/D78466
 has been pushed to llvm 10.x release branch and will be
 available in 10.0.1. The fix is available in llvm 11.0.0 trunk.
+
+BPF CO-RE-based tests and Clang version
+=======================================
+
+A set of selftests use BPF target-specific built-ins, which might require
+bleeding-edge Clang versions (Clang 12 nightly at this time).
+
+Few sub-tests of core_reloc test suit (part of test_progs test runner) require
+the following built-ins, listed with corresponding Clang diffs introducing
+them to Clang/LLVM. These sub-tests are going to be skipped if Clang is too
+old to support them, they shouldn't cause build failures or runtime test
+failures:
+
+  - __builtin_btf_type_id() ([0], [1], [2]);
+  - __builtin_preserve_type_info(), __builtin_preserve_enum_value() ([3], [4]).
+
+  [0] https://reviews.llvm.org/D74572
+  [1] https://reviews.llvm.org/D74668
+  [2] https://reviews.llvm.org/D85174
+  [3] https://reviews.llvm.org/D83878
+  [4] https://reviews.llvm.org/D83242
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c b/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c
index 23e6e6bf276c..22aba3f6e344 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c
@@ -75,10 +75,12 @@ int test_core_type_id(void *ctx)
 {
 	/* We use __builtin_btf_type_id() in this tests, but up until the time
 	 * __builtin_preserve_type_info() was added it contained a bug that
-	 * would make this test fail. The bug was fixed with addition of
+	 * would make this test fail. The bug was fixed ([0]) with addition of
 	 * __builtin_preserve_type_info(), though, so that's what we are using
 	 * to detect whether this test has to be executed, however strange
 	 * that might look like.
+	 *
+	 *   [0] https://reviews.llvm.org/D85174
 	 */
 #if __has_builtin(__builtin_preserve_type_info)
 	struct core_reloc_type_id_output *out = (void *)&data.out;
-- 
cgit v1.2.3


From 6b0a249a301e2af9adda84adbced3a2988248b95 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Fri, 21 Aug 2020 11:44:18 -0700
Subject: bpf: Implement link_query for bpf iterators

This patch implemented bpf_link callback functions
show_fdinfo and fill_link_info to support link_query
interface.

The general interface for show_fdinfo and fill_link_info
will print/fill the target_name. Each targets can
register show_fdinfo and fill_link_info callbacks
to print/fill more target specific information.

For example, the below is a fdinfo result for a bpf
task iterator.
  $ cat /proc/1749/fdinfo/7
  pos:    0
  flags:  02000000
  mnt_id: 14
  link_type:      iter
  link_id:        11
  prog_tag:       990e1f8152f7e54f
  prog_id:        59
  target_name:    task

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200821184418.574122-1-yhs@fb.com
---
 tools/include/uapi/linux/bpf.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 0480f893facd..a1bbaff7a0af 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4071,6 +4071,13 @@ struct bpf_link_info {
 			__u64 cgroup_id;
 			__u32 attach_type;
 		} cgroup;
+		struct {
+			__aligned_u64 target_name; /* in/out: target_name buffer ptr */
+			__u32 target_name_len;	   /* in/out: target_name buffer len */
+			union {
+				__u32 map_id;
+			} map;
+		} iter;
 		struct  {
 			__u32 netns_ino;
 			__u32 attach_type;
-- 
cgit v1.2.3


From e60495eafdba24a466a6ae62da86245a958954bc Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Fri, 21 Aug 2020 11:44:20 -0700
Subject: bpftool: Implement link_query for bpf iterators

The link query for bpf iterators is implemented.
Besides being shown to the user what bpf iterator
the link represents, the target_name is also used
to filter out what additional information should be
printed out, e.g., whether map_id should be shown or not.
The following is an example of bpf_iter link dump,
plain output or pretty output.

  $ bpftool link show
  11: iter  prog 59  target_name task
          pids test_progs(1749)
  34: iter  prog 173  target_name bpf_map_elem  map_id 127
          pids test_progs_1(1753)
  $ bpftool -p link show
  [{
          "id": 11,
          "type": "iter",
          "prog_id": 59,
          "target_name": "task",
          "pids": [{
                  "pid": 1749,
                  "comm": "test_progs"
              }
          ]
      },{
          "id": 34,
          "type": "iter",
          "prog_id": 173,
          "target_name": "bpf_map_elem",
          "map_id": 127,
          "pids": [{
                  "pid": 1753,
                  "comm": "test_progs_1"
              }
          ]
      }
  ]

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200821184420.574430-1-yhs@fb.com
---
 tools/bpf/bpftool/link.c | 44 +++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 41 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index a89f09e3c848..e77e1525d20a 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -77,6 +77,22 @@ static void show_link_attach_type_json(__u32 attach_type, json_writer_t *wtr)
 		jsonw_uint_field(wtr, "attach_type", attach_type);
 }
 
+static bool is_iter_map_target(const char *target_name)
+{
+	return strcmp(target_name, "bpf_map_elem") == 0 ||
+	       strcmp(target_name, "bpf_sk_storage_map") == 0;
+}
+
+static void show_iter_json(struct bpf_link_info *info, json_writer_t *wtr)
+{
+	const char *target_name = u64_to_ptr(info->iter.target_name);
+
+	jsonw_string_field(wtr, "target_name", target_name);
+
+	if (is_iter_map_target(target_name))
+		jsonw_uint_field(wtr, "map_id", info->iter.map.map_id);
+}
+
 static int get_prog_info(int prog_id, struct bpf_prog_info *info)
 {
 	__u32 len = sizeof(*info);
@@ -128,6 +144,9 @@ static int show_link_close_json(int fd, struct bpf_link_info *info)
 				   info->cgroup.cgroup_id);
 		show_link_attach_type_json(info->cgroup.attach_type, json_wtr);
 		break;
+	case BPF_LINK_TYPE_ITER:
+		show_iter_json(info, json_wtr);
+		break;
 	case BPF_LINK_TYPE_NETNS:
 		jsonw_uint_field(json_wtr, "netns_ino",
 				 info->netns.netns_ino);
@@ -175,6 +194,16 @@ static void show_link_attach_type_plain(__u32 attach_type)
 		printf("attach_type %u  ", attach_type);
 }
 
+static void show_iter_plain(struct bpf_link_info *info)
+{
+	const char *target_name = u64_to_ptr(info->iter.target_name);
+
+	printf("target_name %s  ", target_name);
+
+	if (is_iter_map_target(target_name))
+		printf("map_id %u  ", info->iter.map.map_id);
+}
+
 static int show_link_close_plain(int fd, struct bpf_link_info *info)
 {
 	struct bpf_prog_info prog_info;
@@ -204,6 +233,9 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info)
 		printf("\n\tcgroup_id %zu  ", (size_t)info->cgroup.cgroup_id);
 		show_link_attach_type_plain(info->cgroup.attach_type);
 		break;
+	case BPF_LINK_TYPE_ITER:
+		show_iter_plain(info);
+		break;
 	case BPF_LINK_TYPE_NETNS:
 		printf("\n\tnetns_ino %u  ", info->netns.netns_ino);
 		show_link_attach_type_plain(info->netns.attach_type);
@@ -231,7 +263,7 @@ static int do_show_link(int fd)
 {
 	struct bpf_link_info info;
 	__u32 len = sizeof(info);
-	char raw_tp_name[256];
+	char buf[256];
 	int err;
 
 	memset(&info, 0, sizeof(info));
@@ -245,8 +277,14 @@ again:
 	}
 	if (info.type == BPF_LINK_TYPE_RAW_TRACEPOINT &&
 	    !info.raw_tracepoint.tp_name) {
-		info.raw_tracepoint.tp_name = (unsigned long)&raw_tp_name;
-		info.raw_tracepoint.tp_name_len = sizeof(raw_tp_name);
+		info.raw_tracepoint.tp_name = (unsigned long)&buf;
+		info.raw_tracepoint.tp_name_len = sizeof(buf);
+		goto again;
+	}
+	if (info.type == BPF_LINK_TYPE_ITER &&
+	    !info.iter.target_name) {
+		info.iter.target_name = (unsigned long)&buf;
+		info.iter.target_name_len = sizeof(buf);
 		goto again;
 	}
 
-- 
cgit v1.2.3


From dca5612f8eb9d0cf1dc254eb2adff1f16a588a7d Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 21 Aug 2020 09:59:27 -0700
Subject: libbpf: Add perf_buffer APIs for better integration with outside
 epoll loop

Add a set of APIs to perf_buffer manage to allow applications to integrate
perf buffer polling into existing epoll-based infrastructure. One example is
applications using libevent already and wanting to plug perf_buffer polling,
instead of relying on perf_buffer__poll() and waste an extra thread to do it.
But perf_buffer is still extremely useful to set up and consume perf buffer
rings even for such use cases.

So to accomodate such new use cases, add three new APIs:
  - perf_buffer__buffer_cnt() returns number of per-CPU buffers maintained by
    given instance of perf_buffer manager;
  - perf_buffer__buffer_fd() returns FD of perf_event corresponding to
    a specified per-CPU buffer; this FD is then polled independently;
  - perf_buffer__consume_buffer() consumes data from single per-CPU buffer,
    identified by its slot index.

To support a simpler, but less efficient, way to integrate perf_buffer into
external polling logic, also expose underlying epoll FD through
perf_buffer__epoll_fd() API. It will need to be followed by
perf_buffer__poll(), wasting extra syscall, or perf_buffer__consume(), wasting
CPU to iterate buffers with no data. But could be simpler and more convenient
for some cases.

These APIs allow for great flexiblity, but do not sacrifice general usability
of perf_buffer.

Also exercise and check new APIs in perf_buffer selftest.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Alan Maguire <alan.maguire@oracle.com>
Link: https://lore.kernel.org/bpf/20200821165927.849538-1-andriin@fb.com
---
 tools/lib/bpf/libbpf.c                             | 56 ++++++++++++++++++-
 tools/lib/bpf/libbpf.h                             |  4 ++
 tools/lib/bpf/libbpf.map                           |  8 +++
 .../testing/selftests/bpf/prog_tests/perf_buffer.c | 65 ++++++++++++++++++----
 4 files changed, 121 insertions(+), 12 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 0bc1fd813408..210429c5b772 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -9373,6 +9373,11 @@ static int perf_buffer__process_records(struct perf_buffer *pb,
 	return 0;
 }
 
+int perf_buffer__epoll_fd(const struct perf_buffer *pb)
+{
+	return pb->epoll_fd;
+}
+
 int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
 {
 	int i, cnt, err;
@@ -9390,6 +9395,55 @@ int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
 	return cnt < 0 ? -errno : cnt;
 }
 
+/* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer
+ * manager.
+ */
+size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb)
+{
+	return pb->cpu_cnt;
+}
+
+/*
+ * Return perf_event FD of a ring buffer in *buf_idx* slot of
+ * PERF_EVENT_ARRAY BPF map. This FD can be polled for new data using
+ * select()/poll()/epoll() Linux syscalls.
+ */
+int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx)
+{
+	struct perf_cpu_buf *cpu_buf;
+
+	if (buf_idx >= pb->cpu_cnt)
+		return -EINVAL;
+
+	cpu_buf = pb->cpu_bufs[buf_idx];
+	if (!cpu_buf)
+		return -ENOENT;
+
+	return cpu_buf->fd;
+}
+
+/*
+ * Consume data from perf ring buffer corresponding to slot *buf_idx* in
+ * PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to
+ * consume, do nothing and return success.
+ * Returns:
+ *   - 0 on success;
+ *   - <0 on failure.
+ */
+int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx)
+{
+	struct perf_cpu_buf *cpu_buf;
+
+	if (buf_idx >= pb->cpu_cnt)
+		return -EINVAL;
+
+	cpu_buf = pb->cpu_bufs[buf_idx];
+	if (!cpu_buf)
+		return -ENOENT;
+
+	return perf_buffer__process_records(pb, cpu_buf);
+}
+
 int perf_buffer__consume(struct perf_buffer *pb)
 {
 	int i, err;
@@ -9402,7 +9456,7 @@ int perf_buffer__consume(struct perf_buffer *pb)
 
 		err = perf_buffer__process_records(pb, cpu_buf);
 		if (err) {
-			pr_warn("error while processing records: %d\n", err);
+			pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err);
 			return err;
 		}
 	}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 5ecb4069a9f0..308e0ded8f14 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -588,8 +588,12 @@ perf_buffer__new_raw(int map_fd, size_t page_cnt,
 		     const struct perf_buffer_raw_opts *opts);
 
 LIBBPF_API void perf_buffer__free(struct perf_buffer *pb);
+LIBBPF_API int perf_buffer__epoll_fd(const struct perf_buffer *pb);
 LIBBPF_API int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms);
 LIBBPF_API int perf_buffer__consume(struct perf_buffer *pb);
+LIBBPF_API int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx);
+LIBBPF_API size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb);
+LIBBPF_API int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx);
 
 typedef enum bpf_perf_event_ret
 	(*bpf_perf_event_print_t)(struct perf_event_header *hdr,
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index e35bd6cdbdbf..66a6286d0716 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -299,3 +299,11 @@ LIBBPF_0.1.0 {
 		btf__set_fd;
 		btf__set_pointer_size;
 } LIBBPF_0.0.9;
+
+LIBBPF_0.2.0 {
+	global:
+		perf_buffer__buffer_cnt;
+		perf_buffer__buffer_fd;
+		perf_buffer__epoll_fd;
+		perf_buffer__consume_buffer;
+} LIBBPF_0.1.0;
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
index c33ec180b3f2..ca9f0895ec84 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
@@ -7,6 +7,8 @@
 #include "test_perf_buffer.skel.h"
 #include "bpf/libbpf_internal.h"
 
+static int duration;
+
 /* AddressSanitizer sometimes crashes due to data dereference below, due to
  * this being mmap()'ed memory. Disable instrumentation with
  * no_sanitize_address attribute
@@ -24,13 +26,31 @@ static void on_sample(void *ctx, int cpu, void *data, __u32 size)
 	CPU_SET(cpu, cpu_seen);
 }
 
+int trigger_on_cpu(int cpu)
+{
+	cpu_set_t cpu_set;
+	int err;
+
+	CPU_ZERO(&cpu_set);
+	CPU_SET(cpu, &cpu_set);
+
+	err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
+	if (err && CHECK(err, "set_affinity", "cpu #%d, err %d\n", cpu, err))
+		return err;
+
+	usleep(1);
+
+	return 0;
+}
+
 void test_perf_buffer(void)
 {
-	int err, on_len, nr_on_cpus = 0,  nr_cpus, i, duration = 0;
+	int err, on_len, nr_on_cpus = 0, nr_cpus, i;
 	struct perf_buffer_opts pb_opts = {};
 	struct test_perf_buffer *skel;
-	cpu_set_t cpu_set, cpu_seen;
+	cpu_set_t cpu_seen;
 	struct perf_buffer *pb;
+	int last_fd = -1, fd;
 	bool *online;
 
 	nr_cpus = libbpf_num_possible_cpus();
@@ -63,6 +83,9 @@ void test_perf_buffer(void)
 	if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
 		goto out_close;
 
+	CHECK(perf_buffer__epoll_fd(pb) < 0, "epoll_fd",
+	      "bad fd: %d\n", perf_buffer__epoll_fd(pb));
+
 	/* trigger kprobe on every CPU */
 	CPU_ZERO(&cpu_seen);
 	for (i = 0; i < nr_cpus; i++) {
@@ -71,16 +94,8 @@ void test_perf_buffer(void)
 			continue;
 		}
 
-		CPU_ZERO(&cpu_set);
-		CPU_SET(i, &cpu_set);
-
-		err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set),
-					     &cpu_set);
-		if (err && CHECK(err, "set_affinity", "cpu #%d, err %d\n",
-				 i, err))
+		if (trigger_on_cpu(i))
 			goto out_close;
-
-		usleep(1);
 	}
 
 	/* read perf buffer */
@@ -92,6 +107,34 @@ void test_perf_buffer(void)
 		  "expect %d, seen %d\n", nr_on_cpus, CPU_COUNT(&cpu_seen)))
 		goto out_free_pb;
 
+	if (CHECK(perf_buffer__buffer_cnt(pb) != nr_cpus, "buf_cnt",
+		  "got %zu, expected %d\n", perf_buffer__buffer_cnt(pb), nr_cpus))
+		goto out_close;
+
+	for (i = 0; i < nr_cpus; i++) {
+		if (i >= on_len || !online[i])
+			continue;
+
+		fd = perf_buffer__buffer_fd(pb, i);
+		CHECK(fd < 0 || last_fd == fd, "fd_check", "last fd %d == fd %d\n", last_fd, fd);
+		last_fd = fd;
+
+		err = perf_buffer__consume_buffer(pb, i);
+		if (CHECK(err, "drain_buf", "cpu %d, err %d\n", i, err))
+			goto out_close;
+
+		CPU_CLR(i, &cpu_seen);
+		if (trigger_on_cpu(i))
+			goto out_close;
+
+		err = perf_buffer__consume_buffer(pb, i);
+		if (CHECK(err, "consume_buf", "cpu %d, err %d\n", i, err))
+			goto out_close;
+
+		if (CHECK(!CPU_ISSET(i, &cpu_seen), "cpu_seen", "cpu %d not seen\n", i))
+			goto out_close;
+	}
+
 out_free_pb:
 	perf_buffer__free(pb);
 out_close:
-- 
cgit v1.2.3


From bb23c0e1c57f3b40c8d2713401c1b42df911d424 Mon Sep 17 00:00:00 2001
From: Lorenz Bauer <lmb@cloudflare.com>
Date: Fri, 21 Aug 2020 11:29:48 +0100
Subject: selftests: bpf: Test sockmap update from BPF

Add a test which copies a socket from a sockmap into another sockmap
or sockhash. This excercises bpf_map_update_elem support from BPF
context. Compare the socket cookies from source and destination to
ensure that the copy succeeded.

Also check that the verifier rejects map_update from unsafe contexts.

Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200821102948.21918-7-lmb@cloudflare.com
---
 .../selftests/bpf/prog_tests/sockmap_basic.c       | 78 ++++++++++++++++++++++
 .../bpf/progs/test_sockmap_invalid_update.c        | 23 +++++++
 .../selftests/bpf/progs/test_sockmap_update.c      | 48 +++++++++++++
 3 files changed, 149 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/test_sockmap_invalid_update.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_sockmap_update.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index 96e7b7f84c65..65ce7c289534 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -4,6 +4,8 @@
 
 #include "test_progs.h"
 #include "test_skmsg_load_helpers.skel.h"
+#include "test_sockmap_update.skel.h"
+#include "test_sockmap_invalid_update.skel.h"
 
 #define TCP_REPAIR		19	/* TCP sock is under repair right now */
 
@@ -101,6 +103,76 @@ out:
 	test_skmsg_load_helpers__destroy(skel);
 }
 
+static void test_sockmap_update(enum bpf_map_type map_type)
+{
+	struct bpf_prog_test_run_attr tattr;
+	int err, prog, src, dst, duration = 0;
+	struct test_sockmap_update *skel;
+	__u64 src_cookie, dst_cookie;
+	const __u32 zero = 0;
+	char dummy[14] = {0};
+	__s64 sk;
+
+	sk = connected_socket_v4();
+	if (CHECK(sk == -1, "connected_socket_v4", "cannot connect\n"))
+		return;
+
+	skel = test_sockmap_update__open_and_load();
+	if (CHECK(!skel, "open_and_load", "cannot load skeleton\n")) {
+		close(sk);
+		return;
+	}
+
+	prog = bpf_program__fd(skel->progs.copy_sock_map);
+	src = bpf_map__fd(skel->maps.src);
+	if (map_type == BPF_MAP_TYPE_SOCKMAP)
+		dst = bpf_map__fd(skel->maps.dst_sock_map);
+	else
+		dst = bpf_map__fd(skel->maps.dst_sock_hash);
+
+	err = bpf_map_update_elem(src, &zero, &sk, BPF_NOEXIST);
+	if (CHECK(err, "update_elem(src)", "errno=%u\n", errno))
+		goto out;
+
+	err = bpf_map_lookup_elem(src, &zero, &src_cookie);
+	if (CHECK(err, "lookup_elem(src, cookie)", "errno=%u\n", errno))
+		goto out;
+
+	tattr = (struct bpf_prog_test_run_attr){
+		.prog_fd = prog,
+		.repeat = 1,
+		.data_in = dummy,
+		.data_size_in = sizeof(dummy),
+	};
+
+	err = bpf_prog_test_run_xattr(&tattr);
+	if (CHECK_ATTR(err || !tattr.retval, "bpf_prog_test_run",
+		       "errno=%u retval=%u\n", errno, tattr.retval))
+		goto out;
+
+	err = bpf_map_lookup_elem(dst, &zero, &dst_cookie);
+	if (CHECK(err, "lookup_elem(dst, cookie)", "errno=%u\n", errno))
+		goto out;
+
+	CHECK(dst_cookie != src_cookie, "cookie mismatch", "%llu != %llu\n",
+	      dst_cookie, src_cookie);
+
+out:
+	close(sk);
+	test_sockmap_update__destroy(skel);
+}
+
+static void test_sockmap_invalid_update(void)
+{
+	struct test_sockmap_invalid_update *skel;
+	int duration = 0;
+
+	skel = test_sockmap_invalid_update__open_and_load();
+	CHECK(skel, "open_and_load", "verifier accepted map_update\n");
+	if (skel)
+		test_sockmap_invalid_update__destroy(skel);
+}
+
 void test_sockmap_basic(void)
 {
 	if (test__start_subtest("sockmap create_update_free"))
@@ -111,4 +183,10 @@ void test_sockmap_basic(void)
 		test_skmsg_helpers(BPF_MAP_TYPE_SOCKMAP);
 	if (test__start_subtest("sockhash sk_msg load helpers"))
 		test_skmsg_helpers(BPF_MAP_TYPE_SOCKHASH);
+	if (test__start_subtest("sockmap update"))
+		test_sockmap_update(BPF_MAP_TYPE_SOCKMAP);
+	if (test__start_subtest("sockhash update"))
+		test_sockmap_update(BPF_MAP_TYPE_SOCKHASH);
+	if (test__start_subtest("sockmap update in unsafe context"))
+		test_sockmap_invalid_update();
 }
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_invalid_update.c b/tools/testing/selftests/bpf/progs/test_sockmap_invalid_update.c
new file mode 100644
index 000000000000..02a59e220cbc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_invalid_update.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Cloudflare
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKMAP);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u64);
+} map SEC(".maps");
+
+SEC("sockops")
+int bpf_sockmap(struct bpf_sock_ops *skops)
+{
+	__u32 key = 0;
+
+	if (skops->sk)
+		bpf_map_update_elem(&map, &key, skops->sk, 0);
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_update.c b/tools/testing/selftests/bpf/progs/test_sockmap_update.c
new file mode 100644
index 000000000000..9d0c9f28cab2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_update.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Cloudflare
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKMAP);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u64);
+} src SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKMAP);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u64);
+} dst_sock_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKHASH);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u64);
+} dst_sock_hash SEC(".maps");
+
+SEC("classifier/copy_sock_map")
+int copy_sock_map(void *ctx)
+{
+	struct bpf_sock *sk;
+	bool failed = false;
+	__u32 key = 0;
+
+	sk = bpf_map_lookup_elem(&src, &key);
+	if (!sk)
+		return SK_DROP;
+
+	if (bpf_map_update_elem(&dst_sock_map, &key, sk, 0))
+		failed = true;
+
+	if (bpf_map_update_elem(&dst_sock_hash, &key, sk, 0))
+		failed = true;
+
+	bpf_sk_release(sk);
+	return failed ? SK_DROP : SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From 3ac2e20fba07e57cd229091a568ac14a5434e6bb Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Thu, 20 Aug 2020 16:12:35 -0700
Subject: selftests/bpf: BPF object files should depend only on libbpf headers

There is no need to re-build BPF object files if any of the sources of libbpf
change. So record more precise dependency only on libbpf/bpf_*.h headers. This
eliminates unnecessary re-builds.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200820231250.1293069-2-andriin@fb.com
---
 tools/testing/selftests/bpf/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index a83b5827532f..09657d0afb5c 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -316,7 +316,7 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o:				\
 		     $(TRUNNER_BPF_PROGS_DIR)/%.c			\
 		     $(TRUNNER_BPF_PROGS_DIR)/*.h			\
 		     $$(INCLUDE_DIR)/vmlinux.h				\
-		     $$(BPFOBJ) | $(TRUNNER_OUTPUT)
+		     $(wildcard $(BPFDIR)/bpf_*.h) | $(TRUNNER_OUTPUT)
 	$$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@,			\
 					  $(TRUNNER_BPF_CFLAGS),	\
 					  $(TRUNNER_BPF_LDFLAGS))
-- 
cgit v1.2.3


From 88a82120282bdef4c331e20991e3057f417beae4 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Thu, 20 Aug 2020 16:12:36 -0700
Subject: libbpf: Factor out common ELF operations and improve logging

Factor out common ELF operations done throughout the libbpf. This simplifies
usage across multiple places in libbpf, as well as hide error reporting from
higher-level functions and make error logging more consistent.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200820231250.1293069-3-andriin@fb.com
---
 tools/lib/bpf/libbpf.c | 367 +++++++++++++++++++++++++++----------------------
 1 file changed, 206 insertions(+), 161 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 210429c5b772..dfb18d3ee590 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -398,6 +398,7 @@ struct bpf_object {
 		Elf_Data *rodata;
 		Elf_Data *bss;
 		Elf_Data *st_ops_data;
+		size_t shstrndx; /* section index for section name strings */
 		size_t strtabidx;
 		struct {
 			GElf_Shdr shdr;
@@ -435,6 +436,14 @@ struct bpf_object {
 };
 #define obj_elf_valid(o)	((o)->efile.elf)
 
+static const char *elf_sym_str(const struct bpf_object *obj, size_t off);
+static const char *elf_sec_str(const struct bpf_object *obj, size_t off);
+static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx);
+static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name);
+static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr);
+static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn);
+static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn);
+
 void bpf_program__unload(struct bpf_program *prog)
 {
 	int i;
@@ -496,7 +505,7 @@ static char *__bpf_program__pin_name(struct bpf_program *prog)
 }
 
 static int
-bpf_program__init(void *data, size_t size, char *section_name, int idx,
+bpf_program__init(void *data, size_t size, const char *section_name, int idx,
 		  struct bpf_program *prog)
 {
 	const size_t bpf_insn_sz = sizeof(struct bpf_insn);
@@ -545,7 +554,7 @@ errout:
 
 static int
 bpf_object__add_program(struct bpf_object *obj, void *data, size_t size,
-			char *section_name, int idx)
+			const char *section_name, int idx)
 {
 	struct bpf_program prog, *progs;
 	int nr_progs, err;
@@ -570,7 +579,7 @@ bpf_object__add_program(struct bpf_object *obj, void *data, size_t size,
 		return -ENOMEM;
 	}
 
-	pr_debug("found program %s\n", prog.section_name);
+	pr_debug("elf: found program '%s'\n", prog.section_name);
 	obj->programs = progs;
 	obj->nr_programs = nr_progs + 1;
 	prog.obj = obj;
@@ -590,8 +599,7 @@ bpf_object__init_prog_names(struct bpf_object *obj)
 
 		prog = &obj->programs[pi];
 
-		for (si = 0; si < symbols->d_size / sizeof(GElf_Sym) && !name;
-		     si++) {
+		for (si = 0; si < symbols->d_size / sizeof(GElf_Sym) && !name; si++) {
 			GElf_Sym sym;
 
 			if (!gelf_getsym(symbols, si, &sym))
@@ -601,11 +609,9 @@ bpf_object__init_prog_names(struct bpf_object *obj)
 			if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL)
 				continue;
 
-			name = elf_strptr(obj->efile.elf,
-					  obj->efile.strtabidx,
-					  sym.st_name);
+			name = elf_sym_str(obj, sym.st_name);
 			if (!name) {
-				pr_warn("failed to get sym name string for prog %s\n",
+				pr_warn("prog '%s': failed to get symbol name\n",
 					prog->section_name);
 				return -LIBBPF_ERRNO__LIBELF;
 			}
@@ -615,17 +621,14 @@ bpf_object__init_prog_names(struct bpf_object *obj)
 			name = ".text";
 
 		if (!name) {
-			pr_warn("failed to find sym for prog %s\n",
+			pr_warn("prog '%s': failed to find program symbol\n",
 				prog->section_name);
 			return -EINVAL;
 		}
 
 		prog->name = strdup(name);
-		if (!prog->name) {
-			pr_warn("failed to allocate memory for prog sym %s\n",
-				name);
+		if (!prog->name)
 			return -ENOMEM;
-		}
 	}
 
 	return 0;
@@ -1069,7 +1072,7 @@ static int bpf_object__elf_init(struct bpf_object *obj)
 	GElf_Ehdr *ep;
 
 	if (obj_elf_valid(obj)) {
-		pr_warn("elf init: internal error\n");
+		pr_warn("elf: init internal error\n");
 		return -LIBBPF_ERRNO__LIBELF;
 	}
 
@@ -1087,7 +1090,7 @@ static int bpf_object__elf_init(struct bpf_object *obj)
 
 			err = -errno;
 			cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
-			pr_warn("failed to open %s: %s\n", obj->path, cp);
+			pr_warn("elf: failed to open %s: %s\n", obj->path, cp);
 			return err;
 		}
 
@@ -1095,22 +1098,36 @@ static int bpf_object__elf_init(struct bpf_object *obj)
 	}
 
 	if (!obj->efile.elf) {
-		pr_warn("failed to open %s as ELF file\n", obj->path);
+		pr_warn("elf: failed to open %s as ELF file: %s\n", obj->path, elf_errmsg(-1));
 		err = -LIBBPF_ERRNO__LIBELF;
 		goto errout;
 	}
 
 	if (!gelf_getehdr(obj->efile.elf, &obj->efile.ehdr)) {
-		pr_warn("failed to get EHDR from %s\n", obj->path);
+		pr_warn("elf: failed to get ELF header from %s: %s\n", obj->path, elf_errmsg(-1));
 		err = -LIBBPF_ERRNO__FORMAT;
 		goto errout;
 	}
 	ep = &obj->efile.ehdr;
 
+	if (elf_getshdrstrndx(obj->efile.elf, &obj->efile.shstrndx)) {
+		pr_warn("elf: failed to get section names section index for %s: %s\n",
+			obj->path, elf_errmsg(-1));
+		err = -LIBBPF_ERRNO__FORMAT;
+		goto errout;
+	}
+
+	/* Elf is corrupted/truncated, avoid calling elf_strptr. */
+	if (!elf_rawdata(elf_getscn(obj->efile.elf, obj->efile.shstrndx), NULL)) {
+		pr_warn("elf: failed to get section names strings from %s: %s\n",
+			obj->path, elf_errmsg(-1));
+		return -LIBBPF_ERRNO__FORMAT;
+	}
+
 	/* Old LLVM set e_machine to EM_NONE */
 	if (ep->e_type != ET_REL ||
 	    (ep->e_machine && ep->e_machine != EM_BPF)) {
-		pr_warn("%s is not an eBPF object file\n", obj->path);
+		pr_warn("elf: %s is not a valid eBPF object file\n", obj->path);
 		err = -LIBBPF_ERRNO__FORMAT;
 		goto errout;
 	}
@@ -1132,7 +1149,7 @@ static int bpf_object__check_endianness(struct bpf_object *obj)
 #else
 # error "Unrecognized __BYTE_ORDER__"
 #endif
-	pr_warn("endianness mismatch.\n");
+	pr_warn("elf: endianness mismatch in %s.\n", obj->path);
 	return -LIBBPF_ERRNO__ENDIAN;
 }
 
@@ -1167,55 +1184,10 @@ static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)
 	return false;
 }
 
-static int bpf_object_search_section_size(const struct bpf_object *obj,
-					  const char *name, size_t *d_size)
-{
-	const GElf_Ehdr *ep = &obj->efile.ehdr;
-	Elf *elf = obj->efile.elf;
-	Elf_Scn *scn = NULL;
-	int idx = 0;
-
-	while ((scn = elf_nextscn(elf, scn)) != NULL) {
-		const char *sec_name;
-		Elf_Data *data;
-		GElf_Shdr sh;
-
-		idx++;
-		if (gelf_getshdr(scn, &sh) != &sh) {
-			pr_warn("failed to get section(%d) header from %s\n",
-				idx, obj->path);
-			return -EIO;
-		}
-
-		sec_name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name);
-		if (!sec_name) {
-			pr_warn("failed to get section(%d) name from %s\n",
-				idx, obj->path);
-			return -EIO;
-		}
-
-		if (strcmp(name, sec_name))
-			continue;
-
-		data = elf_getdata(scn, 0);
-		if (!data) {
-			pr_warn("failed to get section(%d) data from %s(%s)\n",
-				idx, name, obj->path);
-			return -EIO;
-		}
-
-		*d_size = data->d_size;
-		return 0;
-	}
-
-	return -ENOENT;
-}
-
 int bpf_object__section_size(const struct bpf_object *obj, const char *name,
 			     __u32 *size)
 {
 	int ret = -ENOENT;
-	size_t d_size;
 
 	*size = 0;
 	if (!name) {
@@ -1233,9 +1205,13 @@ int bpf_object__section_size(const struct bpf_object *obj, const char *name,
 		if (obj->efile.st_ops_data)
 			*size = obj->efile.st_ops_data->d_size;
 	} else {
-		ret = bpf_object_search_section_size(obj, name, &d_size);
-		if (!ret)
-			*size = d_size;
+		Elf_Scn *scn = elf_sec_by_name(obj, name);
+		Elf_Data *data = elf_sec_data(obj, scn);
+
+		if (data) {
+			ret = 0; /* found it */
+			*size = data->d_size;
+		}
 	}
 
 	return *size ? 0 : ret;
@@ -1260,8 +1236,7 @@ int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
 		    GELF_ST_TYPE(sym.st_info) != STT_OBJECT)
 			continue;
 
-		sname = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
-				   sym.st_name);
+		sname = elf_sym_str(obj, sym.st_name);
 		if (!sname) {
 			pr_warn("failed to get sym name string for var %s\n",
 				name);
@@ -1738,12 +1713,12 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
 	if (!symbols)
 		return -EINVAL;
 
-	scn = elf_getscn(obj->efile.elf, obj->efile.maps_shndx);
-	if (scn)
-		data = elf_getdata(scn, NULL);
+
+	scn = elf_sec_by_idx(obj, obj->efile.maps_shndx);
+	data = elf_sec_data(obj, scn);
 	if (!scn || !data) {
-		pr_warn("failed to get Elf_Data from map section %d\n",
-			obj->efile.maps_shndx);
+		pr_warn("elf: failed to get legacy map definitions for %s\n",
+			obj->path);
 		return -EINVAL;
 	}
 
@@ -1765,12 +1740,12 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
 		nr_maps++;
 	}
 	/* Assume equally sized map definitions */
-	pr_debug("maps in %s: %d maps in %zd bytes\n",
-		 obj->path, nr_maps, data->d_size);
+	pr_debug("elf: found %d legacy map definitions (%zd bytes) in %s\n",
+		 nr_maps, data->d_size, obj->path);
 
 	if (!data->d_size || nr_maps == 0 || (data->d_size % nr_maps) != 0) {
-		pr_warn("unable to determine map definition size section %s, %d maps in %zd bytes\n",
-			obj->path, nr_maps, data->d_size);
+		pr_warn("elf: unable to determine legacy map definition size in %s\n",
+			obj->path);
 		return -EINVAL;
 	}
 	map_def_sz = data->d_size / nr_maps;
@@ -1791,8 +1766,7 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
 		if (IS_ERR(map))
 			return PTR_ERR(map);
 
-		map_name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
-				      sym.st_name);
+		map_name = elf_sym_str(obj, sym.st_name);
 		if (!map_name) {
 			pr_warn("failed to get map #%d name sym string for obj %s\n",
 				i, obj->path);
@@ -2274,12 +2248,11 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
 	if (obj->efile.btf_maps_shndx < 0)
 		return 0;
 
-	scn = elf_getscn(obj->efile.elf, obj->efile.btf_maps_shndx);
-	if (scn)
-		data = elf_getdata(scn, NULL);
+	scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx);
+	data = elf_sec_data(obj, scn);
 	if (!scn || !data) {
-		pr_warn("failed to get Elf_Data from map section %d (%s)\n",
-			obj->efile.maps_shndx, MAPS_ELF_SEC);
+		pr_warn("elf: failed to get %s map definitions for %s\n",
+			MAPS_ELF_SEC, obj->path);
 		return -EINVAL;
 	}
 
@@ -2337,20 +2310,12 @@ static int bpf_object__init_maps(struct bpf_object *obj,
 
 static bool section_have_execinstr(struct bpf_object *obj, int idx)
 {
-	Elf_Scn *scn;
 	GElf_Shdr sh;
 
-	scn = elf_getscn(obj->efile.elf, idx);
-	if (!scn)
-		return false;
-
-	if (gelf_getshdr(scn, &sh) != &sh)
+	if (elf_sec_hdr(obj, elf_sec_by_idx(obj, idx), &sh))
 		return false;
 
-	if (sh.sh_flags & SHF_EXECINSTR)
-		return true;
-
-	return false;
+	return sh.sh_flags & SHF_EXECINSTR;
 }
 
 static bool btf_needs_sanitization(struct bpf_object *obj)
@@ -2594,61 +2559,156 @@ report:
 	return err;
 }
 
+static const char *elf_sym_str(const struct bpf_object *obj, size_t off)
+{
+	const char *name;
+
+	name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, off);
+	if (!name) {
+		pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
+			off, obj->path, elf_errmsg(-1));
+		return NULL;
+	}
+
+	return name;
+}
+
+static const char *elf_sec_str(const struct bpf_object *obj, size_t off)
+{
+	const char *name;
+
+	name = elf_strptr(obj->efile.elf, obj->efile.shstrndx, off);
+	if (!name) {
+		pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
+			off, obj->path, elf_errmsg(-1));
+		return NULL;
+	}
+
+	return name;
+}
+
+static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx)
+{
+	Elf_Scn *scn;
+
+	scn = elf_getscn(obj->efile.elf, idx);
+	if (!scn) {
+		pr_warn("elf: failed to get section(%zu) from %s: %s\n",
+			idx, obj->path, elf_errmsg(-1));
+		return NULL;
+	}
+	return scn;
+}
+
+static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name)
+{
+	Elf_Scn *scn = NULL;
+	Elf *elf = obj->efile.elf;
+	const char *sec_name;
+
+	while ((scn = elf_nextscn(elf, scn)) != NULL) {
+		sec_name = elf_sec_name(obj, scn);
+		if (!sec_name)
+			return NULL;
+
+		if (strcmp(sec_name, name) != 0)
+			continue;
+
+		return scn;
+	}
+	return NULL;
+}
+
+static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr)
+{
+	if (!scn)
+		return -EINVAL;
+
+	if (gelf_getshdr(scn, hdr) != hdr) {
+		pr_warn("elf: failed to get section(%zu) header from %s: %s\n",
+			elf_ndxscn(scn), obj->path, elf_errmsg(-1));
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn)
+{
+	const char *name;
+	GElf_Shdr sh;
+
+	if (!scn)
+		return NULL;
+
+	if (elf_sec_hdr(obj, scn, &sh))
+		return NULL;
+
+	name = elf_sec_str(obj, sh.sh_name);
+	if (!name) {
+		pr_warn("elf: failed to get section(%zu) name from %s: %s\n",
+			elf_ndxscn(scn), obj->path, elf_errmsg(-1));
+		return NULL;
+	}
+
+	return name;
+}
+
+static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn)
+{
+	Elf_Data *data;
+
+	if (!scn)
+		return NULL;
+
+	data = elf_getdata(scn, 0);
+	if (!data) {
+		pr_warn("elf: failed to get section(%zu) %s data from %s: %s\n",
+			elf_ndxscn(scn), elf_sec_name(obj, scn) ?: "<?>",
+			obj->path, elf_errmsg(-1));
+		return NULL;
+	}
+
+	return data;
+}
+
 static int bpf_object__elf_collect(struct bpf_object *obj)
 {
 	Elf *elf = obj->efile.elf;
-	GElf_Ehdr *ep = &obj->efile.ehdr;
 	Elf_Data *btf_ext_data = NULL;
 	Elf_Data *btf_data = NULL;
 	Elf_Scn *scn = NULL;
 	int idx = 0, err = 0;
 
-	/* Elf is corrupted/truncated, avoid calling elf_strptr. */
-	if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) {
-		pr_warn("failed to get e_shstrndx from %s\n", obj->path);
-		return -LIBBPF_ERRNO__FORMAT;
-	}
-
 	while ((scn = elf_nextscn(elf, scn)) != NULL) {
-		char *name;
+		const char *name;
 		GElf_Shdr sh;
 		Elf_Data *data;
 
 		idx++;
-		if (gelf_getshdr(scn, &sh) != &sh) {
-			pr_warn("failed to get section(%d) header from %s\n",
-				idx, obj->path);
+
+		if (elf_sec_hdr(obj, scn, &sh))
 			return -LIBBPF_ERRNO__FORMAT;
-		}
 
-		name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name);
-		if (!name) {
-			pr_warn("failed to get section(%d) name from %s\n",
-				idx, obj->path);
+		name = elf_sec_str(obj, sh.sh_name);
+		if (!name)
 			return -LIBBPF_ERRNO__FORMAT;
-		}
 
-		data = elf_getdata(scn, 0);
-		if (!data) {
-			pr_warn("failed to get section(%d) data from %s(%s)\n",
-				idx, name, obj->path);
+		data = elf_sec_data(obj, scn);
+		if (!data)
 			return -LIBBPF_ERRNO__FORMAT;
-		}
-		pr_debug("section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
+
+		pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
 			 idx, name, (unsigned long)data->d_size,
 			 (int)sh.sh_link, (unsigned long)sh.sh_flags,
 			 (int)sh.sh_type);
 
 		if (strcmp(name, "license") == 0) {
-			err = bpf_object__init_license(obj,
-						       data->d_buf,
-						       data->d_size);
+			err = bpf_object__init_license(obj, data->d_buf, data->d_size);
 			if (err)
 				return err;
 		} else if (strcmp(name, "version") == 0) {
-			err = bpf_object__init_kversion(obj,
-							data->d_buf,
-							data->d_size);
+			err = bpf_object__init_kversion(obj, data->d_buf, data->d_size);
 			if (err)
 				return err;
 		} else if (strcmp(name, "maps") == 0) {
@@ -2661,8 +2721,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
 			btf_ext_data = data;
 		} else if (sh.sh_type == SHT_SYMTAB) {
 			if (obj->efile.symbols) {
-				pr_warn("bpf: multiple SYMTAB in %s\n",
-					obj->path);
+				pr_warn("elf: multiple symbol tables in %s\n", obj->path);
 				return -LIBBPF_ERRNO__FORMAT;
 			}
 			obj->efile.symbols = data;
@@ -2675,16 +2734,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
 				err = bpf_object__add_program(obj, data->d_buf,
 							      data->d_size,
 							      name, idx);
-				if (err) {
-					char errmsg[STRERR_BUFSIZE];
-					char *cp;
-
-					cp = libbpf_strerror_r(-err, errmsg,
-							       sizeof(errmsg));
-					pr_warn("failed to alloc program %s (%s): %s",
-						name, obj->path, cp);
+				if (err)
 					return err;
-				}
 			} else if (strcmp(name, DATA_SEC) == 0) {
 				obj->efile.data = data;
 				obj->efile.data_shndx = idx;
@@ -2695,7 +2746,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
 				obj->efile.st_ops_data = data;
 				obj->efile.st_ops_shndx = idx;
 			} else {
-				pr_debug("skip section(%d) %s\n", idx, name);
+				pr_debug("elf: skipping unrecognized data section(%d) %s\n",
+					 idx, name);
 			}
 		} else if (sh.sh_type == SHT_REL) {
 			int nr_sects = obj->efile.nr_reloc_sects;
@@ -2706,34 +2758,32 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
 			if (!section_have_execinstr(obj, sec) &&
 			    strcmp(name, ".rel" STRUCT_OPS_SEC) &&
 			    strcmp(name, ".rel" MAPS_ELF_SEC)) {
-				pr_debug("skip relo %s(%d) for section(%d)\n",
-					 name, idx, sec);
+				pr_debug("elf: skipping relo section(%d) %s for section(%d) %s\n",
+					 idx, name, sec,
+					 elf_sec_name(obj, elf_sec_by_idx(obj, sec)) ?: "<?>");
 				continue;
 			}
 
 			sects = libbpf_reallocarray(sects, nr_sects + 1,
 						    sizeof(*obj->efile.reloc_sects));
-			if (!sects) {
-				pr_warn("reloc_sects realloc failed\n");
+			if (!sects)
 				return -ENOMEM;
-			}
 
 			obj->efile.reloc_sects = sects;
 			obj->efile.nr_reloc_sects++;
 
 			obj->efile.reloc_sects[nr_sects].shdr = sh;
 			obj->efile.reloc_sects[nr_sects].data = data;
-		} else if (sh.sh_type == SHT_NOBITS &&
-			   strcmp(name, BSS_SEC) == 0) {
+		} else if (sh.sh_type == SHT_NOBITS && strcmp(name, BSS_SEC) == 0) {
 			obj->efile.bss = data;
 			obj->efile.bss_shndx = idx;
 		} else {
-			pr_debug("skip section(%d) %s\n", idx, name);
+			pr_debug("elf: skipping section(%d) %s\n", idx, name);
 		}
 	}
 
 	if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) {
-		pr_warn("Corrupted ELF file: index of strtab invalid\n");
+		pr_warn("elf: symbol strings section missing or invalid in %s\n", obj->path);
 		return -LIBBPF_ERRNO__FORMAT;
 	}
 	return bpf_object__init_btf(obj, btf_data, btf_ext_data);
@@ -2894,14 +2944,13 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
 	if (!obj->efile.symbols)
 		return 0;
 
-	scn = elf_getscn(obj->efile.elf, obj->efile.symbols_shndx);
-	if (!scn)
-		return -LIBBPF_ERRNO__FORMAT;
-	if (gelf_getshdr(scn, &sh) != &sh)
+	scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx);
+	if (elf_sec_hdr(obj, scn, &sh))
 		return -LIBBPF_ERRNO__FORMAT;
-	n = sh.sh_size / sh.sh_entsize;
 
+	n = sh.sh_size / sh.sh_entsize;
 	pr_debug("looking for externs among %d symbols...\n", n);
+
 	for (i = 0; i < n; i++) {
 		GElf_Sym sym;
 
@@ -2909,8 +2958,7 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
 			return -LIBBPF_ERRNO__FORMAT;
 		if (!sym_is_extern(&sym))
 			continue;
-		ext_name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
-				      sym.st_name);
+		ext_name = elf_sym_str(obj, sym.st_name);
 		if (!ext_name || !ext_name[0])
 			continue;
 
@@ -3289,16 +3337,15 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
 			return -LIBBPF_ERRNO__FORMAT;
 		}
 		if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
-			pr_warn("relocation: symbol %"PRIx64" not found\n",
-				GELF_R_SYM(rel.r_info));
+			pr_warn("relocation: symbol %zx not found\n",
+				(size_t)GELF_R_SYM(rel.r_info));
 			return -LIBBPF_ERRNO__FORMAT;
 		}
 		if (rel.r_offset % sizeof(struct bpf_insn))
 			return -LIBBPF_ERRNO__FORMAT;
 
 		insn_idx = rel.r_offset / sizeof(struct bpf_insn);
-		name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
-				  sym.st_name) ? : "<?>";
+		name = elf_sym_str(obj, sym.st_name) ?: "<?>";
 
 		pr_debug("relo for shdr %u, symb %zu, value %zu, type %d, bind %d, name %d (\'%s\'), insn %u\n",
 			 (__u32)sym.st_shndx, (size_t)GELF_R_SYM(rel.r_info),
@@ -5720,8 +5767,7 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj,
 				i, (size_t)GELF_R_SYM(rel.r_info));
 			return -LIBBPF_ERRNO__FORMAT;
 		}
-		name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
-				  sym.st_name) ? : "<?>";
+		name = elf_sym_str(obj, sym.st_name) ?: "<?>";
 		if (sym.st_shndx != obj->efile.btf_maps_shndx) {
 			pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n",
 				i, name);
@@ -7663,8 +7709,7 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
 			return -LIBBPF_ERRNO__FORMAT;
 		}
 
-		name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
-				  sym.st_name) ? : "<?>";
+		name = elf_sym_str(obj, sym.st_name) ?: "<?>";
 		map = find_struct_ops_map_by_offset(obj, rel.r_offset);
 		if (!map) {
 			pr_warn("struct_ops reloc: cannot find map at rel.r_offset %zu\n",
-- 
cgit v1.2.3


From 819c23af561c4dc8c35faaacacdfa9ba81bacf5f Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Thu, 20 Aug 2020 16:12:37 -0700
Subject: libbpf: Add __noinline macro to bpf_helpers.h

__noinline is pretty frequently used, especially with BPF subprograms, so add
them along the __always_inline, for user convenience and completeness.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200820231250.1293069-4-andriin@fb.com
---
 tools/lib/bpf/bpf_helpers.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index e9a4ecddb7a5..1106777df00b 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -32,6 +32,9 @@
 #ifndef __always_inline
 #define __always_inline __attribute__((always_inline))
 #endif
+#ifndef __noinline
+#define __noinline __attribute__((noinline))
+#endif
 #ifndef __weak
 #define __weak __attribute__((weak))
 #endif
-- 
cgit v1.2.3


From 50e09460d9f8dee9fbaaa3194c7921753d75cd96 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Thu, 20 Aug 2020 16:12:38 -0700
Subject: libbpf: Skip well-known ELF sections when iterating ELF

Skip and don't log ELF sections that libbpf knows about and ignores during ELF
processing. This allows to not unnecessarily log details about those ELF
sections and cleans up libbpf debug log. Ignored sections include DWARF data,
string table, empty .text section and few special (e.g., .llvm_addrsig)
useless sections.

With such ELF sections out of the way, log unrecognized ELF sections at
pr_info level to increase visibility.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200820231250.1293069-5-andriin@fb.com
---
 tools/lib/bpf/libbpf.c | 55 ++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 49 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index dfb18d3ee590..2ae89b84a2be 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -2672,6 +2672,46 @@ static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn)
 	return data;
 }
 
+static bool is_sec_name_dwarf(const char *name)
+{
+	/* approximation, but the actual list is too long */
+	return strncmp(name, ".debug_", sizeof(".debug_") - 1) == 0;
+}
+
+static bool ignore_elf_section(GElf_Shdr *hdr, const char *name)
+{
+	/* no special handling of .strtab */
+	if (hdr->sh_type == SHT_STRTAB)
+		return true;
+
+	/* ignore .llvm_addrsig section as well */
+	if (hdr->sh_type == 0x6FFF4C03 /* SHT_LLVM_ADDRSIG */)
+		return true;
+
+	/* no subprograms will lead to an empty .text section, ignore it */
+	if (hdr->sh_type == SHT_PROGBITS && hdr->sh_size == 0 &&
+	    strcmp(name, ".text") == 0)
+		return true;
+
+	/* DWARF sections */
+	if (is_sec_name_dwarf(name))
+		return true;
+
+	if (strncmp(name, ".rel", sizeof(".rel") - 1) == 0) {
+		name += sizeof(".rel") - 1;
+		/* DWARF section relocations */
+		if (is_sec_name_dwarf(name))
+			return true;
+
+		/* .BTF and .BTF.ext don't need relocations */
+		if (strcmp(name, BTF_ELF_SEC) == 0 ||
+		    strcmp(name, BTF_EXT_ELF_SEC) == 0)
+			return true;
+	}
+
+	return false;
+}
+
 static int bpf_object__elf_collect(struct bpf_object *obj)
 {
 	Elf *elf = obj->efile.elf;
@@ -2694,6 +2734,9 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
 		if (!name)
 			return -LIBBPF_ERRNO__FORMAT;
 
+		if (ignore_elf_section(&sh, name))
+			continue;
+
 		data = elf_sec_data(obj, scn);
 		if (!data)
 			return -LIBBPF_ERRNO__FORMAT;
@@ -2746,8 +2789,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
 				obj->efile.st_ops_data = data;
 				obj->efile.st_ops_shndx = idx;
 			} else {
-				pr_debug("elf: skipping unrecognized data section(%d) %s\n",
-					 idx, name);
+				pr_info("elf: skipping unrecognized data section(%d) %s\n",
+					idx, name);
 			}
 		} else if (sh.sh_type == SHT_REL) {
 			int nr_sects = obj->efile.nr_reloc_sects;
@@ -2758,9 +2801,9 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
 			if (!section_have_execinstr(obj, sec) &&
 			    strcmp(name, ".rel" STRUCT_OPS_SEC) &&
 			    strcmp(name, ".rel" MAPS_ELF_SEC)) {
-				pr_debug("elf: skipping relo section(%d) %s for section(%d) %s\n",
-					 idx, name, sec,
-					 elf_sec_name(obj, elf_sec_by_idx(obj, sec)) ?: "<?>");
+				pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n",
+					idx, name, sec,
+					elf_sec_name(obj, elf_sec_by_idx(obj, sec)) ?: "<?>");
 				continue;
 			}
 
@@ -2778,7 +2821,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
 			obj->efile.bss = data;
 			obj->efile.bss_shndx = idx;
 		} else {
-			pr_debug("elf: skipping section(%d) %s\n", idx, name);
+			pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name, sh.sh_size);
 		}
 	}
 
-- 
cgit v1.2.3


From 9c0f8cbdc0e9cf8a2a3a96045778b8d759f172c2 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Thu, 20 Aug 2020 16:12:39 -0700
Subject: libbpf: Normalize and improve logging across few functions

Make libbpf logs follow similar pattern and provide more context like section
name or program name, where appropriate. Also, add BPF_INSN_SZ constant and
use it throughout to clean up code a little bit. This commit doesn't have any
functional changes and just removes some code changes out of the way before
bigger refactoring in libbpf internals.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200820231250.1293069-6-andriin@fb.com
---
 tools/lib/bpf/libbpf.c | 117 ++++++++++++++++++++++++++++---------------------
 1 file changed, 67 insertions(+), 50 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 2ae89b84a2be..fb7b8fb1d3fa 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -63,6 +63,8 @@
 #define BPF_FS_MAGIC		0xcafe4a11
 #endif
 
+#define BPF_INSN_SZ (sizeof(struct bpf_insn))
+
 /* vsprintf() in __base_pr() uses nonliteral format string. It may break
  * compilation if user enables corresponding warning. Disable it explicitly.
  */
@@ -3225,7 +3227,7 @@ bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
 
 static int bpf_program__record_reloc(struct bpf_program *prog,
 				     struct reloc_desc *reloc_desc,
-				     __u32 insn_idx, const char *name,
+				     __u32 insn_idx, const char *sym_name,
 				     const GElf_Sym *sym, const GElf_Rel *rel)
 {
 	struct bpf_insn *insn = &prog->insns[insn_idx];
@@ -3233,22 +3235,25 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
 	struct bpf_object *obj = prog->obj;
 	__u32 shdr_idx = sym->st_shndx;
 	enum libbpf_map_type type;
+	const char *sym_sec_name;
 	struct bpf_map *map;
 
 	/* sub-program call relocation */
 	if (insn->code == (BPF_JMP | BPF_CALL)) {
 		if (insn->src_reg != BPF_PSEUDO_CALL) {
-			pr_warn("incorrect bpf_call opcode\n");
+			pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name);
 			return -LIBBPF_ERRNO__RELOC;
 		}
 		/* text_shndx can be 0, if no default "main" program exists */
 		if (!shdr_idx || shdr_idx != obj->efile.text_shndx) {
-			pr_warn("bad call relo against section %u\n", shdr_idx);
+			sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
+			pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n",
+				prog->name, sym_name, sym_sec_name);
 			return -LIBBPF_ERRNO__RELOC;
 		}
-		if (sym->st_value % 8) {
-			pr_warn("bad call relo offset: %zu\n",
-				(size_t)sym->st_value);
+		if (sym->st_value % BPF_INSN_SZ) {
+			pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n",
+				prog->name, sym_name, (size_t)sym->st_value);
 			return -LIBBPF_ERRNO__RELOC;
 		}
 		reloc_desc->type = RELO_CALL;
@@ -3259,8 +3264,8 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
 	}
 
 	if (insn->code != (BPF_LD | BPF_IMM | BPF_DW)) {
-		pr_warn("invalid relo for insns[%d].code 0x%x\n",
-			insn_idx, insn->code);
+		pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
+			prog->name, sym_name, insn_idx, insn->code);
 		return -LIBBPF_ERRNO__RELOC;
 	}
 
@@ -3275,12 +3280,12 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
 				break;
 		}
 		if (i >= n) {
-			pr_warn("extern relo failed to find extern for sym %d\n",
-				sym_idx);
+			pr_warn("prog '%s': extern relo failed to find extern for '%s' (%d)\n",
+				prog->name, sym_name, sym_idx);
 			return -LIBBPF_ERRNO__RELOC;
 		}
-		pr_debug("found extern #%d '%s' (sym %d) for insn %u\n",
-			 i, ext->name, ext->sym_idx, insn_idx);
+		pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n",
+			 prog->name, i, ext->name, ext->sym_idx, insn_idx);
 		reloc_desc->type = RELO_EXTERN;
 		reloc_desc->insn_idx = insn_idx;
 		reloc_desc->sym_off = i; /* sym_off stores extern index */
@@ -3288,18 +3293,19 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
 	}
 
 	if (!shdr_idx || shdr_idx >= SHN_LORESERVE) {
-		pr_warn("invalid relo for \'%s\' in special section 0x%x; forgot to initialize global var?..\n",
-			name, shdr_idx);
+		pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n",
+			prog->name, sym_name, shdr_idx);
 		return -LIBBPF_ERRNO__RELOC;
 	}
 
 	type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
+	sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
 
 	/* generic map reference relocation */
 	if (type == LIBBPF_MAP_UNSPEC) {
 		if (!bpf_object__shndx_is_maps(obj, shdr_idx)) {
-			pr_warn("bad map relo against section %u\n",
-				shdr_idx);
+			pr_warn("prog '%s': bad map relo against '%s' in section '%s'\n",
+				prog->name, sym_name, sym_sec_name);
 			return -LIBBPF_ERRNO__RELOC;
 		}
 		for (map_idx = 0; map_idx < nr_maps; map_idx++) {
@@ -3308,14 +3314,14 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
 			    map->sec_idx != sym->st_shndx ||
 			    map->sec_offset != sym->st_value)
 				continue;
-			pr_debug("found map %zd (%s, sec %d, off %zu) for insn %u\n",
-				 map_idx, map->name, map->sec_idx,
+			pr_debug("prog '%s': found map %zd (%s, sec %d, off %zu) for insn #%u\n",
+				 prog->name, map_idx, map->name, map->sec_idx,
 				 map->sec_offset, insn_idx);
 			break;
 		}
 		if (map_idx >= nr_maps) {
-			pr_warn("map relo failed to find map for sec %u, off %zu\n",
-				shdr_idx, (size_t)sym->st_value);
+			pr_warn("prog '%s': map relo failed to find map for section '%s', off %zu\n",
+				prog->name, sym_sec_name, (size_t)sym->st_value);
 			return -LIBBPF_ERRNO__RELOC;
 		}
 		reloc_desc->type = RELO_LD64;
@@ -3327,21 +3333,22 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
 
 	/* global data map relocation */
 	if (!bpf_object__shndx_is_data(obj, shdr_idx)) {
-		pr_warn("bad data relo against section %u\n", shdr_idx);
+		pr_warn("prog '%s': bad data relo against section '%s'\n",
+			prog->name, sym_sec_name);
 		return -LIBBPF_ERRNO__RELOC;
 	}
 	for (map_idx = 0; map_idx < nr_maps; map_idx++) {
 		map = &obj->maps[map_idx];
 		if (map->libbpf_type != type)
 			continue;
-		pr_debug("found data map %zd (%s, sec %d, off %zu) for insn %u\n",
-			 map_idx, map->name, map->sec_idx, map->sec_offset,
-			 insn_idx);
+		pr_debug("prog '%s': found data map %zd (%s, sec %d, off %zu) for insn %u\n",
+			 prog->name, map_idx, map->name, map->sec_idx,
+			 map->sec_offset, insn_idx);
 		break;
 	}
 	if (map_idx >= nr_maps) {
-		pr_warn("data relo failed to find map for sec %u\n",
-			shdr_idx);
+		pr_warn("prog '%s': data relo failed to find map for section '%s'\n",
+			prog->name, sym_sec_name);
 		return -LIBBPF_ERRNO__RELOC;
 	}
 
@@ -3357,9 +3364,17 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
 			   Elf_Data *data, struct bpf_object *obj)
 {
 	Elf_Data *symbols = obj->efile.symbols;
+	const char *relo_sec_name, *sec_name;
+	size_t sec_idx = shdr->sh_info;
 	int err, i, nrels;
 
-	pr_debug("collecting relocating info for: '%s'\n", prog->section_name);
+	relo_sec_name = elf_sec_str(obj, shdr->sh_name);
+	sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
+	if (!relo_sec_name || !sec_name)
+		return -EINVAL;
+
+	pr_debug("sec '%s': collecting relocation for section(%zu) '%s'\n",
+		 relo_sec_name, sec_idx, sec_name);
 	nrels = shdr->sh_size / shdr->sh_entsize;
 
 	prog->reloc_desc = malloc(sizeof(*prog->reloc_desc) * nrels);
@@ -3370,34 +3385,34 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
 	prog->nr_reloc = nrels;
 
 	for (i = 0; i < nrels; i++) {
-		const char *name;
+		const char *sym_name;
 		__u32 insn_idx;
 		GElf_Sym sym;
 		GElf_Rel rel;
 
 		if (!gelf_getrel(data, i, &rel)) {
-			pr_warn("relocation: failed to get %d reloc\n", i);
+			pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i);
 			return -LIBBPF_ERRNO__FORMAT;
 		}
 		if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
-			pr_warn("relocation: symbol %zx not found\n",
-				(size_t)GELF_R_SYM(rel.r_info));
+			pr_warn("sec '%s': symbol 0x%zx not found for relo #%d\n",
+				relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i);
 			return -LIBBPF_ERRNO__FORMAT;
 		}
-		if (rel.r_offset % sizeof(struct bpf_insn))
+		if (rel.r_offset % BPF_INSN_SZ) {
+			pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n",
+				relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i);
 			return -LIBBPF_ERRNO__FORMAT;
+		}
 
-		insn_idx = rel.r_offset / sizeof(struct bpf_insn);
-		name = elf_sym_str(obj, sym.st_name) ?: "<?>";
+		insn_idx = rel.r_offset / BPF_INSN_SZ;
+		sym_name = elf_sym_str(obj, sym.st_name) ?: "<?>";
 
-		pr_debug("relo for shdr %u, symb %zu, value %zu, type %d, bind %d, name %d (\'%s\'), insn %u\n",
-			 (__u32)sym.st_shndx, (size_t)GELF_R_SYM(rel.r_info),
-			 (size_t)sym.st_value, GELF_ST_TYPE(sym.st_info),
-			 GELF_ST_BIND(sym.st_info), sym.st_name, name,
-			 insn_idx);
+		pr_debug("sec '%s': relo #%d: insn #%u against '%s'\n",
+			 relo_sec_name, i, insn_idx, sym_name);
 
 		err = bpf_program__record_reloc(prog, &prog->reloc_desc[i],
-						insn_idx, name, &sym, &rel);
+						insn_idx, sym_name, &sym, &rel);
 		if (err)
 			return err;
 	}
@@ -5155,9 +5170,9 @@ static int bpf_core_patch_insn(struct bpf_program *prog,
 	int insn_idx;
 	__u8 class;
 
-	if (relo->insn_off % sizeof(struct bpf_insn))
+	if (relo->insn_off % BPF_INSN_SZ)
 		return -EINVAL;
-	insn_idx = relo->insn_off / sizeof(struct bpf_insn);
+	insn_idx = relo->insn_off / BPF_INSN_SZ;
 	insn = &prog->insns[insn_idx];
 	class = BPF_CLASS(insn->code);
 
@@ -5588,7 +5603,7 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
 			goto out;
 		}
 
-		pr_debug("prog '%s': performing %d CO-RE offset relocs\n",
+		pr_debug("sec '%s': found %d CO-RE relocations\n",
 			 sec_name, sec->num_info);
 
 		for_each_btf_ext_rec(seg, sec, i, rec) {
@@ -5596,7 +5611,7 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
 						  targ_btf, cand_cache);
 			if (err) {
 				pr_warn("prog '%s': relo #%d: failed to relocate: %d\n",
-					sec_name, i, err);
+					prog->name, i, err);
 				goto out;
 			}
 		}
@@ -5716,7 +5731,8 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
 				return err;
 			break;
 		default:
-			pr_warn("relo #%d: bad relo type %d\n", i, relo->type);
+			pr_warn("prog '%s': relo #%d: bad relo type %d\n",
+				prog->name, i, relo->type);
 			return -EINVAL;
 		}
 	}
@@ -5751,7 +5767,8 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
 
 		err = bpf_program__relocate(prog, obj);
 		if (err) {
-			pr_warn("failed to relocate '%s'\n", prog->section_name);
+			pr_warn("prog '%s': failed to relocate data references: %d\n",
+				prog->name, err);
 			return err;
 		}
 		break;
@@ -5766,7 +5783,8 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
 
 		err = bpf_program__relocate(prog, obj);
 		if (err) {
-			pr_warn("failed to relocate '%s'\n", prog->section_name);
+			pr_warn("prog '%s': failed to relocate calls: %d\n",
+				prog->name, err);
 			return err;
 		}
 	}
@@ -6198,8 +6216,7 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level)
 		if (bpf_program__is_function_storage(prog, obj))
 			continue;
 		if (!prog->load) {
-			pr_debug("prog '%s'('%s'): skipped loading\n",
-				 prog->name, prog->section_name);
+			pr_debug("prog '%s': skipped loading\n", prog->name);
 			continue;
 		}
 		prog->log_level |= log_level;
@@ -7343,7 +7360,7 @@ int bpf_program__fd(const struct bpf_program *prog)
 
 size_t bpf_program__size(const struct bpf_program *prog)
 {
-	return prog->insns_cnt * sizeof(struct bpf_insn);
+	return prog->insns_cnt * BPF_INSN_SZ;
 }
 
 int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,
-- 
cgit v1.2.3


From 2b8ee4f05d4f6a6c427ad30dd6c1bb49eb2efd3b Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 20 Aug 2020 12:00:21 -0700
Subject: tcp: bpf: Add TCP_BPF_DELACK_MAX setsockopt

This change is mostly from an internal patch and adapts it from sysctl
config to the bpf_setsockopt setup.

The bpf_prog can set the max delay ack by using
bpf_setsockopt(TCP_BPF_DELACK_MAX).  This max delay ack can be communicated
to its peer through bpf header option.  The receiving peer can then use
this max delay ack and set a potentially lower rto by using
bpf_setsockopt(TCP_BPF_RTO_MIN) which will be introduced
in the next patch.

Another later selftest patch will also use it like the above to show
how to write and parse bpf tcp header option.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20200820190021.2884000-1-kafai@fb.com
---
 tools/include/uapi/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index a1bbaff7a0af..7b905cb0213e 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4257,6 +4257,7 @@ enum {
 enum {
 	TCP_BPF_IW		= 1001,	/* Set TCP initial congestion window */
 	TCP_BPF_SNDCWND_CLAMP	= 1002,	/* Set sndcwnd_clamp */
+	TCP_BPF_DELACK_MAX	= 1003, /* Max delay ack in usecs */
 };
 
 struct bpf_perf_event_value {
-- 
cgit v1.2.3


From ca584ba070864c606f3a54faaafe774726d5b4a1 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 20 Aug 2020 12:00:27 -0700
Subject: tcp: bpf: Add TCP_BPF_RTO_MIN for bpf_setsockopt

This patch adds bpf_setsockopt(TCP_BPF_RTO_MIN) to allow bpf prog
to set the min rto of a connection.  It could be used together
with the earlier patch which has added bpf_setsockopt(TCP_BPF_DELACK_MAX).

A later selftest patch will communicate the max delay ack in a
bpf tcp header option and then the receiving side can use
bpf_setsockopt(TCP_BPF_RTO_MIN) to set a shorter rto.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20200820190027.2884170-1-kafai@fb.com
---
 tools/include/uapi/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 7b905cb0213e..1ae20058b574 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4258,6 +4258,7 @@ enum {
 	TCP_BPF_IW		= 1001,	/* Set TCP initial congestion window */
 	TCP_BPF_SNDCWND_CLAMP	= 1002,	/* Set sndcwnd_clamp */
 	TCP_BPF_DELACK_MAX	= 1003, /* Max delay ack in usecs */
+	TCP_BPF_RTO_MIN		= 1004, /* Min delay ack in usecs */
 };
 
 struct bpf_perf_event_value {
-- 
cgit v1.2.3


From 00d211a4ea6f48e8e3b758813fe23ad28193d3bf Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 20 Aug 2020 12:00:46 -0700
Subject: bpf: tcp: Add bpf_skops_parse_hdr()

The patch adds a function bpf_skops_parse_hdr().
It will call the bpf prog to parse the TCP header received at
a tcp_sock that has at least reached the ESTABLISHED state.

For the packets received during the 3WHS (SYN, SYNACK and ACK),
the received skb will be available to the bpf prog during the callback
in bpf_skops_established() introduced in the previous patch and
in the bpf_skops_write_hdr_opt() that will be added in the
next patch.

Calling bpf prog to parse header is controlled by two new flags in
tp->bpf_sock_ops_cb_flags:
BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG and
BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG.

When BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG is set,
the bpf prog will only be called when there is unknown
option in the TCP header.

When BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG is set,
the bpf prog will be called on all received TCP header.

This function is half implemented to highlight the changes in
TCP stack.  The actual codes preparing the bpf running context and
invoking the bpf prog will be added in the later patch with other
necessary bpf pieces.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/bpf/20200820190046.2885054-1-kafai@fb.com
---
 tools/include/uapi/linux/bpf.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 1ae20058b574..010ed2abcb66 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4173,8 +4173,10 @@ enum {
 	BPF_SOCK_OPS_RETRANS_CB_FLAG	= (1<<1),
 	BPF_SOCK_OPS_STATE_CB_FLAG	= (1<<2),
 	BPF_SOCK_OPS_RTT_CB_FLAG	= (1<<3),
+	BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG  = (1<<4),
+	BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG = (1<<5),
 /* Mask of all currently supported cb flags */
-	BPF_SOCK_OPS_ALL_CB_FLAGS       = 0xF,
+	BPF_SOCK_OPS_ALL_CB_FLAGS       = 0x3F,
 };
 
 /* List of known BPF sock_ops operators.
-- 
cgit v1.2.3


From 331fca4315efa3bbd258fbdf8209d59d253c0480 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 20 Aug 2020 12:00:52 -0700
Subject: bpf: tcp: Add bpf_skops_hdr_opt_len() and bpf_skops_write_hdr_opt()

The bpf prog needs to parse the SYN header to learn what options have
been sent by the peer's bpf-prog before writing its options into SYNACK.
This patch adds a "syn_skb" arg to tcp_make_synack() and send_synack().
This syn_skb will eventually be made available (as read-only) to the
bpf prog.  This will be the only SYN packet available to the bpf
prog during syncookie.  For other regular cases, the bpf prog can
also use the saved_syn.

When writing options, the bpf prog will first be called to tell the
kernel its required number of bytes.  It is done by the new
bpf_skops_hdr_opt_len().  The bpf prog will only be called when the new
BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG is set in tp->bpf_sock_ops_cb_flags.
When the bpf prog returns, the kernel will know how many bytes are needed
and then update the "*remaining" arg accordingly.  4 byte alignment will
be included in the "*remaining" before this function returns.  The 4 byte
aligned number of bytes will also be stored into the opts->bpf_opt_len.
"bpf_opt_len" is a newly added member to the struct tcp_out_options.

Then the new bpf_skops_write_hdr_opt() will call the bpf prog to write the
header options.  The bpf prog is only called if it has reserved spaces
before (opts->bpf_opt_len > 0).

The bpf prog is the last one getting a chance to reserve header space
and writing the header option.

These two functions are half implemented to highlight the changes in
TCP stack.  The actual codes preparing the bpf running context and
invoking the bpf prog will be added in the later patch with other
necessary bpf pieces.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/bpf/20200820190052.2885316-1-kafai@fb.com
---
 tools/include/uapi/linux/bpf.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 010ed2abcb66..18d0e128bc3c 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4175,8 +4175,9 @@ enum {
 	BPF_SOCK_OPS_RTT_CB_FLAG	= (1<<3),
 	BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG  = (1<<4),
 	BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG = (1<<5),
+	BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG = (1<<6),
 /* Mask of all currently supported cb flags */
-	BPF_SOCK_OPS_ALL_CB_FLAGS       = 0x3F,
+	BPF_SOCK_OPS_ALL_CB_FLAGS       = 0x7F,
 };
 
 /* List of known BPF sock_ops operators.
-- 
cgit v1.2.3


From 0813a841566f0962a5551be7749b43c45f0022a0 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 20 Aug 2020 12:01:04 -0700
Subject: bpf: tcp: Allow bpf prog to write and parse TCP header option
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Note: The TCP changes here is mainly to implement the bpf
  pieces into the bpf_skops_*() functions introduced
  in the earlier patches. ]

The earlier effort in BPF-TCP-CC allows the TCP Congestion Control
algorithm to be written in BPF.  It opens up opportunities to allow
a faster turnaround time in testing/releasing new congestion control
ideas to production environment.

The same flexibility can be extended to writing TCP header option.
It is not uncommon that people want to test new TCP header option
to improve the TCP performance.  Another use case is for data-center
that has a more controlled environment and has more flexibility in
putting header options for internal only use.

For example, we want to test the idea in putting maximum delay
ACK in TCP header option which is similar to a draft RFC proposal [1].

This patch introduces the necessary BPF API and use them in the
TCP stack to allow BPF_PROG_TYPE_SOCK_OPS program to parse
and write TCP header options.  It currently supports most of
the TCP packet except RST.

Supported TCP header option:
───────────────────────────
This patch allows the bpf-prog to write any option kind.
Different bpf-progs can write its own option by calling the new helper
bpf_store_hdr_opt().  The helper will ensure there is no duplicated
option in the header.

By allowing bpf-prog to write any option kind, this gives a lot of
flexibility to the bpf-prog.  Different bpf-prog can write its
own option kind.  It could also allow the bpf-prog to support a
recently standardized option on an older kernel.

Sockops Callback Flags:
──────────────────────
The bpf program will only be called to parse/write tcp header option
if the following newly added callback flags are enabled
in tp->bpf_sock_ops_cb_flags:
BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG
BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG
BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG

A few words on the PARSE CB flags.  When the above PARSE CB flags are
turned on, the bpf-prog will be called on packets received
at a sk that has at least reached the ESTABLISHED state.
The parsing of the SYN-SYNACK-ACK will be discussed in the
"3 Way HandShake" section.

The default is off for all of the above new CB flags, i.e. the bpf prog
will not be called to parse or write bpf hdr option.  There are
details comment on these new cb flags in the UAPI bpf.h.

sock_ops->skb_data and bpf_load_hdr_opt()
─────────────────────────────────────────
sock_ops->skb_data and sock_ops->skb_data_end covers the whole
TCP header and its options.  They are read only.

The new bpf_load_hdr_opt() helps to read a particular option "kind"
from the skb_data.

Please refer to the comment in UAPI bpf.h.  It has details
on what skb_data contains under different sock_ops->op.

3 Way HandShake
───────────────
The bpf-prog can learn if it is sending SYN or SYNACK by reading the
sock_ops->skb_tcp_flags.

* Passive side

When writing SYNACK (i.e. sock_ops->op == BPF_SOCK_OPS_WRITE_HDR_OPT_CB),
the received SYN skb will be available to the bpf prog.  The bpf prog can
use the SYN skb (which may carry the header option sent from the remote bpf
prog) to decide what bpf header option should be written to the outgoing
SYNACK skb.  The SYN packet can be obtained by getsockopt(TCP_BPF_SYN*).
More on this later.  Also, the bpf prog can learn if it is in syncookie
mode (by checking sock_ops->args[0] == BPF_WRITE_HDR_TCP_SYNACK_COOKIE).

The bpf prog can store the received SYN pkt by using the existing
bpf_setsockopt(TCP_SAVE_SYN).  The example in a later patch does it.
[ Note that the fullsock here is a listen sk, bpf_sk_storage
  is not very useful here since the listen sk will be shared
  by many concurrent connection requests.

  Extending bpf_sk_storage support to request_sock will add weight
  to the minisock and it is not necessary better than storing the
  whole ~100 bytes SYN pkt. ]

When the connection is established, the bpf prog will be called
in the existing PASSIVE_ESTABLISHED_CB callback.  At that time,
the bpf prog can get the header option from the saved syn and
then apply the needed operation to the newly established socket.
The later patch will use the max delay ack specified in the SYN
header and set the RTO of this newly established connection
as an example.

The received ACK (that concludes the 3WHS) will also be available to
the bpf prog during PASSIVE_ESTABLISHED_CB through the sock_ops->skb_data.
It could be useful in syncookie scenario.  More on this later.

There is an existing getsockopt "TCP_SAVED_SYN" to return the whole
saved syn pkt which includes the IP[46] header and the TCP header.
A few "TCP_BPF_SYN*" getsockopt has been added to allow specifying where to
start getting from, e.g. starting from TCP header, or from IP[46] header.

The new getsockopt(TCP_BPF_SYN*) will also know where it can get
the SYN's packet from:
  - (a) the just received syn (available when the bpf prog is writing SYNACK)
        and it is the only way to get SYN during syncookie mode.
  or
  - (b) the saved syn (available in PASSIVE_ESTABLISHED_CB and also other
        existing CB).

The bpf prog does not need to know where the SYN pkt is coming from.
The getsockopt(TCP_BPF_SYN*) will hide this details.

Similarly, a flags "BPF_LOAD_HDR_OPT_TCP_SYN" is also added to
bpf_load_hdr_opt() to read a particular header option from the SYN packet.

* Fastopen

Fastopen should work the same as the regular non fastopen case.
This is a test in a later patch.

* Syncookie

For syncookie, the later example patch asks the active
side's bpf prog to resend the header options in ACK.  The server
can use bpf_load_hdr_opt() to look at the options in this
received ACK during PASSIVE_ESTABLISHED_CB.

* Active side

The bpf prog will get a chance to write the bpf header option
in the SYN packet during WRITE_HDR_OPT_CB.  The received SYNACK
pkt will also be available to the bpf prog during the existing
ACTIVE_ESTABLISHED_CB callback through the sock_ops->skb_data
and bpf_load_hdr_opt().

* Turn off header CB flags after 3WHS

If the bpf prog does not need to write/parse header options
beyond the 3WHS, the bpf prog can clear the bpf_sock_ops_cb_flags
to avoid being called for header options.
Or the bpf-prog can select to leave the UNKNOWN_HDR_OPT_CB_FLAG on
so that the kernel will only call it when there is option that
the kernel cannot handle.

[1]: draft-wang-tcpm-low-latency-opt-00
     https://tools.ietf.org/html/draft-wang-tcpm-low-latency-opt-00

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200820190104.2885895-1-kafai@fb.com
---
 tools/include/uapi/linux/bpf.h | 300 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 299 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 18d0e128bc3c..f67ec5d9e57d 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3395,6 +3395,120 @@ union bpf_attr {
  *		A non-negative value equal to or less than *size* on success,
  *		or a negative error in case of failure.
  *
+ * long bpf_load_hdr_opt(struct bpf_sock_ops *skops, void *searchby_res, u32 len, u64 flags)
+ *	Description
+ *		Load header option.  Support reading a particular TCP header
+ *		option for bpf program (BPF_PROG_TYPE_SOCK_OPS).
+ *
+ *		If *flags* is 0, it will search the option from the
+ *		sock_ops->skb_data.  The comment in "struct bpf_sock_ops"
+ *		has details on what skb_data contains under different
+ *		sock_ops->op.
+ *
+ *		The first byte of the *searchby_res* specifies the
+ *		kind that it wants to search.
+ *
+ *		If the searching kind is an experimental kind
+ *		(i.e. 253 or 254 according to RFC6994).  It also
+ *		needs to specify the "magic" which is either
+ *		2 bytes or 4 bytes.  It then also needs to
+ *		specify the size of the magic by using
+ *		the 2nd byte which is "kind-length" of a TCP
+ *		header option and the "kind-length" also
+ *		includes the first 2 bytes "kind" and "kind-length"
+ *		itself as a normal TCP header option also does.
+ *
+ *		For example, to search experimental kind 254 with
+ *		2 byte magic 0xeB9F, the searchby_res should be
+ *		[ 254, 4, 0xeB, 0x9F, 0, 0, .... 0 ].
+ *
+ *		To search for the standard window scale option (3),
+ *		the searchby_res should be [ 3, 0, 0, .... 0 ].
+ *		Note, kind-length must be 0 for regular option.
+ *
+ *		Searching for No-Op (0) and End-of-Option-List (1) are
+ *		not supported.
+ *
+ *		*len* must be at least 2 bytes which is the minimal size
+ *		of a header option.
+ *
+ *		Supported flags:
+ *		* **BPF_LOAD_HDR_OPT_TCP_SYN** to search from the
+ *		  saved_syn packet or the just-received syn packet.
+ *
+ *	Return
+ *		>0 when found, the header option is copied to *searchby_res*.
+ *		The return value is the total length copied.
+ *
+ *		**-EINVAL** If param is invalid
+ *
+ *		**-ENOMSG** The option is not found
+ *
+ *		**-ENOENT** No syn packet available when
+ *			    **BPF_LOAD_HDR_OPT_TCP_SYN** is used
+ *
+ *		**-ENOSPC** Not enough space.  Only *len* number of
+ *			    bytes are copied.
+ *
+ *		**-EFAULT** Cannot parse the header options in the packet
+ *
+ *		**-EPERM** This helper cannot be used under the
+ *			   current sock_ops->op.
+ *
+ * long bpf_store_hdr_opt(struct bpf_sock_ops *skops, const void *from, u32 len, u64 flags)
+ *	Description
+ *		Store header option.  The data will be copied
+ *		from buffer *from* with length *len* to the TCP header.
+ *
+ *		The buffer *from* should have the whole option that
+ *		includes the kind, kind-length, and the actual
+ *		option data.  The *len* must be at least kind-length
+ *		long.  The kind-length does not have to be 4 byte
+ *		aligned.  The kernel will take care of the padding
+ *		and setting the 4 bytes aligned value to th->doff.
+ *
+ *		This helper will check for duplicated option
+ *		by searching the same option in the outgoing skb.
+ *
+ *		This helper can only be called during
+ *		BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+ *
+ *	Return
+ *		0 on success, or negative error in case of failure:
+ *
+ *		**-EINVAL** If param is invalid
+ *
+ *		**-ENOSPC** Not enough space in the header.
+ *			    Nothing has been written
+ *
+ *		**-EEXIST** The option has already existed
+ *
+ *		**-EFAULT** Cannot parse the existing header options
+ *
+ *		**-EPERM** This helper cannot be used under the
+ *			   current sock_ops->op.
+ *
+ * long bpf_reserve_hdr_opt(struct bpf_sock_ops *skops, u32 len, u64 flags)
+ *	Description
+ *		Reserve *len* bytes for the bpf header option.  The
+ *		space will be used by bpf_store_hdr_opt() later in
+ *		BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+ *
+ *		If bpf_reserve_hdr_opt() is called multiple times,
+ *		the total number of bytes will be reserved.
+ *
+ *		This helper can only be called during
+ *		BPF_SOCK_OPS_HDR_OPT_LEN_CB.
+ *
+ *	Return
+ *		0 on success, or negative error in case of failure:
+ *
+ *		**-EINVAL** if param is invalid
+ *
+ *		**-ENOSPC** Not enough space in the header.
+ *
+ *		**-EPERM** This helper cannot be used under the
+ *			   current sock_ops->op.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3539,6 +3653,9 @@ union bpf_attr {
 	FN(skc_to_tcp_request_sock),	\
 	FN(skc_to_udp6_sock),		\
 	FN(get_task_stack),		\
+	FN(load_hdr_opt),		\
+	FN(store_hdr_opt),		\
+	FN(reserve_hdr_opt),
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -4165,6 +4282,36 @@ struct bpf_sock_ops {
 	__u64 bytes_received;
 	__u64 bytes_acked;
 	__bpf_md_ptr(struct bpf_sock *, sk);
+	/* [skb_data, skb_data_end) covers the whole TCP header.
+	 *
+	 * BPF_SOCK_OPS_PARSE_HDR_OPT_CB: The packet received
+	 * BPF_SOCK_OPS_HDR_OPT_LEN_CB:   Not useful because the
+	 *                                header has not been written.
+	 * BPF_SOCK_OPS_WRITE_HDR_OPT_CB: The header and options have
+	 *				  been written so far.
+	 * BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:  The SYNACK that concludes
+	 *					the 3WHS.
+	 * BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: The ACK that concludes
+	 *					the 3WHS.
+	 *
+	 * bpf_load_hdr_opt() can also be used to read a particular option.
+	 */
+	__bpf_md_ptr(void *, skb_data);
+	__bpf_md_ptr(void *, skb_data_end);
+	__u32 skb_len;		/* The total length of a packet.
+				 * It includes the header, options,
+				 * and payload.
+				 */
+	__u32 skb_tcp_flags;	/* tcp_flags of the header.  It provides
+				 * an easy way to check for tcp_flags
+				 * without parsing skb_data.
+				 *
+				 * In particular, the skb_tcp_flags
+				 * will still be available in
+				 * BPF_SOCK_OPS_HDR_OPT_LEN even though
+				 * the outgoing header has not
+				 * been written yet.
+				 */
 };
 
 /* Definitions for bpf_sock_ops_cb_flags */
@@ -4173,8 +4320,48 @@ enum {
 	BPF_SOCK_OPS_RETRANS_CB_FLAG	= (1<<1),
 	BPF_SOCK_OPS_STATE_CB_FLAG	= (1<<2),
 	BPF_SOCK_OPS_RTT_CB_FLAG	= (1<<3),
-	BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG  = (1<<4),
+	/* Call bpf for all received TCP headers.  The bpf prog will be
+	 * called under sock_ops->op == BPF_SOCK_OPS_PARSE_HDR_OPT_CB
+	 *
+	 * Please refer to the comment in BPF_SOCK_OPS_PARSE_HDR_OPT_CB
+	 * for the header option related helpers that will be useful
+	 * to the bpf programs.
+	 *
+	 * It could be used at the client/active side (i.e. connect() side)
+	 * when the server told it that the server was in syncookie
+	 * mode and required the active side to resend the bpf-written
+	 * options.  The active side can keep writing the bpf-options until
+	 * it received a valid packet from the server side to confirm
+	 * the earlier packet (and options) has been received.  The later
+	 * example patch is using it like this at the active side when the
+	 * server is in syncookie mode.
+	 *
+	 * The bpf prog will usually turn this off in the common cases.
+	 */
+	BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG	= (1<<4),
+	/* Call bpf when kernel has received a header option that
+	 * the kernel cannot handle.  The bpf prog will be called under
+	 * sock_ops->op == BPF_SOCK_OPS_PARSE_HDR_OPT_CB.
+	 *
+	 * Please refer to the comment in BPF_SOCK_OPS_PARSE_HDR_OPT_CB
+	 * for the header option related helpers that will be useful
+	 * to the bpf programs.
+	 */
 	BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG = (1<<5),
+	/* Call bpf when the kernel is writing header options for the
+	 * outgoing packet.  The bpf prog will first be called
+	 * to reserve space in a skb under
+	 * sock_ops->op == BPF_SOCK_OPS_HDR_OPT_LEN_CB.  Then
+	 * the bpf prog will be called to write the header option(s)
+	 * under sock_ops->op == BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+	 *
+	 * Please refer to the comment in BPF_SOCK_OPS_HDR_OPT_LEN_CB
+	 * and BPF_SOCK_OPS_WRITE_HDR_OPT_CB for the header option
+	 * related helpers that will be useful to the bpf programs.
+	 *
+	 * The kernel gets its chance to reserve space and write
+	 * options first before the BPF program does.
+	 */
 	BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG = (1<<6),
 /* Mask of all currently supported cb flags */
 	BPF_SOCK_OPS_ALL_CB_FLAGS       = 0x7F,
@@ -4233,6 +4420,63 @@ enum {
 					 */
 	BPF_SOCK_OPS_RTT_CB,		/* Called on every RTT.
 					 */
+	BPF_SOCK_OPS_PARSE_HDR_OPT_CB,	/* Parse the header option.
+					 * It will be called to handle
+					 * the packets received at
+					 * an already established
+					 * connection.
+					 *
+					 * sock_ops->skb_data:
+					 * Referring to the received skb.
+					 * It covers the TCP header only.
+					 *
+					 * bpf_load_hdr_opt() can also
+					 * be used to search for a
+					 * particular option.
+					 */
+	BPF_SOCK_OPS_HDR_OPT_LEN_CB,	/* Reserve space for writing the
+					 * header option later in
+					 * BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+					 * Arg1: bool want_cookie. (in
+					 *       writing SYNACK only)
+					 *
+					 * sock_ops->skb_data:
+					 * Not available because no header has
+					 * been	written yet.
+					 *
+					 * sock_ops->skb_tcp_flags:
+					 * The tcp_flags of the
+					 * outgoing skb. (e.g. SYN, ACK, FIN).
+					 *
+					 * bpf_reserve_hdr_opt() should
+					 * be used to reserve space.
+					 */
+	BPF_SOCK_OPS_WRITE_HDR_OPT_CB,	/* Write the header options
+					 * Arg1: bool want_cookie. (in
+					 *       writing SYNACK only)
+					 *
+					 * sock_ops->skb_data:
+					 * Referring to the outgoing skb.
+					 * It covers the TCP header
+					 * that has already been written
+					 * by the kernel and the
+					 * earlier bpf-progs.
+					 *
+					 * sock_ops->skb_tcp_flags:
+					 * The tcp_flags of the outgoing
+					 * skb. (e.g. SYN, ACK, FIN).
+					 *
+					 * bpf_store_hdr_opt() should
+					 * be used to write the
+					 * option.
+					 *
+					 * bpf_load_hdr_opt() can also
+					 * be used to search for a
+					 * particular option that
+					 * has already been written
+					 * by the kernel or the
+					 * earlier bpf-progs.
+					 */
 };
 
 /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
@@ -4262,6 +4506,60 @@ enum {
 	TCP_BPF_SNDCWND_CLAMP	= 1002,	/* Set sndcwnd_clamp */
 	TCP_BPF_DELACK_MAX	= 1003, /* Max delay ack in usecs */
 	TCP_BPF_RTO_MIN		= 1004, /* Min delay ack in usecs */
+	/* Copy the SYN pkt to optval
+	 *
+	 * BPF_PROG_TYPE_SOCK_OPS only.  It is similar to the
+	 * bpf_getsockopt(TCP_SAVED_SYN) but it does not limit
+	 * to only getting from the saved_syn.  It can either get the
+	 * syn packet from:
+	 *
+	 * 1. the just-received SYN packet (only available when writing the
+	 *    SYNACK).  It will be useful when it is not necessary to
+	 *    save the SYN packet for latter use.  It is also the only way
+	 *    to get the SYN during syncookie mode because the syn
+	 *    packet cannot be saved during syncookie.
+	 *
+	 * OR
+	 *
+	 * 2. the earlier saved syn which was done by
+	 *    bpf_setsockopt(TCP_SAVE_SYN).
+	 *
+	 * The bpf_getsockopt(TCP_BPF_SYN*) option will hide where the
+	 * SYN packet is obtained.
+	 *
+	 * If the bpf-prog does not need the IP[46] header,  the
+	 * bpf-prog can avoid parsing the IP header by using
+	 * TCP_BPF_SYN.  Otherwise, the bpf-prog can get both
+	 * IP[46] and TCP header by using TCP_BPF_SYN_IP.
+	 *
+	 *      >0: Total number of bytes copied
+	 * -ENOSPC: Not enough space in optval. Only optlen number of
+	 *          bytes is copied.
+	 * -ENOENT: The SYN skb is not available now and the earlier SYN pkt
+	 *	    is not saved by setsockopt(TCP_SAVE_SYN).
+	 */
+	TCP_BPF_SYN		= 1005, /* Copy the TCP header */
+	TCP_BPF_SYN_IP		= 1006, /* Copy the IP[46] and TCP header */
+};
+
+enum {
+	BPF_LOAD_HDR_OPT_TCP_SYN = (1ULL << 0),
+};
+
+/* args[0] value during BPF_SOCK_OPS_HDR_OPT_LEN_CB and
+ * BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+ */
+enum {
+	BPF_WRITE_HDR_TCP_CURRENT_MSS = 1,	/* Kernel is finding the
+						 * total option spaces
+						 * required for an established
+						 * sk in order to calculate the
+						 * MSS.  No skb is actually
+						 * sent.
+						 */
+	BPF_WRITE_HDR_TCP_SYNACK_COOKIE = 2,	/* Kernel is in syncookie mode
+						 * when sending a SYN.
+						 */
 };
 
 struct bpf_perf_event_value {
-- 
cgit v1.2.3


From 8085e1dc1f3c885e1e9c1ef8031b3eabc1cccf25 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 20 Aug 2020 12:01:11 -0700
Subject: bpf: selftests: Add fastopen_connect to network_helpers

This patch adds a fastopen_connect() helper which will
be used in a later test.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200820190111.2886196-1-kafai@fb.com
---
 tools/testing/selftests/bpf/network_helpers.c | 37 +++++++++++++++++++++++++++
 tools/testing/selftests/bpf/network_helpers.h |  2 ++
 2 files changed, 39 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index f56655690f9b..12ee40284da0 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -104,6 +104,43 @@ error_close:
 	return -1;
 }
 
+int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
+		     int timeout_ms)
+{
+	struct sockaddr_storage addr;
+	socklen_t addrlen = sizeof(addr);
+	struct sockaddr_in *addr_in;
+	int fd, ret;
+
+	if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) {
+		log_err("Failed to get server addr");
+		return -1;
+	}
+
+	addr_in = (struct sockaddr_in *)&addr;
+	fd = socket(addr_in->sin_family, SOCK_STREAM, 0);
+	if (fd < 0) {
+		log_err("Failed to create client socket");
+		return -1;
+	}
+
+	if (settimeo(fd, timeout_ms))
+		goto error_close;
+
+	ret = sendto(fd, data, data_len, MSG_FASTOPEN, (struct sockaddr *)&addr,
+		     addrlen);
+	if (ret != data_len) {
+		log_err("sendto(data, %u) != %d\n", data_len, ret);
+		goto error_close;
+	}
+
+	return fd;
+
+error_close:
+	save_errno_close(fd);
+	return -1;
+}
+
 static int connect_fd_to_addr(int fd,
 			      const struct sockaddr_storage *addr,
 			      socklen_t addrlen)
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index c3728f6667e4..7205f8afdba1 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -37,6 +37,8 @@ int start_server(int family, int type, const char *addr, __u16 port,
 		 int timeout_ms);
 int connect_to_fd(int server_fd, int timeout_ms);
 int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms);
+int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
+		     int timeout_ms);
 int make_sockaddr(int family, const char *addr_str, __u16 port,
 		  struct sockaddr_storage *addr, socklen_t *len);
 
-- 
cgit v1.2.3


From ad2f8eb0095e9036724d9cf0eb6960f1e6d52d21 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 20 Aug 2020 12:01:17 -0700
Subject: bpf: selftests: Tcp header options

This patch adds tests for the new bpf tcp header option feature.

test_tcp_hdr_options.c:
- It tests header option writing and parsing in 3WHS: regular
  connection establishment, fastopen, and syncookie.
- In syncookie, the passive side's bpf prog is asking the active side
  to resend its bpf header option by specifying a RESEND bit in the
  outgoing SYNACK. handle_active_estab() and write_nodata_opt() has
  some details.
- handle_passive_estab() has comments on fastopen.
- It also has test for header writing and parsing in FIN packet.
- Most of the tests is writing an experimental option 254 with magic 0xeB9F.
- The no_exprm_estab() also tests writing a regular TCP option
  without any magic.

test_misc_tcp_options.c:
- It is an one directional test.  Active side writes option and
  passive side parses option.  The focus is to exercise
  the new helpers and API.
- Testing the new helper: bpf_load_hdr_opt() and bpf_store_hdr_opt().
- Testing the bpf_getsockopt(TCP_BPF_SYN).
- Negative tests for the above helpers.
- Testing the sock_ops->skb_data.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200820190117.2886749-1-kafai@fb.com
---
 .../selftests/bpf/prog_tests/tcp_hdr_options.c     | 622 ++++++++++++++++++++
 .../bpf/progs/test_misc_tcp_hdr_options.c          | 325 +++++++++++
 .../selftests/bpf/progs/test_tcp_hdr_options.c     | 623 +++++++++++++++++++++
 tools/testing/selftests/bpf/test_tcp_hdr_options.h | 151 +++++
 4 files changed, 1721 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c
 create mode 100644 tools/testing/selftests/bpf/test_tcp_hdr_options.h

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
new file mode 100644
index 000000000000..24ba0d21b641
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
@@ -0,0 +1,622 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <linux/compiler.h>
+
+#include "test_progs.h"
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+#include "test_tcp_hdr_options.h"
+#include "test_tcp_hdr_options.skel.h"
+#include "test_misc_tcp_hdr_options.skel.h"
+
+#define LO_ADDR6 "::eB9F"
+#define CG_NAME "/tcpbpf-hdr-opt-test"
+
+struct bpf_test_option exp_passive_estab_in;
+struct bpf_test_option exp_active_estab_in;
+struct bpf_test_option exp_passive_fin_in;
+struct bpf_test_option exp_active_fin_in;
+struct hdr_stg exp_passive_hdr_stg;
+struct hdr_stg exp_active_hdr_stg = { .active = true, };
+
+static struct test_misc_tcp_hdr_options *misc_skel;
+static struct test_tcp_hdr_options *skel;
+static int lport_linum_map_fd;
+static int hdr_stg_map_fd;
+static __u32 duration;
+static int cg_fd;
+
+struct sk_fds {
+	int srv_fd;
+	int passive_fd;
+	int active_fd;
+	int passive_lport;
+	int active_lport;
+};
+
+static int add_lo_addr(void)
+{
+	char ip_addr_cmd[256];
+	int cmdlen;
+
+	cmdlen = snprintf(ip_addr_cmd, sizeof(ip_addr_cmd),
+			  "ip -6 addr add %s/128 dev lo scope host",
+			  LO_ADDR6);
+
+	if (CHECK(cmdlen >= sizeof(ip_addr_cmd), "compile ip cmd",
+		  "failed to add host addr %s to lo. ip cmdlen is too long\n",
+		  LO_ADDR6))
+		return -1;
+
+	if (CHECK(system(ip_addr_cmd), "run ip cmd",
+		  "failed to add host addr %s to lo\n", LO_ADDR6))
+		return -1;
+
+	return 0;
+}
+
+static int create_netns(void)
+{
+	if (CHECK(unshare(CLONE_NEWNET), "create netns",
+		  "unshare(CLONE_NEWNET): %s (%d)",
+		  strerror(errno), errno))
+		return -1;
+
+	if (CHECK(system("ip link set dev lo up"), "run ip cmd",
+		  "failed to bring lo link up\n"))
+		return -1;
+
+	if (add_lo_addr())
+		return -1;
+
+	return 0;
+}
+
+static int write_sysctl(const char *sysctl, const char *value)
+{
+	int fd, err, len;
+
+	fd = open(sysctl, O_WRONLY);
+	if (CHECK(fd == -1, "open sysctl", "open(%s): %s (%d)\n",
+		  sysctl, strerror(errno), errno))
+		return -1;
+
+	len = strlen(value);
+	err = write(fd, value, len);
+	close(fd);
+	if (CHECK(err != len, "write sysctl",
+		  "write(%s, %s): err:%d %s (%d)\n",
+		  sysctl, value, err, strerror(errno), errno))
+		return -1;
+
+	return 0;
+}
+
+static void print_hdr_stg(const struct hdr_stg *hdr_stg, const char *prefix)
+{
+	fprintf(stderr, "%s{active:%u, resend_syn:%u, syncookie:%u, fastopen:%u}\n",
+		prefix ? : "", hdr_stg->active, hdr_stg->resend_syn,
+		hdr_stg->syncookie, hdr_stg->fastopen);
+}
+
+static void print_option(const struct bpf_test_option *opt, const char *prefix)
+{
+	fprintf(stderr, "%s{flags:0x%x, max_delack_ms:%u, rand:0x%x}\n",
+		prefix ? : "", opt->flags, opt->max_delack_ms, opt->rand);
+}
+
+static void sk_fds_close(struct sk_fds *sk_fds)
+{
+	close(sk_fds->srv_fd);
+	close(sk_fds->passive_fd);
+	close(sk_fds->active_fd);
+}
+
+static int sk_fds_shutdown(struct sk_fds *sk_fds)
+{
+	int ret, abyte;
+
+	shutdown(sk_fds->active_fd, SHUT_WR);
+	ret = read(sk_fds->passive_fd, &abyte, sizeof(abyte));
+	if (CHECK(ret != 0, "read-after-shutdown(passive_fd):",
+		  "ret:%d %s (%d)\n",
+		  ret, strerror(errno), errno))
+		return -1;
+
+	shutdown(sk_fds->passive_fd, SHUT_WR);
+	ret = read(sk_fds->active_fd, &abyte, sizeof(abyte));
+	if (CHECK(ret != 0, "read-after-shutdown(active_fd):",
+		  "ret:%d %s (%d)\n",
+		  ret, strerror(errno), errno))
+		return -1;
+
+	return 0;
+}
+
+static int sk_fds_connect(struct sk_fds *sk_fds, bool fast_open)
+{
+	const char fast[] = "FAST!!!";
+	struct sockaddr_in6 addr6;
+	socklen_t len;
+
+	sk_fds->srv_fd = start_server(AF_INET6, SOCK_STREAM, LO_ADDR6, 0, 0);
+	if (CHECK(sk_fds->srv_fd == -1, "start_server", "%s (%d)\n",
+		  strerror(errno), errno))
+		goto error;
+
+	if (fast_open)
+		sk_fds->active_fd = fastopen_connect(sk_fds->srv_fd, fast,
+						     sizeof(fast), 0);
+	else
+		sk_fds->active_fd = connect_to_fd(sk_fds->srv_fd, 0);
+
+	if (CHECK_FAIL(sk_fds->active_fd == -1)) {
+		close(sk_fds->srv_fd);
+		goto error;
+	}
+
+	len = sizeof(addr6);
+	if (CHECK(getsockname(sk_fds->srv_fd, (struct sockaddr *)&addr6,
+			      &len), "getsockname(srv_fd)", "%s (%d)\n",
+		  strerror(errno), errno))
+		goto error_close;
+	sk_fds->passive_lport = ntohs(addr6.sin6_port);
+
+	len = sizeof(addr6);
+	if (CHECK(getsockname(sk_fds->active_fd, (struct sockaddr *)&addr6,
+			      &len), "getsockname(active_fd)", "%s (%d)\n",
+		  strerror(errno), errno))
+		goto error_close;
+	sk_fds->active_lport = ntohs(addr6.sin6_port);
+
+	sk_fds->passive_fd = accept(sk_fds->srv_fd, NULL, 0);
+	if (CHECK(sk_fds->passive_fd == -1, "accept(srv_fd)", "%s (%d)\n",
+		  strerror(errno), errno))
+		goto error_close;
+
+	if (fast_open) {
+		char bytes_in[sizeof(fast)];
+		int ret;
+
+		ret = read(sk_fds->passive_fd, bytes_in, sizeof(bytes_in));
+		if (CHECK(ret != sizeof(fast), "read fastopen syn data",
+			  "expected=%lu actual=%d\n", sizeof(fast), ret)) {
+			close(sk_fds->passive_fd);
+			goto error_close;
+		}
+	}
+
+	return 0;
+
+error_close:
+	close(sk_fds->active_fd);
+	close(sk_fds->srv_fd);
+
+error:
+	memset(sk_fds, -1, sizeof(*sk_fds));
+	return -1;
+}
+
+static int check_hdr_opt(const struct bpf_test_option *exp,
+			 const struct bpf_test_option *act,
+			 const char *hdr_desc)
+{
+	if (CHECK(memcmp(exp, act, sizeof(*exp)),
+		  "expected-vs-actual", "unexpected %s\n", hdr_desc)) {
+		print_option(exp, "expected: ");
+		print_option(act, "  actual: ");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int check_hdr_stg(const struct hdr_stg *exp, int fd,
+			 const char *stg_desc)
+{
+	struct hdr_stg act;
+
+	if (CHECK(bpf_map_lookup_elem(hdr_stg_map_fd, &fd, &act),
+		  "map_lookup(hdr_stg_map_fd)", "%s %s (%d)\n",
+		  stg_desc, strerror(errno), errno))
+		return -1;
+
+	if (CHECK(memcmp(exp, &act, sizeof(*exp)),
+		  "expected-vs-actual", "unexpected %s\n", stg_desc)) {
+		print_hdr_stg(exp, "expected: ");
+		print_hdr_stg(&act, "  actual: ");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int check_error_linum(const struct sk_fds *sk_fds)
+{
+	unsigned int nr_errors = 0;
+	struct linum_err linum_err;
+	int lport;
+
+	lport = sk_fds->passive_lport;
+	if (!bpf_map_lookup_elem(lport_linum_map_fd, &lport, &linum_err)) {
+		fprintf(stderr,
+			"bpf prog error out at lport:passive(%d), linum:%u err:%d\n",
+			lport, linum_err.linum, linum_err.err);
+		nr_errors++;
+	}
+
+	lport = sk_fds->active_lport;
+	if (!bpf_map_lookup_elem(lport_linum_map_fd, &lport, &linum_err)) {
+		fprintf(stderr,
+			"bpf prog error out at lport:active(%d), linum:%u err:%d\n",
+			lport, linum_err.linum, linum_err.err);
+		nr_errors++;
+	}
+
+	return nr_errors;
+}
+
+static void check_hdr_and_close_fds(struct sk_fds *sk_fds)
+{
+	if (sk_fds_shutdown(sk_fds))
+		goto check_linum;
+
+	if (check_hdr_stg(&exp_passive_hdr_stg, sk_fds->passive_fd,
+			  "passive_hdr_stg"))
+		goto check_linum;
+
+	if (check_hdr_stg(&exp_active_hdr_stg, sk_fds->active_fd,
+			  "active_hdr_stg"))
+		goto check_linum;
+
+	if (check_hdr_opt(&exp_passive_estab_in, &skel->bss->passive_estab_in,
+			  "passive_estab_in"))
+		goto check_linum;
+
+	if (check_hdr_opt(&exp_active_estab_in, &skel->bss->active_estab_in,
+			  "active_estab_in"))
+		goto check_linum;
+
+	if (check_hdr_opt(&exp_passive_fin_in, &skel->bss->passive_fin_in,
+			  "passive_fin_in"))
+		goto check_linum;
+
+	check_hdr_opt(&exp_active_fin_in, &skel->bss->active_fin_in,
+		      "active_fin_in");
+
+check_linum:
+	CHECK_FAIL(check_error_linum(sk_fds));
+	sk_fds_close(sk_fds);
+}
+
+static void prepare_out(void)
+{
+	skel->bss->active_syn_out = exp_passive_estab_in;
+	skel->bss->passive_synack_out = exp_active_estab_in;
+
+	skel->bss->active_fin_out = exp_passive_fin_in;
+	skel->bss->passive_fin_out = exp_active_fin_in;
+}
+
+static void reset_test(void)
+{
+	size_t optsize = sizeof(struct bpf_test_option);
+	int lport, err;
+
+	memset(&skel->bss->passive_synack_out, 0, optsize);
+	memset(&skel->bss->passive_fin_out, 0, optsize);
+
+	memset(&skel->bss->passive_estab_in, 0, optsize);
+	memset(&skel->bss->passive_fin_in, 0, optsize);
+
+	memset(&skel->bss->active_syn_out, 0, optsize);
+	memset(&skel->bss->active_fin_out, 0, optsize);
+
+	memset(&skel->bss->active_estab_in, 0, optsize);
+	memset(&skel->bss->active_fin_in, 0, optsize);
+
+	skel->data->test_kind = TCPOPT_EXP;
+	skel->data->test_magic = 0xeB9F;
+
+	memset(&exp_passive_estab_in, 0, optsize);
+	memset(&exp_active_estab_in, 0, optsize);
+	memset(&exp_passive_fin_in, 0, optsize);
+	memset(&exp_active_fin_in, 0, optsize);
+
+	memset(&exp_passive_hdr_stg, 0, sizeof(exp_passive_hdr_stg));
+	memset(&exp_active_hdr_stg, 0, sizeof(exp_active_hdr_stg));
+	exp_active_hdr_stg.active = true;
+
+	err = bpf_map_get_next_key(lport_linum_map_fd, NULL, &lport);
+	while (!err) {
+		bpf_map_delete_elem(lport_linum_map_fd, &lport);
+		err = bpf_map_get_next_key(lport_linum_map_fd, &lport, &lport);
+	}
+}
+
+static void fastopen_estab(void)
+{
+	struct bpf_link *link;
+	struct sk_fds sk_fds;
+
+	hdr_stg_map_fd = bpf_map__fd(skel->maps.hdr_stg_map);
+	lport_linum_map_fd = bpf_map__fd(skel->maps.lport_linum_map);
+
+	exp_passive_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS;
+	exp_passive_estab_in.rand = 0xfa;
+	exp_passive_estab_in.max_delack_ms = 11;
+
+	exp_active_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS;
+	exp_active_estab_in.rand = 0xce;
+	exp_active_estab_in.max_delack_ms = 22;
+
+	exp_passive_hdr_stg.fastopen = true;
+
+	prepare_out();
+
+	/* Allow fastopen without fastopen cookie */
+	if (write_sysctl("/proc/sys/net/ipv4/tcp_fastopen", "1543"))
+		return;
+
+	link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
+	if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
+		  PTR_ERR(link)))
+		return;
+
+	if (sk_fds_connect(&sk_fds, true)) {
+		bpf_link__destroy(link);
+		return;
+	}
+
+	check_hdr_and_close_fds(&sk_fds);
+	bpf_link__destroy(link);
+}
+
+static void syncookie_estab(void)
+{
+	struct bpf_link *link;
+	struct sk_fds sk_fds;
+
+	hdr_stg_map_fd = bpf_map__fd(skel->maps.hdr_stg_map);
+	lport_linum_map_fd = bpf_map__fd(skel->maps.lport_linum_map);
+
+	exp_passive_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS;
+	exp_passive_estab_in.rand = 0xfa;
+	exp_passive_estab_in.max_delack_ms = 11;
+
+	exp_active_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS |
+					OPTION_F_RESEND;
+	exp_active_estab_in.rand = 0xce;
+	exp_active_estab_in.max_delack_ms = 22;
+
+	exp_passive_hdr_stg.syncookie = true;
+	exp_active_hdr_stg.resend_syn = true,
+
+	prepare_out();
+
+	/* Clear the RESEND to ensure the bpf prog can learn
+	 * want_cookie and set the RESEND by itself.
+	 */
+	skel->bss->passive_synack_out.flags &= ~OPTION_F_RESEND;
+
+	/* Enforce syncookie mode */
+	if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "2"))
+		return;
+
+	link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
+	if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
+		  PTR_ERR(link)))
+		return;
+
+	if (sk_fds_connect(&sk_fds, false)) {
+		bpf_link__destroy(link);
+		return;
+	}
+
+	check_hdr_and_close_fds(&sk_fds);
+	bpf_link__destroy(link);
+}
+
+static void fin(void)
+{
+	struct bpf_link *link;
+	struct sk_fds sk_fds;
+
+	hdr_stg_map_fd = bpf_map__fd(skel->maps.hdr_stg_map);
+	lport_linum_map_fd = bpf_map__fd(skel->maps.lport_linum_map);
+
+	exp_passive_fin_in.flags = OPTION_F_RAND;
+	exp_passive_fin_in.rand = 0xfa;
+
+	exp_active_fin_in.flags = OPTION_F_RAND;
+	exp_active_fin_in.rand = 0xce;
+
+	prepare_out();
+
+	if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "1"))
+		return;
+
+	link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
+	if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
+		  PTR_ERR(link)))
+		return;
+
+	if (sk_fds_connect(&sk_fds, false)) {
+		bpf_link__destroy(link);
+		return;
+	}
+
+	check_hdr_and_close_fds(&sk_fds);
+	bpf_link__destroy(link);
+}
+
+static void __simple_estab(bool exprm)
+{
+	struct bpf_link *link;
+	struct sk_fds sk_fds;
+
+	hdr_stg_map_fd = bpf_map__fd(skel->maps.hdr_stg_map);
+	lport_linum_map_fd = bpf_map__fd(skel->maps.lport_linum_map);
+
+	exp_passive_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS;
+	exp_passive_estab_in.rand = 0xfa;
+	exp_passive_estab_in.max_delack_ms = 11;
+
+	exp_active_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS;
+	exp_active_estab_in.rand = 0xce;
+	exp_active_estab_in.max_delack_ms = 22;
+
+	prepare_out();
+
+	if (!exprm) {
+		skel->data->test_kind = 0xB9;
+		skel->data->test_magic = 0;
+	}
+
+	if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "1"))
+		return;
+
+	link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
+	if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
+		  PTR_ERR(link)))
+		return;
+
+	if (sk_fds_connect(&sk_fds, false)) {
+		bpf_link__destroy(link);
+		return;
+	}
+
+	check_hdr_and_close_fds(&sk_fds);
+	bpf_link__destroy(link);
+}
+
+static void no_exprm_estab(void)
+{
+	__simple_estab(false);
+}
+
+static void simple_estab(void)
+{
+	__simple_estab(true);
+}
+
+static void misc(void)
+{
+	const char send_msg[] = "MISC!!!";
+	char recv_msg[sizeof(send_msg)];
+	const unsigned int nr_data = 2;
+	struct bpf_link *link;
+	struct sk_fds sk_fds;
+	int i, ret;
+
+	lport_linum_map_fd = bpf_map__fd(misc_skel->maps.lport_linum_map);
+
+	if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "1"))
+		return;
+
+	link = bpf_program__attach_cgroup(misc_skel->progs.misc_estab, cg_fd);
+	if (CHECK(IS_ERR(link), "attach_cgroup(misc_estab)", "err: %ld\n",
+		  PTR_ERR(link)))
+		return;
+
+	if (sk_fds_connect(&sk_fds, false)) {
+		bpf_link__destroy(link);
+		return;
+	}
+
+	for (i = 0; i < nr_data; i++) {
+		/* MSG_EOR to ensure skb will not be combined */
+		ret = send(sk_fds.active_fd, send_msg, sizeof(send_msg),
+			   MSG_EOR);
+		if (CHECK(ret != sizeof(send_msg), "send(msg)", "ret:%d\n",
+			  ret))
+			goto check_linum;
+
+		ret = read(sk_fds.passive_fd, recv_msg, sizeof(recv_msg));
+		if (CHECK(ret != sizeof(send_msg), "read(msg)", "ret:%d\n",
+			  ret))
+			goto check_linum;
+	}
+
+	if (sk_fds_shutdown(&sk_fds))
+		goto check_linum;
+
+	CHECK(misc_skel->bss->nr_syn != 1, "unexpected nr_syn",
+	      "expected (1) != actual (%u)\n",
+		misc_skel->bss->nr_syn);
+
+	CHECK(misc_skel->bss->nr_data != nr_data, "unexpected nr_data",
+	      "expected (%u) != actual (%u)\n",
+	      nr_data, misc_skel->bss->nr_data);
+
+	/* The last ACK may have been delayed, so it is either 1 or 2. */
+	CHECK(misc_skel->bss->nr_pure_ack != 1 &&
+	      misc_skel->bss->nr_pure_ack != 2,
+	      "unexpected nr_pure_ack",
+	      "expected (1 or 2) != actual (%u)\n",
+		misc_skel->bss->nr_pure_ack);
+
+	CHECK(misc_skel->bss->nr_fin != 1, "unexpected nr_fin",
+	      "expected (1) != actual (%u)\n",
+	      misc_skel->bss->nr_fin);
+
+check_linum:
+	CHECK_FAIL(check_error_linum(&sk_fds));
+	sk_fds_close(&sk_fds);
+	bpf_link__destroy(link);
+}
+
+struct test {
+	const char *desc;
+	void (*run)(void);
+};
+
+#define DEF_TEST(name) { #name, name }
+static struct test tests[] = {
+	DEF_TEST(simple_estab),
+	DEF_TEST(no_exprm_estab),
+	DEF_TEST(syncookie_estab),
+	DEF_TEST(fastopen_estab),
+	DEF_TEST(fin),
+	DEF_TEST(misc),
+};
+
+void test_tcp_hdr_options(void)
+{
+	int i;
+
+	skel = test_tcp_hdr_options__open_and_load();
+	if (CHECK(!skel, "open and load skel", "failed"))
+		return;
+
+	misc_skel = test_misc_tcp_hdr_options__open_and_load();
+	if (CHECK(!misc_skel, "open and load misc test skel", "failed"))
+		goto skel_destroy;
+
+	cg_fd = test__join_cgroup(CG_NAME);
+	if (CHECK_FAIL(cg_fd < 0))
+		goto skel_destroy;
+
+	for (i = 0; i < ARRAY_SIZE(tests); i++) {
+		if (!test__start_subtest(tests[i].desc))
+			continue;
+
+		if (create_netns())
+			break;
+
+		tests[i].run();
+
+		reset_test();
+	}
+
+	close(cg_fd);
+skel_destroy:
+	test_misc_tcp_hdr_options__destroy(misc_skel);
+	test_tcp_hdr_options__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c
new file mode 100644
index 000000000000..3a216d1d0226
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c
@@ -0,0 +1,325 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <stddef.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/socket.h>
+#include <linux/bpf.h>
+#include <linux/types.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#define BPF_PROG_TEST_TCP_HDR_OPTIONS
+#include "test_tcp_hdr_options.h"
+
+__u16 last_addr16_n = __bpf_htons(0xeB9F);
+__u16 active_lport_n = 0;
+__u16 active_lport_h = 0;
+__u16 passive_lport_n = 0;
+__u16 passive_lport_h = 0;
+
+/* options received at passive side */
+unsigned int nr_pure_ack = 0;
+unsigned int nr_data = 0;
+unsigned int nr_syn = 0;
+unsigned int nr_fin = 0;
+
+/* Check the header received from the active side */
+static int __check_active_hdr_in(struct bpf_sock_ops *skops, bool check_syn)
+{
+	union {
+		struct tcphdr th;
+		struct ipv6hdr ip6;
+		struct tcp_exprm_opt exprm_opt;
+		struct tcp_opt reg_opt;
+		__u8 data[100]; /* IPv6 (40) + Max TCP hdr (60) */
+	} hdr = {};
+	__u64 load_flags = check_syn ? BPF_LOAD_HDR_OPT_TCP_SYN : 0;
+	struct tcphdr *pth;
+	int ret;
+
+	hdr.reg_opt.kind = 0xB9;
+
+	/* The option is 4 bytes long instead of 2 bytes */
+	ret = bpf_load_hdr_opt(skops, &hdr.reg_opt, 2, load_flags);
+	if (ret != -ENOSPC)
+		RET_CG_ERR(ret);
+
+	/* Test searching magic with regular kind */
+	hdr.reg_opt.len = 4;
+	ret = bpf_load_hdr_opt(skops, &hdr.reg_opt, sizeof(hdr.reg_opt),
+			       load_flags);
+	if (ret != -EINVAL)
+		RET_CG_ERR(ret);
+
+	hdr.reg_opt.len = 0;
+	ret = bpf_load_hdr_opt(skops, &hdr.reg_opt, sizeof(hdr.reg_opt),
+			       load_flags);
+	if (ret != 4 || hdr.reg_opt.len != 4 || hdr.reg_opt.kind != 0xB9 ||
+	    hdr.reg_opt.data[0] != 0xfa || hdr.reg_opt.data[1] != 0xce)
+		RET_CG_ERR(ret);
+
+	/* Test searching experimental option with invalid kind length */
+	hdr.exprm_opt.kind = TCPOPT_EXP;
+	hdr.exprm_opt.len = 5;
+	hdr.exprm_opt.magic = 0;
+	ret = bpf_load_hdr_opt(skops, &hdr.exprm_opt, sizeof(hdr.exprm_opt),
+			       load_flags);
+	if (ret != -EINVAL)
+		RET_CG_ERR(ret);
+
+	/* Test searching experimental option with 0 magic value */
+	hdr.exprm_opt.len = 4;
+	ret = bpf_load_hdr_opt(skops, &hdr.exprm_opt, sizeof(hdr.exprm_opt),
+			       load_flags);
+	if (ret != -ENOMSG)
+		RET_CG_ERR(ret);
+
+	hdr.exprm_opt.magic = __bpf_htons(0xeB9F);
+	ret = bpf_load_hdr_opt(skops, &hdr.exprm_opt, sizeof(hdr.exprm_opt),
+			       load_flags);
+	if (ret != 4 || hdr.exprm_opt.len != 4 ||
+	    hdr.exprm_opt.kind != TCPOPT_EXP ||
+	    hdr.exprm_opt.magic != __bpf_htons(0xeB9F))
+		RET_CG_ERR(ret);
+
+	if (!check_syn)
+		return CG_OK;
+
+	/* Test loading from skops->syn_skb if sk_state == TCP_NEW_SYN_RECV
+	 *
+	 * Test loading from tp->saved_syn for other sk_state.
+	 */
+	ret = bpf_getsockopt(skops, SOL_TCP, TCP_BPF_SYN_IP, &hdr.ip6,
+			     sizeof(hdr.ip6));
+	if (ret != -ENOSPC)
+		RET_CG_ERR(ret);
+
+	if (hdr.ip6.saddr.s6_addr16[7] != last_addr16_n ||
+	    hdr.ip6.daddr.s6_addr16[7] != last_addr16_n)
+		RET_CG_ERR(0);
+
+	ret = bpf_getsockopt(skops, SOL_TCP, TCP_BPF_SYN_IP, &hdr, sizeof(hdr));
+	if (ret < 0)
+		RET_CG_ERR(ret);
+
+	pth = (struct tcphdr *)(&hdr.ip6 + 1);
+	if (pth->dest != passive_lport_n || pth->source != active_lport_n)
+		RET_CG_ERR(0);
+
+	ret = bpf_getsockopt(skops, SOL_TCP, TCP_BPF_SYN, &hdr, sizeof(hdr));
+	if (ret < 0)
+		RET_CG_ERR(ret);
+
+	if (hdr.th.dest != passive_lport_n || hdr.th.source != active_lport_n)
+		RET_CG_ERR(0);
+
+	return CG_OK;
+}
+
+static int check_active_syn_in(struct bpf_sock_ops *skops)
+{
+	return __check_active_hdr_in(skops, true);
+}
+
+static int check_active_hdr_in(struct bpf_sock_ops *skops)
+{
+	struct tcphdr *th;
+
+	if (__check_active_hdr_in(skops, false) == CG_ERR)
+		return CG_ERR;
+
+	th = skops->skb_data;
+	if (th + 1 > skops->skb_data_end)
+		RET_CG_ERR(0);
+
+	if (tcp_hdrlen(th) < skops->skb_len)
+		nr_data++;
+
+	if (th->fin)
+		nr_fin++;
+
+	if (th->ack && !th->fin && tcp_hdrlen(th) == skops->skb_len)
+		nr_pure_ack++;
+
+	return CG_OK;
+}
+
+static int active_opt_len(struct bpf_sock_ops *skops)
+{
+	int err;
+
+	/* Reserve more than enough to allow the -EEXIST test in
+	 * the write_active_opt().
+	 */
+	err = bpf_reserve_hdr_opt(skops, 12, 0);
+	if (err)
+		RET_CG_ERR(err);
+
+	return CG_OK;
+}
+
+static int write_active_opt(struct bpf_sock_ops *skops)
+{
+	struct tcp_exprm_opt exprm_opt = {};
+	struct tcp_opt win_scale_opt = {};
+	struct tcp_opt reg_opt = {};
+	struct tcphdr *th;
+	int err, ret;
+
+	exprm_opt.kind = TCPOPT_EXP;
+	exprm_opt.len = 4;
+	exprm_opt.magic = __bpf_htons(0xeB9F);
+
+	reg_opt.kind = 0xB9;
+	reg_opt.len = 4;
+	reg_opt.data[0] = 0xfa;
+	reg_opt.data[1] = 0xce;
+
+	win_scale_opt.kind = TCPOPT_WINDOW;
+
+	err = bpf_store_hdr_opt(skops, &exprm_opt, sizeof(exprm_opt), 0);
+	if (err)
+		RET_CG_ERR(err);
+
+	/* Store the same exprm option */
+	err = bpf_store_hdr_opt(skops, &exprm_opt, sizeof(exprm_opt), 0);
+	if (err != -EEXIST)
+		RET_CG_ERR(err);
+
+	err = bpf_store_hdr_opt(skops, &reg_opt, sizeof(reg_opt), 0);
+	if (err)
+		RET_CG_ERR(err);
+	err = bpf_store_hdr_opt(skops, &reg_opt, sizeof(reg_opt), 0);
+	if (err != -EEXIST)
+		RET_CG_ERR(err);
+
+	/* Check the option has been written and can be searched */
+	ret = bpf_load_hdr_opt(skops, &exprm_opt, sizeof(exprm_opt), 0);
+	if (ret != 4 || exprm_opt.len != 4 || exprm_opt.kind != TCPOPT_EXP ||
+	    exprm_opt.magic != __bpf_htons(0xeB9F))
+		RET_CG_ERR(ret);
+
+	reg_opt.len = 0;
+	ret = bpf_load_hdr_opt(skops, &reg_opt, sizeof(reg_opt), 0);
+	if (ret != 4 || reg_opt.len != 4 || reg_opt.kind != 0xB9 ||
+	    reg_opt.data[0] != 0xfa || reg_opt.data[1] != 0xce)
+		RET_CG_ERR(ret);
+
+	th = skops->skb_data;
+	if (th + 1 > skops->skb_data_end)
+		RET_CG_ERR(0);
+
+	if (th->syn) {
+		active_lport_h = skops->local_port;
+		active_lport_n = th->source;
+
+		/* Search the win scale option written by kernel
+		 * in the SYN packet.
+		 */
+		ret = bpf_load_hdr_opt(skops, &win_scale_opt,
+				       sizeof(win_scale_opt), 0);
+		if (ret != 3 || win_scale_opt.len != 3 ||
+		    win_scale_opt.kind != TCPOPT_WINDOW)
+			RET_CG_ERR(ret);
+
+		/* Write the win scale option that kernel
+		 * has already written.
+		 */
+		err = bpf_store_hdr_opt(skops, &win_scale_opt,
+					sizeof(win_scale_opt), 0);
+		if (err != -EEXIST)
+			RET_CG_ERR(err);
+	}
+
+	return CG_OK;
+}
+
+static int handle_hdr_opt_len(struct bpf_sock_ops *skops)
+{
+	__u8 tcp_flags = skops_tcp_flags(skops);
+
+	if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK)
+		/* Check the SYN from bpf_sock_ops_kern->syn_skb */
+		return check_active_syn_in(skops);
+
+	/* Passive side should have cleared the write hdr cb by now */
+	if (skops->local_port == passive_lport_h)
+		RET_CG_ERR(0);
+
+	return active_opt_len(skops);
+}
+
+static int handle_write_hdr_opt(struct bpf_sock_ops *skops)
+{
+	if (skops->local_port == passive_lport_h)
+		RET_CG_ERR(0);
+
+	return write_active_opt(skops);
+}
+
+static int handle_parse_hdr(struct bpf_sock_ops *skops)
+{
+	/* Passive side is not writing any non-standard/unknown
+	 * option, so the active side should never be called.
+	 */
+	if (skops->local_port == active_lport_h)
+		RET_CG_ERR(0);
+
+	return check_active_hdr_in(skops);
+}
+
+static int handle_passive_estab(struct bpf_sock_ops *skops)
+{
+	int err;
+
+	/* No more write hdr cb */
+	bpf_sock_ops_cb_flags_set(skops,
+				  skops->bpf_sock_ops_cb_flags &
+				  ~BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG);
+
+	/* Recheck the SYN but check the tp->saved_syn this time */
+	err = check_active_syn_in(skops);
+	if (err == CG_ERR)
+		return err;
+
+	nr_syn++;
+
+	/* The ack has header option written by the active side also */
+	return check_active_hdr_in(skops);
+}
+
+SEC("sockops/misc_estab")
+int misc_estab(struct bpf_sock_ops *skops)
+{
+	int true_val = 1;
+
+	switch (skops->op) {
+	case BPF_SOCK_OPS_TCP_LISTEN_CB:
+		passive_lport_h = skops->local_port;
+		passive_lport_n = __bpf_htons(passive_lport_h);
+		bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN,
+			       &true_val, sizeof(true_val));
+		set_hdr_cb_flags(skops);
+		break;
+	case BPF_SOCK_OPS_TCP_CONNECT_CB:
+		set_hdr_cb_flags(skops);
+		break;
+	case BPF_SOCK_OPS_PARSE_HDR_OPT_CB:
+		return handle_parse_hdr(skops);
+	case BPF_SOCK_OPS_HDR_OPT_LEN_CB:
+		return handle_hdr_opt_len(skops);
+	case BPF_SOCK_OPS_WRITE_HDR_OPT_CB:
+		return handle_write_hdr_opt(skops);
+	case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+		return handle_passive_estab(skops);
+	}
+
+	return CG_OK;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c
new file mode 100644
index 000000000000..9197a23df3da
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c
@@ -0,0 +1,623 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <stddef.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <linux/tcp.h>
+#include <linux/socket.h>
+#include <linux/bpf.h>
+#include <linux/types.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#define BPF_PROG_TEST_TCP_HDR_OPTIONS
+#include "test_tcp_hdr_options.h"
+
+#ifndef sizeof_field
+#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
+#endif
+
+__u8 test_kind = TCPOPT_EXP;
+__u16 test_magic = 0xeB9F;
+
+struct bpf_test_option passive_synack_out = {};
+struct bpf_test_option passive_fin_out	= {};
+
+struct bpf_test_option passive_estab_in = {};
+struct bpf_test_option passive_fin_in	= {};
+
+struct bpf_test_option active_syn_out	= {};
+struct bpf_test_option active_fin_out	= {};
+
+struct bpf_test_option active_estab_in	= {};
+struct bpf_test_option active_fin_in	= {};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, struct hdr_stg);
+} hdr_stg_map SEC(".maps");
+
+static bool skops_want_cookie(const struct bpf_sock_ops *skops)
+{
+	return skops->args[0] == BPF_WRITE_HDR_TCP_SYNACK_COOKIE;
+}
+
+static bool skops_current_mss(const struct bpf_sock_ops *skops)
+{
+	return skops->args[0] == BPF_WRITE_HDR_TCP_CURRENT_MSS;
+}
+
+static __u8 option_total_len(__u8 flags)
+{
+	__u8 i, len = 1; /* +1 for flags */
+
+	if (!flags)
+		return 0;
+
+	/* RESEND bit does not use a byte */
+	for (i = OPTION_RESEND + 1; i < __NR_OPTION_FLAGS; i++)
+		len += !!TEST_OPTION_FLAGS(flags, i);
+
+	if (test_kind == TCPOPT_EXP)
+		return len + TCP_BPF_EXPOPT_BASE_LEN;
+	else
+		return len + 2; /* +1 kind, +1 kind-len */
+}
+
+static void write_test_option(const struct bpf_test_option *test_opt,
+			      __u8 *data)
+{
+	__u8 offset = 0;
+
+	data[offset++] = test_opt->flags;
+	if (TEST_OPTION_FLAGS(test_opt->flags, OPTION_MAX_DELACK_MS))
+		data[offset++] = test_opt->max_delack_ms;
+
+	if (TEST_OPTION_FLAGS(test_opt->flags, OPTION_RAND))
+		data[offset++] = test_opt->rand;
+}
+
+static int store_option(struct bpf_sock_ops *skops,
+			const struct bpf_test_option *test_opt)
+{
+	union {
+		struct tcp_exprm_opt exprm;
+		struct tcp_opt regular;
+	} write_opt;
+	int err;
+
+	if (test_kind == TCPOPT_EXP) {
+		write_opt.exprm.kind = TCPOPT_EXP;
+		write_opt.exprm.len = option_total_len(test_opt->flags);
+		write_opt.exprm.magic = __bpf_htons(test_magic);
+		write_opt.exprm.data32 = 0;
+		write_test_option(test_opt, write_opt.exprm.data);
+		err = bpf_store_hdr_opt(skops, &write_opt.exprm,
+					sizeof(write_opt.exprm), 0);
+	} else {
+		write_opt.regular.kind = test_kind;
+		write_opt.regular.len = option_total_len(test_opt->flags);
+		write_opt.regular.data32 = 0;
+		write_test_option(test_opt, write_opt.regular.data);
+		err = bpf_store_hdr_opt(skops, &write_opt.regular,
+					sizeof(write_opt.regular), 0);
+	}
+
+	if (err)
+		RET_CG_ERR(err);
+
+	return CG_OK;
+}
+
+static int parse_test_option(struct bpf_test_option *opt, const __u8 *start)
+{
+	opt->flags = *start++;
+
+	if (TEST_OPTION_FLAGS(opt->flags, OPTION_MAX_DELACK_MS))
+		opt->max_delack_ms = *start++;
+
+	if (TEST_OPTION_FLAGS(opt->flags, OPTION_RAND))
+		opt->rand = *start++;
+
+	return 0;
+}
+
+static int load_option(struct bpf_sock_ops *skops,
+		       struct bpf_test_option *test_opt, bool from_syn)
+{
+	union {
+		struct tcp_exprm_opt exprm;
+		struct tcp_opt regular;
+	} search_opt;
+	int ret, load_flags = from_syn ? BPF_LOAD_HDR_OPT_TCP_SYN : 0;
+
+	if (test_kind == TCPOPT_EXP) {
+		search_opt.exprm.kind = TCPOPT_EXP;
+		search_opt.exprm.len = 4;
+		search_opt.exprm.magic = __bpf_htons(test_magic);
+		search_opt.exprm.data32 = 0;
+		ret = bpf_load_hdr_opt(skops, &search_opt.exprm,
+				       sizeof(search_opt.exprm), load_flags);
+		if (ret < 0)
+			return ret;
+		return parse_test_option(test_opt, search_opt.exprm.data);
+	} else {
+		search_opt.regular.kind = test_kind;
+		search_opt.regular.len = 0;
+		search_opt.regular.data32 = 0;
+		ret = bpf_load_hdr_opt(skops, &search_opt.regular,
+				       sizeof(search_opt.regular), load_flags);
+		if (ret < 0)
+			return ret;
+		return parse_test_option(test_opt, search_opt.regular.data);
+	}
+}
+
+static int synack_opt_len(struct bpf_sock_ops *skops)
+{
+	struct bpf_test_option test_opt = {};
+	__u8 optlen;
+	int err;
+
+	if (!passive_synack_out.flags)
+		return CG_OK;
+
+	err = load_option(skops, &test_opt, true);
+
+	/* bpf_test_option is not found */
+	if (err == -ENOMSG)
+		return CG_OK;
+
+	if (err)
+		RET_CG_ERR(err);
+
+	optlen = option_total_len(passive_synack_out.flags);
+	if (optlen) {
+		err = bpf_reserve_hdr_opt(skops, optlen, 0);
+		if (err)
+			RET_CG_ERR(err);
+	}
+
+	return CG_OK;
+}
+
+static int write_synack_opt(struct bpf_sock_ops *skops)
+{
+	struct bpf_test_option opt;
+
+	if (!passive_synack_out.flags)
+		/* We should not even be called since no header
+		 * space has been reserved.
+		 */
+		RET_CG_ERR(0);
+
+	opt = passive_synack_out;
+	if (skops_want_cookie(skops))
+		SET_OPTION_FLAGS(opt.flags, OPTION_RESEND);
+
+	return store_option(skops, &opt);
+}
+
+static int syn_opt_len(struct bpf_sock_ops *skops)
+{
+	__u8 optlen;
+	int err;
+
+	if (!active_syn_out.flags)
+		return CG_OK;
+
+	optlen = option_total_len(active_syn_out.flags);
+	if (optlen) {
+		err = bpf_reserve_hdr_opt(skops, optlen, 0);
+		if (err)
+			RET_CG_ERR(err);
+	}
+
+	return CG_OK;
+}
+
+static int write_syn_opt(struct bpf_sock_ops *skops)
+{
+	if (!active_syn_out.flags)
+		RET_CG_ERR(0);
+
+	return store_option(skops, &active_syn_out);
+}
+
+static int fin_opt_len(struct bpf_sock_ops *skops)
+{
+	struct bpf_test_option *opt;
+	struct hdr_stg *hdr_stg;
+	__u8 optlen;
+	int err;
+
+	if (!skops->sk)
+		RET_CG_ERR(0);
+
+	hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
+	if (!hdr_stg)
+		RET_CG_ERR(0);
+
+	if (hdr_stg->active)
+		opt = &active_fin_out;
+	else
+		opt = &passive_fin_out;
+
+	optlen = option_total_len(opt->flags);
+	if (optlen) {
+		err = bpf_reserve_hdr_opt(skops, optlen, 0);
+		if (err)
+			RET_CG_ERR(err);
+	}
+
+	return CG_OK;
+}
+
+static int write_fin_opt(struct bpf_sock_ops *skops)
+{
+	struct bpf_test_option *opt;
+	struct hdr_stg *hdr_stg;
+
+	if (!skops->sk)
+		RET_CG_ERR(0);
+
+	hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
+	if (!hdr_stg)
+		RET_CG_ERR(0);
+
+	if (hdr_stg->active)
+		opt = &active_fin_out;
+	else
+		opt = &passive_fin_out;
+
+	if (!opt->flags)
+		RET_CG_ERR(0);
+
+	return store_option(skops, opt);
+}
+
+static int resend_in_ack(struct bpf_sock_ops *skops)
+{
+	struct hdr_stg *hdr_stg;
+
+	if (!skops->sk)
+		return -1;
+
+	hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
+	if (!hdr_stg)
+		return -1;
+
+	return !!hdr_stg->resend_syn;
+}
+
+static int nodata_opt_len(struct bpf_sock_ops *skops)
+{
+	int resend;
+
+	resend = resend_in_ack(skops);
+	if (resend < 0)
+		RET_CG_ERR(0);
+
+	if (resend)
+		return syn_opt_len(skops);
+
+	return CG_OK;
+}
+
+static int write_nodata_opt(struct bpf_sock_ops *skops)
+{
+	int resend;
+
+	resend = resend_in_ack(skops);
+	if (resend < 0)
+		RET_CG_ERR(0);
+
+	if (resend)
+		return write_syn_opt(skops);
+
+	return CG_OK;
+}
+
+static int data_opt_len(struct bpf_sock_ops *skops)
+{
+	/* Same as the nodata version.  Mostly to show
+	 * an example usage on skops->skb_len.
+	 */
+	return nodata_opt_len(skops);
+}
+
+static int write_data_opt(struct bpf_sock_ops *skops)
+{
+	return write_nodata_opt(skops);
+}
+
+static int current_mss_opt_len(struct bpf_sock_ops *skops)
+{
+	/* Reserve maximum that may be needed */
+	int err;
+
+	err = bpf_reserve_hdr_opt(skops, option_total_len(OPTION_MASK), 0);
+	if (err)
+		RET_CG_ERR(err);
+
+	return CG_OK;
+}
+
+static int handle_hdr_opt_len(struct bpf_sock_ops *skops)
+{
+	__u8 tcp_flags = skops_tcp_flags(skops);
+
+	if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK)
+		return synack_opt_len(skops);
+
+	if (tcp_flags & TCPHDR_SYN)
+		return syn_opt_len(skops);
+
+	if (tcp_flags & TCPHDR_FIN)
+		return fin_opt_len(skops);
+
+	if (skops_current_mss(skops))
+		/* The kernel is calculating the MSS */
+		return current_mss_opt_len(skops);
+
+	if (skops->skb_len)
+		return data_opt_len(skops);
+
+	return nodata_opt_len(skops);
+}
+
+static int handle_write_hdr_opt(struct bpf_sock_ops *skops)
+{
+	__u8 tcp_flags = skops_tcp_flags(skops);
+	struct tcphdr *th;
+
+	if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK)
+		return write_synack_opt(skops);
+
+	if (tcp_flags & TCPHDR_SYN)
+		return write_syn_opt(skops);
+
+	if (tcp_flags & TCPHDR_FIN)
+		return write_fin_opt(skops);
+
+	th = skops->skb_data;
+	if (th + 1 > skops->skb_data_end)
+		RET_CG_ERR(0);
+
+	if (skops->skb_len > tcp_hdrlen(th))
+		return write_data_opt(skops);
+
+	return write_nodata_opt(skops);
+}
+
+static int set_delack_max(struct bpf_sock_ops *skops, __u8 max_delack_ms)
+{
+	__u32 max_delack_us = max_delack_ms * 1000;
+
+	return bpf_setsockopt(skops, SOL_TCP, TCP_BPF_DELACK_MAX,
+			      &max_delack_us, sizeof(max_delack_us));
+}
+
+static int set_rto_min(struct bpf_sock_ops *skops, __u8 peer_max_delack_ms)
+{
+	__u32 min_rto_us = peer_max_delack_ms * 1000;
+
+	return bpf_setsockopt(skops, SOL_TCP, TCP_BPF_RTO_MIN, &min_rto_us,
+			      sizeof(min_rto_us));
+}
+
+static int handle_active_estab(struct bpf_sock_ops *skops)
+{
+	struct hdr_stg init_stg = {
+		.active = true,
+	};
+	int err;
+
+	err = load_option(skops, &active_estab_in, false);
+	if (err && err != -ENOMSG)
+		RET_CG_ERR(err);
+
+	init_stg.resend_syn = TEST_OPTION_FLAGS(active_estab_in.flags,
+						OPTION_RESEND);
+	if (!skops->sk || !bpf_sk_storage_get(&hdr_stg_map, skops->sk,
+					      &init_stg,
+					      BPF_SK_STORAGE_GET_F_CREATE))
+		RET_CG_ERR(0);
+
+	if (init_stg.resend_syn)
+		/* Don't clear the write_hdr cb now because
+		 * the ACK may get lost and retransmit may
+		 * be needed.
+		 *
+		 * PARSE_ALL_HDR cb flag is set to learn if this
+		 * resend_syn option has received by the peer.
+		 *
+		 * The header option will be resent until a valid
+		 * packet is received at handle_parse_hdr()
+		 * and all hdr cb flags will be cleared in
+		 * handle_parse_hdr().
+		 */
+		set_parse_all_hdr_cb_flags(skops);
+	else if (!active_fin_out.flags)
+		/* No options will be written from now */
+		clear_hdr_cb_flags(skops);
+
+	if (active_syn_out.max_delack_ms) {
+		err = set_delack_max(skops, active_syn_out.max_delack_ms);
+		if (err)
+			RET_CG_ERR(err);
+	}
+
+	if (active_estab_in.max_delack_ms) {
+		err = set_rto_min(skops, active_estab_in.max_delack_ms);
+		if (err)
+			RET_CG_ERR(err);
+	}
+
+	return CG_OK;
+}
+
+static int handle_passive_estab(struct bpf_sock_ops *skops)
+{
+	struct hdr_stg init_stg = {};
+	struct tcphdr *th;
+	int err;
+
+	err = load_option(skops, &passive_estab_in, true);
+	if (err == -ENOENT) {
+		/* saved_syn is not found. It was in syncookie mode.
+		 * We have asked the active side to resend the options
+		 * in ACK, so try to find the bpf_test_option from ACK now.
+		 */
+		err = load_option(skops, &passive_estab_in, false);
+		init_stg.syncookie = true;
+	}
+
+	/* ENOMSG: The bpf_test_option is not found which is fine.
+	 * Bail out now for all other errors.
+	 */
+	if (err && err != -ENOMSG)
+		RET_CG_ERR(err);
+
+	th = skops->skb_data;
+	if (th + 1 > skops->skb_data_end)
+		RET_CG_ERR(0);
+
+	if (th->syn) {
+		/* Fastopen */
+
+		/* Cannot clear cb_flags to stop write_hdr cb.
+		 * synack is not sent yet for fast open.
+		 * Even it was, the synack may need to be retransmitted.
+		 *
+		 * PARSE_ALL_HDR cb flag is set to learn
+		 * if synack has reached the peer.
+		 * All cb_flags will be cleared in handle_parse_hdr().
+		 */
+		set_parse_all_hdr_cb_flags(skops);
+		init_stg.fastopen = true;
+	} else if (!passive_fin_out.flags) {
+		/* No options will be written from now */
+		clear_hdr_cb_flags(skops);
+	}
+
+	if (!skops->sk ||
+	    !bpf_sk_storage_get(&hdr_stg_map, skops->sk, &init_stg,
+				BPF_SK_STORAGE_GET_F_CREATE))
+		RET_CG_ERR(0);
+
+	if (passive_synack_out.max_delack_ms) {
+		err = set_delack_max(skops, passive_synack_out.max_delack_ms);
+		if (err)
+			RET_CG_ERR(err);
+	}
+
+	if (passive_estab_in.max_delack_ms) {
+		err = set_rto_min(skops, passive_estab_in.max_delack_ms);
+		if (err)
+			RET_CG_ERR(err);
+	}
+
+	return CG_OK;
+}
+
+static int handle_parse_hdr(struct bpf_sock_ops *skops)
+{
+	struct hdr_stg *hdr_stg;
+	struct tcphdr *th;
+
+	if (!skops->sk)
+		RET_CG_ERR(0);
+
+	th = skops->skb_data;
+	if (th + 1 > skops->skb_data_end)
+		RET_CG_ERR(0);
+
+	hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
+	if (!hdr_stg)
+		RET_CG_ERR(0);
+
+	if (hdr_stg->resend_syn || hdr_stg->fastopen)
+		/* The PARSE_ALL_HDR cb flag was turned on
+		 * to ensure that the previously written
+		 * options have reached the peer.
+		 * Those previously written option includes:
+		 *     - Active side: resend_syn in ACK during syncookie
+		 *      or
+		 *     - Passive side: SYNACK during fastopen
+		 *
+		 * A valid packet has been received here after
+		 * the 3WHS, so the PARSE_ALL_HDR cb flag
+		 * can be cleared now.
+		 */
+		clear_parse_all_hdr_cb_flags(skops);
+
+	if (hdr_stg->resend_syn && !active_fin_out.flags)
+		/* Active side resent the syn option in ACK
+		 * because the server was in syncookie mode.
+		 * A valid packet has been received, so
+		 * clear header cb flags if there is no
+		 * more option to send.
+		 */
+		clear_hdr_cb_flags(skops);
+
+	if (hdr_stg->fastopen && !passive_fin_out.flags)
+		/* Passive side was in fastopen.
+		 * A valid packet has been received, so
+		 * the SYNACK has reached the peer.
+		 * Clear header cb flags if there is no more
+		 * option to send.
+		 */
+		clear_hdr_cb_flags(skops);
+
+	if (th->fin) {
+		struct bpf_test_option *fin_opt;
+		int err;
+
+		if (hdr_stg->active)
+			fin_opt = &active_fin_in;
+		else
+			fin_opt = &passive_fin_in;
+
+		err = load_option(skops, fin_opt, false);
+		if (err && err != -ENOMSG)
+			RET_CG_ERR(err);
+	}
+
+	return CG_OK;
+}
+
+SEC("sockops/estab")
+int estab(struct bpf_sock_ops *skops)
+{
+	int true_val = 1;
+
+	switch (skops->op) {
+	case BPF_SOCK_OPS_TCP_LISTEN_CB:
+		bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN,
+			       &true_val, sizeof(true_val));
+		set_hdr_cb_flags(skops);
+		break;
+	case BPF_SOCK_OPS_TCP_CONNECT_CB:
+		set_hdr_cb_flags(skops);
+		break;
+	case BPF_SOCK_OPS_PARSE_HDR_OPT_CB:
+		return handle_parse_hdr(skops);
+	case BPF_SOCK_OPS_HDR_OPT_LEN_CB:
+		return handle_hdr_opt_len(skops);
+	case BPF_SOCK_OPS_WRITE_HDR_OPT_CB:
+		return handle_write_hdr_opt(skops);
+	case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+		return handle_passive_estab(skops);
+	case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+		return handle_active_estab(skops);
+	}
+
+	return CG_OK;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_tcp_hdr_options.h b/tools/testing/selftests/bpf/test_tcp_hdr_options.h
new file mode 100644
index 000000000000..78a8cf9eab42
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tcp_hdr_options.h
@@ -0,0 +1,151 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2020 Facebook */
+
+#ifndef _TEST_TCP_HDR_OPTIONS_H
+#define _TEST_TCP_HDR_OPTIONS_H
+
+struct bpf_test_option {
+	__u8 flags;
+	__u8 max_delack_ms;
+	__u8 rand;
+} __attribute__((packed));
+
+enum {
+	OPTION_RESEND,
+	OPTION_MAX_DELACK_MS,
+	OPTION_RAND,
+	__NR_OPTION_FLAGS,
+};
+
+#define OPTION_F_RESEND		(1 << OPTION_RESEND)
+#define OPTION_F_MAX_DELACK_MS	(1 << OPTION_MAX_DELACK_MS)
+#define OPTION_F_RAND		(1 << OPTION_RAND)
+#define OPTION_MASK		((1 << __NR_OPTION_FLAGS) - 1)
+
+#define TEST_OPTION_FLAGS(flags, option) (1 & ((flags) >> (option)))
+#define SET_OPTION_FLAGS(flags, option)	((flags) |= (1 << (option)))
+
+/* Store in bpf_sk_storage */
+struct hdr_stg {
+	bool active;
+	bool resend_syn; /* active side only */
+	bool syncookie;  /* passive side only */
+	bool fastopen;	/* passive side only */
+};
+
+struct linum_err {
+	unsigned int linum;
+	int err;
+};
+
+#define TCPHDR_FIN 0x01
+#define TCPHDR_SYN 0x02
+#define TCPHDR_RST 0x04
+#define TCPHDR_PSH 0x08
+#define TCPHDR_ACK 0x10
+#define TCPHDR_URG 0x20
+#define TCPHDR_ECE 0x40
+#define TCPHDR_CWR 0x80
+#define TCPHDR_SYNACK (TCPHDR_SYN | TCPHDR_ACK)
+
+#define TCPOPT_EOL		0
+#define TCPOPT_NOP		1
+#define TCPOPT_WINDOW		3
+#define TCPOPT_EXP		254
+
+#define TCP_BPF_EXPOPT_BASE_LEN 4
+#define MAX_TCP_HDR_LEN		60
+#define MAX_TCP_OPTION_SPACE	40
+
+#ifdef BPF_PROG_TEST_TCP_HDR_OPTIONS
+
+#define CG_OK	1
+#define CG_ERR	0
+
+#ifndef SOL_TCP
+#define SOL_TCP 6
+#endif
+
+struct tcp_exprm_opt {
+	__u8 kind;
+	__u8 len;
+	__u16 magic;
+	union {
+		__u8 data[4];
+		__u32 data32;
+	};
+} __attribute__((packed));
+
+struct tcp_opt {
+	__u8 kind;
+	__u8 len;
+	union {
+		__u8 data[4];
+		__u32 data32;
+	};
+} __attribute__((packed));
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 2);
+	__type(key, int);
+	__type(value, struct linum_err);
+} lport_linum_map SEC(".maps");
+
+static inline unsigned int tcp_hdrlen(const struct tcphdr *th)
+{
+	return th->doff << 2;
+}
+
+static inline __u8 skops_tcp_flags(const struct bpf_sock_ops *skops)
+{
+	return skops->skb_tcp_flags;
+}
+
+static inline void clear_hdr_cb_flags(struct bpf_sock_ops *skops)
+{
+	bpf_sock_ops_cb_flags_set(skops,
+				  skops->bpf_sock_ops_cb_flags &
+				  ~(BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG |
+				    BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG));
+}
+
+static inline void set_hdr_cb_flags(struct bpf_sock_ops *skops)
+{
+	bpf_sock_ops_cb_flags_set(skops,
+				  skops->bpf_sock_ops_cb_flags |
+				  BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG |
+				  BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG);
+}
+static inline void
+clear_parse_all_hdr_cb_flags(struct bpf_sock_ops *skops)
+{
+	bpf_sock_ops_cb_flags_set(skops,
+				  skops->bpf_sock_ops_cb_flags &
+				  ~BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG);
+}
+
+static inline void
+set_parse_all_hdr_cb_flags(struct bpf_sock_ops *skops)
+{
+	bpf_sock_ops_cb_flags_set(skops,
+				  skops->bpf_sock_ops_cb_flags |
+				  BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG);
+}
+
+#define RET_CG_ERR(__err) ({			\
+	struct linum_err __linum_err;		\
+	int __lport;				\
+						\
+	__linum_err.linum = __LINE__;		\
+	__linum_err.err = __err;		\
+	__lport = skops->local_port;		\
+	bpf_map_update_elem(&lport_linum_map, &__lport, &__linum_err, BPF_NOEXIST); \
+	clear_hdr_cb_flags(skops);					\
+	clear_parse_all_hdr_cb_flags(skops);				\
+	return CG_ERR;							\
+})
+
+#endif /* BPF_PROG_TEST_TCP_HDR_OPTIONS */
+
+#endif /* _TEST_TCP_HDR_OPTIONS_H */
-- 
cgit v1.2.3


From 267cf9fa43d1c9d525d5d818a8651f2900e3aa9e Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 20 Aug 2020 12:01:23 -0700
Subject: tcp: bpf: Optionally store mac header in TCP_SAVE_SYN

This patch is adapted from Eric's patch in an earlier discussion [1].

The TCP_SAVE_SYN currently only stores the network header and
tcp header.  This patch allows it to optionally store
the mac header also if the setsockopt's optval is 2.

It requires one more bit for the "save_syn" bit field in tcp_sock.
This patch achieves this by moving the syn_smc bit next to the is_mptcp.
The syn_smc is currently used with the TCP experimental option.  Since
syn_smc is only used when CONFIG_SMC is enabled, this patch also puts
the "IS_ENABLED(CONFIG_SMC)" around it like the is_mptcp did
with "IS_ENABLED(CONFIG_MPTCP)".

The mac_hdrlen is also stored in the "struct saved_syn"
to allow a quick offset from the bpf prog if it chooses to start
getting from the network header or the tcp header.

[1]: https://lore.kernel.org/netdev/CANn89iLJNWh6bkH7DNhy_kmcAexuUCccqERqe7z2QsvPhGrYPQ@mail.gmail.com/

Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/bpf/20200820190123.2886935-1-kafai@fb.com
---
 tools/include/uapi/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index f67ec5d9e57d..544b89a64918 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4540,6 +4540,7 @@ enum {
 	 */
 	TCP_BPF_SYN		= 1005, /* Copy the TCP header */
 	TCP_BPF_SYN_IP		= 1006, /* Copy the IP[46] and TCP header */
+	TCP_BPF_SYN_MAC         = 1007, /* Copy the MAC, IP[46], and TCP header */
 };
 
 enum {
-- 
cgit v1.2.3


From 3418c56de81fd73f2265c8915f4b910bcc141cb7 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 21 Aug 2020 15:55:56 -0700
Subject: libbpf: Avoid false unuinitialized variable warning in
 bpf_core_apply_relo

Some versions of GCC report uninitialized targ_spec usage. GCC is wrong, but
let's avoid unnecessary warnings.

Fixes: ddc7c3042614 ("libbpf: implement BPF CO-RE offset relocation algorithm")
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200821225556.2178419-1-andriin@fb.com
---
 tools/lib/bpf/libbpf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index fb7b8fb1d3fa..cd4a2bba05ff 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -5390,7 +5390,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog,
 			       struct hashmap *cand_cache)
 {
 	const char *prog_name = bpf_program__title(prog, false);
-	struct bpf_core_spec local_spec, cand_spec, targ_spec;
+	struct bpf_core_spec local_spec, cand_spec, targ_spec = {};
 	const void *type_key = u32_as_hash_key(relo->type_id);
 	struct bpf_core_relo_res cand_res, targ_res;
 	const struct btf_type *local_type;
-- 
cgit v1.2.3


From f872e4bc47f100ccbbe9469cd12e5ac78e4b6eda Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 21 Aug 2020 15:56:53 -0700
Subject: libbpf: Fix type compatibility check copy-paste error

Fix copy-paste error in types compatibility check. Local type is accidentally
used instead of target type for the very first type check strictness check.
This can result in potentially less strict candidate comparison. Fix the
error.

Fixes: 3fc32f40c402 ("libbpf: Implement type-based CO-RE relocations support")
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200821225653.2180782-1-andriin@fb.com
---
 tools/lib/bpf/libbpf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index cd4a2bba05ff..2e2523d8bb6d 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -4708,7 +4708,7 @@ static int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id
 
 	/* caller made sure that names match (ignoring flavor suffix) */
 	local_type = btf__type_by_id(local_btf, local_id);
-	targ_type = btf__type_by_id(local_btf, local_id);
+	targ_type = btf__type_by_id(targ_btf, targ_id);
 	if (btf_kind(local_type) != btf_kind(targ_type))
 		return 0;
 
-- 
cgit v1.2.3


From 8c3b3d971f2e4d72d8768457c40b513ca5d1f906 Mon Sep 17 00:00:00 2001
From: Lorenz Bauer <lmb@cloudflare.com>
Date: Mon, 24 Aug 2020 09:45:23 +0100
Subject: selftests: bpf: Fix sockmap update nits

Address review by Yonghong, to bring the new tests in line with the
usual code style.

Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200824084523.13104-1-lmb@cloudflare.com
---
 tools/testing/selftests/bpf/prog_tests/sockmap_basic.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index 65ce7c289534..0b79d78b98db 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -118,10 +118,8 @@ static void test_sockmap_update(enum bpf_map_type map_type)
 		return;
 
 	skel = test_sockmap_update__open_and_load();
-	if (CHECK(!skel, "open_and_load", "cannot load skeleton\n")) {
-		close(sk);
-		return;
-	}
+	if (CHECK(!skel, "open_and_load", "cannot load skeleton\n"))
+		goto close_sk;
 
 	prog = bpf_program__fd(skel->progs.copy_sock_map);
 	src = bpf_map__fd(skel->maps.src);
@@ -158,8 +156,9 @@ static void test_sockmap_update(enum bpf_map_type map_type)
 	      dst_cookie, src_cookie);
 
 out:
-	close(sk);
 	test_sockmap_update__destroy(skel);
+close_sk:
+	close(sk);
 }
 
 static void test_sockmap_invalid_update(void)
@@ -168,8 +167,7 @@ static void test_sockmap_invalid_update(void)
 	int duration = 0;
 
 	skel = test_sockmap_invalid_update__open_and_load();
-	CHECK(skel, "open_and_load", "verifier accepted map_update\n");
-	if (skel)
+	if (CHECK(skel, "open_and_load", "verifier accepted map_update\n"))
 		test_sockmap_invalid_update__destroy(skel);
 }
 
-- 
cgit v1.2.3


From 0fcdfffe80346d015b920228203d0269284d8b13 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Mon, 24 Aug 2020 15:28:07 -0700
Subject: selftests/bpf: Enable tc verbose mode for test_sk_assign

Currently test_sk_assign failed verifier with llvm11/llvm12.
During debugging, I found the default verifier output is
truncated like below
  Verifier analysis:

  Skipped 2200 bytes, use 'verb' option for the full verbose log.
  [...]
  off=23,r=34,imm=0) R5=inv0 R6=ctx(id=0,off=0,imm=0) R7=pkt(id=0,off=0,r=34,imm=0) R10=fp0
  80: (0f) r7 += r2
  last_idx 80 first_idx 21
  regs=4 stack=0 before 78: (16) if w3 == 0x11 goto pc+1
when I am using "./test_progs -vv -t assign".

The reason is tc verbose mode is not enabled.

This patched enabled tc verbose mode and the output looks like below
  Verifier analysis:

  0: (bf) r6 = r1
  1: (b4) w0 = 2
  2: (61) r1 = *(u32 *)(r6 +80)
  3: (61) r7 = *(u32 *)(r6 +76)
  4: (bf) r2 = r7
  5: (07) r2 += 14
  6: (2d) if r2 > r1 goto pc+61
   R0_w=inv2 R1_w=pkt_end(id=0,off=0,imm=0) R2_w=pkt(id=0,off=14,r=14,imm=0)
  ...

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200824222807.100200-1-yhs@fb.com
---
 tools/testing/selftests/bpf/prog_tests/sk_assign.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/sk_assign.c b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
index d43038d2b9e1..a49a26f95a8b 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_assign.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
@@ -49,7 +49,7 @@ configure_stack(void)
 	sprintf(tc_cmd, "%s %s %s %s", "tc filter add dev lo ingress bpf",
 		       "direct-action object-file ./test_sk_assign.o",
 		       "section classifier/sk_assign_test",
-		       (env.verbosity < VERBOSE_VERY) ? " 2>/dev/null" : "");
+		       (env.verbosity < VERBOSE_VERY) ? " 2>/dev/null" : "verbose");
 	if (CHECK(system(tc_cmd), "BPF load failed;",
 		  "run with -vv for more info\n"))
 		return false;
-- 
cgit v1.2.3


From 1f00d375af84fbcdb6dd6c79fd7c3d02d2390338 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Tue, 25 Aug 2020 20:29:13 +0200
Subject: bpf: Renames in preparation for bpf_local_storage

A purely mechanical change to split the renaming from the actual
generalization.

Flags/consts:

  SK_STORAGE_CREATE_FLAG_MASK	BPF_LOCAL_STORAGE_CREATE_FLAG_MASK
  BPF_SK_STORAGE_CACHE_SIZE	BPF_LOCAL_STORAGE_CACHE_SIZE
  MAX_VALUE_SIZE		BPF_LOCAL_STORAGE_MAX_VALUE_SIZE

Structs:

  bucket			bpf_local_storage_map_bucket
  bpf_sk_storage_map		bpf_local_storage_map
  bpf_sk_storage_data		bpf_local_storage_data
  bpf_sk_storage_elem		bpf_local_storage_elem
  bpf_sk_storage		bpf_local_storage

The "sk" member in bpf_local_storage is also updated to "owner"
in preparation for changing the type to void * in a subsequent patch.

Functions:

  selem_linked_to_sk			selem_linked_to_storage
  selem_alloc				bpf_selem_alloc
  __selem_unlink_sk			bpf_selem_unlink_storage_nolock
  __selem_link_sk			bpf_selem_link_storage_nolock
  selem_unlink_sk			__bpf_selem_unlink_storage
  sk_storage_update			bpf_local_storage_update
  __sk_storage_lookup			bpf_local_storage_lookup
  bpf_sk_storage_map_free		bpf_local_storage_map_free
  bpf_sk_storage_map_alloc		bpf_local_storage_map_alloc
  bpf_sk_storage_map_alloc_check	bpf_local_storage_map_alloc_check
  bpf_sk_storage_map_check_btf		bpf_local_storage_map_check_btf

Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20200825182919.1118197-2-kpsingh@chromium.org
---
 tools/testing/selftests/bpf/progs/map_ptr_kern.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
index 473665cac67e..982a2d8aa844 100644
--- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c
+++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
@@ -589,7 +589,7 @@ static inline int check_stack(void)
 	return 1;
 }
 
-struct bpf_sk_storage_map {
+struct bpf_local_storage_map {
 	struct bpf_map map;
 } __attribute__((preserve_access_index));
 
@@ -602,8 +602,8 @@ struct {
 
 static inline int check_sk_storage(void)
 {
-	struct bpf_sk_storage_map *sk_storage =
-		(struct bpf_sk_storage_map *)&m_sk_storage;
+	struct bpf_local_storage_map *sk_storage =
+		(struct bpf_local_storage_map *)&m_sk_storage;
 	struct bpf_map *map = (struct bpf_map *)&m_sk_storage;
 
 	VERIFY(check(&sk_storage->map, map, sizeof(__u32), sizeof(__u32), 0));
-- 
cgit v1.2.3


From f836a56e84ffc9f1a1cd73f77e10404ca46a4616 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Tue, 25 Aug 2020 20:29:15 +0200
Subject: bpf: Generalize bpf_sk_storage

Refactor the functionality in bpf_sk_storage.c so that concept of
storage linked to kernel objects can be extended to other objects like
inode, task_struct etc.

Each new local storage will still be a separate map and provide its own
set of helpers. This allows for future object specific extensions and
still share a lot of the underlying implementation.

This includes the changes suggested by Martin in:

  https://lore.kernel.org/bpf/20200725013047.4006241-1-kafai@fb.com/

adding new map operations to support bpf_local_storage maps:

* storages for different kernel objects to optionally have different
  memory charging strategy (map_local_storage_charge,
  map_local_storage_uncharge)
* Functionality to extract the storage pointer from a pointer to the
  owning object (map_owner_storage_ptr)

Co-developed-by: Martin KaFai Lau <kafai@fb.com>

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200825182919.1118197-4-kpsingh@chromium.org
---
 tools/include/uapi/linux/bpf.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 544b89a64918..2cbd137eed86 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3765,9 +3765,13 @@ enum {
 	BPF_F_SYSCTL_BASE_NAME		= (1ULL << 0),
 };
 
-/* BPF_FUNC_sk_storage_get flags */
+/* BPF_FUNC_<kernel_obj>_storage_get flags */
 enum {
-	BPF_SK_STORAGE_GET_F_CREATE	= (1ULL << 0),
+	BPF_LOCAL_STORAGE_GET_F_CREATE	= (1ULL << 0),
+	/* BPF_SK_STORAGE_GET_F_CREATE is only kept for backward compatibility
+	 * and BPF_LOCAL_STORAGE_GET_F_CREATE must be used instead.
+	 */
+	BPF_SK_STORAGE_GET_F_CREATE  = BPF_LOCAL_STORAGE_GET_F_CREATE,
 };
 
 /* BPF_FUNC_read_branch_records flags. */
-- 
cgit v1.2.3


From 8ea636848aca35b9f97c5b5dee30225cf2dd0fe6 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Tue, 25 Aug 2020 20:29:17 +0200
Subject: bpf: Implement bpf_local_storage for inodes

Similar to bpf_local_storage for sockets, add local storage for inodes.
The life-cycle of storage is managed with the life-cycle of the inode.
i.e. the storage is destroyed along with the owning inode.

The BPF LSM allocates an __rcu pointer to the bpf_local_storage in the
security blob which are now stackable and can co-exist with other LSMs.

Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200825182919.1118197-6-kpsingh@chromium.org
---
 tools/bpf/bpftool/Documentation/bpftool-map.rst |  2 +-
 tools/bpf/bpftool/bash-completion/bpftool       |  3 +-
 tools/bpf/bpftool/map.c                         |  3 +-
 tools/include/uapi/linux/bpf.h                  | 40 ++++++++++++++++++++++++-
 tools/lib/bpf/libbpf_probes.c                   |  5 ++--
 5 files changed, 47 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index 41e2a74252d0..083db6c2fc67 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -49,7 +49,7 @@ MAP COMMANDS
 |		| **lru_percpu_hash** | **lpm_trie** | **array_of_maps** | **hash_of_maps**
 |		| **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash**
 |		| **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage**
-|		| **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** }
+|		| **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** | **inode_storage** }
 
 DESCRIPTION
 ===========
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index f53ed2f1a4aa..7b68e3c0a5fb 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -704,7 +704,8 @@ _bpftool()
                                 lru_percpu_hash lpm_trie array_of_maps \
                                 hash_of_maps devmap devmap_hash sockmap cpumap \
                                 xskmap sockhash cgroup_storage reuseport_sockarray \
-                                percpu_cgroup_storage queue stack' -- \
+                                percpu_cgroup_storage queue stack sk_storage \
+                                struct_ops inode_storage' -- \
                                                    "$cur" ) )
                             return 0
                             ;;
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index 3a27d31a1856..bc0071228f88 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -50,6 +50,7 @@ const char * const map_type_name[] = {
 	[BPF_MAP_TYPE_SK_STORAGE]		= "sk_storage",
 	[BPF_MAP_TYPE_STRUCT_OPS]		= "struct_ops",
 	[BPF_MAP_TYPE_RINGBUF]			= "ringbuf",
+	[BPF_MAP_TYPE_INODE_STORAGE]		= "inode_storage",
 };
 
 const size_t map_type_name_size = ARRAY_SIZE(map_type_name);
@@ -1442,7 +1443,7 @@ static int do_help(int argc, char **argv)
 		"                 lru_percpu_hash | lpm_trie | array_of_maps | hash_of_maps |\n"
 		"                 devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n"
 		"                 cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n"
-		"                 queue | stack | sk_storage | struct_ops | ringbuf }\n"
+		"                 queue | stack | sk_storage | struct_ops | ringbuf | inode_storage }\n"
 		"       " HELP_SPEC_OPTIONS "\n"
 		"",
 		bin_name, argv[-2]);
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 2cbd137eed86..b6bfcd085a76 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -155,6 +155,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_DEVMAP_HASH,
 	BPF_MAP_TYPE_STRUCT_OPS,
 	BPF_MAP_TYPE_RINGBUF,
+	BPF_MAP_TYPE_INODE_STORAGE,
 };
 
 /* Note that tracing related programs such as
@@ -3509,6 +3510,41 @@ union bpf_attr {
  *
  *		**-EPERM** This helper cannot be used under the
  *			   current sock_ops->op.
+ * void *bpf_inode_storage_get(struct bpf_map *map, void *inode, void *value, u64 flags)
+ *	Description
+ *		Get a bpf_local_storage from an *inode*.
+ *
+ *		Logically, it could be thought of as getting the value from
+ *		a *map* with *inode* as the **key**.  From this
+ *		perspective,  the usage is not much different from
+ *		**bpf_map_lookup_elem**\ (*map*, **&**\ *inode*) except this
+ *		helper enforces the key must be an inode and the map must also
+ *		be a **BPF_MAP_TYPE_INODE_STORAGE**.
+ *
+ *		Underneath, the value is stored locally at *inode* instead of
+ *		the *map*.  The *map* is used as the bpf-local-storage
+ *		"type". The bpf-local-storage "type" (i.e. the *map*) is
+ *		searched against all bpf_local_storage residing at *inode*.
+ *
+ *		An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be
+ *		used such that a new bpf_local_storage will be
+ *		created if one does not exist.  *value* can be used
+ *		together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify
+ *		the initial value of a bpf_local_storage.  If *value* is
+ *		**NULL**, the new bpf_local_storage will be zero initialized.
+ *	Return
+ *		A bpf_local_storage pointer is returned on success.
+ *
+ *		**NULL** if not found or there was an error in adding
+ *		a new bpf_local_storage.
+ *
+ * int bpf_inode_storage_delete(struct bpf_map *map, void *inode)
+ *	Description
+ *		Delete a bpf_local_storage from an *inode*.
+ *	Return
+ *		0 on success.
+ *
+ *		**-ENOENT** if the bpf_local_storage cannot be found.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3655,7 +3691,9 @@ union bpf_attr {
 	FN(get_task_stack),		\
 	FN(load_hdr_opt),		\
 	FN(store_hdr_opt),		\
-	FN(reserve_hdr_opt),
+	FN(reserve_hdr_opt),		\
+	FN(inode_storage_get),		\
+	FN(inode_storage_delete),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 010c9a76fd2b..5482a9b7ae2d 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -170,7 +170,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
 	return btf_fd;
 }
 
-static int load_sk_storage_btf(void)
+static int load_local_storage_btf(void)
 {
 	const char strs[] = "\0bpf_spin_lock\0val\0cnt\0l";
 	/* struct bpf_spin_lock {
@@ -229,12 +229,13 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
 		key_size	= 0;
 		break;
 	case BPF_MAP_TYPE_SK_STORAGE:
+	case BPF_MAP_TYPE_INODE_STORAGE:
 		btf_key_type_id = 1;
 		btf_value_type_id = 3;
 		value_size = 8;
 		max_entries = 0;
 		map_flags = BPF_F_NO_PREALLOC;
-		btf_fd = load_sk_storage_btf();
+		btf_fd = load_local_storage_btf();
 		if (btf_fd < 0)
 			return false;
 		break;
-- 
cgit v1.2.3


From 30897832d8b97e93833fb52c0a02951db3692ed2 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Tue, 25 Aug 2020 20:29:18 +0200
Subject: bpf: Allow local storage to be used from LSM programs

Adds support for both bpf_{sk, inode}_storage_{get, delete} to be used
in LSM programs. These helpers are not used for tracing programs
(currently) as their usage is tied to the life-cycle of the object and
should only be used where the owning object won't be freed (when the
owning object is passed as an argument to the LSM hook). Thus, they
are safer to use in LSM hooks than tracing. Usage of local storage in
tracing programs will probably follow a per function based whitelist
approach.

Since the UAPI helper signature for bpf_sk_storage expect a bpf_sock,
it, leads to a compilation warning for LSM programs, it's also updated
to accept a void * pointer instead.

Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20200825182919.1118197-7-kpsingh@chromium.org
---
 tools/include/uapi/linux/bpf.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index b6bfcd085a76..0e1cdf806fe1 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2808,7 +2808,7 @@ union bpf_attr {
  *
  *		**-ERANGE** if resulting value was out of range.
  *
- * void *bpf_sk_storage_get(struct bpf_map *map, struct bpf_sock *sk, void *value, u64 flags)
+ * void *bpf_sk_storage_get(struct bpf_map *map, void *sk, void *value, u64 flags)
  *	Description
  *		Get a bpf-local-storage from a *sk*.
  *
@@ -2824,6 +2824,9 @@ union bpf_attr {
  *		"type". The bpf-local-storage "type" (i.e. the *map*) is
  *		searched against all bpf-local-storages residing at *sk*.
  *
+ *		*sk* is a kernel **struct sock** pointer for LSM program.
+ *		*sk* is a **struct bpf_sock** pointer for other program types.
+ *
  *		An optional *flags* (**BPF_SK_STORAGE_GET_F_CREATE**) can be
  *		used such that a new bpf-local-storage will be
  *		created if one does not exist.  *value* can be used
@@ -2836,7 +2839,7 @@ union bpf_attr {
  *		**NULL** if not found or there was an error in adding
  *		a new bpf-local-storage.
  *
- * long bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk)
+ * long bpf_sk_storage_delete(struct bpf_map *map, void *sk)
  *	Description
  *		Delete a bpf-local-storage from a *sk*.
  *	Return
-- 
cgit v1.2.3


From cd324d7abb3d850ae083bda1239f800abe21fe25 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Tue, 25 Aug 2020 20:29:19 +0200
Subject: bpf: Add selftests for local_storage

inode_local_storage:

* Hook to the file_open and inode_unlink LSM hooks.
* Create and unlink a temporary file.
* Store some information in the inode's bpf_local_storage during
  file_open.
* Verify that this information exists when the file is unlinked.

sk_local_storage:

* Hook to the socket_post_create and socket_bind LSM hooks.
* Open and bind a socket and set the sk_storage in the
  socket_post_create hook using the start_server helper.
* Verify if the information is set in the socket_bind hook.

Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200825182919.1118197-8-kpsingh@chromium.org
---
 .../selftests/bpf/prog_tests/test_local_storage.c  |  60 +++++++++
 tools/testing/selftests/bpf/progs/local_storage.c  | 140 +++++++++++++++++++++
 2 files changed, 200 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/test_local_storage.c
 create mode 100644 tools/testing/selftests/bpf/progs/local_storage.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
new file mode 100644
index 000000000000..91cd6f357246
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2020 Google LLC.
+ */
+
+#include <test_progs.h>
+#include <linux/limits.h>
+
+#include "local_storage.skel.h"
+#include "network_helpers.h"
+
+int create_and_unlink_file(void)
+{
+	char fname[PATH_MAX] = "/tmp/fileXXXXXX";
+	int fd;
+
+	fd = mkstemp(fname);
+	if (fd < 0)
+		return fd;
+
+	close(fd);
+	unlink(fname);
+	return 0;
+}
+
+void test_test_local_storage(void)
+{
+	struct local_storage *skel = NULL;
+	int err, duration = 0, serv_sk = -1;
+
+	skel = local_storage__open_and_load();
+	if (CHECK(!skel, "skel_load", "lsm skeleton failed\n"))
+		goto close_prog;
+
+	err = local_storage__attach(skel);
+	if (CHECK(err, "attach", "lsm attach failed: %d\n", err))
+		goto close_prog;
+
+	skel->bss->monitored_pid = getpid();
+
+	err = create_and_unlink_file();
+	if (CHECK(err < 0, "exec_cmd", "err %d errno %d\n", err, errno))
+		goto close_prog;
+
+	CHECK(skel->data->inode_storage_result != 0, "inode_storage_result",
+	      "inode_local_storage not set\n");
+
+	serv_sk = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0);
+	if (CHECK(serv_sk < 0, "start_server", "failed to start server\n"))
+		goto close_prog;
+
+	CHECK(skel->data->sk_storage_result != 0, "sk_storage_result",
+	      "sk_local_storage not set\n");
+
+	close(serv_sk);
+
+close_prog:
+	local_storage__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c
new file mode 100644
index 000000000000..0758ba229ae0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/local_storage.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+#define DUMMY_STORAGE_VALUE 0xdeadbeef
+
+int monitored_pid = 0;
+int inode_storage_result = -1;
+int sk_storage_result = -1;
+
+struct dummy_storage {
+	__u32 value;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_INODE_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, struct dummy_storage);
+} inode_storage_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC | BPF_F_CLONE);
+	__type(key, int);
+	__type(value, struct dummy_storage);
+} sk_storage_map SEC(".maps");
+
+/* TODO Use vmlinux.h once BTF pruning for embedded types is fixed.
+ */
+struct sock {} __attribute__((preserve_access_index));
+struct sockaddr {} __attribute__((preserve_access_index));
+struct socket {
+	struct sock *sk;
+} __attribute__((preserve_access_index));
+
+struct inode {} __attribute__((preserve_access_index));
+struct dentry {
+	struct inode *d_inode;
+} __attribute__((preserve_access_index));
+struct file {
+	struct inode *f_inode;
+} __attribute__((preserve_access_index));
+
+
+SEC("lsm/inode_unlink")
+int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim)
+{
+	__u32 pid = bpf_get_current_pid_tgid() >> 32;
+	struct dummy_storage *storage;
+
+	if (pid != monitored_pid)
+		return 0;
+
+	storage = bpf_inode_storage_get(&inode_storage_map, victim->d_inode, 0,
+				     BPF_SK_STORAGE_GET_F_CREATE);
+	if (!storage)
+		return 0;
+
+	if (storage->value == DUMMY_STORAGE_VALUE)
+		inode_storage_result = -1;
+
+	inode_storage_result =
+		bpf_inode_storage_delete(&inode_storage_map, victim->d_inode);
+
+	return 0;
+}
+
+SEC("lsm/socket_bind")
+int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address,
+	     int addrlen)
+{
+	__u32 pid = bpf_get_current_pid_tgid() >> 32;
+	struct dummy_storage *storage;
+
+	if (pid != monitored_pid)
+		return 0;
+
+	storage = bpf_sk_storage_get(&sk_storage_map, sock->sk, 0,
+				     BPF_SK_STORAGE_GET_F_CREATE);
+	if (!storage)
+		return 0;
+
+	if (storage->value == DUMMY_STORAGE_VALUE)
+		sk_storage_result = -1;
+
+	sk_storage_result = bpf_sk_storage_delete(&sk_storage_map, sock->sk);
+	return 0;
+}
+
+SEC("lsm/socket_post_create")
+int BPF_PROG(socket_post_create, struct socket *sock, int family, int type,
+	     int protocol, int kern)
+{
+	__u32 pid = bpf_get_current_pid_tgid() >> 32;
+	struct dummy_storage *storage;
+
+	if (pid != monitored_pid)
+		return 0;
+
+	storage = bpf_sk_storage_get(&sk_storage_map, sock->sk, 0,
+				     BPF_SK_STORAGE_GET_F_CREATE);
+	if (!storage)
+		return 0;
+
+	storage->value = DUMMY_STORAGE_VALUE;
+
+	return 0;
+}
+
+SEC("lsm/file_open")
+int BPF_PROG(file_open, struct file *file)
+{
+	__u32 pid = bpf_get_current_pid_tgid() >> 32;
+	struct dummy_storage *storage;
+
+	if (pid != monitored_pid)
+		return 0;
+
+	if (!file->f_inode)
+		return 0;
+
+	storage = bpf_inode_storage_get(&inode_storage_map, file->f_inode, 0,
+				     BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (!storage)
+		return 0;
+
+	storage->value = DUMMY_STORAGE_VALUE;
+	return 0;
+}
-- 
cgit v1.2.3


From 193a983c5bc4c62f66d62a9a6f9084cf3f2a0db2 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 25 Aug 2020 21:21:11 +0200
Subject: tools resolve_btfids: Add size check to get_id function

To make sure we don't crash on malformed symbols.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200825192124.710397-2-jolsa@kernel.org
---
 tools/bpf/resolve_btfids/main.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c
index 4d9ecb975862..35a172d3d80d 100644
--- a/tools/bpf/resolve_btfids/main.c
+++ b/tools/bpf/resolve_btfids/main.c
@@ -199,9 +199,16 @@ static char *get_id(const char *prefix_end)
 	/*
 	 * __BTF_ID__func__vfs_truncate__0
 	 * prefix_end =  ^
+	 * pos        =    ^
 	 */
-	char *p, *id = strdup(prefix_end + sizeof("__") - 1);
+	int len = strlen(prefix_end);
+	int pos = sizeof("__") - 1;
+	char *p, *id;
 
+	if (pos >= len)
+		return NULL;
+
+	id = strdup(prefix_end + pos);
 	if (id) {
 		/*
 		 * __BTF_ID__func__vfs_truncate__0
-- 
cgit v1.2.3


From a5f53b1d59c2fd35ebb55bbe47b05d596c0d466c Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 25 Aug 2020 21:21:12 +0200
Subject: tools resolve_btfids: Add support for set symbols

The set symbol does not have the unique number suffix,
so we need to give it a special parsing function.

This was omitted in the first batch, because there was
no set support yet, so it slipped in the testing.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200825192124.710397-3-jolsa@kernel.org
---
 tools/bpf/resolve_btfids/main.c | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c
index 35a172d3d80d..6152d1356d7b 100644
--- a/tools/bpf/resolve_btfids/main.c
+++ b/tools/bpf/resolve_btfids/main.c
@@ -227,6 +227,24 @@ static char *get_id(const char *prefix_end)
 	return id;
 }
 
+static struct btf_id *add_set(struct object *obj, char *name)
+{
+	/*
+	 * __BTF_ID__set__name
+	 * name =    ^
+	 * id   =         ^
+	 */
+	char *id = name + sizeof(BTF_SET "__") - 1;
+	int len = strlen(name);
+
+	if (id >= name + len) {
+		pr_err("FAILED to parse set name: %s\n", name);
+		return NULL;
+	}
+
+	return btf_id__add(&obj->sets, id, true);
+}
+
 static struct btf_id *add_symbol(struct rb_root *root, char *name, size_t size)
 {
 	char *id;
@@ -383,7 +401,7 @@ static int symbols_collect(struct object *obj)
 			id = add_symbol(&obj->funcs, prefix, sizeof(BTF_FUNC) - 1);
 		/* set */
 		} else if (!strncmp(prefix, BTF_SET, sizeof(BTF_SET) - 1)) {
-			id = add_symbol(&obj->sets, prefix, sizeof(BTF_SET) - 1);
+			id = add_set(obj, prefix);
 			/*
 			 * SET objects store list's count, which is encoded
 			 * in symbol's size, together with 'cnt' field hence
-- 
cgit v1.2.3


From eae2e83e62633a2659e3bc690facba1c2fc9c45b Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 25 Aug 2020 21:21:19 +0200
Subject: bpf: Add BTF_SET_START/END macros

Adding support to define sorted set of BTF ID values.

Following defines sorted set of BTF ID values:

  BTF_SET_START(btf_allowlist_d_path)
  BTF_ID(func, vfs_truncate)
  BTF_ID(func, vfs_fallocate)
  BTF_ID(func, dentry_open)
  BTF_ID(func, vfs_getattr)
  BTF_ID(func, filp_close)
  BTF_SET_END(btf_allowlist_d_path)

It defines following 'struct btf_id_set' variable to access
values and count:

  struct btf_id_set btf_allowlist_d_path;

Adding 'allowed' callback to struct bpf_func_proto, to allow
verifier the check on allowed callers.

Adding btf_id_set_contains function, which will be used by
allowed callbacks to verify the caller's BTF ID value is
within allowed set.

Also removing extra '\' in __BTF_ID_LIST macro.

Added BTF_SET_START_GLOBAL macro for global sets.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200825192124.710397-10-jolsa@kernel.org
---
 tools/include/linux/btf_ids.h | 51 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 50 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/include/linux/btf_ids.h b/tools/include/linux/btf_ids.h
index 4867d549e3c1..210b086188a3 100644
--- a/tools/include/linux/btf_ids.h
+++ b/tools/include/linux/btf_ids.h
@@ -3,6 +3,11 @@
 #ifndef _LINUX_BTF_IDS_H
 #define _LINUX_BTF_IDS_H
 
+struct btf_id_set {
+	u32 cnt;
+	u32 ids[];
+};
+
 #ifdef CONFIG_DEBUG_INFO_BTF
 
 #include <linux/compiler.h> /* for __PASTE */
@@ -62,7 +67,7 @@ asm(							\
 ".pushsection " BTF_IDS_SECTION ",\"a\";       \n"	\
 "." #scope " " #name ";                        \n"	\
 #name ":;                                      \n"	\
-".popsection;                                  \n");	\
+".popsection;                                  \n");
 
 #define BTF_ID_LIST(name)				\
 __BTF_ID_LIST(name, local)				\
@@ -88,12 +93,56 @@ asm(							\
 ".zero 4                                       \n"	\
 ".popsection;                                  \n");
 
+/*
+ * The BTF_SET_START/END macros pair defines sorted list of
+ * BTF IDs plus its members count, with following layout:
+ *
+ * BTF_SET_START(list)
+ * BTF_ID(type1, name1)
+ * BTF_ID(type2, name2)
+ * BTF_SET_END(list)
+ *
+ * __BTF_ID__set__list:
+ * .zero 4
+ * list:
+ * __BTF_ID__type1__name1__3:
+ * .zero 4
+ * __BTF_ID__type2__name2__4:
+ * .zero 4
+ *
+ */
+#define __BTF_SET_START(name, scope)			\
+asm(							\
+".pushsection " BTF_IDS_SECTION ",\"a\";       \n"	\
+"." #scope " __BTF_ID__set__" #name ";         \n"	\
+"__BTF_ID__set__" #name ":;                    \n"	\
+".zero 4                                       \n"	\
+".popsection;                                  \n");
+
+#define BTF_SET_START(name)				\
+__BTF_ID_LIST(name, local)				\
+__BTF_SET_START(name, local)
+
+#define BTF_SET_START_GLOBAL(name)			\
+__BTF_ID_LIST(name, globl)				\
+__BTF_SET_START(name, globl)
+
+#define BTF_SET_END(name)				\
+asm(							\
+".pushsection " BTF_IDS_SECTION ",\"a\";      \n"	\
+".size __BTF_ID__set__" #name ", .-" #name "  \n"	\
+".popsection;                                 \n");	\
+extern struct btf_id_set name;
+
 #else
 
 #define BTF_ID_LIST(name) static u32 name[5];
 #define BTF_ID(prefix, name)
 #define BTF_ID_UNUSED
 #define BTF_ID_LIST_GLOBAL(name) u32 name[1];
+#define BTF_SET_START(name) static struct btf_id_set name = { 0 };
+#define BTF_SET_START_GLOBAL(name) static struct btf_id_set name = { 0 };
+#define BTF_SET_END(name)
 
 #endif /* CONFIG_DEBUG_INFO_BTF */
 
-- 
cgit v1.2.3


From 6e22ab9da79343532cd3cde39df25e5a5478c692 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 25 Aug 2020 21:21:20 +0200
Subject: bpf: Add d_path helper

Adding d_path helper function that returns full path for
given 'struct path' object, which needs to be the kernel
BTF 'path' object. The path is returned in buffer provided
'buf' of size 'sz' and is zero terminated.

  bpf_d_path(&file->f_path, buf, size);

The helper calls directly d_path function, so there's only
limited set of function it can be called from. Adding just
very modest set for the start.

Updating also bpf.h tools uapi header and adding 'path' to
bpf_helpers_doc.py script.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: KP Singh <kpsingh@google.com>
Link: https://lore.kernel.org/bpf/20200825192124.710397-11-jolsa@kernel.org
---
 tools/include/uapi/linux/bpf.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 0e1cdf806fe1..0388bc0200b0 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3513,6 +3513,7 @@ union bpf_attr {
  *
  *		**-EPERM** This helper cannot be used under the
  *			   current sock_ops->op.
+ *
  * void *bpf_inode_storage_get(struct bpf_map *map, void *inode, void *value, u64 flags)
  *	Description
  *		Get a bpf_local_storage from an *inode*.
@@ -3548,6 +3549,18 @@ union bpf_attr {
  *		0 on success.
  *
  *		**-ENOENT** if the bpf_local_storage cannot be found.
+ *
+ * long bpf_d_path(struct path *path, char *buf, u32 sz)
+ *	Description
+ *		Return full path for given 'struct path' object, which
+ *		needs to be the kernel BTF 'path' object. The path is
+ *		returned in the provided buffer 'buf' of size 'sz' and
+ *		is zero terminated.
+ *
+ *	Return
+ *		On success, the strictly positive length of the string,
+ *		including the trailing NUL character. On error, a negative
+ *		value.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3697,6 +3710,7 @@ union bpf_attr {
 	FN(reserve_hdr_opt),		\
 	FN(inode_storage_get),		\
 	FN(inode_storage_delete),	\
+	FN(d_path),			\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From 762f85156835258c85a32bf81b283300b6c6bd17 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 25 Aug 2020 21:21:22 +0200
Subject: selftests/bpf: Add verifier test for d_path helper

Adding verifier test for attaching tracing program and
calling d_path helper from within and testing that it's
allowed for dentry_open function and denied for 'd_path'
function with appropriate error.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200825192124.710397-13-jolsa@kernel.org
---
 tools/testing/selftests/bpf/test_verifier.c   | 19 +++++++++++++-
 tools/testing/selftests/bpf/verifier/d_path.c | 37 +++++++++++++++++++++++++++
 2 files changed, 55 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/verifier/d_path.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 78a6bae56ea6..9be395d9dc64 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -114,6 +114,7 @@ struct bpf_test {
 		bpf_testdata_struct_t retvals[MAX_TEST_RUNS];
 	};
 	enum bpf_attach_type expected_attach_type;
+	const char *kfunc;
 };
 
 /* Note we want this to be 64 bit aligned so that the end of our array is
@@ -984,8 +985,24 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 		attr.log_level = 4;
 	attr.prog_flags = pflags;
 
+	if (prog_type == BPF_PROG_TYPE_TRACING && test->kfunc) {
+		attr.attach_btf_id = libbpf_find_vmlinux_btf_id(test->kfunc,
+						attr.expected_attach_type);
+		if (attr.attach_btf_id < 0) {
+			printf("FAIL\nFailed to find BTF ID for '%s'!\n",
+				test->kfunc);
+			(*errors)++;
+			return;
+		}
+	}
+
 	fd_prog = bpf_load_program_xattr(&attr, bpf_vlog, sizeof(bpf_vlog));
-	if (fd_prog < 0 && !bpf_probe_prog_type(prog_type, 0)) {
+
+	/* BPF_PROG_TYPE_TRACING requires more setup and
+	 * bpf_probe_prog_type won't give correct answer
+	 */
+	if (fd_prog < 0 && prog_type != BPF_PROG_TYPE_TRACING &&
+	    !bpf_probe_prog_type(prog_type, 0)) {
 		printf("SKIP (unsupported program type %d)\n", prog_type);
 		skips++;
 		goto close_fds;
diff --git a/tools/testing/selftests/bpf/verifier/d_path.c b/tools/testing/selftests/bpf/verifier/d_path.c
new file mode 100644
index 000000000000..b988396379a7
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/d_path.c
@@ -0,0 +1,37 @@
+{
+	"d_path accept",
+	.insns = {
+	BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_MOV64_IMM(BPF_REG_6, 0),
+	BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 0),
+	BPF_LD_IMM64(BPF_REG_3, 8),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_d_path),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_TRACING,
+	.expected_attach_type = BPF_TRACE_FENTRY,
+	.kfunc = "dentry_open",
+},
+{
+	"d_path reject",
+	.insns = {
+	BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_MOV64_IMM(BPF_REG_6, 0),
+	BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 0),
+	BPF_LD_IMM64(BPF_REG_3, 8),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_d_path),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.errstr = "helper call is not allowed in probe",
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_TRACING,
+	.expected_attach_type = BPF_TRACE_FENTRY,
+	.kfunc = "d_path",
+},
-- 
cgit v1.2.3


From e4d1af4b16f80a90d9cf3a09bee2012dcde45638 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 25 Aug 2020 21:21:23 +0200
Subject: selftests/bpf: Add test for d_path helper

Adding test for d_path helper which is pretty much
copied from Wenbo Zhang's test for bpf_get_fd_path,
which never made it in.

The test is doing fstat/close on several fd types,
and verifies we got the d_path helper working on
kernel probes for vfs_getattr/filp_close functions.

Original-patch-by: Wenbo Zhang <ethercflow@gmail.com>

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200825192124.710397-14-jolsa@kernel.org
---
 tools/testing/selftests/bpf/prog_tests/d_path.c | 147 ++++++++++++++++++++++++
 tools/testing/selftests/bpf/progs/test_d_path.c |  58 ++++++++++
 2 files changed, 205 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/d_path.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_d_path.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/d_path.c b/tools/testing/selftests/bpf/prog_tests/d_path.c
new file mode 100644
index 000000000000..058765da17e6
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/d_path.c
@@ -0,0 +1,147 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include <sys/stat.h>
+#include <linux/sched.h>
+#include <sys/syscall.h>
+
+#define MAX_PATH_LEN		128
+#define MAX_FILES		7
+
+#include "test_d_path.skel.h"
+
+static int duration;
+
+static struct {
+	__u32 cnt;
+	char paths[MAX_FILES][MAX_PATH_LEN];
+} src;
+
+static int set_pathname(int fd, pid_t pid)
+{
+	char buf[MAX_PATH_LEN];
+
+	snprintf(buf, MAX_PATH_LEN, "/proc/%d/fd/%d", pid, fd);
+	return readlink(buf, src.paths[src.cnt++], MAX_PATH_LEN);
+}
+
+static int trigger_fstat_events(pid_t pid)
+{
+	int sockfd = -1, procfd = -1, devfd = -1;
+	int localfd = -1, indicatorfd = -1;
+	int pipefd[2] = { -1, -1 };
+	struct stat fileStat;
+	int ret = -1;
+
+	/* unmountable pseudo-filesystems */
+	if (CHECK(pipe(pipefd) < 0, "trigger", "pipe failed\n"))
+		return ret;
+	/* unmountable pseudo-filesystems */
+	sockfd = socket(AF_INET, SOCK_STREAM, 0);
+	if (CHECK(sockfd < 0, "trigger", "scoket failed\n"))
+		goto out_close;
+	/* mountable pseudo-filesystems */
+	procfd = open("/proc/self/comm", O_RDONLY);
+	if (CHECK(procfd < 0, "trigger", "open /proc/self/comm failed\n"))
+		goto out_close;
+	devfd = open("/dev/urandom", O_RDONLY);
+	if (CHECK(devfd < 0, "trigger", "open /dev/urandom failed\n"))
+		goto out_close;
+	localfd = open("/tmp/d_path_loadgen.txt", O_CREAT | O_RDONLY);
+	if (CHECK(localfd < 0, "trigger", "open /tmp/d_path_loadgen.txt failed\n"))
+		goto out_close;
+	/* bpf_d_path will return path with (deleted) */
+	remove("/tmp/d_path_loadgen.txt");
+	indicatorfd = open("/tmp/", O_PATH);
+	if (CHECK(indicatorfd < 0, "trigger", "open /tmp/ failed\n"))
+		goto out_close;
+
+	ret = set_pathname(pipefd[0], pid);
+	if (CHECK(ret < 0, "trigger", "set_pathname failed for pipe[0]\n"))
+		goto out_close;
+	ret = set_pathname(pipefd[1], pid);
+	if (CHECK(ret < 0, "trigger", "set_pathname failed for pipe[1]\n"))
+		goto out_close;
+	ret = set_pathname(sockfd, pid);
+	if (CHECK(ret < 0, "trigger", "set_pathname failed for socket\n"))
+		goto out_close;
+	ret = set_pathname(procfd, pid);
+	if (CHECK(ret < 0, "trigger", "set_pathname failed for proc\n"))
+		goto out_close;
+	ret = set_pathname(devfd, pid);
+	if (CHECK(ret < 0, "trigger", "set_pathname failed for dev\n"))
+		goto out_close;
+	ret = set_pathname(localfd, pid);
+	if (CHECK(ret < 0, "trigger", "set_pathname failed for file\n"))
+		goto out_close;
+	ret = set_pathname(indicatorfd, pid);
+	if (CHECK(ret < 0, "trigger", "set_pathname failed for dir\n"))
+		goto out_close;
+
+	/* triggers vfs_getattr */
+	fstat(pipefd[0], &fileStat);
+	fstat(pipefd[1], &fileStat);
+	fstat(sockfd, &fileStat);
+	fstat(procfd, &fileStat);
+	fstat(devfd, &fileStat);
+	fstat(localfd, &fileStat);
+	fstat(indicatorfd, &fileStat);
+
+out_close:
+	/* triggers filp_close */
+	close(pipefd[0]);
+	close(pipefd[1]);
+	close(sockfd);
+	close(procfd);
+	close(devfd);
+	close(localfd);
+	close(indicatorfd);
+	return ret;
+}
+
+void test_d_path(void)
+{
+	struct test_d_path__bss *bss;
+	struct test_d_path *skel;
+	int err;
+
+	skel = test_d_path__open_and_load();
+	if (CHECK(!skel, "setup", "d_path skeleton failed\n"))
+		goto cleanup;
+
+	err = test_d_path__attach(skel);
+	if (CHECK(err, "setup", "attach failed: %d\n", err))
+		goto cleanup;
+
+	bss = skel->bss;
+	bss->my_pid = getpid();
+
+	err = trigger_fstat_events(bss->my_pid);
+	if (err < 0)
+		goto cleanup;
+
+	for (int i = 0; i < MAX_FILES; i++) {
+		CHECK(strncmp(src.paths[i], bss->paths_stat[i], MAX_PATH_LEN),
+		      "check",
+		      "failed to get stat path[%d]: %s vs %s\n",
+		      i, src.paths[i], bss->paths_stat[i]);
+		CHECK(strncmp(src.paths[i], bss->paths_close[i], MAX_PATH_LEN),
+		      "check",
+		      "failed to get close path[%d]: %s vs %s\n",
+		      i, src.paths[i], bss->paths_close[i]);
+		/* The d_path helper returns size plus NUL char, hence + 1 */
+		CHECK(bss->rets_stat[i] != strlen(bss->paths_stat[i]) + 1,
+		      "check",
+		      "failed to match stat return [%d]: %d vs %zd [%s]\n",
+		      i, bss->rets_stat[i], strlen(bss->paths_stat[i]) + 1,
+		      bss->paths_stat[i]);
+		CHECK(bss->rets_close[i] != strlen(bss->paths_stat[i]) + 1,
+		      "check",
+		      "failed to match stat return [%d]: %d vs %zd [%s]\n",
+		      i, bss->rets_close[i], strlen(bss->paths_close[i]) + 1,
+		      bss->paths_stat[i]);
+	}
+
+cleanup:
+	test_d_path__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_d_path.c b/tools/testing/selftests/bpf/progs/test_d_path.c
new file mode 100644
index 000000000000..61f007855649
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_d_path.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define MAX_PATH_LEN		128
+#define MAX_FILES		7
+
+pid_t my_pid = 0;
+__u32 cnt_stat = 0;
+__u32 cnt_close = 0;
+char paths_stat[MAX_FILES][MAX_PATH_LEN] = {};
+char paths_close[MAX_FILES][MAX_PATH_LEN] = {};
+int rets_stat[MAX_FILES] = {};
+int rets_close[MAX_FILES] = {};
+
+SEC("fentry/vfs_getattr")
+int BPF_PROG(prog_stat, struct path *path, struct kstat *stat,
+	     __u32 request_mask, unsigned int query_flags)
+{
+	pid_t pid = bpf_get_current_pid_tgid() >> 32;
+	__u32 cnt = cnt_stat;
+	int ret;
+
+	if (pid != my_pid)
+		return 0;
+
+	if (cnt >= MAX_FILES)
+		return 0;
+	ret = bpf_d_path(path, paths_stat[cnt], MAX_PATH_LEN);
+
+	rets_stat[cnt] = ret;
+	cnt_stat++;
+	return 0;
+}
+
+SEC("fentry/filp_close")
+int BPF_PROG(prog_close, struct file *file, void *id)
+{
+	pid_t pid = bpf_get_current_pid_tgid() >> 32;
+	__u32 cnt = cnt_close;
+	int ret;
+
+	if (pid != my_pid)
+		return 0;
+
+	if (cnt >= MAX_FILES)
+		return 0;
+	ret = bpf_d_path(&file->f_path,
+			 paths_close[cnt], MAX_PATH_LEN);
+
+	rets_close[cnt] = ret;
+	cnt_close++;
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From cd04b04de119a222c83936f7e9dbd46a650cb688 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 25 Aug 2020 21:21:24 +0200
Subject: selftests/bpf: Add set test to resolve_btfids

Adding test to for sets resolve_btfids. We're checking that
testing set gets properly resolved and sorted.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200825192124.710397-15-jolsa@kernel.org
---
 .../selftests/bpf/prog_tests/resolve_btfids.c      | 39 +++++++++++++++++++++-
 1 file changed, 38 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
index 3b127cab4864..8826c652adad 100644
--- a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
+++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
@@ -47,6 +47,15 @@ BTF_ID(struct,  S)
 BTF_ID(union,   U)
 BTF_ID(func,    func)
 
+BTF_SET_START(test_set)
+BTF_ID(typedef, S)
+BTF_ID(typedef, T)
+BTF_ID(typedef, U)
+BTF_ID(struct,  S)
+BTF_ID(union,   U)
+BTF_ID(func,    func)
+BTF_SET_END(test_set)
+
 static int
 __resolve_symbol(struct btf *btf, int type_id)
 {
@@ -116,12 +125,40 @@ int test_resolve_btfids(void)
 	 */
 	for (j = 0; j < ARRAY_SIZE(test_lists); j++) {
 		test_list = test_lists[j];
-		for (i = 0; i < ARRAY_SIZE(test_symbols) && !ret; i++) {
+		for (i = 0; i < ARRAY_SIZE(test_symbols); i++) {
 			ret = CHECK(test_list[i] != test_symbols[i].id,
 				    "id_check",
 				    "wrong ID for %s (%d != %d)\n",
 				    test_symbols[i].name,
 				    test_list[i], test_symbols[i].id);
+			if (ret)
+				return ret;
+		}
+	}
+
+	/* Check BTF_SET_START(test_set) IDs */
+	for (i = 0; i < test_set.cnt; i++) {
+		bool found = false;
+
+		for (j = 0; j < ARRAY_SIZE(test_symbols); j++) {
+			if (test_symbols[j].id != test_set.ids[i])
+				continue;
+			found = true;
+			break;
+		}
+
+		ret = CHECK(!found, "id_check",
+			    "ID %d not found in test_symbols\n",
+			    test_set.ids[i]);
+		if (ret)
+			break;
+
+		if (i > 0) {
+			ret = CHECK(test_set.ids[i - 1] > test_set.ids[i],
+				    "sort_check",
+				    "test_set is not sorted\n");
+			if (ret)
+				break;
 		}
 	}
 
-- 
cgit v1.2.3


From d83971761fa278dcffa855110f024bf043ce6e6e Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 26 Aug 2020 12:18:45 +0200
Subject: selftests/bpf: Fix open call in trigger_fstat_events
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Alexei reported compile breakage on newer systems with
following error:

  In file included from /usr/include/fcntl.h:290:0,
  4814                 from ./test_progs.h:29,
  4815                 from
  .../bpf-next/tools/testing/selftests/bpf/prog_tests/d_path.c:3:
  4816In function ‘open’,
  4817    inlined from ‘trigger_fstat_events’ at
  .../bpf-next/tools/testing/selftests/bpf/prog_tests/d_path.c:50:10,
  4818    inlined from ‘test_d_path’ at
  .../bpf-next/tools/testing/selftests/bpf/prog_tests/d_path.c:119:6:
  4819/usr/include/x86_64-linux-gnu/bits/fcntl2.h:50:4: error: call to
  ‘__open_missing_mode’ declared with attribute error: open with O_CREAT
  or O_TMPFILE in second argument needs 3 arguments
  4820    __open_missing_mode ();
  4821    ^~~~~~~~~~~~~~~~~~~~~~

We're missing permission bits as 3rd argument
for open call with O_CREAT flag specified.

Fixes: e4d1af4b16f8 ("selftests/bpf: Add test for d_path helper")
Reported-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200826101845.747617-1-jolsa@kernel.org
---
 tools/testing/selftests/bpf/prog_tests/d_path.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/d_path.c b/tools/testing/selftests/bpf/prog_tests/d_path.c
index 058765da17e6..43ffbeacd680 100644
--- a/tools/testing/selftests/bpf/prog_tests/d_path.c
+++ b/tools/testing/selftests/bpf/prog_tests/d_path.c
@@ -47,7 +47,7 @@ static int trigger_fstat_events(pid_t pid)
 	devfd = open("/dev/urandom", O_RDONLY);
 	if (CHECK(devfd < 0, "trigger", "open /dev/urandom failed\n"))
 		goto out_close;
-	localfd = open("/tmp/d_path_loadgen.txt", O_CREAT | O_RDONLY);
+	localfd = open("/tmp/d_path_loadgen.txt", O_CREAT | O_RDONLY, 0644);
 	if (CHECK(localfd < 0, "trigger", "open /tmp/d_path_loadgen.txt failed\n"))
 		goto out_close;
 	/* bpf_d_path will return path with (deleted) */
-- 
cgit v1.2.3


From 7100ff7c62682c2332300ffde8706578e1098e13 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 26 Aug 2020 09:59:07 +0100
Subject: selftests/bpf: Fix spelling mistake "scoket" -> "socket"

There is a spelling mistake in a check error message. Fix it.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200826085907.43095-1-colin.king@canonical.com
---
 tools/testing/selftests/bpf/prog_tests/d_path.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/d_path.c b/tools/testing/selftests/bpf/prog_tests/d_path.c
index 43ffbeacd680..fc12e0d445ff 100644
--- a/tools/testing/selftests/bpf/prog_tests/d_path.c
+++ b/tools/testing/selftests/bpf/prog_tests/d_path.c
@@ -38,7 +38,7 @@ static int trigger_fstat_events(pid_t pid)
 		return ret;
 	/* unmountable pseudo-filesystems */
 	sockfd = socket(AF_INET, SOCK_STREAM, 0);
-	if (CHECK(sockfd < 0, "trigger", "scoket failed\n"))
+	if (CHECK(sockfd < 0, "trigger", "socket failed\n"))
 		goto out_close;
 	/* mountable pseudo-filesystems */
 	procfd = open("/proc/self/comm", O_RDONLY);
-- 
cgit v1.2.3


From 6dc03dc71387e1dc65cf14efb49e5cf7062a2d46 Mon Sep 17 00:00:00 2001
From: Udip Pant <udippant@fb.com>
Date: Tue, 25 Aug 2020 16:20:01 -0700
Subject: selftests/bpf: Add test for freplace program with write access

This adds a selftest that tests the behavior when a freplace target program
attempts to make a write access on a packet. The expectation is that the read or write
access is granted based on the program type of the linked program and
not itself (which is of type, for e.g., BPF_PROG_TYPE_EXT).

This test fails without the associated patch on the verifier.

Signed-off-by: Udip Pant <udippant@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200825232003.2877030-3-udippant@fb.com
---
 .../selftests/bpf/prog_tests/fexit_bpf2bpf.c       |  1 +
 tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c  | 27 ++++++++++++++++++++++
 .../testing/selftests/bpf/progs/test_pkt_access.c  | 20 ++++++++++++++++
 3 files changed, 48 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
index 197d0d217b56..7c7168963d52 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
@@ -123,6 +123,7 @@ static void test_func_replace(void)
 		"freplace/get_skb_len",
 		"freplace/get_skb_ifindex",
 		"freplace/get_constant",
+		"freplace/test_pkt_write_access_subprog",
 	};
 	test_fexit_bpf2bpf_common("./fexit_bpf2bpf.o",
 				  "./test_pkt_access.o",
diff --git a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
index 98e1efe14549..49a84a3a2306 100644
--- a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
@@ -1,8 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2019 Facebook */
 #include <linux/stddef.h>
+#include <linux/if_ether.h>
 #include <linux/ipv6.h>
 #include <linux/bpf.h>
+#include <linux/tcp.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_endian.h>
 #include <bpf/bpf_tracing.h>
@@ -151,4 +153,29 @@ int new_get_constant(long val)
 	test_get_constant = 1;
 	return test_get_constant; /* original get_constant() returns val - 122 */
 }
+
+__u64 test_pkt_write_access_subprog = 0;
+SEC("freplace/test_pkt_write_access_subprog")
+int new_test_pkt_write_access_subprog(struct __sk_buff *skb, __u32 off)
+{
+
+	void *data = (void *)(long)skb->data;
+	void *data_end = (void *)(long)skb->data_end;
+	struct tcphdr *tcp;
+
+	if (off > sizeof(struct ethhdr) + sizeof(struct ipv6hdr))
+		return -1;
+
+	tcp = data + off;
+	if (tcp + 1 > data_end)
+		return -1;
+
+	/* make modifications to the packet data */
+	tcp->check++;
+	tcp->syn = 0;
+
+	test_pkt_write_access_subprog = 1;
+	return 0;
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_pkt_access.c b/tools/testing/selftests/bpf/progs/test_pkt_access.c
index e72eba4a93d2..852051064507 100644
--- a/tools/testing/selftests/bpf/progs/test_pkt_access.c
+++ b/tools/testing/selftests/bpf/progs/test_pkt_access.c
@@ -79,6 +79,24 @@ int get_skb_ifindex(int val, struct __sk_buff *skb, int var)
 	return skb->ifindex * val * var;
 }
 
+__attribute__ ((noinline))
+int test_pkt_write_access_subprog(struct __sk_buff *skb, __u32 off)
+{
+	void *data = (void *)(long)skb->data;
+	void *data_end = (void *)(long)skb->data_end;
+	struct tcphdr *tcp = NULL;
+
+	if (off > sizeof(struct ethhdr) + sizeof(struct ipv6hdr))
+		return -1;
+
+	tcp = data + off;
+	if (tcp + 1 > data_end)
+		return -1;
+	/* make modification to the packet data */
+	tcp->check++;
+	return 0;
+}
+
 SEC("classifier/test_pkt_access")
 int test_pkt_access(struct __sk_buff *skb)
 {
@@ -117,6 +135,8 @@ int test_pkt_access(struct __sk_buff *skb)
 	if (test_pkt_access_subprog3(3, skb) != skb->len * 3 * skb->ifindex)
 		return TC_ACT_SHOT;
 	if (tcp) {
+		if (test_pkt_write_access_subprog(skb, (void *)tcp - data))
+			return TC_ACT_SHOT;
 		if (((void *)(tcp) + 20) > data_end || proto != 6)
 			return TC_ACT_SHOT;
 		barrier(); /* to force ordering of checks */
-- 
cgit v1.2.3


From 50d19736aff497a4c25ec7e36375195bfd8570cd Mon Sep 17 00:00:00 2001
From: Udip Pant <udippant@fb.com>
Date: Tue, 25 Aug 2020 16:20:02 -0700
Subject: selftests/bpf: Test for checking return code for the extended prog

This adds test to enforce same check for the return code for the extended prog
as it is enforced for the target program. It asserts failure for a
return code, which is permitted without the patch in this series, while
it is restricted after the application of this patch.

Signed-off-by: Udip Pant <udippant@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200825232003.2877030-4-udippant@fb.com
---
 .../selftests/bpf/prog_tests/fexit_bpf2bpf.c       | 40 ++++++++++++++++++++++
 .../selftests/bpf/progs/freplace_connect_v4_prog.c | 19 ++++++++++
 2 files changed, 59 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
index 7c7168963d52..d295ca9bbf96 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
@@ -142,10 +142,50 @@ static void test_func_replace_verify(void)
 				  prog_name, false);
 }
 
+static void test_func_replace_return_code(void)
+{
+	/*
+	 * standalone test that asserts failure to load freplace prog
+	 * because of invalid return code.
+	 */
+	struct bpf_object *obj = NULL, *pkt_obj;
+	int err, pkt_fd;
+	__u32 duration = 0;
+	const char *target_obj_file = "./connect4_prog.o";
+	const char *obj_file = "./freplace_connect_v4_prog.o";
+
+	err = bpf_prog_load(target_obj_file, BPF_PROG_TYPE_UNSPEC,
+			    &pkt_obj, &pkt_fd);
+	/* the target prog should load fine */
+	if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n",
+		  target_obj_file, err, errno))
+		return;
+	DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
+			    .attach_prog_fd = pkt_fd,
+			   );
+
+	obj = bpf_object__open_file(obj_file, &opts);
+	if (CHECK(IS_ERR_OR_NULL(obj), "obj_open",
+		  "failed to open %s: %ld\n", obj_file,
+		  PTR_ERR(obj)))
+		goto close_prog;
+
+	/* It should fail to load the program */
+	err = bpf_object__load(obj);
+	if (CHECK(!err, "bpf_obj_load should fail", "err %d\n", err))
+		goto close_prog;
+
+close_prog:
+	if (!IS_ERR_OR_NULL(obj))
+		bpf_object__close(obj);
+	bpf_object__close(pkt_obj);
+}
+
 void test_fexit_bpf2bpf(void)
 {
 	test_target_no_callees();
 	test_target_yes_callees();
 	test_func_replace();
 	test_func_replace_verify();
+	test_func_replace_return_code();
 }
diff --git a/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c b/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c
new file mode 100644
index 000000000000..544e5ac90461
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/stddef.h>
+#include <linux/ipv6.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <sys/socket.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+SEC("freplace/connect_v4_prog")
+int new_connect_v4_prog(struct bpf_sock_addr *ctx)
+{
+	// return value thats in invalid range
+	return 255;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From 1410620cf20e7e23cce17983e9a81af659b28583 Mon Sep 17 00:00:00 2001
From: Udip Pant <udippant@fb.com>
Date: Tue, 25 Aug 2020 16:20:03 -0700
Subject: selftests/bpf: Test for map update access from within EXT programs

This adds further tests to ensure access permissions and restrictions
are applied properly for some map types such as sock-map.
It also adds another negative tests to assert static functions cannot be
replaced. In the 'unreliable' mode it still fails with error 'tracing progs
cannot use bpf_spin_lock yet' with the change in the verifier

Signed-off-by: Udip Pant <udippant@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200825232003.2877030-5-udippant@fb.com
---
 .../selftests/bpf/prog_tests/fexit_bpf2bpf.c       | 33 ++++++++++++++++--
 .../selftests/bpf/progs/freplace_attach_probe.c    | 40 ++++++++++++++++++++++
 .../selftests/bpf/progs/freplace_cls_redirect.c    | 34 ++++++++++++++++++
 3 files changed, 104 insertions(+), 3 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/freplace_attach_probe.c
 create mode 100644 tools/testing/selftests/bpf/progs/freplace_cls_redirect.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
index d295ca9bbf96..a550dab9ba7a 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
@@ -142,7 +142,20 @@ static void test_func_replace_verify(void)
 				  prog_name, false);
 }
 
-static void test_func_replace_return_code(void)
+static void test_func_sockmap_update(void)
+{
+	const char *prog_name[] = {
+		"freplace/cls_redirect",
+	};
+	test_fexit_bpf2bpf_common("./freplace_cls_redirect.o",
+				  "./test_cls_redirect.o",
+				  ARRAY_SIZE(prog_name),
+				  prog_name, false);
+}
+
+static void test_obj_load_failure_common(const char *obj_file,
+					  const char *target_obj_file)
+
 {
 	/*
 	 * standalone test that asserts failure to load freplace prog
@@ -151,8 +164,6 @@ static void test_func_replace_return_code(void)
 	struct bpf_object *obj = NULL, *pkt_obj;
 	int err, pkt_fd;
 	__u32 duration = 0;
-	const char *target_obj_file = "./connect4_prog.o";
-	const char *obj_file = "./freplace_connect_v4_prog.o";
 
 	err = bpf_prog_load(target_obj_file, BPF_PROG_TYPE_UNSPEC,
 			    &pkt_obj, &pkt_fd);
@@ -181,11 +192,27 @@ close_prog:
 	bpf_object__close(pkt_obj);
 }
 
+static void test_func_replace_return_code(void)
+{
+	/* test invalid return code in the replaced program */
+	test_obj_load_failure_common("./freplace_connect_v4_prog.o",
+				     "./connect4_prog.o");
+}
+
+static void test_func_map_prog_compatibility(void)
+{
+	/* test with spin lock map value in the replaced program */
+	test_obj_load_failure_common("./freplace_attach_probe.o",
+				     "./test_attach_probe.o");
+}
+
 void test_fexit_bpf2bpf(void)
 {
 	test_target_no_callees();
 	test_target_yes_callees();
 	test_func_replace();
 	test_func_replace_verify();
+	test_func_sockmap_update();
 	test_func_replace_return_code();
+	test_func_map_prog_compatibility();
 }
diff --git a/tools/testing/selftests/bpf/progs/freplace_attach_probe.c b/tools/testing/selftests/bpf/progs/freplace_attach_probe.c
new file mode 100644
index 000000000000..bb2a77c5b62b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/freplace_attach_probe.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/ptrace.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define VAR_NUM 2
+
+struct hmap_elem {
+	struct bpf_spin_lock lock;
+	int var[VAR_NUM];
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, struct hmap_elem);
+} hash_map SEC(".maps");
+
+SEC("freplace/handle_kprobe")
+int new_handle_kprobe(struct pt_regs *ctx)
+{
+	struct hmap_elem zero = {}, *val;
+	int key = 0;
+
+	val = bpf_map_lookup_elem(&hash_map, &key);
+	if (!val)
+		return 1;
+	/* spin_lock in hash map */
+	bpf_spin_lock(&val->lock);
+	val->var[0] = 99;
+	bpf_spin_unlock(&val->lock);
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c b/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c
new file mode 100644
index 000000000000..68a5a9db928a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+
+struct bpf_map_def SEC("maps") sock_map = {
+	.type = BPF_MAP_TYPE_SOCKMAP,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 2,
+};
+
+SEC("freplace/cls_redirect")
+int freplace_cls_redirect_test(struct __sk_buff *skb)
+{
+	int ret = 0;
+	const int zero = 0;
+	struct bpf_sock *sk;
+
+	sk = bpf_map_lookup_elem(&sock_map, &zero);
+	if (!sk)
+		return TC_ACT_SHOT;
+
+	ret = bpf_map_update_elem(&sock_map, &zero, sk, 0);
+	bpf_sk_release(sk);
+
+	return ret == 0 ? TC_ACT_OK : TC_ACT_SHOT;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From ef05afa66c59c2031a3798916ef3ff3778232129 Mon Sep 17 00:00:00 2001
From: Alex Gartrell <alexgartrell@gmail.com>
Date: Wed, 26 Aug 2020 00:55:49 -0700
Subject: libbpf: Fix unintentional success return code in bpf_object__load

There are code paths where EINVAL is returned directly without setting
errno. In that case, errno could be 0, which would mask the
failure. For example, if a careless programmer set log_level to 10000
out of laziness, they would have to spend a long time trying to figure
out why.

Fixes: 4f33ddb4e3e2 ("libbpf: Propagate EPERM to caller on program load")
Signed-off-by: Alex Gartrell <alexgartrell@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200826075549.1858580-1-alexgartrell@gmail.com
---
 tools/lib/bpf/libbpf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 2e2523d8bb6d..8f9e7d281225 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -6067,7 +6067,7 @@ retry_load:
 		free(log_buf);
 		goto retry_load;
 	}
-	ret = -errno;
+	ret = errno ? -errno : -LIBBPF_ERRNO__LOAD;
 	cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
 	pr_warn("load bpf program failed: %s\n", cp);
 	pr_perm_msg(ret);
-- 
cgit v1.2.3


From f5493c514cdbcd8c3f9f3ffca4f68b97f122e60a Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Mon, 24 Aug 2020 23:46:09 -0700
Subject: selftests/bpf: Add verifier tests for xor operation

Added some test_verifier bounds check test cases for
xor operations.
  $ ./test_verifier
  ...
  #78/u bounds check for reg = 0, reg xor 1 OK
  #78/p bounds check for reg = 0, reg xor 1 OK
  #79/u bounds check for reg32 = 0, reg32 xor 1 OK
  #79/p bounds check for reg32 = 0, reg32 xor 1 OK
  #80/u bounds check for reg = 2, reg xor 3 OK
  #80/p bounds check for reg = 2, reg xor 3 OK
  #81/u bounds check for reg = any, reg xor 3 OK
  #81/p bounds check for reg = any, reg xor 3 OK
  #82/u bounds check for reg32 = any, reg32 xor 3 OK
  #82/p bounds check for reg32 = any, reg32 xor 3 OK
  #83/u bounds check for reg > 0, reg xor 3 OK
  #83/p bounds check for reg > 0, reg xor 3 OK
  #84/u bounds check for reg32 > 0, reg32 xor 3 OK
  #84/p bounds check for reg32 > 0, reg32 xor 3 OK
  ...

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Cc: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20200825064609.2018077-1-yhs@fb.com
---
 tools/testing/selftests/bpf/verifier/bounds.c | 146 ++++++++++++++++++++++++++
 1 file changed, 146 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c
index 4d6645f2874c..dac40de3f868 100644
--- a/tools/testing/selftests/bpf/verifier/bounds.c
+++ b/tools/testing/selftests/bpf/verifier/bounds.c
@@ -557,3 +557,149 @@
 	.result = ACCEPT,
 	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
+{
+	"bounds check for reg = 0, reg xor 1",
+	.insns = {
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_IMM(BPF_REG_1, 0),
+	BPF_ALU64_IMM(BPF_XOR, BPF_REG_1, 1),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_8b = { 3 },
+	.result = ACCEPT,
+},
+{
+	"bounds check for reg32 = 0, reg32 xor 1",
+	.insns = {
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_MOV32_IMM(BPF_REG_1, 0),
+	BPF_ALU32_IMM(BPF_XOR, BPF_REG_1, 1),
+	BPF_JMP32_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_8b = { 3 },
+	.result = ACCEPT,
+},
+{
+	"bounds check for reg = 2, reg xor 3",
+	.insns = {
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_IMM(BPF_REG_1, 2),
+	BPF_ALU64_IMM(BPF_XOR, BPF_REG_1, 3),
+	BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 0, 1),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_8b = { 3 },
+	.result = ACCEPT,
+},
+{
+	"bounds check for reg = any, reg xor 3",
+	.insns = {
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+	BPF_ALU64_IMM(BPF_XOR, BPF_REG_1, 3),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_8b = { 3 },
+	.result = REJECT,
+	.errstr = "invalid access to map value",
+	.errstr_unpriv = "invalid access to map value",
+},
+{
+	"bounds check for reg32 = any, reg32 xor 3",
+	.insns = {
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+	BPF_ALU32_IMM(BPF_XOR, BPF_REG_1, 3),
+	BPF_JMP32_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_8b = { 3 },
+	.result = REJECT,
+	.errstr = "invalid access to map value",
+	.errstr_unpriv = "invalid access to map value",
+},
+{
+	"bounds check for reg > 0, reg xor 3",
+	.insns = {
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+	BPF_JMP_IMM(BPF_JLE, BPF_REG_1, 0, 3),
+	BPF_ALU64_IMM(BPF_XOR, BPF_REG_1, 3),
+	BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 1),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_8b = { 3 },
+	.result = ACCEPT,
+},
+{
+	"bounds check for reg32 > 0, reg32 xor 3",
+	.insns = {
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+	BPF_JMP32_IMM(BPF_JLE, BPF_REG_1, 0, 3),
+	BPF_ALU32_IMM(BPF_XOR, BPF_REG_1, 3),
+	BPF_JMP32_IMM(BPF_JGE, BPF_REG_1, 0, 1),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_8b = { 3 },
+	.result = ACCEPT,
+},
-- 
cgit v1.2.3


From 2e80be60c465a4f8559327340eaf40845dd7797a Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Wed, 26 Aug 2020 21:11:09 -0700
Subject: libbpf: Fix compilation warnings for 64-bit printf args

Fix compilation warnings due to __u64 defined differently as `unsigned long`
or `unsigned long long` on different architectures (e.g., ppc64le differs from
x86-64). Also cast one argument to size_t to fix printf warning of similar
nature.

Fixes: eacaaed784e2 ("libbpf: Implement enum value-based CO-RE relocations")
Fixes: 50e09460d9f8 ("libbpf: Skip well-known ELF sections when iterating ELF")
Reported-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200827041109.3613090-1-andriin@fb.com
---
 tools/lib/bpf/libbpf.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 8f9e7d281225..8cdb2528482e 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -2823,7 +2823,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
 			obj->efile.bss = data;
 			obj->efile.bss_shndx = idx;
 		} else {
-			pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name, sh.sh_size);
+			pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name,
+				(size_t)sh.sh_size);
 		}
 	}
 
@@ -5244,7 +5245,8 @@ static int bpf_core_patch_insn(struct bpf_program *prog,
 		if (res->validate && imm != orig_val) {
 			pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n",
 				bpf_program__title(prog, false), relo_idx,
-				insn_idx, imm, orig_val, new_val);
+				insn_idx, (unsigned long long)imm,
+				orig_val, new_val);
 			return -EINVAL;
 		}
 
@@ -5252,7 +5254,7 @@ static int bpf_core_patch_insn(struct bpf_program *prog,
 		insn[1].imm = 0; /* currently only 32-bit values are supported */
 		pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n",
 			 bpf_program__title(prog, false), relo_idx, insn_idx,
-			 imm, new_val);
+			 (unsigned long long)imm, new_val);
 		break;
 	}
 	default:
-- 
cgit v1.2.3


From 661b37cd437ef49cd28444f79b9b0c71ea76e8c8 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 27 Aug 2020 10:53:36 +0200
Subject: tools, bpf/build: Cleanup feature files on make clean

The system for "Auto-detecting system features" located under
tools/build/ are (currently) used by perf, libbpf and bpftool. It can
contain stalled feature detection files, which are not cleaned up by
libbpf and bpftool on make clean (side-note: perf tool is correct).

Fix this by making the users invoke the make clean target.

Some details about the changes. The libbpf Makefile already had a
clean-config target (which seems to be copy-pasted from perf), but this
target was not "connected" (a make dependency) to clean target. Choose
not to rename target as someone might be using it. Did change the output
from "CLEAN config" to "CLEAN feature-detect", to make it more clear
what happens.

This is related to the complaint and troubleshooting in the following
link: https://lore.kernel.org/lkml/20200818122007.2d1cfe2d@carbon/

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Link: https://lore.kernel.org/lkml/20200818122007.2d1cfe2d@carbon/
Link: https://lore.kernel.org/bpf/159851841661.1072907.13770213104521805592.stgit@firesoul
---
 tools/bpf/bpftool/Makefile | 6 +++++-
 tools/build/Makefile       | 2 ++
 tools/lib/bpf/Makefile     | 4 ++--
 3 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 8462690a039b..02c99bc95c69 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -176,7 +176,11 @@ $(OUTPUT)bpftool: $(OBJS) $(LIBBPF)
 $(OUTPUT)%.o: %.c
 	$(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $<
 
-clean: $(LIBBPF)-clean
+feature-detect-clean:
+	$(call QUIET_CLEAN, feature-detect)
+	$(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null
+
+clean: $(LIBBPF)-clean feature-detect-clean
 	$(call QUIET_CLEAN, bpftool)
 	$(Q)$(RM) -- $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d
 	$(Q)$(RM) -- $(BPFTOOL_BOOTSTRAP) $(OUTPUT)*.skel.h $(OUTPUT)vmlinux.h
diff --git a/tools/build/Makefile b/tools/build/Makefile
index 727050c40f09..722f1700d96a 100644
--- a/tools/build/Makefile
+++ b/tools/build/Makefile
@@ -38,6 +38,8 @@ clean:
 	$(call QUIET_CLEAN, fixdep)
 	$(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
 	$(Q)rm -f $(OUTPUT)fixdep
+	$(call QUIET_CLEAN, feature-detect)
+	$(Q)$(MAKE) -C feature/ clean >/dev/null
 
 $(OUTPUT)fixdep-in.o: FORCE
 	$(Q)$(MAKE) $(build)=fixdep
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 66b2cfadf262..adbe994610f2 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -264,10 +264,10 @@ install: install_lib install_pkgconfig install_headers
 ### Cleaning rules
 
 config-clean:
-	$(call QUIET_CLEAN, config)
+	$(call QUIET_CLEAN, feature-detect)
 	$(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null
 
-clean:
+clean: config-clean
 	$(call QUIET_CLEAN, libbpf) $(RM) -rf $(CMD_TARGETS)		     \
 		*~ .*.d .*.cmd LIBBPF-CFLAGS $(BPF_HELPER_DEFS)		     \
 		$(SHARED_OBJDIR) $(STATIC_OBJDIR)			     \
-- 
cgit v1.2.3


From b0c9eb37817943840a1a82dbc998c491609a0afd Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Thu, 27 Aug 2020 22:19:22 -0700
Subject: bpf: Make bpf_link_info.iter similar to bpf_iter_link_info

bpf_link_info.iter is used by link_query to return bpf_iter_link_info
to user space. Fields may be different, e.g., map_fd vs. map_id, so
we cannot reuse the exact structure. But make them similar, e.g.,

  struct bpf_link_info {
     /* common fields */
     union {
	struct { ... } raw_tracepoint;
	struct { ... } tracing;
	...
	struct {
	    /* common fields for iter */
	    union {
		struct {
		    __u32 map_id;
		} map;
		/* other structs for other targets */
	    };
	};
    };
 };

so the structure is extensible the same way as bpf_iter_link_info.

Fixes: 6b0a249a301e ("bpf: Implement link_query for bpf iterators")
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200828051922.758950-1-yhs@fb.com
---
 tools/include/uapi/linux/bpf.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 0388bc0200b0..ef7af384f5ee 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4251,8 +4251,10 @@ struct bpf_link_info {
 			__aligned_u64 target_name; /* in/out: target_name buffer ptr */
 			__u32 target_name_len;	   /* in/out: target_name buffer len */
 			union {
-				__u32 map_id;
-			} map;
+				struct {
+					__u32 map_id;
+				} map;
+			};
 		} iter;
 		struct  {
 			__u32 netns_ino;
-- 
cgit v1.2.3


From d557ea39a5f894630c403b78703ac92b08b7dd62 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 27 Aug 2020 18:18:19 -0700
Subject: bpf: selftests: Add test for different inner map size

This patch tests the inner map size can be different
for reuseport_sockarray but has to be the same for
arraymap.  A new subtest "diff_size" is added for this.

The existing test is moved to a subtest "lookup_update".

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200828011819.1970825-1-kafai@fb.com
---
 .../selftests/bpf/prog_tests/btf_map_in_map.c      | 35 +++++++++++++++++++++-
 .../selftests/bpf/progs/test_btf_map_in_map.c      | 31 +++++++++++++++++++
 2 files changed, 65 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c
index 6ccecbd39476..540fea4c91a5 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c
@@ -53,7 +53,7 @@ static int kern_sync_rcu(void)
 	return err;
 }
 
-void test_btf_map_in_map(void)
+static void test_lookup_update(void)
 {
 	int err, key = 0, val, i;
 	struct test_btf_map_in_map *skel;
@@ -143,3 +143,36 @@ void test_btf_map_in_map(void)
 cleanup:
 	test_btf_map_in_map__destroy(skel);
 }
+
+static void test_diff_size(void)
+{
+	struct test_btf_map_in_map *skel;
+	int err, inner_map_fd, zero = 0;
+
+	skel = test_btf_map_in_map__open_and_load();
+	if (CHECK(!skel, "skel_open", "failed to open&load skeleton\n"))
+		return;
+
+	inner_map_fd = bpf_map__fd(skel->maps.sockarr_sz2);
+	err = bpf_map_update_elem(bpf_map__fd(skel->maps.outer_sockarr), &zero,
+				  &inner_map_fd, 0);
+	CHECK(err, "outer_sockarr inner map size check",
+	      "cannot use a different size inner_map\n");
+
+	inner_map_fd = bpf_map__fd(skel->maps.inner_map_sz2);
+	err = bpf_map_update_elem(bpf_map__fd(skel->maps.outer_arr), &zero,
+				  &inner_map_fd, 0);
+	CHECK(!err, "outer_arr inner map size check",
+	      "incorrectly updated with a different size inner_map\n");
+
+	test_btf_map_in_map__destroy(skel);
+}
+
+void test_btf_map_in_map(void)
+{
+	if (test__start_subtest("lookup_update"))
+		test_lookup_update();
+
+	if (test__start_subtest("diff_size"))
+		test_diff_size();
+}
diff --git a/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c b/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c
index e5093796be97..193fe0198b21 100644
--- a/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c
@@ -11,6 +11,13 @@ struct inner_map {
 } inner_map1 SEC(".maps"),
   inner_map2 SEC(".maps");
 
+struct inner_map_sz2 {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 2);
+	__type(key, int);
+	__type(value, int);
+} inner_map_sz2 SEC(".maps");
+
 struct outer_arr {
 	__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
 	__uint(max_entries, 3);
@@ -50,6 +57,30 @@ struct outer_hash {
 	},
 };
 
+struct sockarr_sz1 {
+	__uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, int);
+} sockarr_sz1 SEC(".maps");
+
+struct sockarr_sz2 {
+	__uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
+	__uint(max_entries, 2);
+	__type(key, int);
+	__type(value, int);
+} sockarr_sz2 SEC(".maps");
+
+struct outer_sockarr_sz1 {
+	__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+	__uint(max_entries, 1);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(int));
+	__array(values, struct sockarr_sz1);
+} outer_sockarr SEC(".maps") = {
+	.values = { (void *)&sockarr_sz1 },
+};
+
 int input = 0;
 
 SEC("raw_tp/sys_enter")
-- 
cgit v1.2.3


From 1e6c62a8821557720a9b2ea9617359b264f2f67c Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 27 Aug 2020 15:01:11 -0700
Subject: bpf: Introduce sleepable BPF programs

Introduce sleepable BPF programs that can request such property for themselves
via BPF_F_SLEEPABLE flag at program load time. In such case they will be able
to use helpers like bpf_copy_from_user() that might sleep. At present only
fentry/fexit/fmod_ret and lsm programs can request to be sleepable and only
when they are attached to kernel functions that are known to allow sleeping.

The non-sleepable programs are relying on implicit rcu_read_lock() and
migrate_disable() to protect life time of programs, maps that they use and
per-cpu kernel structures used to pass info between bpf programs and the
kernel. The sleepable programs cannot be enclosed into rcu_read_lock().
migrate_disable() maps to preempt_disable() in non-RT kernels, so the progs
should not be enclosed in migrate_disable() as well. Therefore
rcu_read_lock_trace is used to protect the life time of sleepable progs.

There are many networking and tracing program types. In many cases the
'struct bpf_prog *' pointer itself is rcu protected within some other kernel
data structure and the kernel code is using rcu_dereference() to load that
program pointer and call BPF_PROG_RUN() on it. All these cases are not touched.
Instead sleepable bpf programs are allowed with bpf trampoline only. The
program pointers are hard-coded into generated assembly of bpf trampoline and
synchronize_rcu_tasks_trace() is used to protect the life time of the program.
The same trampoline can hold both sleepable and non-sleepable progs.

When rcu_read_lock_trace is held it means that some sleepable bpf program is
running from bpf trampoline. Those programs can use bpf arrays and preallocated
hash/lru maps. These map types are waiting on programs to complete via
synchronize_rcu_tasks_trace();

Updates to trampoline now has to do synchronize_rcu_tasks_trace() and
synchronize_rcu_tasks() to wait for sleepable progs to finish and for
trampoline assembly to finish.

This is the first step of introducing sleepable progs. Eventually dynamically
allocated hash maps can be allowed and networking program types can become
sleepable too.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: KP Singh <kpsingh@google.com>
Link: https://lore.kernel.org/bpf/20200827220114.69225-3-alexei.starovoitov@gmail.com
---
 tools/include/uapi/linux/bpf.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index ef7af384f5ee..6e8b706aeb05 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -346,6 +346,14 @@ enum bpf_link_type {
 /* The verifier internal test flag. Behavior is undefined */
 #define BPF_F_TEST_STATE_FREQ	(1U << 3)
 
+/* If BPF_F_SLEEPABLE is used in BPF_PROG_LOAD command, the verifier will
+ * restrict map and helper usage for such programs. Sleepable BPF programs can
+ * only be attached to hooks where kernel execution context allows sleeping.
+ * Such programs are allowed to use helpers that may sleep like
+ * bpf_copy_from_user().
+ */
+#define BPF_F_SLEEPABLE		(1U << 4)
+
 /* When BPF ldimm64's insn[0].src_reg != 0 then this can have
  * two extensions:
  *
-- 
cgit v1.2.3


From 07be4c4a3e7a0db148e44b16c5190e753d1c8569 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 27 Aug 2020 15:01:12 -0700
Subject: bpf: Add bpf_copy_from_user() helper.

Sleepable BPF programs can now use copy_from_user() to access user memory.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: KP Singh <kpsingh@google.com>
Link: https://lore.kernel.org/bpf/20200827220114.69225-4-alexei.starovoitov@gmail.com
---
 tools/include/uapi/linux/bpf.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 6e8b706aeb05..a613750d5515 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3569,6 +3569,13 @@ union bpf_attr {
  *		On success, the strictly positive length of the string,
  *		including the trailing NUL character. On error, a negative
  *		value.
+ *
+ * long bpf_copy_from_user(void *dst, u32 size, const void *user_ptr)
+ * 	Description
+ * 		Read *size* bytes from user space address *user_ptr* and store
+ * 		the data in *dst*. This is a wrapper of copy_from_user().
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3719,6 +3726,7 @@ union bpf_attr {
 	FN(inode_storage_get),		\
 	FN(inode_storage_delete),	\
 	FN(d_path),			\
+	FN(copy_from_user),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From 2b288740a1072235166b2706e139f2725b5ea26f Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 27 Aug 2020 15:01:13 -0700
Subject: libbpf: Support sleepable progs

Pass request to load program as sleepable via ".s" suffix in the section name.
If it happens in the future that all map types and helpers are allowed with
BPF_F_SLEEPABLE flag "fmod_ret/" and "lsm/" can be aliased to "fmod_ret.s/" and
"lsm.s/" to make all lsm and fmod_ret programs sleepable by default. The fentry
and fexit programs would always need to have sleepable vs non-sleepable
distinction, since not all fentry/fexit progs will be attached to sleepable
kernel functions.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: KP Singh <kpsingh@google.com>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200827220114.69225-5-alexei.starovoitov@gmail.com
---
 tools/lib/bpf/libbpf.c | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 8cdb2528482e..b688aadf09c5 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -208,6 +208,7 @@ struct bpf_sec_def {
 	bool is_exp_attach_type_optional;
 	bool is_attachable;
 	bool is_attach_btf;
+	bool is_sleepable;
 	attach_fn_t attach_fn;
 };
 
@@ -6291,6 +6292,8 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
 			/* couldn't guess, but user might manually specify */
 			continue;
 
+		if (prog->sec_def->is_sleepable)
+			prog->prog_flags |= BPF_F_SLEEPABLE;
 		bpf_program__set_type(prog, prog->sec_def->prog_type);
 		bpf_program__set_expected_attach_type(prog,
 				prog->sec_def->expected_attach_type);
@@ -7559,6 +7562,21 @@ static const struct bpf_sec_def section_defs[] = {
 		.expected_attach_type = BPF_TRACE_FEXIT,
 		.is_attach_btf = true,
 		.attach_fn = attach_trace),
+	SEC_DEF("fentry.s/", TRACING,
+		.expected_attach_type = BPF_TRACE_FENTRY,
+		.is_attach_btf = true,
+		.is_sleepable = true,
+		.attach_fn = attach_trace),
+	SEC_DEF("fmod_ret.s/", TRACING,
+		.expected_attach_type = BPF_MODIFY_RETURN,
+		.is_attach_btf = true,
+		.is_sleepable = true,
+		.attach_fn = attach_trace),
+	SEC_DEF("fexit.s/", TRACING,
+		.expected_attach_type = BPF_TRACE_FEXIT,
+		.is_attach_btf = true,
+		.is_sleepable = true,
+		.attach_fn = attach_trace),
 	SEC_DEF("freplace/", EXT,
 		.is_attach_btf = true,
 		.attach_fn = attach_trace),
@@ -7566,6 +7584,11 @@ static const struct bpf_sec_def section_defs[] = {
 		.is_attach_btf = true,
 		.expected_attach_type = BPF_LSM_MAC,
 		.attach_fn = attach_lsm),
+	SEC_DEF("lsm.s/", LSM,
+		.is_attach_btf = true,
+		.is_sleepable = true,
+		.expected_attach_type = BPF_LSM_MAC,
+		.attach_fn = attach_lsm),
 	SEC_DEF("iter/", TRACING,
 		.expected_attach_type = BPF_TRACE_ITER,
 		.is_attach_btf = true,
@@ -8288,7 +8311,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
 
 		prog->prog_ifindex = attr->ifindex;
 		prog->log_level = attr->log_level;
-		prog->prog_flags = attr->prog_flags;
+		prog->prog_flags |= attr->prog_flags;
 		if (!first_prog)
 			first_prog = prog;
 	}
-- 
cgit v1.2.3


From e68a144547fc7a956952260539cb7b8bb9afbcc0 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 27 Aug 2020 15:01:14 -0700
Subject: selftests/bpf: Add sleepable tests

Modify few tests to sanity test sleepable bpf functionality.

Running 'bench trig-fentry-sleep' vs 'bench trig-fentry' and 'perf report':
sleepable with SRCU:
   3.86%  bench     [k] __srcu_read_unlock
   3.22%  bench     [k] __srcu_read_lock
   0.92%  bench     [k] bpf_prog_740d4210cdcd99a3_bench_trigger_fentry_sleep
   0.50%  bench     [k] bpf_trampoline_10297
   0.26%  bench     [k] __bpf_prog_exit_sleepable
   0.21%  bench     [k] __bpf_prog_enter_sleepable

sleepable with RCU_TRACE:
   0.79%  bench     [k] bpf_prog_740d4210cdcd99a3_bench_trigger_fentry_sleep
   0.72%  bench     [k] bpf_trampoline_10381
   0.31%  bench     [k] __bpf_prog_exit_sleepable
   0.29%  bench     [k] __bpf_prog_enter_sleepable

non-sleepable with RCU:
   0.88%  bench     [k] bpf_prog_740d4210cdcd99a3_bench_trigger_fentry
   0.84%  bench     [k] bpf_trampoline_10297
   0.13%  bench     [k] __bpf_prog_enter
   0.12%  bench     [k] __bpf_prog_exit

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: KP Singh <kpsingh@google.com>
Link: https://lore.kernel.org/bpf/20200827220114.69225-6-alexei.starovoitov@gmail.com
---
 tools/testing/selftests/bpf/bench.c                |  2 +
 tools/testing/selftests/bpf/benchs/bench_trigger.c | 17 ++++++
 tools/testing/selftests/bpf/prog_tests/test_lsm.c  |  9 +++
 tools/testing/selftests/bpf/progs/lsm.c            | 66 +++++++++++++++++++++-
 tools/testing/selftests/bpf/progs/trigger_bench.c  |  7 +++
 5 files changed, 99 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index 944ad4721c83..1a427685a8a8 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -317,6 +317,7 @@ extern const struct bench bench_trig_tp;
 extern const struct bench bench_trig_rawtp;
 extern const struct bench bench_trig_kprobe;
 extern const struct bench bench_trig_fentry;
+extern const struct bench bench_trig_fentry_sleep;
 extern const struct bench bench_trig_fmodret;
 extern const struct bench bench_rb_libbpf;
 extern const struct bench bench_rb_custom;
@@ -338,6 +339,7 @@ static const struct bench *benchs[] = {
 	&bench_trig_rawtp,
 	&bench_trig_kprobe,
 	&bench_trig_fentry,
+	&bench_trig_fentry_sleep,
 	&bench_trig_fmodret,
 	&bench_rb_libbpf,
 	&bench_rb_custom,
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
index 49c22832f216..2a0b6c9885a4 100644
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -90,6 +90,12 @@ static void trigger_fentry_setup()
 	attach_bpf(ctx.skel->progs.bench_trigger_fentry);
 }
 
+static void trigger_fentry_sleep_setup()
+{
+	setup_ctx();
+	attach_bpf(ctx.skel->progs.bench_trigger_fentry_sleep);
+}
+
 static void trigger_fmodret_setup()
 {
 	setup_ctx();
@@ -155,6 +161,17 @@ const struct bench bench_trig_fentry = {
 	.report_final = hits_drops_report_final,
 };
 
+const struct bench bench_trig_fentry_sleep = {
+	.name = "trig-fentry-sleep",
+	.validate = trigger_validate,
+	.setup = trigger_fentry_sleep_setup,
+	.producer_thread = trigger_producer,
+	.consumer_thread = trigger_consumer,
+	.measure = trigger_measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
 const struct bench bench_trig_fmodret = {
 	.name = "trig-fmodret",
 	.validate = trigger_validate,
diff --git a/tools/testing/selftests/bpf/prog_tests/test_lsm.c b/tools/testing/selftests/bpf/prog_tests/test_lsm.c
index b17eb2045c1d..6ab29226c99b 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_lsm.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_lsm.c
@@ -10,6 +10,7 @@
 #include <unistd.h>
 #include <malloc.h>
 #include <stdlib.h>
+#include <unistd.h>
 
 #include "lsm.skel.h"
 
@@ -55,6 +56,7 @@ void test_test_lsm(void)
 {
 	struct lsm *skel = NULL;
 	int err, duration = 0;
+	int buf = 1234;
 
 	skel = lsm__open_and_load();
 	if (CHECK(!skel, "skel_load", "lsm skeleton failed\n"))
@@ -81,6 +83,13 @@ void test_test_lsm(void)
 	CHECK(skel->bss->mprotect_count != 1, "mprotect_count",
 	      "mprotect_count = %d\n", skel->bss->mprotect_count);
 
+	syscall(__NR_setdomainname, &buf, -2L);
+	syscall(__NR_setdomainname, 0, -3L);
+	syscall(__NR_setdomainname, ~0L, -4L);
+
+	CHECK(skel->bss->copy_test != 3, "copy_test",
+	      "copy_test = %d\n", skel->bss->copy_test);
+
 close_prog:
 	lsm__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/progs/lsm.c b/tools/testing/selftests/bpf/progs/lsm.c
index b4598d4bc4f7..49fa6ca99755 100644
--- a/tools/testing/selftests/bpf/progs/lsm.c
+++ b/tools/testing/selftests/bpf/progs/lsm.c
@@ -9,16 +9,41 @@
 #include <bpf/bpf_tracing.h>
 #include  <errno.h>
 
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u64);
+} array SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u64);
+} hash SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_LRU_HASH);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u64);
+} lru_hash SEC(".maps");
+
 char _license[] SEC("license") = "GPL";
 
 int monitored_pid = 0;
 int mprotect_count = 0;
 int bprm_count = 0;
 
-SEC("lsm/file_mprotect")
+SEC("lsm.s/file_mprotect")
 int BPF_PROG(test_int_hook, struct vm_area_struct *vma,
 	     unsigned long reqprot, unsigned long prot, int ret)
 {
+	char args[64];
+	__u32 key = 0;
+	__u64 *value;
+
 	if (ret != 0)
 		return ret;
 
@@ -28,6 +53,18 @@ int BPF_PROG(test_int_hook, struct vm_area_struct *vma,
 	is_stack = (vma->vm_start <= vma->vm_mm->start_stack &&
 		    vma->vm_end >= vma->vm_mm->start_stack);
 
+	bpf_copy_from_user(args, sizeof(args), (void *)vma->vm_mm->arg_start);
+
+	value = bpf_map_lookup_elem(&array, &key);
+	if (value)
+		*value = 0;
+	value = bpf_map_lookup_elem(&hash, &key);
+	if (value)
+		*value = 0;
+	value = bpf_map_lookup_elem(&lru_hash, &key);
+	if (value)
+		*value = 0;
+
 	if (is_stack && monitored_pid == pid) {
 		mprotect_count++;
 		ret = -EPERM;
@@ -36,7 +73,7 @@ int BPF_PROG(test_int_hook, struct vm_area_struct *vma,
 	return ret;
 }
 
-SEC("lsm/bprm_committed_creds")
+SEC("lsm.s/bprm_committed_creds")
 int BPF_PROG(test_void_hook, struct linux_binprm *bprm)
 {
 	__u32 pid = bpf_get_current_pid_tgid() >> 32;
@@ -46,3 +83,28 @@ int BPF_PROG(test_void_hook, struct linux_binprm *bprm)
 
 	return 0;
 }
+SEC("lsm/task_free") /* lsm/ is ok, lsm.s/ fails */
+int BPF_PROG(test_task_free, struct task_struct *task)
+{
+	return 0;
+}
+
+int copy_test = 0;
+
+SEC("fentry.s/__x64_sys_setdomainname")
+int BPF_PROG(test_sys_setdomainname, struct pt_regs *regs)
+{
+	void *ptr = (void *)PT_REGS_PARM1(regs);
+	int len = PT_REGS_PARM2(regs);
+	int buf = 0;
+	long ret;
+
+	ret = bpf_copy_from_user(&buf, sizeof(buf), ptr);
+	if (len == -2 && ret == 0 && buf == 1234)
+		copy_test++;
+	if (len == -3 && ret == -EFAULT)
+		copy_test++;
+	if (len == -4 && ret == -EFAULT)
+		copy_test++;
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c
index 8b36b6640e7e..9a4d09590b3d 100644
--- a/tools/testing/selftests/bpf/progs/trigger_bench.c
+++ b/tools/testing/selftests/bpf/progs/trigger_bench.c
@@ -39,6 +39,13 @@ int bench_trigger_fentry(void *ctx)
 	return 0;
 }
 
+SEC("fentry.s/__x64_sys_getpgid")
+int bench_trigger_fentry_sleep(void *ctx)
+{
+	__sync_add_and_fetch(&hits, 1);
+	return 0;
+}
+
 SEC("fmod_ret/__x64_sys_getpgid")
 int bench_trigger_fmodret(void *ctx)
 {
-- 
cgit v1.2.3


From 2f6324a3937f8517967d94daef2ba0bdceceece1 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Fri, 28 Aug 2020 10:26:27 +0200
Subject: libbpf: Support shared umems between queues and devices
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add support for shared umems between hardware queues and devices to
the AF_XDP part of libbpf. This so that zero-copy can be achieved in
applications that want to send and receive packets between HW queues
on one device or between different devices/netdevs.

In order to create sockets that share a umem between hardware queues
and devices, a new function has been added called
xsk_socket__create_shared(). It takes the same arguments as
xsk_socket_create() plus references to a fill ring and a completion
ring. So for every socket that share a umem, you need to have one more
set of fill and completion rings. This in order to maintain the
single-producer single-consumer semantics of the rings.

You can create all the sockets via the new xsk_socket__create_shared()
call, or create the first one with xsk_socket__create() and the rest
with xsk_socket__create_shared(). Both methods work.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Link: https://lore.kernel.org/bpf/1598603189-32145-14-git-send-email-magnus.karlsson@intel.com
---
 tools/lib/bpf/libbpf.map |   1 +
 tools/lib/bpf/xsk.c      | 376 ++++++++++++++++++++++++++++++-----------------
 tools/lib/bpf/xsk.h      |   9 ++
 3 files changed, 254 insertions(+), 132 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 66a6286d0716..3fedcdc4ae2f 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -306,4 +306,5 @@ LIBBPF_0.2.0 {
 		perf_buffer__buffer_fd;
 		perf_buffer__epoll_fd;
 		perf_buffer__consume_buffer;
+		xsk_socket__create_shared;
 } LIBBPF_0.1.0;
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index a9b02103767b..49c324594792 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -20,6 +20,7 @@
 #include <linux/if_ether.h>
 #include <linux/if_packet.h>
 #include <linux/if_xdp.h>
+#include <linux/list.h>
 #include <linux/sockios.h>
 #include <net/if.h>
 #include <sys/ioctl.h>
@@ -45,26 +46,35 @@
 #endif
 
 struct xsk_umem {
-	struct xsk_ring_prod *fill;
-	struct xsk_ring_cons *comp;
+	struct xsk_ring_prod *fill_save;
+	struct xsk_ring_cons *comp_save;
 	char *umem_area;
 	struct xsk_umem_config config;
 	int fd;
 	int refcount;
+	struct list_head ctx_list;
+};
+
+struct xsk_ctx {
+	struct xsk_ring_prod *fill;
+	struct xsk_ring_cons *comp;
+	__u32 queue_id;
+	struct xsk_umem *umem;
+	int refcount;
+	int ifindex;
+	struct list_head list;
+	int prog_fd;
+	int xsks_map_fd;
+	char ifname[IFNAMSIZ];
 };
 
 struct xsk_socket {
 	struct xsk_ring_cons *rx;
 	struct xsk_ring_prod *tx;
 	__u64 outstanding_tx;
-	struct xsk_umem *umem;
+	struct xsk_ctx *ctx;
 	struct xsk_socket_config config;
 	int fd;
-	int ifindex;
-	int prog_fd;
-	int xsks_map_fd;
-	__u32 queue_id;
-	char ifname[IFNAMSIZ];
 };
 
 struct xsk_nl_info {
@@ -200,15 +210,73 @@ static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off)
 	return -EINVAL;
 }
 
+static int xsk_create_umem_rings(struct xsk_umem *umem, int fd,
+				 struct xsk_ring_prod *fill,
+				 struct xsk_ring_cons *comp)
+{
+	struct xdp_mmap_offsets off;
+	void *map;
+	int err;
+
+	err = setsockopt(fd, SOL_XDP, XDP_UMEM_FILL_RING,
+			 &umem->config.fill_size,
+			 sizeof(umem->config.fill_size));
+	if (err)
+		return -errno;
+
+	err = setsockopt(fd, SOL_XDP, XDP_UMEM_COMPLETION_RING,
+			 &umem->config.comp_size,
+			 sizeof(umem->config.comp_size));
+	if (err)
+		return -errno;
+
+	err = xsk_get_mmap_offsets(fd, &off);
+	if (err)
+		return -errno;
+
+	map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64),
+		   PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
+		   XDP_UMEM_PGOFF_FILL_RING);
+	if (map == MAP_FAILED)
+		return -errno;
+
+	fill->mask = umem->config.fill_size - 1;
+	fill->size = umem->config.fill_size;
+	fill->producer = map + off.fr.producer;
+	fill->consumer = map + off.fr.consumer;
+	fill->flags = map + off.fr.flags;
+	fill->ring = map + off.fr.desc;
+	fill->cached_cons = umem->config.fill_size;
+
+	map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64),
+		   PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
+		   XDP_UMEM_PGOFF_COMPLETION_RING);
+	if (map == MAP_FAILED) {
+		err = -errno;
+		goto out_mmap;
+	}
+
+	comp->mask = umem->config.comp_size - 1;
+	comp->size = umem->config.comp_size;
+	comp->producer = map + off.cr.producer;
+	comp->consumer = map + off.cr.consumer;
+	comp->flags = map + off.cr.flags;
+	comp->ring = map + off.cr.desc;
+
+	return 0;
+
+out_mmap:
+	munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64));
+	return err;
+}
+
 int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
 			    __u64 size, struct xsk_ring_prod *fill,
 			    struct xsk_ring_cons *comp,
 			    const struct xsk_umem_config *usr_config)
 {
-	struct xdp_mmap_offsets off;
 	struct xdp_umem_reg mr;
 	struct xsk_umem *umem;
-	void *map;
 	int err;
 
 	if (!umem_area || !umem_ptr || !fill || !comp)
@@ -227,6 +295,7 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
 	}
 
 	umem->umem_area = umem_area;
+	INIT_LIST_HEAD(&umem->ctx_list);
 	xsk_set_umem_config(&umem->config, usr_config);
 
 	memset(&mr, 0, sizeof(mr));
@@ -241,71 +310,16 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
 		err = -errno;
 		goto out_socket;
 	}
-	err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_FILL_RING,
-			 &umem->config.fill_size,
-			 sizeof(umem->config.fill_size));
-	if (err) {
-		err = -errno;
-		goto out_socket;
-	}
-	err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_COMPLETION_RING,
-			 &umem->config.comp_size,
-			 sizeof(umem->config.comp_size));
-	if (err) {
-		err = -errno;
-		goto out_socket;
-	}
 
-	err = xsk_get_mmap_offsets(umem->fd, &off);
-	if (err) {
-		err = -errno;
-		goto out_socket;
-	}
-
-	map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64),
-		   PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, umem->fd,
-		   XDP_UMEM_PGOFF_FILL_RING);
-	if (map == MAP_FAILED) {
-		err = -errno;
+	err = xsk_create_umem_rings(umem, umem->fd, fill, comp);
+	if (err)
 		goto out_socket;
-	}
-
-	umem->fill = fill;
-	fill->mask = umem->config.fill_size - 1;
-	fill->size = umem->config.fill_size;
-	fill->producer = map + off.fr.producer;
-	fill->consumer = map + off.fr.consumer;
-	fill->flags = map + off.fr.flags;
-	fill->ring = map + off.fr.desc;
-	fill->cached_prod = *fill->producer;
-	/* cached_cons is "size" bigger than the real consumer pointer
-	 * See xsk_prod_nb_free
-	 */
-	fill->cached_cons = *fill->consumer + umem->config.fill_size;
-
-	map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64),
-		   PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, umem->fd,
-		   XDP_UMEM_PGOFF_COMPLETION_RING);
-	if (map == MAP_FAILED) {
-		err = -errno;
-		goto out_mmap;
-	}
-
-	umem->comp = comp;
-	comp->mask = umem->config.comp_size - 1;
-	comp->size = umem->config.comp_size;
-	comp->producer = map + off.cr.producer;
-	comp->consumer = map + off.cr.consumer;
-	comp->flags = map + off.cr.flags;
-	comp->ring = map + off.cr.desc;
-	comp->cached_prod = *comp->producer;
-	comp->cached_cons = *comp->consumer;
 
+	umem->fill_save = fill;
+	umem->comp_save = comp;
 	*umem_ptr = umem;
 	return 0;
 
-out_mmap:
-	munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64));
 out_socket:
 	close(umem->fd);
 out_umem_alloc:
@@ -339,6 +353,7 @@ DEFAULT_VERSION(xsk_umem__create_v0_0_4, xsk_umem__create, LIBBPF_0.0.4)
 static int xsk_load_xdp_prog(struct xsk_socket *xsk)
 {
 	static const int log_buf_size = 16 * 1024;
+	struct xsk_ctx *ctx = xsk->ctx;
 	char log_buf[log_buf_size];
 	int err, prog_fd;
 
@@ -366,7 +381,7 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
 		/* *(u32 *)(r10 - 4) = r2 */
 		BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -4),
 		/* r1 = xskmap[] */
-		BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd),
+		BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
 		/* r3 = XDP_PASS */
 		BPF_MOV64_IMM(BPF_REG_3, 2),
 		/* call bpf_redirect_map */
@@ -378,7 +393,7 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
 		/* r2 += -4 */
 		BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
 		/* r1 = xskmap[] */
-		BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd),
+		BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
 		/* call bpf_map_lookup_elem */
 		BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
 		/* r1 = r0 */
@@ -390,7 +405,7 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
 		/* r2 = *(u32 *)(r10 - 4) */
 		BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_10, -4),
 		/* r1 = xskmap[] */
-		BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd),
+		BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
 		/* r3 = 0 */
 		BPF_MOV64_IMM(BPF_REG_3, 0),
 		/* call bpf_redirect_map */
@@ -408,19 +423,21 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
 		return prog_fd;
 	}
 
-	err = bpf_set_link_xdp_fd(xsk->ifindex, prog_fd, xsk->config.xdp_flags);
+	err = bpf_set_link_xdp_fd(xsk->ctx->ifindex, prog_fd,
+				  xsk->config.xdp_flags);
 	if (err) {
 		close(prog_fd);
 		return err;
 	}
 
-	xsk->prog_fd = prog_fd;
+	ctx->prog_fd = prog_fd;
 	return 0;
 }
 
 static int xsk_get_max_queues(struct xsk_socket *xsk)
 {
 	struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS };
+	struct xsk_ctx *ctx = xsk->ctx;
 	struct ifreq ifr = {};
 	int fd, err, ret;
 
@@ -429,7 +446,7 @@ static int xsk_get_max_queues(struct xsk_socket *xsk)
 		return -errno;
 
 	ifr.ifr_data = (void *)&channels;
-	memcpy(ifr.ifr_name, xsk->ifname, IFNAMSIZ - 1);
+	memcpy(ifr.ifr_name, ctx->ifname, IFNAMSIZ - 1);
 	ifr.ifr_name[IFNAMSIZ - 1] = '\0';
 	err = ioctl(fd, SIOCETHTOOL, &ifr);
 	if (err && errno != EOPNOTSUPP) {
@@ -457,6 +474,7 @@ out:
 
 static int xsk_create_bpf_maps(struct xsk_socket *xsk)
 {
+	struct xsk_ctx *ctx = xsk->ctx;
 	int max_queues;
 	int fd;
 
@@ -469,15 +487,17 @@ static int xsk_create_bpf_maps(struct xsk_socket *xsk)
 	if (fd < 0)
 		return fd;
 
-	xsk->xsks_map_fd = fd;
+	ctx->xsks_map_fd = fd;
 
 	return 0;
 }
 
 static void xsk_delete_bpf_maps(struct xsk_socket *xsk)
 {
-	bpf_map_delete_elem(xsk->xsks_map_fd, &xsk->queue_id);
-	close(xsk->xsks_map_fd);
+	struct xsk_ctx *ctx = xsk->ctx;
+
+	bpf_map_delete_elem(ctx->xsks_map_fd, &ctx->queue_id);
+	close(ctx->xsks_map_fd);
 }
 
 static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
@@ -485,10 +505,11 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
 	__u32 i, *map_ids, num_maps, prog_len = sizeof(struct bpf_prog_info);
 	__u32 map_len = sizeof(struct bpf_map_info);
 	struct bpf_prog_info prog_info = {};
+	struct xsk_ctx *ctx = xsk->ctx;
 	struct bpf_map_info map_info;
 	int fd, err;
 
-	err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len);
+	err = bpf_obj_get_info_by_fd(ctx->prog_fd, &prog_info, &prog_len);
 	if (err)
 		return err;
 
@@ -502,11 +523,11 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
 	prog_info.nr_map_ids = num_maps;
 	prog_info.map_ids = (__u64)(unsigned long)map_ids;
 
-	err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len);
+	err = bpf_obj_get_info_by_fd(ctx->prog_fd, &prog_info, &prog_len);
 	if (err)
 		goto out_map_ids;
 
-	xsk->xsks_map_fd = -1;
+	ctx->xsks_map_fd = -1;
 
 	for (i = 0; i < prog_info.nr_map_ids; i++) {
 		fd = bpf_map_get_fd_by_id(map_ids[i]);
@@ -520,7 +541,7 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
 		}
 
 		if (!strcmp(map_info.name, "xsks_map")) {
-			xsk->xsks_map_fd = fd;
+			ctx->xsks_map_fd = fd;
 			continue;
 		}
 
@@ -528,7 +549,7 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
 	}
 
 	err = 0;
-	if (xsk->xsks_map_fd == -1)
+	if (ctx->xsks_map_fd == -1)
 		err = -ENOENT;
 
 out_map_ids:
@@ -538,16 +559,19 @@ out_map_ids:
 
 static int xsk_set_bpf_maps(struct xsk_socket *xsk)
 {
-	return bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id,
+	struct xsk_ctx *ctx = xsk->ctx;
+
+	return bpf_map_update_elem(ctx->xsks_map_fd, &ctx->queue_id,
 				   &xsk->fd, 0);
 }
 
 static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
 {
+	struct xsk_ctx *ctx = xsk->ctx;
 	__u32 prog_id = 0;
 	int err;
 
-	err = bpf_get_link_xdp_id(xsk->ifindex, &prog_id,
+	err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id,
 				  xsk->config.xdp_flags);
 	if (err)
 		return err;
@@ -563,12 +587,12 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
 			return err;
 		}
 	} else {
-		xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id);
-		if (xsk->prog_fd < 0)
+		ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id);
+		if (ctx->prog_fd < 0)
 			return -errno;
 		err = xsk_lookup_bpf_maps(xsk);
 		if (err) {
-			close(xsk->prog_fd);
+			close(ctx->prog_fd);
 			return err;
 		}
 	}
@@ -577,25 +601,110 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
 		err = xsk_set_bpf_maps(xsk);
 	if (err) {
 		xsk_delete_bpf_maps(xsk);
-		close(xsk->prog_fd);
+		close(ctx->prog_fd);
 		return err;
 	}
 
 	return 0;
 }
 
-int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
-		       __u32 queue_id, struct xsk_umem *umem,
-		       struct xsk_ring_cons *rx, struct xsk_ring_prod *tx,
-		       const struct xsk_socket_config *usr_config)
+static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex,
+				   __u32 queue_id)
+{
+	struct xsk_ctx *ctx;
+
+	if (list_empty(&umem->ctx_list))
+		return NULL;
+
+	list_for_each_entry(ctx, &umem->ctx_list, list) {
+		if (ctx->ifindex == ifindex && ctx->queue_id == queue_id) {
+			ctx->refcount++;
+			return ctx;
+		}
+	}
+
+	return NULL;
+}
+
+static void xsk_put_ctx(struct xsk_ctx *ctx)
+{
+	struct xsk_umem *umem = ctx->umem;
+	struct xdp_mmap_offsets off;
+	int err;
+
+	if (--ctx->refcount == 0) {
+		err = xsk_get_mmap_offsets(umem->fd, &off);
+		if (!err) {
+			munmap(ctx->fill->ring - off.fr.desc,
+			       off.fr.desc + umem->config.fill_size *
+			       sizeof(__u64));
+			munmap(ctx->comp->ring - off.cr.desc,
+			       off.cr.desc + umem->config.comp_size *
+			       sizeof(__u64));
+		}
+
+		list_del(&ctx->list);
+		free(ctx);
+	}
+}
+
+static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk,
+				      struct xsk_umem *umem, int ifindex,
+				      const char *ifname, __u32 queue_id,
+				      struct xsk_ring_prod *fill,
+				      struct xsk_ring_cons *comp)
+{
+	struct xsk_ctx *ctx;
+	int err;
+
+	ctx = calloc(1, sizeof(*ctx));
+	if (!ctx)
+		return NULL;
+
+	if (!umem->fill_save) {
+		err = xsk_create_umem_rings(umem, xsk->fd, fill, comp);
+		if (err) {
+			free(ctx);
+			return NULL;
+		}
+	} else if (umem->fill_save != fill || umem->comp_save != comp) {
+		/* Copy over rings to new structs. */
+		memcpy(fill, umem->fill_save, sizeof(*fill));
+		memcpy(comp, umem->comp_save, sizeof(*comp));
+	}
+
+	ctx->ifindex = ifindex;
+	ctx->refcount = 1;
+	ctx->umem = umem;
+	ctx->queue_id = queue_id;
+	memcpy(ctx->ifname, ifname, IFNAMSIZ - 1);
+	ctx->ifname[IFNAMSIZ - 1] = '\0';
+
+	umem->fill_save = NULL;
+	umem->comp_save = NULL;
+	ctx->fill = fill;
+	ctx->comp = comp;
+	list_add(&ctx->list, &umem->ctx_list);
+	return ctx;
+}
+
+int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
+			      const char *ifname,
+			      __u32 queue_id, struct xsk_umem *umem,
+			      struct xsk_ring_cons *rx,
+			      struct xsk_ring_prod *tx,
+			      struct xsk_ring_prod *fill,
+			      struct xsk_ring_cons *comp,
+			      const struct xsk_socket_config *usr_config)
 {
 	void *rx_map = NULL, *tx_map = NULL;
 	struct sockaddr_xdp sxdp = {};
 	struct xdp_mmap_offsets off;
 	struct xsk_socket *xsk;
-	int err;
+	struct xsk_ctx *ctx;
+	int err, ifindex;
 
-	if (!umem || !xsk_ptr || !(rx || tx))
+	if (!umem || !xsk_ptr || !(rx || tx) || !fill || !comp)
 		return -EFAULT;
 
 	xsk = calloc(1, sizeof(*xsk));
@@ -606,10 +715,10 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
 	if (err)
 		goto out_xsk_alloc;
 
-	if (umem->refcount &&
-	    !(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
-		pr_warn("Error: shared umems not supported by libbpf supplied XDP program.\n");
-		err = -EBUSY;
+	xsk->outstanding_tx = 0;
+	ifindex = if_nametoindex(ifname);
+	if (!ifindex) {
+		err = -errno;
 		goto out_xsk_alloc;
 	}
 
@@ -623,16 +732,16 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
 		xsk->fd = umem->fd;
 	}
 
-	xsk->outstanding_tx = 0;
-	xsk->queue_id = queue_id;
-	xsk->umem = umem;
-	xsk->ifindex = if_nametoindex(ifname);
-	if (!xsk->ifindex) {
-		err = -errno;
-		goto out_socket;
+	ctx = xsk_get_ctx(umem, ifindex, queue_id);
+	if (!ctx) {
+		ctx = xsk_create_ctx(xsk, umem, ifindex, ifname, queue_id,
+				     fill, comp);
+		if (!ctx) {
+			err = -ENOMEM;
+			goto out_socket;
+		}
 	}
-	memcpy(xsk->ifname, ifname, IFNAMSIZ - 1);
-	xsk->ifname[IFNAMSIZ - 1] = '\0';
+	xsk->ctx = ctx;
 
 	if (rx) {
 		err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING,
@@ -640,7 +749,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
 				 sizeof(xsk->config.rx_size));
 		if (err) {
 			err = -errno;
-			goto out_socket;
+			goto out_put_ctx;
 		}
 	}
 	if (tx) {
@@ -649,14 +758,14 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
 				 sizeof(xsk->config.tx_size));
 		if (err) {
 			err = -errno;
-			goto out_socket;
+			goto out_put_ctx;
 		}
 	}
 
 	err = xsk_get_mmap_offsets(xsk->fd, &off);
 	if (err) {
 		err = -errno;
-		goto out_socket;
+		goto out_put_ctx;
 	}
 
 	if (rx) {
@@ -666,7 +775,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
 			      xsk->fd, XDP_PGOFF_RX_RING);
 		if (rx_map == MAP_FAILED) {
 			err = -errno;
-			goto out_socket;
+			goto out_put_ctx;
 		}
 
 		rx->mask = xsk->config.rx_size - 1;
@@ -705,10 +814,10 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
 	xsk->tx = tx;
 
 	sxdp.sxdp_family = PF_XDP;
-	sxdp.sxdp_ifindex = xsk->ifindex;
-	sxdp.sxdp_queue_id = xsk->queue_id;
+	sxdp.sxdp_ifindex = ctx->ifindex;
+	sxdp.sxdp_queue_id = ctx->queue_id;
 	if (umem->refcount > 1) {
-		sxdp.sxdp_flags = XDP_SHARED_UMEM;
+		sxdp.sxdp_flags |= XDP_SHARED_UMEM;
 		sxdp.sxdp_shared_umem_fd = umem->fd;
 	} else {
 		sxdp.sxdp_flags = xsk->config.bind_flags;
@@ -720,7 +829,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
 		goto out_mmap_tx;
 	}
 
-	xsk->prog_fd = -1;
+	ctx->prog_fd = -1;
 
 	if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
 		err = xsk_setup_xdp_prog(xsk);
@@ -739,6 +848,8 @@ out_mmap_rx:
 	if (rx)
 		munmap(rx_map, off.rx.desc +
 		       xsk->config.rx_size * sizeof(struct xdp_desc));
+out_put_ctx:
+	xsk_put_ctx(ctx);
 out_socket:
 	if (--umem->refcount)
 		close(xsk->fd);
@@ -747,25 +858,24 @@ out_xsk_alloc:
 	return err;
 }
 
-int xsk_umem__delete(struct xsk_umem *umem)
+int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
+		       __u32 queue_id, struct xsk_umem *umem,
+		       struct xsk_ring_cons *rx, struct xsk_ring_prod *tx,
+		       const struct xsk_socket_config *usr_config)
 {
-	struct xdp_mmap_offsets off;
-	int err;
+	return xsk_socket__create_shared(xsk_ptr, ifname, queue_id, umem,
+					 rx, tx, umem->fill_save,
+					 umem->comp_save, usr_config);
+}
 
+int xsk_umem__delete(struct xsk_umem *umem)
+{
 	if (!umem)
 		return 0;
 
 	if (umem->refcount)
 		return -EBUSY;
 
-	err = xsk_get_mmap_offsets(umem->fd, &off);
-	if (!err) {
-		munmap(umem->fill->ring - off.fr.desc,
-		       off.fr.desc + umem->config.fill_size * sizeof(__u64));
-		munmap(umem->comp->ring - off.cr.desc,
-		       off.cr.desc + umem->config.comp_size * sizeof(__u64));
-	}
-
 	close(umem->fd);
 	free(umem);
 
@@ -775,15 +885,16 @@ int xsk_umem__delete(struct xsk_umem *umem)
 void xsk_socket__delete(struct xsk_socket *xsk)
 {
 	size_t desc_sz = sizeof(struct xdp_desc);
+	struct xsk_ctx *ctx = xsk->ctx;
 	struct xdp_mmap_offsets off;
 	int err;
 
 	if (!xsk)
 		return;
 
-	if (xsk->prog_fd != -1) {
+	if (ctx->prog_fd != -1) {
 		xsk_delete_bpf_maps(xsk);
-		close(xsk->prog_fd);
+		close(ctx->prog_fd);
 	}
 
 	err = xsk_get_mmap_offsets(xsk->fd, &off);
@@ -796,14 +907,15 @@ void xsk_socket__delete(struct xsk_socket *xsk)
 			munmap(xsk->tx->ring - off.tx.desc,
 			       off.tx.desc + xsk->config.tx_size * desc_sz);
 		}
-
 	}
 
-	xsk->umem->refcount--;
+	xsk_put_ctx(ctx);
+
+	ctx->umem->refcount--;
 	/* Do not close an fd that also has an associated umem connected
 	 * to it.
 	 */
-	if (xsk->fd != xsk->umem->fd)
+	if (xsk->fd != ctx->umem->fd)
 		close(xsk->fd);
 	free(xsk);
 }
diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h
index 584f6820a639..1069c46364ff 100644
--- a/tools/lib/bpf/xsk.h
+++ b/tools/lib/bpf/xsk.h
@@ -234,6 +234,15 @@ LIBBPF_API int xsk_socket__create(struct xsk_socket **xsk,
 				  struct xsk_ring_cons *rx,
 				  struct xsk_ring_prod *tx,
 				  const struct xsk_socket_config *config);
+LIBBPF_API int
+xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
+			  const char *ifname,
+			  __u32 queue_id, struct xsk_umem *umem,
+			  struct xsk_ring_cons *rx,
+			  struct xsk_ring_prod *tx,
+			  struct xsk_ring_prod *fill,
+			  struct xsk_ring_cons *comp,
+			  const struct xsk_socket_config *config);
 
 /* Returns 0 for success and -EBUSY if the umem is still in use. */
 LIBBPF_API int xsk_umem__delete(struct xsk_umem *umem);
-- 
cgit v1.2.3


From f56407fa6e69499a06bf1e0543fa93be6922acba Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Mon, 31 Aug 2020 13:16:51 -0700
Subject: bpf: Remove bpf_lsm_file_mprotect from sleepable list.

Technically the bpf programs can sleep while attached to bpf_lsm_file_mprotect,
but such programs need to access user memory. So they're in might_fault()
category. Which means they cannot be called from file_mprotect lsm hook that
takes write lock on mm->mmap_lock.
Adjust the test accordingly.

Also add might_fault() to __bpf_prog_enter_sleepable() to catch such deadlocks early.

Fixes: 1e6c62a88215 ("bpf: Introduce sleepable BPF programs")
Fixes: e68a144547fc ("selftests/bpf: Add sleepable tests")
Reported-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200831201651.82447-1-alexei.starovoitov@gmail.com
---
 tools/testing/selftests/bpf/progs/lsm.c | 34 ++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/lsm.c b/tools/testing/selftests/bpf/progs/lsm.c
index 49fa6ca99755..ff4d343b94b5 100644
--- a/tools/testing/selftests/bpf/progs/lsm.c
+++ b/tools/testing/selftests/bpf/progs/lsm.c
@@ -36,14 +36,10 @@ int monitored_pid = 0;
 int mprotect_count = 0;
 int bprm_count = 0;
 
-SEC("lsm.s/file_mprotect")
+SEC("lsm/file_mprotect")
 int BPF_PROG(test_int_hook, struct vm_area_struct *vma,
 	     unsigned long reqprot, unsigned long prot, int ret)
 {
-	char args[64];
-	__u32 key = 0;
-	__u64 *value;
-
 	if (ret != 0)
 		return ret;
 
@@ -53,18 +49,6 @@ int BPF_PROG(test_int_hook, struct vm_area_struct *vma,
 	is_stack = (vma->vm_start <= vma->vm_mm->start_stack &&
 		    vma->vm_end >= vma->vm_mm->start_stack);
 
-	bpf_copy_from_user(args, sizeof(args), (void *)vma->vm_mm->arg_start);
-
-	value = bpf_map_lookup_elem(&array, &key);
-	if (value)
-		*value = 0;
-	value = bpf_map_lookup_elem(&hash, &key);
-	if (value)
-		*value = 0;
-	value = bpf_map_lookup_elem(&lru_hash, &key);
-	if (value)
-		*value = 0;
-
 	if (is_stack && monitored_pid == pid) {
 		mprotect_count++;
 		ret = -EPERM;
@@ -77,10 +61,26 @@ SEC("lsm.s/bprm_committed_creds")
 int BPF_PROG(test_void_hook, struct linux_binprm *bprm)
 {
 	__u32 pid = bpf_get_current_pid_tgid() >> 32;
+	char args[64];
+	__u32 key = 0;
+	__u64 *value;
 
 	if (monitored_pid == pid)
 		bprm_count++;
 
+	bpf_copy_from_user(args, sizeof(args), (void *)bprm->vma->vm_mm->arg_start);
+	bpf_copy_from_user(args, sizeof(args), (void *)bprm->mm->arg_start);
+
+	value = bpf_map_lookup_elem(&array, &key);
+	if (value)
+		*value = 0;
+	value = bpf_map_lookup_elem(&hash, &key);
+	if (value)
+		*value = 0;
+	value = bpf_map_lookup_elem(&lru_hash, &key);
+	if (value)
+		*value = 0;
+
 	return 0;
 }
 SEC("lsm/task_free") /* lsm/ is ok, lsm.s/ fails */
-- 
cgit v1.2.3