summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2020-05-14 21:21:57 -0700
committerAlexei Starovoitov <ast@kernel.org>2020-05-14 21:50:03 -0700
commit5cc5924d8315a53e03e7dbfa7a3067cde7a81ded (patch)
tree0c842b02f62a8b7e2e7bd6f0a91053e652b45f7a /net
parentd00f26b623333f2419f4c3b95ff11c8b1bb96f56 (diff)
parent7ae2e00e8fc23f10169079fadd388317d81012be (diff)
downloadlinux-5cc5924d8315a53e03e7dbfa7a3067cde7a81ded.tar.gz
linux-5cc5924d8315a53e03e7dbfa7a3067cde7a81ded.tar.bz2
linux-5cc5924d8315a53e03e7dbfa7a3067cde7a81ded.zip
Merge branch 'xdp-grow-tail'
Jesper Dangaard Brouer says: ==================== V4: - Fixup checkpatch.pl issues - Collected more ACKs V3: - Fix issue on virtio_net patch spotted by Jason Wang - Adjust name for variable in mlx5 patch - Collected more ACKs V2: - Fix bug in mlx5 for XDP_PASS case - Collected nitpicks and ACKs from mailing list V1: - Fix bug in dpaa2 XDP have evolved to support several frame sizes, but xdp_buff was not updated with this information. This have caused the side-effect that XDP frame data hard end is unknown. This have limited the BPF-helper bpf_xdp_adjust_tail to only shrink the packet. This patchset address this and add packet tail extend/grow. The purpose of the patchset is ALSO to reserve a memory area that can be used for storing extra information, specifically for extending XDP with multi-buffer support. One proposal is to use same layout as skb_shared_info, which is why this area is currently 320 bytes. When converting xdp_frame to SKB (veth and cpumap), the full tailroom area can now be used and SKB truesize is now correct. For most drivers this result in a much larger tailroom in SKB "head" data area. The network stack can now take advantage of this when doing SKB coalescing. Thus, a good driver test is to use xdp_redirect_cpu from samples/bpf/ and do some TCP stream testing. Use-cases for tail grow/extend: (1) IPsec / XFRM needs a tail extend[1][2]. (2) DNS-cache responses in XDP. (3) HAProxy ALOHA would need it to convert to XDP. (4) Add tail info e.g. timestamp and collect via tcpdump [1] http://vger.kernel.org/netconf2019_files/xfrm_xdp.pdf [2] http://vger.kernel.org/netconf2019.html Examples on howto access the tail area of an XDP packet is shown in the XDP-tutorial example[3]. [3] https://github.com/xdp-project/xdp-tutorial/blob/master/experiment01-tailgrow/ ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'net')
-rw-r--r--net/bpf/test_run.c16
-rw-r--r--net/core/dev.c14
-rw-r--r--net/core/filter.c15
-rw-r--r--net/core/xdp.c8
4 files changed, 41 insertions, 12 deletions
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 29dbdd4c29f6..30ba7d38941d 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -470,25 +470,34 @@ out:
int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr)
{
+ u32 tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ u32 headroom = XDP_PACKET_HEADROOM;
u32 size = kattr->test.data_size_in;
u32 repeat = kattr->test.repeat;
struct netdev_rx_queue *rxqueue;
struct xdp_buff xdp = {};
u32 retval, duration;
+ u32 max_data_sz;
void *data;
int ret;
if (kattr->test.ctx_in || kattr->test.ctx_out)
return -EINVAL;
- data = bpf_test_init(kattr, size, XDP_PACKET_HEADROOM + NET_IP_ALIGN, 0);
+ /* XDP have extra tailroom as (most) drivers use full page */
+ max_data_sz = 4096 - headroom - tailroom;
+ if (size > max_data_sz)
+ return -EINVAL;
+
+ data = bpf_test_init(kattr, max_data_sz, headroom, tailroom);
if (IS_ERR(data))
return PTR_ERR(data);
xdp.data_hard_start = data;
- xdp.data = data + XDP_PACKET_HEADROOM + NET_IP_ALIGN;
+ xdp.data = data + headroom;
xdp.data_meta = xdp.data;
xdp.data_end = xdp.data + size;
+ xdp.frame_sz = headroom + max_data_sz + tailroom;
rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0);
xdp.rxq = &rxqueue->xdp_rxq;
@@ -496,8 +505,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true);
if (ret)
goto out;
- if (xdp.data != data + XDP_PACKET_HEADROOM + NET_IP_ALIGN ||
- xdp.data_end != xdp.data + size)
+ if (xdp.data != data + headroom || xdp.data_end != xdp.data + size)
size = xdp.data_end - xdp.data;
ret = bpf_test_finish(kattr, uattr, xdp.data, size, retval, duration);
out:
diff --git a/net/core/dev.c b/net/core/dev.c
index 4c91de39890a..f937a3ff668d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4617,6 +4617,11 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
xdp->data_meta = xdp->data;
xdp->data_end = xdp->data + hlen;
xdp->data_hard_start = skb->data - skb_headroom(skb);
+
+ /* SKB "head" area always have tailroom for skb_shared_info */
+ xdp->frame_sz = (void *)skb_end_pointer(skb) - xdp->data_hard_start;
+ xdp->frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+
orig_data_end = xdp->data_end;
orig_data = xdp->data;
eth = (struct ethhdr *)xdp->data;
@@ -4640,14 +4645,11 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
skb_reset_network_header(skb);
}
- /* check if bpf_xdp_adjust_tail was used. it can only "shrink"
- * pckt.
- */
- off = orig_data_end - xdp->data_end;
+ /* check if bpf_xdp_adjust_tail was used */
+ off = xdp->data_end - orig_data_end;
if (off != 0) {
skb_set_tail_pointer(skb, xdp->data_end - xdp->data);
- skb->len -= off;
-
+ skb->len += off; /* positive on grow, negative on shrink */
}
/* check if XDP changed eth hdr such SKB needs update */
diff --git a/net/core/filter.c b/net/core/filter.c
index 5815902bb617..a85eb538d4d6 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3411,15 +3411,26 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
{
+ void *data_hard_end = xdp_data_hard_end(xdp); /* use xdp->frame_sz */
void *data_end = xdp->data_end + offset;
- /* only shrinking is allowed for now. */
- if (unlikely(offset >= 0))
+ /* Notice that xdp_data_hard_end have reserved some tailroom */
+ if (unlikely(data_end > data_hard_end))
return -EINVAL;
+ /* ALL drivers MUST init xdp->frame_sz, chicken check below */
+ if (unlikely(xdp->frame_sz > PAGE_SIZE)) {
+ WARN_ONCE(1, "Too BIG xdp->frame_sz = %d\n", xdp->frame_sz);
+ return -EINVAL;
+ }
+
if (unlikely(data_end < xdp->data + ETH_HLEN))
return -EINVAL;
+ /* Clear memory area on grow, can contain uninit kernel memory */
+ if (offset > 0)
+ memset(xdp->data_end, 0, offset);
+
xdp->data_end = data_end;
return 0;
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 4c7ea85486af..490b8f5fa8ee 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -11,6 +11,7 @@
#include <linux/slab.h>
#include <linux/idr.h>
#include <linux/rhashtable.h>
+#include <linux/bug.h>
#include <net/page_pool.h>
#include <net/xdp.h>
@@ -496,3 +497,10 @@ struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp)
return xdpf;
}
EXPORT_SYMBOL_GPL(xdp_convert_zc_to_xdp_frame);
+
+/* Used by XDP_WARN macro, to avoid inlining WARN() in fast-path */
+void xdp_warn(const char *msg, const char *func, const int line)
+{
+ WARN(1, "XDP_WARN: %s(line:%d): %s\n", func, line, msg);
+};
+EXPORT_SYMBOL_GPL(xdp_warn);