summaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2020-05-14 21:21:57 -0700
committerAlexei Starovoitov <ast@kernel.org>2020-05-14 21:50:03 -0700
commit5cc5924d8315a53e03e7dbfa7a3067cde7a81ded (patch)
tree0c842b02f62a8b7e2e7bd6f0a91053e652b45f7a /net/core
parentd00f26b623333f2419f4c3b95ff11c8b1bb96f56 (diff)
parent7ae2e00e8fc23f10169079fadd388317d81012be (diff)
downloadlinux-5cc5924d8315a53e03e7dbfa7a3067cde7a81ded.tar.gz
linux-5cc5924d8315a53e03e7dbfa7a3067cde7a81ded.tar.bz2
linux-5cc5924d8315a53e03e7dbfa7a3067cde7a81ded.zip
Merge branch 'xdp-grow-tail'
Jesper Dangaard Brouer says: ==================== V4: - Fixup checkpatch.pl issues - Collected more ACKs V3: - Fix issue on virtio_net patch spotted by Jason Wang - Adjust name for variable in mlx5 patch - Collected more ACKs V2: - Fix bug in mlx5 for XDP_PASS case - Collected nitpicks and ACKs from mailing list V1: - Fix bug in dpaa2 XDP have evolved to support several frame sizes, but xdp_buff was not updated with this information. This have caused the side-effect that XDP frame data hard end is unknown. This have limited the BPF-helper bpf_xdp_adjust_tail to only shrink the packet. This patchset address this and add packet tail extend/grow. The purpose of the patchset is ALSO to reserve a memory area that can be used for storing extra information, specifically for extending XDP with multi-buffer support. One proposal is to use same layout as skb_shared_info, which is why this area is currently 320 bytes. When converting xdp_frame to SKB (veth and cpumap), the full tailroom area can now be used and SKB truesize is now correct. For most drivers this result in a much larger tailroom in SKB "head" data area. The network stack can now take advantage of this when doing SKB coalescing. Thus, a good driver test is to use xdp_redirect_cpu from samples/bpf/ and do some TCP stream testing. Use-cases for tail grow/extend: (1) IPsec / XFRM needs a tail extend[1][2]. (2) DNS-cache responses in XDP. (3) HAProxy ALOHA would need it to convert to XDP. (4) Add tail info e.g. timestamp and collect via tcpdump [1] http://vger.kernel.org/netconf2019_files/xfrm_xdp.pdf [2] http://vger.kernel.org/netconf2019.html Examples on howto access the tail area of an XDP packet is shown in the XDP-tutorial example[3]. [3] https://github.com/xdp-project/xdp-tutorial/blob/master/experiment01-tailgrow/ ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c14
-rw-r--r--net/core/filter.c15
-rw-r--r--net/core/xdp.c8
3 files changed, 29 insertions, 8 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 4c91de39890a..f937a3ff668d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4617,6 +4617,11 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
xdp->data_meta = xdp->data;
xdp->data_end = xdp->data + hlen;
xdp->data_hard_start = skb->data - skb_headroom(skb);
+
+ /* SKB "head" area always have tailroom for skb_shared_info */
+ xdp->frame_sz = (void *)skb_end_pointer(skb) - xdp->data_hard_start;
+ xdp->frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+
orig_data_end = xdp->data_end;
orig_data = xdp->data;
eth = (struct ethhdr *)xdp->data;
@@ -4640,14 +4645,11 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
skb_reset_network_header(skb);
}
- /* check if bpf_xdp_adjust_tail was used. it can only "shrink"
- * pckt.
- */
- off = orig_data_end - xdp->data_end;
+ /* check if bpf_xdp_adjust_tail was used */
+ off = xdp->data_end - orig_data_end;
if (off != 0) {
skb_set_tail_pointer(skb, xdp->data_end - xdp->data);
- skb->len -= off;
-
+ skb->len += off; /* positive on grow, negative on shrink */
}
/* check if XDP changed eth hdr such SKB needs update */
diff --git a/net/core/filter.c b/net/core/filter.c
index 5815902bb617..a85eb538d4d6 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3411,15 +3411,26 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
{
+ void *data_hard_end = xdp_data_hard_end(xdp); /* use xdp->frame_sz */
void *data_end = xdp->data_end + offset;
- /* only shrinking is allowed for now. */
- if (unlikely(offset >= 0))
+ /* Notice that xdp_data_hard_end have reserved some tailroom */
+ if (unlikely(data_end > data_hard_end))
return -EINVAL;
+ /* ALL drivers MUST init xdp->frame_sz, chicken check below */
+ if (unlikely(xdp->frame_sz > PAGE_SIZE)) {
+ WARN_ONCE(1, "Too BIG xdp->frame_sz = %d\n", xdp->frame_sz);
+ return -EINVAL;
+ }
+
if (unlikely(data_end < xdp->data + ETH_HLEN))
return -EINVAL;
+ /* Clear memory area on grow, can contain uninit kernel memory */
+ if (offset > 0)
+ memset(xdp->data_end, 0, offset);
+
xdp->data_end = data_end;
return 0;
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 4c7ea85486af..490b8f5fa8ee 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -11,6 +11,7 @@
#include <linux/slab.h>
#include <linux/idr.h>
#include <linux/rhashtable.h>
+#include <linux/bug.h>
#include <net/page_pool.h>
#include <net/xdp.h>
@@ -496,3 +497,10 @@ struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp)
return xdpf;
}
EXPORT_SYMBOL_GPL(xdp_convert_zc_to_xdp_frame);
+
+/* Used by XDP_WARN macro, to avoid inlining WARN() in fast-path */
+void xdp_warn(const char *msg, const char *func, const int line)
+{
+ WARN(1, "XDP_WARN: %s(line:%d): %s\n", func, line, msg);
+};
+EXPORT_SYMBOL_GPL(xdp_warn);