diff options
author | Alexei Starovoitov <ast@kernel.org> | 2020-05-14 21:21:57 -0700 |
---|---|---|
committer | Alexei Starovoitov <ast@kernel.org> | 2020-05-14 21:50:03 -0700 |
commit | 5cc5924d8315a53e03e7dbfa7a3067cde7a81ded (patch) | |
tree | 0c842b02f62a8b7e2e7bd6f0a91053e652b45f7a /net/core | |
parent | d00f26b623333f2419f4c3b95ff11c8b1bb96f56 (diff) | |
parent | 7ae2e00e8fc23f10169079fadd388317d81012be (diff) | |
download | linux-5cc5924d8315a53e03e7dbfa7a3067cde7a81ded.tar.gz linux-5cc5924d8315a53e03e7dbfa7a3067cde7a81ded.tar.bz2 linux-5cc5924d8315a53e03e7dbfa7a3067cde7a81ded.zip |
Merge branch 'xdp-grow-tail'
Jesper Dangaard Brouer says:
====================
V4:
- Fixup checkpatch.pl issues
- Collected more ACKs
V3:
- Fix issue on virtio_net patch spotted by Jason Wang
- Adjust name for variable in mlx5 patch
- Collected more ACKs
V2:
- Fix bug in mlx5 for XDP_PASS case
- Collected nitpicks and ACKs from mailing list
V1:
- Fix bug in dpaa2
XDP have evolved to support several frame sizes, but xdp_buff was not
updated with this information. This have caused the side-effect that
XDP frame data hard end is unknown. This have limited the BPF-helper
bpf_xdp_adjust_tail to only shrink the packet. This patchset address
this and add packet tail extend/grow.
The purpose of the patchset is ALSO to reserve a memory area that can be
used for storing extra information, specifically for extending XDP with
multi-buffer support. One proposal is to use same layout as
skb_shared_info, which is why this area is currently 320 bytes.
When converting xdp_frame to SKB (veth and cpumap), the full tailroom
area can now be used and SKB truesize is now correct. For most
drivers this result in a much larger tailroom in SKB "head" data
area. The network stack can now take advantage of this when doing SKB
coalescing. Thus, a good driver test is to use xdp_redirect_cpu from
samples/bpf/ and do some TCP stream testing.
Use-cases for tail grow/extend:
(1) IPsec / XFRM needs a tail extend[1][2].
(2) DNS-cache responses in XDP.
(3) HAProxy ALOHA would need it to convert to XDP.
(4) Add tail info e.g. timestamp and collect via tcpdump
[1] http://vger.kernel.org/netconf2019_files/xfrm_xdp.pdf
[2] http://vger.kernel.org/netconf2019.html
Examples on howto access the tail area of an XDP packet is shown in the
XDP-tutorial example[3].
[3] https://github.com/xdp-project/xdp-tutorial/blob/master/experiment01-tailgrow/
====================
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/dev.c | 14 | ||||
-rw-r--r-- | net/core/filter.c | 15 | ||||
-rw-r--r-- | net/core/xdp.c | 8 |
3 files changed, 29 insertions, 8 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 4c91de39890a..f937a3ff668d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4617,6 +4617,11 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, xdp->data_meta = xdp->data; xdp->data_end = xdp->data + hlen; xdp->data_hard_start = skb->data - skb_headroom(skb); + + /* SKB "head" area always have tailroom for skb_shared_info */ + xdp->frame_sz = (void *)skb_end_pointer(skb) - xdp->data_hard_start; + xdp->frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + orig_data_end = xdp->data_end; orig_data = xdp->data; eth = (struct ethhdr *)xdp->data; @@ -4640,14 +4645,11 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, skb_reset_network_header(skb); } - /* check if bpf_xdp_adjust_tail was used. it can only "shrink" - * pckt. - */ - off = orig_data_end - xdp->data_end; + /* check if bpf_xdp_adjust_tail was used */ + off = xdp->data_end - orig_data_end; if (off != 0) { skb_set_tail_pointer(skb, xdp->data_end - xdp->data); - skb->len -= off; - + skb->len += off; /* positive on grow, negative on shrink */ } /* check if XDP changed eth hdr such SKB needs update */ diff --git a/net/core/filter.c b/net/core/filter.c index 5815902bb617..a85eb538d4d6 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3411,15 +3411,26 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = { BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset) { + void *data_hard_end = xdp_data_hard_end(xdp); /* use xdp->frame_sz */ void *data_end = xdp->data_end + offset; - /* only shrinking is allowed for now. */ - if (unlikely(offset >= 0)) + /* Notice that xdp_data_hard_end have reserved some tailroom */ + if (unlikely(data_end > data_hard_end)) return -EINVAL; + /* ALL drivers MUST init xdp->frame_sz, chicken check below */ + if (unlikely(xdp->frame_sz > PAGE_SIZE)) { + WARN_ONCE(1, "Too BIG xdp->frame_sz = %d\n", xdp->frame_sz); + return -EINVAL; + } + if (unlikely(data_end < xdp->data + ETH_HLEN)) return -EINVAL; + /* Clear memory area on grow, can contain uninit kernel memory */ + if (offset > 0) + memset(xdp->data_end, 0, offset); + xdp->data_end = data_end; return 0; diff --git a/net/core/xdp.c b/net/core/xdp.c index 4c7ea85486af..490b8f5fa8ee 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -11,6 +11,7 @@ #include <linux/slab.h> #include <linux/idr.h> #include <linux/rhashtable.h> +#include <linux/bug.h> #include <net/page_pool.h> #include <net/xdp.h> @@ -496,3 +497,10 @@ struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp) return xdpf; } EXPORT_SYMBOL_GPL(xdp_convert_zc_to_xdp_frame); + +/* Used by XDP_WARN macro, to avoid inlining WARN() in fast-path */ +void xdp_warn(const char *msg, const char *func, const int line) +{ + WARN(1, "XDP_WARN: %s(line:%d): %s\n", func, line, msg); +}; +EXPORT_SYMBOL_GPL(xdp_warn); |