summaryrefslogtreecommitdiff
path: root/net/ipv6/reassembly.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-03-05 08:26:13 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2019-03-05 08:26:13 -0800
commit6456300356433873309a1cae6aa05e77d6b59153 (patch)
tree3158f04f2ca63a48e4d3021aba31aee8f18221cf /net/ipv6/reassembly.c
parentcd2a3bf02625ffad02a6b9f7df758ee36cf12769 (diff)
parent18a4d8bf250a33c015955f0dec27259780ef6448 (diff)
downloadlinux-6456300356433873309a1cae6aa05e77d6b59153.tar.gz
linux-6456300356433873309a1cae6aa05e77d6b59153.tar.bz2
linux-6456300356433873309a1cae6aa05e77d6b59153.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: "Here we go, another merge window full of networking and #ebpf changes: 1) Snoop DHCPACKS in batman-adv to learn MAC/IP pairs in the DHCP range without dealing with floods of ARP traffic, from Linus Lüssing. 2) Throttle buffered multicast packet transmission in mt76, from Felix Fietkau. 3) Support adaptive interrupt moderation in ice, from Brett Creeley. 4) A lot of struct_size conversions, from Gustavo A. R. Silva. 5) Add peek/push/pop commands to bpftool, as well as bash completion, from Stanislav Fomichev. 6) Optimize sk_msg_clone(), from Vakul Garg. 7) Add SO_BINDTOIFINDEX, from David Herrmann. 8) Be more conservative with local resends due to local congestion, from Yuchung Cheng. 9) Allow vetoing of unsupported VXLAN FDBs, from Petr Machata. 10) Add health buffer support to devlink, from Eran Ben Elisha. 11) Add TXQ scheduling API to mac80211, from Toke Høiland-Jørgensen. 12) Add statistics to basic packet scheduler filter, from Cong Wang. 13) Add GRE tunnel support for mlxsw Spectrum-2, from Nir Dotan. 14) Lots of new IP tunneling forwarding tests, also from Nir Dotan. 15) Add 3ad stats to bonding, from Nikolay Aleksandrov. 16) Lots of probing improvements for bpftool, from Quentin Monnet. 17) Various nfp drive #ebpf JIT improvements from Jakub Kicinski. 18) Allow #ebpf programs to access gso_segs from skb shared info, from Eric Dumazet. 19) Add sock_diag support for AF_XDP sockets, from Björn Töpel. 20) Support 22260 iwlwifi devices, from Luca Coelho. 21) Use rbtree for ipv6 defragmentation, from Peter Oskolkov. 22) Add JMP32 instruction class support to #ebpf, from Jiong Wang. 23) Add spinlock support to #ebpf, from Alexei Starovoitov. 24) Support 256-bit keys and TLS 1.3 in ktls, from Dave Watson. 25) Add device infomation API to devlink, from Jakub Kicinski. 26) Add new timestamping socket options which are y2038 safe, from Deepa Dinamani. 27) Add RX checksum offloading for various sh_eth chips, from Sergei Shtylyov. 28) Flow offload infrastructure, from Pablo Neira Ayuso. 29) Numerous cleanups, improvements, and bug fixes to the PHY layer and many drivers from Heiner Kallweit. 30) Lots of changes to try and make packet scheduler classifiers run lockless as much as possible, from Vlad Buslov. 31) Support BCM957504 chip in bnxt_en driver, from Erik Burrows. 32) Add concurrency tests to tc-tests infrastructure, from Vlad Buslov. 33) Add hwmon support to aquantia, from Heiner Kallweit. 34) Allow 64-bit values for SO_MAX_PACING_RATE, from Eric Dumazet. And I would be remiss if I didn't thank the various major networking subsystem maintainers for integrating much of this work before I even saw it. Alexei Starovoitov, Daniel Borkmann, Pablo Neira Ayuso, Johannes Berg, Kalle Valo, and many others. Thank you!" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (2207 commits) net/sched: avoid unused-label warning net: ignore sysctl_devconf_inherit_init_net without SYSCTL phy: mdio-mux: fix Kconfig dependencies net: phy: use phy_modify_mmd_changed in genphy_c45_an_config_aneg net: dsa: mv88e6xxx: add call to mv88e6xxx_ports_cmode_init to probe for new DSA framework selftest/net: Remove duplicate header sky2: Disable MSI on Dell Inspiron 1545 and Gateway P-79 net/mlx5e: Update tx reporter status in case channels were successfully opened devlink: Add support for direct reporter health state update devlink: Update reporter state to error even if recover aborted sctp: call iov_iter_revert() after sending ABORT team: Free BPF filter when unregistering netdev ip6mr: Do not call __IP6_INC_STATS() from preemptible context isdn: mISDN: Fix potential NULL pointer dereference of kzalloc net: dsa: mv88e6xxx: support in-band signalling on SGMII ports with external PHYs cxgb4/chtls: Prefix adapter flags with CXGB4 net-sysfs: Switch to bitmap_zalloc() mellanox: Switch to bitmap_zalloc() bpf: add test cases for non-pointer sanitiation logic mlxsw: i2c: Extend initialization by querying resources data ...
Diffstat (limited to 'net/ipv6/reassembly.c')
-rw-r--r--net/ipv6/reassembly.c234
1 files changed, 62 insertions, 172 deletions
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 36a3d8dc61f5..1a832f5e190b 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -69,8 +69,8 @@ static u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
static struct inet_frags ip6_frags;
-static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
- struct net_device *dev);
+static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
+ struct sk_buff *prev_tail, struct net_device *dev);
static void ip6_frag_expire(struct timer_list *t)
{
@@ -111,21 +111,26 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
struct frag_hdr *fhdr, int nhoff,
u32 *prob_offset)
{
- struct sk_buff *prev, *next;
- struct net_device *dev;
- int offset, end, fragsize;
struct net *net = dev_net(skb_dst(skb)->dev);
+ int offset, end, fragsize;
+ struct sk_buff *prev_tail;
+ struct net_device *dev;
+ int err = -ENOENT;
u8 ecn;
if (fq->q.flags & INET_FRAG_COMPLETE)
goto err;
+ err = -EINVAL;
offset = ntohs(fhdr->frag_off) & ~0x7;
end = offset + (ntohs(ipv6_hdr(skb)->payload_len) -
((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
if ((unsigned int)end > IPV6_MAXPLEN) {
*prob_offset = (u8 *)&fhdr->frag_off - skb_network_header(skb);
+ /* note that if prob_offset is set, the skb is freed elsewhere,
+ * we do not free it here.
+ */
return -1;
}
@@ -170,62 +175,27 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
if (end == offset)
goto discard_fq;
+ err = -ENOMEM;
/* Point into the IP datagram 'data' part. */
if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data))
goto discard_fq;
- if (pskb_trim_rcsum(skb, end - offset))
+ err = pskb_trim_rcsum(skb, end - offset);
+ if (err)
goto discard_fq;
- /* Find out which fragments are in front and at the back of us
- * in the chain of fragments so far. We must know where to put
- * this fragment, right?
- */
- prev = fq->q.fragments_tail;
- if (!prev || prev->ip_defrag_offset < offset) {
- next = NULL;
- goto found;
- }
- prev = NULL;
- for (next = fq->q.fragments; next != NULL; next = next->next) {
- if (next->ip_defrag_offset >= offset)
- break; /* bingo! */
- prev = next;
- }
-
-found:
- /* RFC5722, Section 4, amended by Errata ID : 3089
- * When reassembling an IPv6 datagram, if
- * one or more its constituent fragments is determined to be an
- * overlapping fragment, the entire datagram (and any constituent
- * fragments) MUST be silently discarded.
- */
-
- /* Check for overlap with preceding fragment. */
- if (prev &&
- (prev->ip_defrag_offset + prev->len) > offset)
- goto discard_fq;
-
- /* Look for overlap with succeeding segment. */
- if (next && next->ip_defrag_offset < end)
- goto discard_fq;
-
- /* Note : skb->ip_defrag_offset and skb->sk share the same location */
+ /* Note : skb->rbnode and skb->dev share the same location. */
dev = skb->dev;
- if (dev)
- fq->iif = dev->ifindex;
/* Makes sure compiler wont do silly aliasing games */
barrier();
- skb->ip_defrag_offset = offset;
- /* Insert this fragment in the chain of fragments. */
- skb->next = next;
- if (!next)
- fq->q.fragments_tail = skb;
- if (prev)
- prev->next = skb;
- else
- fq->q.fragments = skb;
+ prev_tail = fq->q.fragments_tail;
+ err = inet_frag_queue_insert(&fq->q, skb, offset, end);
+ if (err)
+ goto insert_error;
+
+ if (dev)
+ fq->iif = dev->ifindex;
fq->q.stamp = skb->tstamp;
fq->q.meat += skb->len;
@@ -246,44 +216,48 @@ found:
if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
fq->q.meat == fq->q.len) {
- int res;
unsigned long orefdst = skb->_skb_refdst;
skb->_skb_refdst = 0UL;
- res = ip6_frag_reasm(fq, prev, dev);
+ err = ip6_frag_reasm(fq, skb, prev_tail, dev);
skb->_skb_refdst = orefdst;
- return res;
+ return err;
}
skb_dst_drop(skb);
- return -1;
+ return -EINPROGRESS;
+insert_error:
+ if (err == IPFRAG_DUP) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+ err = -EINVAL;
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_REASM_OVERLAPS);
discard_fq:
inet_frag_kill(&fq->q);
-err:
__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_REASMFAILS);
+err:
kfree_skb(skb);
- return -1;
+ return err;
}
/*
* Check if this packet is complete.
- * Returns NULL on failure by any reason, and pointer
- * to current nexthdr field in reassembled frame.
*
* It is called with locked fq, and caller must check that
* queue is eligible for reassembly i.e. it is not COMPLETE,
* the last and the first frames arrived and all the bits are here.
*/
-static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
- struct net_device *dev)
+static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
+ struct sk_buff *prev_tail, struct net_device *dev)
{
struct net *net = container_of(fq->q.net, struct net, ipv6.frags);
- struct sk_buff *fp, *head = fq->q.fragments;
- int payload_len, delta;
unsigned int nhoff;
- int sum_truesize;
+ void *reasm_data;
+ int payload_len;
u8 ecn;
inet_frag_kill(&fq->q);
@@ -292,128 +266,47 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
if (unlikely(ecn == 0xff))
goto out_fail;
- /* Make the one we just received the head. */
- if (prev) {
- head = prev->next;
- fp = skb_clone(head, GFP_ATOMIC);
-
- if (!fp)
- goto out_oom;
-
- fp->next = head->next;
- if (!fp->next)
- fq->q.fragments_tail = fp;
- prev->next = fp;
-
- skb_morph(head, fq->q.fragments);
- head->next = fq->q.fragments->next;
-
- consume_skb(fq->q.fragments);
- fq->q.fragments = head;
- }
-
- WARN_ON(head == NULL);
- WARN_ON(head->ip_defrag_offset != 0);
+ reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail);
+ if (!reasm_data)
+ goto out_oom;
- /* Unfragmented part is taken from the first segment. */
- payload_len = ((head->data - skb_network_header(head)) -
+ payload_len = ((skb->data - skb_network_header(skb)) -
sizeof(struct ipv6hdr) + fq->q.len -
sizeof(struct frag_hdr));
if (payload_len > IPV6_MAXPLEN)
goto out_oversize;
- delta = - head->truesize;
-
- /* Head of list must not be cloned. */
- if (skb_unclone(head, GFP_ATOMIC))
- goto out_oom;
-
- delta += head->truesize;
- if (delta)
- add_frag_mem_limit(fq->q.net, delta);
-
- /* If the first fragment is fragmented itself, we split
- * it to two chunks: the first with data and paged part
- * and the second, holding only fragments. */
- if (skb_has_frag_list(head)) {
- struct sk_buff *clone;
- int i, plen = 0;
-
- clone = alloc_skb(0, GFP_ATOMIC);
- if (!clone)
- goto out_oom;
- clone->next = head->next;
- head->next = clone;
- skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
- skb_frag_list_init(head);
- for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
- plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
- clone->len = clone->data_len = head->data_len - plen;
- head->data_len -= clone->len;
- head->len -= clone->len;
- clone->csum = 0;
- clone->ip_summed = head->ip_summed;
- add_frag_mem_limit(fq->q.net, clone->truesize);
- }
-
/* We have to remove fragment header from datagram and to relocate
* header in order to calculate ICV correctly. */
nhoff = fq->nhoffset;
- skb_network_header(head)[nhoff] = skb_transport_header(head)[0];
- memmove(head->head + sizeof(struct frag_hdr), head->head,
- (head->data - head->head) - sizeof(struct frag_hdr));
- if (skb_mac_header_was_set(head))
- head->mac_header += sizeof(struct frag_hdr);
- head->network_header += sizeof(struct frag_hdr);
-
- skb_reset_transport_header(head);
- skb_push(head, head->data - skb_network_header(head));
-
- sum_truesize = head->truesize;
- for (fp = head->next; fp;) {
- bool headstolen;
- int delta;
- struct sk_buff *next = fp->next;
-
- sum_truesize += fp->truesize;
- if (head->ip_summed != fp->ip_summed)
- head->ip_summed = CHECKSUM_NONE;
- else if (head->ip_summed == CHECKSUM_COMPLETE)
- head->csum = csum_add(head->csum, fp->csum);
-
- if (skb_try_coalesce(head, fp, &headstolen, &delta)) {
- kfree_skb_partial(fp, headstolen);
- } else {
- fp->sk = NULL;
- if (!skb_shinfo(head)->frag_list)
- skb_shinfo(head)->frag_list = fp;
- head->data_len += fp->len;
- head->len += fp->len;
- head->truesize += fp->truesize;
- }
- fp = next;
- }
- sub_frag_mem_limit(fq->q.net, sum_truesize);
+ skb_network_header(skb)[nhoff] = skb_transport_header(skb)[0];
+ memmove(skb->head + sizeof(struct frag_hdr), skb->head,
+ (skb->data - skb->head) - sizeof(struct frag_hdr));
+ if (skb_mac_header_was_set(skb))
+ skb->mac_header += sizeof(struct frag_hdr);
+ skb->network_header += sizeof(struct frag_hdr);
+
+ skb_reset_transport_header(skb);
+
+ inet_frag_reasm_finish(&fq->q, skb, reasm_data);
- skb_mark_not_on_list(head);
- head->dev = dev;
- head->tstamp = fq->q.stamp;
- ipv6_hdr(head)->payload_len = htons(payload_len);
- ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
- IP6CB(head)->nhoff = nhoff;
- IP6CB(head)->flags |= IP6SKB_FRAGMENTED;
- IP6CB(head)->frag_max_size = fq->q.max_size;
+ skb->dev = dev;
+ ipv6_hdr(skb)->payload_len = htons(payload_len);
+ ipv6_change_dsfield(ipv6_hdr(skb), 0xff, ecn);
+ IP6CB(skb)->nhoff = nhoff;
+ IP6CB(skb)->flags |= IP6SKB_FRAGMENTED;
+ IP6CB(skb)->frag_max_size = fq->q.max_size;
/* Yes, and fold redundant checksum back. 8) */
- skb_postpush_rcsum(head, skb_network_header(head),
- skb_network_header_len(head));
+ skb_postpush_rcsum(skb, skb_network_header(skb),
+ skb_network_header_len(skb));
rcu_read_lock();
__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
rcu_read_unlock();
- fq->q.fragments = NULL;
fq->q.rb_fragments = RB_ROOT;
fq->q.fragments_tail = NULL;
+ fq->q.last_run_head = NULL;
return 1;
out_oversize:
@@ -464,10 +357,6 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
return 1;
}
- if (skb->len - skb_network_offset(skb) < IPV6_MIN_MTU &&
- fhdr->frag_off & htons(IP6_MF))
- goto fail_hdr;
-
iif = skb->dev ? skb->dev->ifindex : 0;
fq = fq_find(net, fhdr->identification, hdr, iif);
if (fq) {
@@ -485,6 +374,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
if (prob_offset) {
__IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev),
IPSTATS_MIB_INHDRERRORS);
+ /* icmpv6_param_prob() calls kfree_skb(skb) */
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, prob_offset);
}
return ret;