From 7143dfac692cd25d48a24dbe8323bc17af95b4ec Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Fri, 28 Dec 2012 16:07:16 +0800 Subject: ah4/esp4: set transport header correctly for IPsec tunnel mode. IPsec tunnel does not set ECN field to CE in inner header when the ECN field in the outer header is CE, and the ECN field in the inner header is ECT(0) or ECT(1). The cause is ipip_hdr() does not return the correct address of inner header since skb->transport-header is not the inner header after esp_input_done2(), or ah_input(). Signed-off-by: Li RongQing Signed-off-by: Steffen Klassert --- net/ipv4/ah4.c | 11 +++++++++-- net/ipv4/esp4.c | 5 ++++- 2 files changed, 13 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index a0d8392491c3..a154d0a08c79 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -269,7 +269,11 @@ static void ah_input_done(struct crypto_async_request *base, int err) skb->network_header += ah_hlen; memcpy(skb_network_header(skb), work_iph, ihl); __skb_pull(skb, ah_hlen + ihl); - skb_set_transport_header(skb, -ihl); + + if (x->props.mode == XFRM_MODE_TUNNEL) + skb_reset_transport_header(skb); + else + skb_set_transport_header(skb, -ihl); out: kfree(AH_SKB_CB(skb)->tmp); xfrm_input_resume(skb, err); @@ -381,7 +385,10 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) skb->network_header += ah_hlen; memcpy(skb_network_header(skb), work_iph, ihl); __skb_pull(skb, ah_hlen + ihl); - skb_set_transport_header(skb, -ihl); + if (x->props.mode == XFRM_MODE_TUNNEL) + skb_reset_transport_header(skb); + else + skb_set_transport_header(skb, -ihl); err = nexthdr; diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index b61e9deb7c7e..fd26ff4f3eac 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -346,7 +346,10 @@ static int esp_input_done2(struct sk_buff *skb, int err) pskb_trim(skb, skb->len - alen - padlen - 2); __skb_pull(skb, hlen); - skb_set_transport_header(skb, -ihl); + if (x->props.mode == XFRM_MODE_TUNNEL) + skb_reset_transport_header(skb); + else + skb_set_transport_header(skb, -ihl); err = nexthdr[1]; -- cgit v1.2.3 From a9403f8aeb3e7dba6988d6cbe436e6521894e427 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Tue, 8 Jan 2013 15:41:12 +0800 Subject: ah6/esp6: set transport header correctly for IPsec tunnel mode. IPsec tunnel does not set ECN field to CE in inner header when the ECN field in the outer header is CE, and the ECN field in the inner header is ECT(0) or ECT(1). The cause is ipip6_hdr() does not return the correct address of inner header since skb->transport-header is not the inner header after esp6_input_done2(), or ah6_input(). Signed-off-by: Li RongQing Signed-off-by: Steffen Klassert --- net/ipv6/ah6.c | 11 +++++++++-- net/ipv6/esp6.c | 5 ++++- 2 files changed, 13 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index ecc35b93314b..384233188ac1 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -472,7 +472,10 @@ static void ah6_input_done(struct crypto_async_request *base, int err) skb->network_header += ah_hlen; memcpy(skb_network_header(skb), work_iph, hdr_len); __skb_pull(skb, ah_hlen + hdr_len); - skb_set_transport_header(skb, -hdr_len); + if (x->props.mode == XFRM_MODE_TUNNEL) + skb_reset_transport_header(skb); + else + skb_set_transport_header(skb, -hdr_len); out: kfree(AH_SKB_CB(skb)->tmp); xfrm_input_resume(skb, err); @@ -593,9 +596,13 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) skb->network_header += ah_hlen; memcpy(skb_network_header(skb), work_iph, hdr_len); - skb->transport_header = skb->network_header; __skb_pull(skb, ah_hlen + hdr_len); + if (x->props.mode == XFRM_MODE_TUNNEL) + skb_reset_transport_header(skb); + else + skb_set_transport_header(skb, -hdr_len); + err = nexthdr; out_free: diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 282f3723ee19..40ffd72243a4 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -300,7 +300,10 @@ static int esp_input_done2(struct sk_buff *skb, int err) pskb_trim(skb, skb->len - alen - padlen - 2); __skb_pull(skb, hlen); - skb_set_transport_header(skb, -hdr_len); + if (x->props.mode == XFRM_MODE_TUNNEL) + skb_reset_transport_header(skb); + else + skb_set_transport_header(skb, -hdr_len); err = nexthdr[1]; -- cgit v1.2.3 From 7efdba5bd9a2f3e2059beeb45c9fa55eefe1bced Mon Sep 17 00:00:00 2001 From: Romain KUNTZ Date: Wed, 16 Jan 2013 12:47:40 +0000 Subject: ipv6: fix header length calculation in ip6_append_data() Commit 299b0767 (ipv6: Fix IPsec slowpath fragmentation problem) has introduced a error in the header length calculation that provokes corrupted packets when non-fragmentable extensions headers (Destination Option or Routing Header Type 2) are used. rt->rt6i_nfheader_len is the length of the non-fragmentable extension header, and it should be substracted to rt->dst.header_len, and not to exthdrlen, as it was done before commit 299b0767. This patch reverts to the original and correct behavior. It has been successfully tested with and without IPsec on packets that include non-fragmentable extensions headers. Signed-off-by: Romain Kuntz Acked-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 5552d13ae92f..0c7c03d50dc0 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1213,10 +1213,10 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, if (dst_allfrag(rt->dst.path)) cork->flags |= IPCORK_ALLFRAG; cork->length = 0; - exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len; + exthdrlen = (opt ? opt->opt_flen : 0); length += exthdrlen; transhdrlen += exthdrlen; - dst_exthdrlen = rt->dst.header_len; + dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len; } else { rt = (struct rt6_info *)cork->dst; fl6 = &inet->cork.fl.u.ip6; -- cgit v1.2.3 From 38d523e2948162776903349c89d65f7b9370dadb Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 16 Jan 2013 20:55:01 +0000 Subject: ipv4: Remove output route check in ipv4_mtu MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The output route check was introduced with git commit 261663b0 (ipv4: Don't use the cached pmtu informations for input routes) during times when we cached the pmtu informations on the inetpeer. Now the pmtu informations are back in the routes, so this check is obsolete. It also had some unwanted side effects, as reported by Timo Teras and Lukas Tribus. Signed-off-by: Steffen Klassert Acked-by: Timo Teräs Signed-off-by: David S. Miller --- net/ipv4/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 844a9ef60dbd..6e4a89c5e27e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1120,7 +1120,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) if (!mtu || time_after_eq(jiffies, rt->dst.expires)) mtu = dst_metric_raw(dst, RTAX_MTU); - if (mtu && rt_is_output_route(rt)) + if (mtu) return mtu; mtu = dst->dev->mtu; -- cgit v1.2.3 From fa1e492aa3cbafba9f8fc6d05e5b08a3091daf4a Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 16 Jan 2013 20:58:10 +0000 Subject: ipv4: Don't update the pmtu on mtu locked routes Routes with locked mtu should not use learned pmtu informations, so do not update the pmtu on these routes. Reported-by: Julian Anastasov Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/route.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6e4a89c5e27e..259cbeee9a8b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -912,6 +912,9 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) struct dst_entry *dst = &rt->dst; struct fib_result res; + if (dst_metric_locked(dst, RTAX_MTU)) + return; + if (dst->dev->mtu < mtu) return; -- cgit v1.2.3 From 2f94aabd9f6c925d77aecb3ff020f1cc12ed8f86 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Thu, 17 Jan 2013 11:15:08 +0000 Subject: sctp: refactor sctp_outq_teardown to insure proper re-initalization Jamie Parsons reported a problem recently, in which the re-initalization of an association (The duplicate init case), resulted in a loss of receive window space. He tracked down the root cause to sctp_outq_teardown, which discarded all the data on an outq during a re-initalization of the corresponding association, but never reset the outq->outstanding_data field to zero. I wrote, and he tested this fix, which does a proper full re-initalization of the outq, fixing this problem, and hopefully future proofing us from simmilar issues down the road. Signed-off-by: Neil Horman Reported-by: Jamie Parsons Tested-by: Jamie Parsons CC: Jamie Parsons CC: Vlad Yasevich CC: "David S. Miller" CC: netdev@vger.kernel.org Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/outqueue.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 379c81dee9d1..9bcdbd02d777 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -224,7 +224,7 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q) /* Free the outqueue structure and any related pending chunks. */ -void sctp_outq_teardown(struct sctp_outq *q) +static void __sctp_outq_teardown(struct sctp_outq *q) { struct sctp_transport *transport; struct list_head *lchunk, *temp; @@ -277,8 +277,6 @@ void sctp_outq_teardown(struct sctp_outq *q) sctp_chunk_free(chunk); } - q->error = 0; - /* Throw away any leftover control chunks. */ list_for_each_entry_safe(chunk, tmp, &q->control_chunk_list, list) { list_del_init(&chunk->list); @@ -286,11 +284,17 @@ void sctp_outq_teardown(struct sctp_outq *q) } } +void sctp_outq_teardown(struct sctp_outq *q) +{ + __sctp_outq_teardown(q); + sctp_outq_init(q->asoc, q); +} + /* Free the outqueue structure and any related pending chunks. */ void sctp_outq_free(struct sctp_outq *q) { /* Throw away leftover chunks. */ - sctp_outq_teardown(q); + __sctp_outq_teardown(q); /* If we were kmalloc()'d, free the memory. */ if (q->malloced) -- cgit v1.2.3 From e2f6725917ed525f4111c33c31ab53397b70f9d2 Mon Sep 17 00:00:00 2001 From: Nickolai Zeldovich Date: Thu, 17 Jan 2013 13:58:28 -0500 Subject: net/xfrm/xfrm_replay: avoid division by zero All of the xfrm_replay->advance functions in xfrm_replay.c check if x->replay_esn->replay_window is zero (and return if so). However, one of them, xfrm_replay_advance_bmp(), divides by that value (in the '%' operator) before doing the check, which can potentially trigger a divide-by-zero exception. Some compilers will also assume that the earlier division means the value cannot be zero later, and thus will eliminate the subsequent zero check as dead code. This patch moves the division to after the check. Signed-off-by: Nickolai Zeldovich Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_replay.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index 765f6fe951eb..35754cc8a9e5 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -242,11 +242,13 @@ static void xfrm_replay_advance_bmp(struct xfrm_state *x, __be32 net_seq) u32 diff; struct xfrm_replay_state_esn *replay_esn = x->replay_esn; u32 seq = ntohl(net_seq); - u32 pos = (replay_esn->seq - 1) % replay_esn->replay_window; + u32 pos; if (!replay_esn->replay_window) return; + pos = (replay_esn->seq - 1) % replay_esn->replay_window; + if (seq > replay_esn->seq) { diff = seq - replay_esn->seq; -- cgit v1.2.3 From 6f809da27c94425e07be4a64d5093e1df95188e9 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 16 Jan 2013 22:09:49 +0000 Subject: ipv6: Add an error handler for icmp6 pmtu and redirect events are now handled in the protocols error handler, so add an error handler for icmp6 to do this. It is needed in the case when we have no socket context. Based on a patch by Duan Jiong. Reported-by: Duan Jiong Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv6/icmp.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'net') diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index b4a9fd51dae7..fff5bdd8b680 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -81,10 +81,22 @@ static inline struct sock *icmpv6_sk(struct net *net) return net->ipv6.icmp_sk[smp_processor_id()]; } +static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) +{ + struct net *net = dev_net(skb->dev); + + if (type == ICMPV6_PKT_TOOBIG) + ip6_update_pmtu(skb, net, info, 0, 0); + else if (type == NDISC_REDIRECT) + ip6_redirect(skb, net, 0, 0); +} + static int icmpv6_rcv(struct sk_buff *skb); static const struct inet6_protocol icmpv6_protocol = { .handler = icmpv6_rcv, + .err_handler = icmpv6_err, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; -- cgit v1.2.3 From b74aa930ef49a3c0d8e4c1987f89decac768fb2c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 19 Jan 2013 16:10:37 +0000 Subject: tcp: fix incorrect LOCKDROPPEDICMPS counter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 563d34d057 (tcp: dont drop MTU reduction indications) added an error leading to incorrect accounting of LINUX_MIB_LOCKDROPPEDICMPS If socket is owned by the user, we want to increment this SNMP counter, unless the message is a (ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED) one. Reported-by: Maciej Żenczykowski Signed-off-by: Eric Dumazet Cc: Neal Cardwell Signed-off-by: Maciej Żenczykowski Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 54139fa514e6..70b09ef2463b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -369,11 +369,10 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) * We do take care of PMTU discovery (RFC1191) special case : * we can receive locally generated ICMP messages while socket is held. */ - if (sock_owned_by_user(sk) && - type != ICMP_DEST_UNREACH && - code != ICMP_FRAG_NEEDED) - NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); - + if (sock_owned_by_user(sk)) { + if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)) + NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); + } if (sk->sk_state == TCP_CLOSE) goto out; -- cgit v1.2.3 From 82bda6195615891181115f579a480aa5001ce7e9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 5 Jan 2013 21:31:18 +0000 Subject: net: splice: avoid high order page splitting splice() can handle pages of any order, but network code tries hard to split them in PAGE_SIZE units. Not quite successfully anyway, as __splice_segment() assumed poff < PAGE_SIZE. This is true for the skb->data part, not necessarily for the fragments. This patch removes this logic to give the pages as they are in the skb. Signed-off-by: Eric Dumazet Cc: Willy Tarreau Signed-off-by: David S. Miller --- net/core/skbuff.c | 38 +++++++++----------------------------- 1 file changed, 9 insertions(+), 29 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3ab989b0de42..f5dfdf7727c4 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1706,20 +1706,6 @@ static bool spd_fill_page(struct splice_pipe_desc *spd, return false; } -static inline void __segment_seek(struct page **page, unsigned int *poff, - unsigned int *plen, unsigned int off) -{ - unsigned long n; - - *poff += off; - n = *poff / PAGE_SIZE; - if (n) - *page = nth_page(*page, n); - - *poff = *poff % PAGE_SIZE; - *plen -= off; -} - static bool __splice_segment(struct page *page, unsigned int poff, unsigned int plen, unsigned int *off, unsigned int *len, struct sk_buff *skb, @@ -1727,6 +1713,8 @@ static bool __splice_segment(struct page *page, unsigned int poff, struct sock *sk, struct pipe_inode_info *pipe) { + unsigned int flen; + if (!*len) return true; @@ -1737,24 +1725,16 @@ static bool __splice_segment(struct page *page, unsigned int poff, } /* ignore any bits we already processed */ - if (*off) { - __segment_seek(&page, &poff, &plen, *off); - *off = 0; - } - - do { - unsigned int flen = min(*len, plen); + poff += *off; + plen -= *off; + *off = 0; - /* the linear region may spread across several pages */ - flen = min_t(unsigned int, flen, PAGE_SIZE - poff); + flen = min(*len, plen); - if (spd_fill_page(spd, pipe, page, &flen, poff, skb, linear, sk)) - return true; - - __segment_seek(&page, &poff, &plen, flen); - *len -= flen; + if (spd_fill_page(spd, pipe, page, &flen, poff, skb, linear, sk)) + return true; - } while (*len && plen); + *len -= flen; return false; } -- cgit v1.2.3 From bc9540c637c3d8712ccbf9dcf28621f380ed5e64 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 11 Jan 2013 14:46:37 +0000 Subject: net: splice: fix __splice_segment() commit 9ca1b22d6d2 (net: splice: avoid high order page splitting) forgot that skb->head could need a copy into several page frags. This could be the case for loopback traffic mostly. Also remove now useless skb argument from linear_to_page() and __splice_segment() prototypes. Signed-off-by: Eric Dumazet Cc: Willy Tarreau Signed-off-by: David S. Miller --- net/core/skbuff.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f5dfdf7727c4..a9a2ae3e2213 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1649,7 +1649,7 @@ static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i) static struct page *linear_to_page(struct page *page, unsigned int *len, unsigned int *offset, - struct sk_buff *skb, struct sock *sk) + struct sock *sk) { struct page_frag *pfrag = sk_page_frag(sk); @@ -1682,14 +1682,14 @@ static bool spd_can_coalesce(const struct splice_pipe_desc *spd, static bool spd_fill_page(struct splice_pipe_desc *spd, struct pipe_inode_info *pipe, struct page *page, unsigned int *len, unsigned int offset, - struct sk_buff *skb, bool linear, + bool linear, struct sock *sk) { if (unlikely(spd->nr_pages == MAX_SKB_FRAGS)) return true; if (linear) { - page = linear_to_page(page, len, &offset, skb, sk); + page = linear_to_page(page, len, &offset, sk); if (!page) return true; } @@ -1708,13 +1708,11 @@ static bool spd_fill_page(struct splice_pipe_desc *spd, static bool __splice_segment(struct page *page, unsigned int poff, unsigned int plen, unsigned int *off, - unsigned int *len, struct sk_buff *skb, + unsigned int *len, struct splice_pipe_desc *spd, bool linear, struct sock *sk, struct pipe_inode_info *pipe) { - unsigned int flen; - if (!*len) return true; @@ -1729,12 +1727,16 @@ static bool __splice_segment(struct page *page, unsigned int poff, plen -= *off; *off = 0; - flen = min(*len, plen); - - if (spd_fill_page(spd, pipe, page, &flen, poff, skb, linear, sk)) - return true; + do { + unsigned int flen = min(*len, plen); - *len -= flen; + if (spd_fill_page(spd, pipe, page, &flen, poff, + linear, sk)) + return true; + poff += flen; + plen -= flen; + *len -= flen; + } while (*len && plen); return false; } @@ -1757,7 +1759,7 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, if (__splice_segment(virt_to_page(skb->data), (unsigned long) skb->data & (PAGE_SIZE - 1), skb_headlen(skb), - offset, len, skb, spd, + offset, len, spd, skb_head_is_locked(skb), sk, pipe)) return true; @@ -1770,7 +1772,7 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, if (__splice_segment(skb_frag_page(f), f->page_offset, skb_frag_size(f), - offset, len, skb, spd, false, sk, pipe)) + offset, len, spd, false, sk, pipe)) return true; } -- cgit v1.2.3 From 5b653b2a1c3b5634368fde2df958a1398481e580 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Fri, 18 Jan 2013 16:03:48 +0100 Subject: xfrm: fix freed block size calculation in xfrm_policy_fini() Missing multiplication of block size by sizeof(struct hlist_head) can cause xfrm_hash_free() to be called with wrong second argument so that kfree() is called on a block allocated with vzalloc() or __get_free_pages() or free_pages() is called with wrong order when a namespace with enough policies is removed. Bug introduced by commit a35f6c5d, i.e. versions >= 2.6.29 are affected. Signed-off-by: Michal Kubecek Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 41eabc46f110..07c585756d2a 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2656,7 +2656,7 @@ static void xfrm_policy_fini(struct net *net) WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir])); htab = &net->xfrm.policy_bydst[dir]; - sz = (htab->hmask + 1); + sz = (htab->hmask + 1) * sizeof(struct hlist_head); WARN_ON(!hlist_empty(htab->table)); xfrm_hash_free(htab->table, sz); } -- cgit v1.2.3 From 05ab86c55683410593720003442dde629782aaac Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 15 Jan 2013 13:38:53 +0100 Subject: xfrm4: Invalidate all ipv4 routes on IPsec pmtu events On IPsec pmtu events we can't access the transport headers of the original packet, so we can't find the socket that sent the packet. The only chance to notify the socket about the pmtu change is to force a relookup for all routes. This patch implenents this for the IPsec protocols. Signed-off-by: Steffen Klassert --- net/ipv4/ah4.c | 7 +++++-- net/ipv4/esp4.c | 7 +++++-- net/ipv4/ipcomp.c | 7 +++++-- 3 files changed, 15 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index a154d0a08c79..a69b4e4a02b5 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -420,9 +420,12 @@ static void ah4_err(struct sk_buff *skb, u32 info) if (!x) return; - if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) + if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) { + atomic_inc(&flow_cache_genid); + rt_genid_bump(net); + ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_AH, 0); - else + } else ipv4_redirect(skb, net, 0, 0, IPPROTO_AH, 0); xfrm_state_put(x); } diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index fd26ff4f3eac..3b4f0cd2e63e 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -502,9 +502,12 @@ static void esp4_err(struct sk_buff *skb, u32 info) if (!x) return; - if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) + if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) { + atomic_inc(&flow_cache_genid); + rt_genid_bump(net); + ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ESP, 0); - else + } else ipv4_redirect(skb, net, 0, 0, IPPROTO_ESP, 0); xfrm_state_put(x); } diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index d3ab47e19a89..9a46daed2f3c 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -47,9 +47,12 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) if (!x) return; - if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) + if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) { + atomic_inc(&flow_cache_genid); + rt_genid_bump(net); + ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_COMP, 0); - else + } else ipv4_redirect(skb, net, 0, 0, IPPROTO_COMP, 0); xfrm_state_put(x); } -- cgit v1.2.3 From 9cb3a50c5f63ed745702972f66eaee8767659acd Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 21 Jan 2013 01:59:11 +0000 Subject: ipv4: Invalidate the socket cached route on pmtu events if possible The route lookup in ipv4_sk_update_pmtu() might return a route different from the route we cached at the socket. This is because standart routes are per cpu, so each cpu has it's own struct rtable. This means that we do not invalidate the socket cached route if the NET_RX_SOFTIRQ is not served by the same cpu that the sending socket uses. As a result, the cached route reused until we disconnect. With this patch we invalidate the socket cached route if possible. If the socket is owened by the user, we can't update the cached route directly. A followup patch will implement socket release callback functions for datagram sockets to handle this case. Reported-by: Yurij M. Plotnikov Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/route.c | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 259cbeee9a8b..132737a7c83a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -965,7 +965,7 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, } EXPORT_SYMBOL_GPL(ipv4_update_pmtu); -void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) +static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) { const struct iphdr *iph = (const struct iphdr *) skb->data; struct flowi4 fl4; @@ -978,6 +978,46 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) ip_rt_put(rt); } } + +void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) +{ + const struct iphdr *iph = (const struct iphdr *) skb->data; + struct flowi4 fl4; + struct rtable *rt; + struct dst_entry *dst; + + bh_lock_sock(sk); + rt = (struct rtable *) __sk_dst_get(sk); + + if (sock_owned_by_user(sk) || !rt) { + __ipv4_sk_update_pmtu(skb, sk, mtu); + goto out; + } + + __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); + + if (!__sk_dst_check(sk, 0)) { + rt = ip_route_output_flow(sock_net(sk), &fl4, sk); + if (IS_ERR(rt)) + goto out; + } + + __ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu); + + dst = dst_check(&rt->dst, 0); + if (!dst) { + rt = ip_route_output_flow(sock_net(sk), &fl4, sk); + if (IS_ERR(rt)) + goto out; + + dst = &rt->dst; + } + + __sk_dst_set(sk, dst); + +out: + bh_unlock_sock(sk); +} EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu); void ipv4_redirect(struct sk_buff *skb, struct net *net, -- cgit v1.2.3 From 8141ed9fcedb278f4a3a78680591bef1e55f75fb Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 21 Jan 2013 02:00:03 +0000 Subject: ipv4: Add a socket release callback for datagram sockets This implements a socket release callback function to check if the socket cached route got invalid during the time we owned the socket. The function is used from udp, raw and ping sockets. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/datagram.c | 25 +++++++++++++++++++++++++ net/ipv4/ping.c | 1 + net/ipv4/raw.c | 1 + net/ipv4/udp.c | 1 + 4 files changed, 28 insertions(+) (limited to 'net') diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 424fafbc8cb0..b28e863fe0a7 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -85,3 +85,28 @@ out: return err; } EXPORT_SYMBOL(ip4_datagram_connect); + +void ip4_datagram_release_cb(struct sock *sk) +{ + const struct inet_sock *inet = inet_sk(sk); + const struct ip_options_rcu *inet_opt; + __be32 daddr = inet->inet_daddr; + struct flowi4 fl4; + struct rtable *rt; + + if (! __sk_dst_get(sk) || __sk_dst_check(sk, 0)) + return; + + rcu_read_lock(); + inet_opt = rcu_dereference(inet->inet_opt); + if (inet_opt && inet_opt->opt.srr) + daddr = inet_opt->opt.faddr; + rt = ip_route_output_ports(sock_net(sk), &fl4, sk, daddr, + inet->inet_saddr, inet->inet_dport, + inet->inet_sport, sk->sk_protocol, + RT_CONN_FLAGS(sk), sk->sk_bound_dev_if); + if (!IS_ERR(rt)) + __sk_dst_set(sk, &rt->dst); + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(ip4_datagram_release_cb); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 8f3d05424a3e..6f9c07268cf6 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -738,6 +738,7 @@ struct proto ping_prot = { .recvmsg = ping_recvmsg, .bind = ping_bind, .backlog_rcv = ping_queue_rcv_skb, + .release_cb = ip4_datagram_release_cb, .hash = ping_v4_hash, .unhash = ping_v4_unhash, .get_port = ping_v4_get_port, diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 73d1e4df4bf6..6f08991409c3 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -894,6 +894,7 @@ struct proto raw_prot = { .recvmsg = raw_recvmsg, .bind = raw_bind, .backlog_rcv = raw_rcv_skb, + .release_cb = ip4_datagram_release_cb, .hash = raw_hash_sk, .unhash = raw_unhash_sk, .obj_size = sizeof(struct raw_sock), diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 79c8dbe59b54..1f4d405eafba 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1952,6 +1952,7 @@ struct proto udp_prot = { .recvmsg = udp_recvmsg, .sendpage = udp_sendpage, .backlog_rcv = __udp_queue_rcv_skb, + .release_cb = ip4_datagram_release_cb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, .rehash = udp_v4_rehash, -- cgit v1.2.3 From d84295067fc7e95660d84c014aa528f4409c070d Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Mon, 21 Jan 2013 21:09:00 +0000 Subject: net: net_cls: fd passed in SCM_RIGHTS datagram not set correctly Commit 6a328d8c6f03501657ad580f6f98bf9a42583ff7 changed the update logic for the socket but it does not update the SCM_RIGHTS update as well. This patch is based on the net_prio fix commit 48a87cc26c13b68f6cce4e9d769fcb17a6b3e4b8 net: netprio: fd passed in SCM_RIGHTS datagram not set correctly A socket fd passed in a SCM_RIGHTS datagram was not getting updated with the new tasks cgrp prioidx. This leaves IO on the socket tagged with the old tasks priority. To fix this add a check in the scm recvmsg path to update the sock cgrp prioidx with the new tasks value. Let's apply the same fix for net_cls. Signed-off-by: Daniel Wagner Reported-by: Li Zefan Cc: "David S. Miller" Cc: "Eric W. Biederman" Cc: Al Viro Cc: John Fastabend Cc: Neil Horman Cc: netdev@vger.kernel.org Cc: cgroups@vger.kernel.org Signed-off-by: David S. Miller --- net/core/scm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/scm.c b/net/core/scm.c index 57fb1ee6649f..905dcc6ad1e3 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -35,6 +35,7 @@ #include #include #include +#include /* @@ -302,8 +303,10 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) } /* Bump the usage count and install the file. */ sock = sock_from_file(fp[i], &err); - if (sock) + if (sock) { sock_update_netprioidx(sock->sk, current); + sock_update_classid(sock->sk, current); + } fd_install(new_fd, get_file(fp[i])); } -- cgit v1.2.3 From b44108dbdbaa07c609bb5755e8dd6c2035236251 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 22 Jan 2013 00:01:28 +0000 Subject: ipv4: Fix route refcount on pmtu discovery git commit 9cb3a50c (ipv4: Invalidate the socket cached route on pmtu events if possible) introduced a refcount problem. We don't get a refcount on the route if we get it from__sk_dst_get(), but we need one if we want to reuse this route because __sk_dst_set() releases the refcount of the old route. This patch adds proper refcount handling for that case. We introduce a 'new' flag to indicate that we are going to use a new route and we release the old route only if we replace it by a new one. Reported-by: Julian Anastasov Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/route.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 132737a7c83a..a0fcc47fee73 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -985,6 +985,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) struct flowi4 fl4; struct rtable *rt; struct dst_entry *dst; + bool new = false; bh_lock_sock(sk); rt = (struct rtable *) __sk_dst_get(sk); @@ -1000,20 +1001,26 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) rt = ip_route_output_flow(sock_net(sk), &fl4, sk); if (IS_ERR(rt)) goto out; + + new = true; } __ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu); dst = dst_check(&rt->dst, 0); if (!dst) { + if (new) + dst_release(&rt->dst); + rt = ip_route_output_flow(sock_net(sk), &fl4, sk); if (IS_ERR(rt)) goto out; - dst = &rt->dst; + new = true; } - __sk_dst_set(sk, dst); + if (new) + __sk_dst_set(sk, &rt->dst); out: bh_unlock_sock(sk); -- cgit v1.2.3 From 0d15becee56fdfc2e9a4374c46ea7cf7562a6f32 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Wed, 23 Jan 2013 18:11:53 +0100 Subject: batman-adv: fix skb leak in batadv_dat_snoop_incoming_arp_reply() The callers of batadv_dat_snoop_incoming_arp_reply() assume the skb has been freed when it returns true; fix this by calling kfree_skb before returning as it is done in batadv_dat_snoop_incoming_arp_request(). Signed-off-by: Matthias Schiffer Signed-off-by: Marek Lindner Acked-by: Antonio Quartulli Signed-off-by: Antonio Quartulli --- net/batman-adv/distributed-arp-table.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 8e1d89d2b1c1..ce0d2992381a 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -1012,6 +1012,8 @@ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv, */ ret = !batadv_is_my_client(bat_priv, hw_dst); out: + if (ret) + kfree_skb(skb); /* if ret == false -> packet has to be delivered to the interface */ return ret; } -- cgit v1.2.3 From 757dd82ea7008ddaccfecff3397bec3e3203a89e Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Thu, 24 Jan 2013 18:18:26 +0100 Subject: batman-adv: check for more types of invalid IP addresses in DAT There are more types of IP addresses that may appear in ARP packets that we don't want to process. While some of these should never appear in sane ARP packets, a 0.0.0.0 source is used for duplicate address detection and thus seen quite often. Signed-off-by: Matthias Schiffer Acked-by: Antonio Quartulli Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/distributed-arp-table.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index ce0d2992381a..ccb3c6c96ba7 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -777,7 +777,9 @@ static uint16_t batadv_arp_get_type(struct batadv_priv *bat_priv, ip_src = batadv_arp_ip_src(skb, hdr_size); ip_dst = batadv_arp_ip_dst(skb, hdr_size); if (ipv4_is_loopback(ip_src) || ipv4_is_multicast(ip_src) || - ipv4_is_loopback(ip_dst) || ipv4_is_multicast(ip_dst)) + ipv4_is_loopback(ip_dst) || ipv4_is_multicast(ip_dst) || + ipv4_is_zeronet(ip_src) || ipv4_is_lbcast(ip_src) || + ipv4_is_zeronet(ip_dst) || ipv4_is_lbcast(ip_dst)) goto out; type = ntohs(arphdr->ar_op); -- cgit v1.2.3 From b618ad1103c9ea0c4a69b44f42fc3c7b4e231e22 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Thu, 24 Jan 2013 18:18:27 +0100 Subject: batman-adv: filter ARP packets with invalid MAC addresses in DAT We never want multicast MAC addresses in the Distributed ARP Table, so it's best to completely ignore ARP packets containing them where we expect unicast addresses. Signed-off-by: Matthias Schiffer Acked-by: Antonio Quartulli Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/distributed-arp-table.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'net') diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index ccb3c6c96ba7..183f97a86bb2 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -738,6 +738,7 @@ static uint16_t batadv_arp_get_type(struct batadv_priv *bat_priv, struct arphdr *arphdr; struct ethhdr *ethhdr; __be32 ip_src, ip_dst; + uint8_t *hw_src, *hw_dst; uint16_t type = 0; /* pull the ethernet header */ @@ -782,6 +783,18 @@ static uint16_t batadv_arp_get_type(struct batadv_priv *bat_priv, ipv4_is_zeronet(ip_dst) || ipv4_is_lbcast(ip_dst)) goto out; + hw_src = batadv_arp_hw_src(skb, hdr_size); + if (is_zero_ether_addr(hw_src) || is_multicast_ether_addr(hw_src)) + goto out; + + /* we don't care about the destination MAC address in ARP requests */ + if (arphdr->ar_op != htons(ARPOP_REQUEST)) { + hw_dst = batadv_arp_hw_dst(skb, hdr_size); + if (is_zero_ether_addr(hw_dst) || + is_multicast_ether_addr(hw_dst)) + goto out; + } + type = ntohs(arphdr->ar_op); out: return type; -- cgit v1.2.3 From 75356a8143426a1301bc66c4fb920dcb1bd5e934 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 23 Jan 2013 20:38:34 +0000 Subject: ip6mr: limit IPv6 MRT_TABLE identifiers We did this for IPv4 in b49d3c1e1c "net: ipmr: limit MRT_TABLE identifiers" but we need to do it for IPv6 as well. On IPv6 the name is "pim6reg" instead of "pimreg" so there is one less digit allowed. The strcpy() is in ip6mr_reg_vif(). Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 26dcdec9e3a5..8fd154e5f079 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1710,6 +1710,9 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns return -EINVAL; if (get_user(v, (u32 __user *)optval)) return -EFAULT; + /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */ + if (v != RT_TABLE_DEFAULT && v >= 100000000) + return -EINVAL; if (sk == mrt->mroute6_sk) return -EBUSY; -- cgit v1.2.3 From 9839ff0dead906e85e4d17490aeff87a5859a157 Mon Sep 17 00:00:00 2001 From: Xufeng Zhang Date: Wed, 23 Jan 2013 16:44:34 +0000 Subject: sctp: set association state to established in dupcook_a handler While sctp handling a duplicate COOKIE-ECHO and the action is 'Association restart', sctp_sf_do_dupcook_a() will processing the unexpected COOKIE-ECHO for peer restart, but it does not set the association state to SCTP_STATE_ESTABLISHED, so the association could stuck in SCTP_STATE_SHUTDOWN_PENDING state forever. This violates the sctp specification: RFC 4960 5.2.4. Handle a COOKIE ECHO when a TCB Exists Action A) In this case, the peer may have restarted. ..... After this, the endpoint shall enter the ESTABLISHED state. To resolve this problem, adding a SCTP_CMD_NEW_STATE cmd to the command list before SCTP_CMD_REPLY cmd, this will set the restart association to SCTP_STATE_ESTABLISHED state properly and also avoid I-bit being set in the DATA chunk header when COOKIE_ACK is bundled with DATA chunks. Signed-off-by: Xufeng Zhang Acked-by: Neil Horman Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/sm_statefuns.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 618ec7e216ca..5131fcfedb03 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1779,8 +1779,10 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(struct net *net, /* Update the content of current association. */ sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc)); - sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); + sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, + SCTP_STATE(SCTP_STATE_ESTABLISHED)); + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); return SCTP_DISPOSITION_CONSUME; nomem_ev: -- cgit v1.2.3 From 5465740ace36f179de5bb0ccb5d46ddeb945e309 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Wed, 23 Jan 2013 11:45:42 +0000 Subject: IP_GRE: Fix kernel panic in IP_GRE with GRE csum. Due to IP_GRE GSO support, GRE can recieve non linear skb which results in panic in case of GRE_CSUM. Following patch fixes it by using correct csum API. Bug introduced in commit 6b78f16e4bdde3936b (gre: add GSO support) Signed-off-by: Pravin B Shelar Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 303012adf9e6..e81b1caf2ea2 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -963,8 +963,12 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev ptr--; } if (tunnel->parms.o_flags&GRE_CSUM) { + int offset = skb_transport_offset(skb); + *ptr = 0; - *(__sum16 *)ptr = ip_compute_csum((void *)(iph+1), skb->len - sizeof(struct iphdr)); + *(__sum16 *)ptr = csum_fold(skb_checksum(skb, offset, + skb->len - offset, + 0)); } } -- cgit v1.2.3 From 5f19d1219a5b96c7b00ad5c3f889030093a8d1a3 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 24 Jan 2013 11:02:47 -0500 Subject: SCTP: Free the per-net sysctl table on net exit. v2 Per-net sysctl table needs to be explicitly freed at net exit. Otherwise we see the following with kmemleak: unreferenced object 0xffff880402d08000 (size 2048): comm "chrome_sandbox", pid 18437, jiffies 4310887172 (age 9097.630s) hex dump (first 32 bytes): b2 68 89 81 ff ff ff ff 20 04 04 f8 01 88 ff ff .h...... ....... 04 00 00 00 a4 01 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] kmemleak_alloc+0x21/0x3e [] slab_post_alloc_hook+0x28/0x2a [] __kmalloc_track_caller+0xf1/0x104 [] kmemdup+0x1b/0x30 [] sctp_sysctl_net_register+0x1f/0x72 [] sctp_net_init+0x100/0x39f [] ops_init+0xc6/0xf5 [] setup_net+0x4c/0xd0 [] copy_net_ns+0x6d/0xd6 [] create_new_namespaces+0xd7/0x147 [] copy_namespaces+0x63/0x99 [] copy_process+0xa65/0x1233 [] do_fork+0x10b/0x271 [] sys_clone+0x23/0x25 [] stub_clone+0x13/0x20 [] 0xffffffffffffffff I fixed the spelling of sysctl_header so the code actually compiles. -- EWB. Reported-by: Martin Mokrejs Signed-off-by: Vlad Yasevich Acked-by: Neil Horman Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/sctp/sysctl.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 043889ac86c0..bf3c6e8fc401 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -366,7 +366,11 @@ int sctp_sysctl_net_register(struct net *net) void sctp_sysctl_net_unregister(struct net *net) { + struct ctl_table *table; + + table = net->sctp.sysctl_header->ctl_table_arg; unregister_net_sysctl_table(net->sctp.sysctl_header); + kfree(table); } static struct ctl_table_header * sctp_sysctl_header; -- cgit v1.2.3