From 496611d7b5eaf59c03440c8f2def1d9988ad2459 Mon Sep 17 00:00:00 2001
From: Craig Gallek <kraig@google.com>
Date: Wed, 10 Feb 2016 11:50:36 -0500
Subject: inet: create IPv6-equivalent inet_hash function

In order to support fast lookups for TCP sockets with SO_REUSEPORT,
the function that adds sockets to the listening hash set needs
to be able to check receive address equality.  Since this equality
check is different for IPv4 and IPv6, we will need two different
socket hashing functions.

This patch adds inet6_hash identical to the existing inet_hash function
and updates the appropriate references.  A following patch will
differentiate the two by passing different comparison functions to
__inet_hash.

Additionally, in order to use the IPv6 address equality function from
inet6_hashtables (which is compiled as a built-in object when IPv6 is
enabled) it also needs to be in a built-in object file as well.  This
moves ipv6_rcv_saddr_equal into inet_hashtables to accomplish this.

Signed-off-by: Craig Gallek <kraig@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv6/tcp_ipv6.c')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 006396e31cb0..d72bcfb326d8 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1865,7 +1865,7 @@ struct proto tcpv6_prot = {
 	.sendpage		= tcp_sendpage,
 	.backlog_rcv		= tcp_v6_do_rcv,
 	.release_cb		= tcp_release_cb,
-	.hash			= inet_hash,
+	.hash			= inet6_hash,
 	.unhash			= inet_unhash,
 	.get_port		= inet_csk_get_port,
 	.enter_memory_pressure	= tcp_enter_memory_pressure,
-- 
cgit v1.2.3


From a583636a83ea383fd07517e5a7a2eedbc5d90fb1 Mon Sep 17 00:00:00 2001
From: Craig Gallek <kraig@google.com>
Date: Wed, 10 Feb 2016 11:50:38 -0500
Subject: inet: refactor inet[6]_lookup functions to take skb

This is a preliminary step to allow fast socket lookup of SO_REUSEPORT
groups.  Doing so with a BPF filter will require access to the
skb in question.  This change plumbs the skb (and offset to payload
data) through the call stack to the listening socket lookup
implementations where it will be used in a following patch.

Signed-off-by: Craig Gallek <kraig@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'net/ipv6/tcp_ipv6.c')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index d72bcfb326d8..9977b6f19f2a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -866,7 +866,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
 		 * no RST generated if md5 hash doesn't match.
 		 */
 		sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
-					   &tcp_hashinfo, &ipv6h->saddr,
+					   &tcp_hashinfo, NULL, 0,
+					   &ipv6h->saddr,
 					   th->source, &ipv6h->daddr,
 					   ntohs(th->source), tcp_v6_iif(skb));
 		if (!sk1)
@@ -1375,8 +1376,8 @@ static int tcp_v6_rcv(struct sk_buff *skb)
 	hdr = ipv6_hdr(skb);
 
 lookup:
-	sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest,
-				inet6_iif(skb));
+	sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
+				th->source, th->dest, inet6_iif(skb));
 	if (!sk)
 		goto no_tcp_socket;
 
@@ -1500,6 +1501,7 @@ do_time_wait:
 		struct sock *sk2;
 
 		sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
+					    skb, __tcp_hdrlen(th),
 					    &ipv6_hdr(skb)->saddr, th->source,
 					    &ipv6_hdr(skb)->daddr,
 					    ntohs(th->dest), tcp_v6_iif(skb));
-- 
cgit v1.2.3


From a44d6eacdaf56f74fad699af7f4925a5f5ac0e7f Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Mon, 14 Mar 2016 10:52:15 -0700
Subject: tcp: Add RFC4898 tcpEStatsPerfDataSegsOut/In

Per RFC4898, they count segments sent/received
containing a positive length data segment (that includes
retransmission segments carrying data).  Unlike
tcpi_segs_out/in, tcpi_data_segs_out/in excludes segments
carrying no data (e.g. pure ack).

The patch also updates the segs_in in tcp_fastopen_add_skb()
so that segs_in >= data_segs_in property is kept.

Together with retransmission data, tcpi_data_segs_out
gives a better signal on the rxmit rate.

v6: Rebase on the latest net-next

v5: Eric pointed out that checking skb->len is still needed in
tcp_fastopen_add_skb() because skb can carry a FIN without data.
Hence, instead of open coding segs_in and data_segs_in, tcp_segs_in()
helper is used.  Comment is added to the fastopen case to explain why
segs_in has to be reset and tcp_segs_in() has to be called before
__skb_pull().

v4: Add comment to the changes in tcp_fastopen_add_skb()
and also add remark on this case in the commit message.

v3: Add const modifier to the skb parameter in tcp_segs_in()

v2: Rework based on recent fix by Eric:
commit a9d99ce28ed3 ("tcp: fix tcpi_segs_in after connection establishment")

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Cc: Chris Rapier <rapier@psc.edu>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Marcelo Ricardo Leitner <mleitner@redhat.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv6/tcp_ipv6.c')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 33f2820181f9..9c16565b70cc 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1443,7 +1443,7 @@ process:
 	sk_incoming_cpu_update(sk);
 
 	bh_lock_sock_nested(sk);
-	tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
+	tcp_segs_in(tcp_sk(sk), skb);
 	ret = 0;
 	if (!sock_owned_by_user(sk)) {
 		if (!tcp_prequeue(sk, skb))
-- 
cgit v1.2.3