summaryrefslogtreecommitdiff
path: root/net/mptcp
diff options
context:
space:
mode:
Diffstat (limited to 'net/mptcp')
-rw-r--r--net/mptcp/pm_netlink.c14
-rw-r--r--net/mptcp/protocol.c93
-rw-r--r--net/mptcp/protocol.h4
-rw-r--r--net/mptcp/sockopt.c238
-rw-r--r--net/mptcp/subflow.c7
5 files changed, 343 insertions, 13 deletions
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 7b96be1e9f14..4ff8d55cbe82 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -1702,22 +1702,28 @@ next:
static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
{
+ struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, }, *entry;
struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
- struct mptcp_pm_addr_entry addr, *entry;
struct net *net = sock_net(skb->sk);
- u8 bkup = 0;
+ u8 bkup = 0, lookup_by_id = 0;
int ret;
- ret = mptcp_pm_parse_addr(attr, info, true, &addr);
+ ret = mptcp_pm_parse_addr(attr, info, false, &addr);
if (ret < 0)
return ret;
if (addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP)
bkup = 1;
+ if (addr.addr.family == AF_UNSPEC) {
+ lookup_by_id = 1;
+ if (!addr.addr.id)
+ return -EOPNOTSUPP;
+ }
list_for_each_entry(entry, &pernet->local_addr_list, list) {
- if (addresses_equal(&entry->addr, &addr.addr, true)) {
+ if ((!lookup_by_id && addresses_equal(&entry->addr, &addr.addr, true)) ||
+ (lookup_by_id && entry->addr.id == addr.addr.id)) {
mptcp_nl_addr_backup(net, &entry->addr, bkup);
if (bkup)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index c82a76d2d0bf..f124cca125d2 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -22,6 +22,7 @@
#endif
#include <net/mptcp.h>
#include <net/xfrm.h>
+#include <asm/ioctls.h>
#include "protocol.h"
#include "mib.h"
@@ -46,9 +47,10 @@ struct mptcp_skb_cb {
enum {
MPTCP_CMSG_TS = BIT(0),
+ MPTCP_CMSG_INQ = BIT(1),
};
-static struct percpu_counter mptcp_sockets_allocated;
+static struct percpu_counter mptcp_sockets_allocated ____cacheline_aligned_in_smp;
static void __mptcp_destroy_sock(struct sock *sk);
static void __mptcp_check_send_data_fin(struct sock *sk);
@@ -738,6 +740,7 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
MPTCP_SKB_CB(skb)->map_seq, msk->ack_seq,
delta);
MPTCP_SKB_CB(skb)->offset += delta;
+ MPTCP_SKB_CB(skb)->map_seq += delta;
__skb_queue_tail(&sk->sk_receive_queue, skb);
}
msk->ack_seq = end_seq;
@@ -1499,7 +1502,7 @@ static void mptcp_update_post_push(struct mptcp_sock *msk,
msk->snd_nxt = snd_nxt_new;
}
-static void mptcp_check_and_set_pending(struct sock *sk)
+void mptcp_check_and_set_pending(struct sock *sk)
{
if (mptcp_send_head(sk) &&
!test_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags))
@@ -1784,8 +1787,10 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
copied += count;
if (count < data_len) {
- if (!(flags & MSG_PEEK))
+ if (!(flags & MSG_PEEK)) {
MPTCP_SKB_CB(skb)->offset += count;
+ MPTCP_SKB_CB(skb)->map_seq += count;
+ }
break;
}
@@ -1965,6 +1970,27 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk)
return !skb_queue_empty(&msk->receive_queue);
}
+static unsigned int mptcp_inq_hint(const struct sock *sk)
+{
+ const struct mptcp_sock *msk = mptcp_sk(sk);
+ const struct sk_buff *skb;
+
+ skb = skb_peek(&msk->receive_queue);
+ if (skb) {
+ u64 hint_val = msk->ack_seq - MPTCP_SKB_CB(skb)->map_seq;
+
+ if (hint_val >= INT_MAX)
+ return INT_MAX;
+
+ return (unsigned int)hint_val;
+ }
+
+ if (sk->sk_state == TCP_CLOSE || (sk->sk_shutdown & RCV_SHUTDOWN))
+ return 1;
+
+ return 0;
+}
+
static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int nonblock, int flags, int *addr_len)
{
@@ -1989,6 +2015,9 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
len = min_t(size_t, len, INT_MAX);
target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
+ if (unlikely(msk->recvmsg_inq))
+ cmsg_flags = MPTCP_CMSG_INQ;
+
while (copied < len) {
int bytes_read;
@@ -2062,6 +2091,12 @@ out_err:
if (cmsg_flags && copied >= 0) {
if (cmsg_flags & MPTCP_CMSG_TS)
tcp_recv_timestamp(msg, sk, &tss);
+
+ if (cmsg_flags & MPTCP_CMSG_INQ) {
+ unsigned int inq = mptcp_inq_hint(sk);
+
+ put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(inq), &inq);
+ }
}
pr_debug("msk=%p rx queue empty=%d:%d copied=%d",
@@ -3177,6 +3212,57 @@ static int mptcp_forward_alloc_get(const struct sock *sk)
return sk->sk_forward_alloc + mptcp_sk(sk)->rmem_fwd_alloc;
}
+static int mptcp_ioctl_outq(const struct mptcp_sock *msk, u64 v)
+{
+ const struct sock *sk = (void *)msk;
+ u64 delta;
+
+ if (sk->sk_state == TCP_LISTEN)
+ return -EINVAL;
+
+ if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
+ return 0;
+
+ delta = msk->write_seq - v;
+ if (delta > INT_MAX)
+ delta = INT_MAX;
+
+ return (int)delta;
+}
+
+static int mptcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
+{
+ struct mptcp_sock *msk = mptcp_sk(sk);
+ bool slow;
+ int answ;
+
+ switch (cmd) {
+ case SIOCINQ:
+ if (sk->sk_state == TCP_LISTEN)
+ return -EINVAL;
+
+ lock_sock(sk);
+ __mptcp_move_skbs(msk);
+ answ = mptcp_inq_hint(sk);
+ release_sock(sk);
+ break;
+ case SIOCOUTQ:
+ slow = lock_sock_fast(sk);
+ answ = mptcp_ioctl_outq(msk, READ_ONCE(msk->snd_una));
+ unlock_sock_fast(sk, slow);
+ break;
+ case SIOCOUTQNSD:
+ slow = lock_sock_fast(sk);
+ answ = mptcp_ioctl_outq(msk, msk->snd_nxt);
+ unlock_sock_fast(sk, slow);
+ break;
+ default:
+ return -ENOIOCTLCMD;
+ }
+
+ return put_user(answ, (int __user *)arg);
+}
+
static struct proto mptcp_prot = {
.name = "MPTCP",
.owner = THIS_MODULE,
@@ -3189,6 +3275,7 @@ static struct proto mptcp_prot = {
.shutdown = mptcp_shutdown,
.destroy = mptcp_destroy,
.sendmsg = mptcp_sendmsg,
+ .ioctl = mptcp_ioctl,
.recvmsg = mptcp_recvmsg,
.release_cb = mptcp_release_cb,
.hash = mptcp_hash,
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index d87cc040352e..e1469155fb15 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -249,6 +249,9 @@ struct mptcp_sock {
bool rcv_fastclose;
bool use_64bit_ack; /* Set when we received a 64-bit DSN */
bool csum_enabled;
+ u8 recvmsg_inq:1,
+ cork:1,
+ nodelay:1;
spinlock_t join_list_lock;
struct work_struct work;
struct sk_buff *ooo_last_skb;
@@ -554,6 +557,7 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net);
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
struct mptcp_options_received *mp_opt);
bool __mptcp_retransmit_pending_data(struct sock *sk);
+void mptcp_check_and_set_pending(struct sock *sk);
void __mptcp_push_pending(struct sock *sk, unsigned int flags);
bool mptcp_subflow_data_available(struct sock *sk);
void __init mptcp_subflow_init(void);
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 0f1e661c2032..3c3db22fd36a 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -390,6 +390,8 @@ static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
switch (optname) {
case IPV6_V6ONLY:
+ case IPV6_TRANSPARENT:
+ case IPV6_FREEBIND:
lock_sock(sk);
ssock = __mptcp_nmpc_socket(msk);
if (!ssock) {
@@ -398,8 +400,24 @@ static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
}
ret = tcp_setsockopt(ssock->sk, SOL_IPV6, optname, optval, optlen);
- if (ret == 0)
+ if (ret != 0) {
+ release_sock(sk);
+ return ret;
+ }
+
+ sockopt_seq_inc(msk);
+
+ switch (optname) {
+ case IPV6_V6ONLY:
sk->sk_ipv6only = ssock->sk->sk_ipv6only;
+ break;
+ case IPV6_TRANSPARENT:
+ inet_sk(sk)->transparent = inet_sk(ssock->sk)->transparent;
+ break;
+ case IPV6_FREEBIND:
+ inet_sk(sk)->freebind = inet_sk(ssock->sk)->freebind;
+ break;
+ }
release_sock(sk);
break;
@@ -539,6 +557,7 @@ static bool mptcp_supported_sockopt(int level, int optname)
case TCP_TIMESTAMP:
case TCP_NOTSENT_LOWAT:
case TCP_TX_DELAY:
+ case TCP_INQ:
return true;
}
@@ -550,7 +569,6 @@ static bool mptcp_supported_sockopt(int level, int optname)
/* TCP_FASTOPEN_KEY, TCP_FASTOPEN TCP_FASTOPEN_CONNECT, TCP_FASTOPEN_NO_COOKIE,
* are not supported fastopen is currently unsupported
*/
- /* TCP_INQ is currently unsupported, needs some recvmsg work */
}
return false;
}
@@ -598,14 +616,171 @@ static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t
return ret;
}
+static int mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, sockptr_t optval,
+ unsigned int optlen)
+{
+ struct mptcp_subflow_context *subflow;
+ struct sock *sk = (struct sock *)msk;
+ int val;
+
+ if (optlen < sizeof(int))
+ return -EINVAL;
+
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
+ return -EFAULT;
+
+ lock_sock(sk);
+ sockopt_seq_inc(msk);
+ msk->cork = !!val;
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+
+ lock_sock(ssk);
+ __tcp_sock_set_cork(ssk, !!val);
+ release_sock(ssk);
+ }
+ if (!val)
+ mptcp_check_and_set_pending(sk);
+ release_sock(sk);
+
+ return 0;
+}
+
+static int mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, sockptr_t optval,
+ unsigned int optlen)
+{
+ struct mptcp_subflow_context *subflow;
+ struct sock *sk = (struct sock *)msk;
+ int val;
+
+ if (optlen < sizeof(int))
+ return -EINVAL;
+
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
+ return -EFAULT;
+
+ lock_sock(sk);
+ sockopt_seq_inc(msk);
+ msk->nodelay = !!val;
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+
+ lock_sock(ssk);
+ __tcp_sock_set_nodelay(ssk, !!val);
+ release_sock(ssk);
+ }
+ if (val)
+ mptcp_check_and_set_pending(sk);
+ release_sock(sk);
+
+ return 0;
+}
+
+static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int optname,
+ sockptr_t optval, unsigned int optlen)
+{
+ struct sock *sk = (struct sock *)msk;
+ struct inet_sock *issk;
+ struct socket *ssock;
+ int err;
+
+ err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen);
+ if (err != 0)
+ return err;
+
+ lock_sock(sk);
+
+ ssock = __mptcp_nmpc_socket(msk);
+ if (!ssock) {
+ release_sock(sk);
+ return -EINVAL;
+ }
+
+ issk = inet_sk(ssock->sk);
+
+ switch (optname) {
+ case IP_FREEBIND:
+ issk->freebind = inet_sk(sk)->freebind;
+ break;
+ case IP_TRANSPARENT:
+ issk->transparent = inet_sk(sk)->transparent;
+ break;
+ default:
+ release_sock(sk);
+ WARN_ON_ONCE(1);
+ return -EOPNOTSUPP;
+ }
+
+ sockopt_seq_inc(msk);
+ release_sock(sk);
+ return 0;
+}
+
+static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname,
+ sockptr_t optval, unsigned int optlen)
+{
+ struct mptcp_subflow_context *subflow;
+ struct sock *sk = (struct sock *)msk;
+ int err, val;
+
+ err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen);
+
+ if (err != 0)
+ return err;
+
+ lock_sock(sk);
+ sockopt_seq_inc(msk);
+ val = inet_sk(sk)->tos;
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+
+ __ip_sock_set_tos(ssk, val);
+ }
+ release_sock(sk);
+
+ return err;
+}
+
+static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname,
+ sockptr_t optval, unsigned int optlen)
+{
+ switch (optname) {
+ case IP_FREEBIND:
+ case IP_TRANSPARENT:
+ return mptcp_setsockopt_sol_ip_set_transparent(msk, optname, optval, optlen);
+ case IP_TOS:
+ return mptcp_setsockopt_v4_set_tos(msk, optname, optval, optlen);
+ }
+
+ return -EOPNOTSUPP;
+}
+
static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
sockptr_t optval, unsigned int optlen)
{
+ struct sock *sk = (void *)msk;
+ int ret, val;
+
switch (optname) {
+ case TCP_INQ:
+ ret = mptcp_get_int_option(msk, optval, optlen, &val);
+ if (ret)
+ return ret;
+ if (val < 0 || val > 1)
+ return -EINVAL;
+
+ lock_sock(sk);
+ msk->recvmsg_inq = !!val;
+ release_sock(sk);
+ return 0;
case TCP_ULP:
return -EOPNOTSUPP;
case TCP_CONGESTION:
return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen);
+ case TCP_CORK:
+ return mptcp_setsockopt_sol_tcp_cork(msk, optval, optlen);
+ case TCP_NODELAY:
+ return mptcp_setsockopt_sol_tcp_nodelay(msk, optval, optlen);
}
return -EOPNOTSUPP;
@@ -637,6 +812,9 @@ int mptcp_setsockopt(struct sock *sk, int level, int optname,
if (ssk)
return tcp_setsockopt(ssk, level, optname, optval, optlen);
+ if (level == SOL_IP)
+ return mptcp_setsockopt_v4(msk, optname, optval, optlen);
+
if (level == SOL_IPV6)
return mptcp_setsockopt_v6(msk, optname, optval, optlen);
@@ -932,6 +1110,35 @@ static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *o
return 0;
}
+static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval,
+ int __user *optlen, int val)
+{
+ int len;
+
+ if (get_user(len, optlen))
+ return -EFAULT;
+ if (len < 0)
+ return -EINVAL;
+
+ if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) {
+ unsigned char ucval = (unsigned char)val;
+
+ len = 1;
+ if (put_user(len, optlen))
+ return -EFAULT;
+ if (copy_to_user(optval, &ucval, 1))
+ return -EFAULT;
+ } else {
+ len = min_t(unsigned int, len, sizeof(int));
+ if (put_user(len, optlen))
+ return -EFAULT;
+ if (copy_to_user(optval, &val, len))
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
char __user *optval, int __user *optlen)
{
@@ -942,10 +1149,29 @@ static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
case TCP_CC_INFO:
return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname,
optval, optlen);
+ case TCP_INQ:
+ return mptcp_put_int_option(msk, optval, optlen, msk->recvmsg_inq);
+ case TCP_CORK:
+ return mptcp_put_int_option(msk, optval, optlen, msk->cork);
+ case TCP_NODELAY:
+ return mptcp_put_int_option(msk, optval, optlen, msk->nodelay);
}
return -EOPNOTSUPP;
}
+static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname,
+ char __user *optval, int __user *optlen)
+{
+ struct sock *sk = (void *)msk;
+
+ switch (optname) {
+ case IP_TOS:
+ return mptcp_put_int_option(msk, optval, optlen, inet_sk(sk)->tos);
+ }
+
+ return -EOPNOTSUPP;
+}
+
static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname,
char __user *optval, int __user *optlen)
{
@@ -981,6 +1207,8 @@ int mptcp_getsockopt(struct sock *sk, int level, int optname,
if (ssk)
return tcp_getsockopt(ssk, level, optname, optval, option);
+ if (level == SOL_IP)
+ return mptcp_getsockopt_v4(msk, optname, optval, option);
if (level == SOL_TCP)
return mptcp_getsockopt_sol_tcp(msk, optname, optval, option);
if (level == SOL_MPTCP)
@@ -1003,6 +1231,7 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
ssk->sk_priority = sk->sk_priority;
ssk->sk_bound_dev_if = sk->sk_bound_dev_if;
ssk->sk_incoming_cpu = sk->sk_incoming_cpu;
+ __ip_sock_set_tos(ssk, inet_sk(sk)->tos);
if (sk->sk_userlocks & tx_rx_locks) {
ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks;
@@ -1028,6 +1257,11 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
if (inet_csk(sk)->icsk_ca_ops != inet_csk(ssk)->icsk_ca_ops)
tcp_set_congestion_control(ssk, msk->ca_name, false, true);
+ __tcp_sock_set_cork(ssk, !!msk->cork);
+ __tcp_sock_set_nodelay(ssk, !!msk->nodelay);
+
+ inet_sk(ssk)->transparent = inet_sk(sk)->transparent;
+ inet_sk(ssk)->freebind = inet_sk(sk)->freebind;
}
static void __mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk)
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 6172f380dfb7..b8dd3441f7d0 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1425,6 +1425,8 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
if (addr.ss_family == AF_INET6)
addrlen = sizeof(struct sockaddr_in6);
#endif
+ mptcp_sockopt_sync(msk, ssk);
+
ssk->sk_bound_dev_if = ifindex;
err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen);
if (err)
@@ -1441,7 +1443,6 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
mptcp_info2sockaddr(remote, &addr, ssk->sk_family);
mptcp_add_pending_subflow(msk, subflow);
- mptcp_sockopt_sync(msk, ssk);
err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK);
if (err && err != -EINPROGRESS)
goto failed_unlink;
@@ -1534,9 +1535,7 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
*/
sf->sk->sk_net_refcnt = 1;
get_net(net);
-#ifdef CONFIG_PROC_FS
- this_cpu_add(*net->core.sock_inuse, 1);
-#endif
+ sock_inuse_add(net, 1);
err = tcp_set_ulp(sf->sk, "mptcp");
release_sock(sf->sk);