summaryrefslogtreecommitdiff
path: root/net/mptcp/subflow.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/mptcp/subflow.c')
-rw-r--r--net/mptcp/subflow.c184
1 files changed, 146 insertions, 38 deletions
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index aba260f547da..af28f3b60389 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -62,7 +62,9 @@ static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2,
static bool mptcp_can_accept_new_subflow(const struct mptcp_sock *msk)
{
return mptcp_is_fully_established((void *)msk) &&
- READ_ONCE(msk->pm.accept_subflow);
+ ((mptcp_pm_is_userspace(msk) &&
+ mptcp_userspace_pm_active(msk)) ||
+ READ_ONCE(msk->pm.accept_subflow));
}
/* validate received token and create truncated hmac and nonce for SYN-ACK */
@@ -441,6 +443,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->backup = mp_opt.backup;
subflow->thmac = mp_opt.thmac;
subflow->remote_nonce = mp_opt.nonce;
+ subflow->remote_id = mp_opt.join_id;
pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d",
subflow, subflow->thmac, subflow->remote_nonce,
subflow->backup);
@@ -840,7 +843,8 @@ enum mapping_status {
MAPPING_INVALID,
MAPPING_EMPTY,
MAPPING_DATA_FIN,
- MAPPING_DUMMY
+ MAPPING_DUMMY,
+ MAPPING_BAD_CSUM
};
static void dbg_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
@@ -888,7 +892,7 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
u32 offset, seq, delta;
- u16 csum;
+ __sum16 csum;
int len;
if (!csum_reqd)
@@ -955,11 +959,10 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *
subflow->map_data_csum);
if (unlikely(csum)) {
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR);
- subflow->send_mp_fail = 1;
- MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPFAILTX);
- return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY;
+ return MAPPING_BAD_CSUM;
}
+ subflow->valid_csum_seen = 1;
return MAPPING_OK;
}
@@ -1006,7 +1009,9 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
data_len = mpext->data_len;
if (data_len == 0) {
+ pr_debug("infinite mapping received");
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX);
+ subflow->map_data_len = 0;
return MAPPING_INVALID;
}
@@ -1141,6 +1146,45 @@ static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ss
}
}
+static bool subflow_can_fallback(struct mptcp_subflow_context *subflow)
+{
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+
+ if (subflow->mp_join)
+ return false;
+ else if (READ_ONCE(msk->csum_enabled))
+ return !subflow->valid_csum_seen;
+ else
+ return !subflow->fully_established;
+}
+
+static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ unsigned long fail_tout;
+
+ /* greceful failure can happen only on the MPC subflow */
+ if (WARN_ON_ONCE(ssk != READ_ONCE(msk->first)))
+ return;
+
+ /* since the close timeout take precedence on the fail one,
+ * no need to start the latter when the first is already set
+ */
+ if (sock_flag((struct sock *)msk, SOCK_DEAD))
+ return;
+
+ /* we don't need extreme accuracy here, use a zero fail_tout as special
+ * value meaning no fail timeout at all;
+ */
+ fail_tout = jiffies + TCP_RTO_MAX;
+ if (!fail_tout)
+ fail_tout = 1;
+ WRITE_ONCE(subflow->fail_tout, fail_tout);
+ tcp_send_ack(ssk);
+
+ mptcp_reset_timeout(msk, subflow->fail_tout);
+}
+
static bool subflow_check_data_avail(struct sock *ssk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
@@ -1160,10 +1204,8 @@ static bool subflow_check_data_avail(struct sock *ssk)
status = get_mapping_status(ssk, msk);
trace_subflow_check_data_avail(status, skb_peek(&ssk->sk_receive_queue));
- if (unlikely(status == MAPPING_INVALID))
- goto fallback;
-
- if (unlikely(status == MAPPING_DUMMY))
+ if (unlikely(status == MAPPING_INVALID || status == MAPPING_DUMMY ||
+ status == MAPPING_BAD_CSUM))
goto fallback;
if (status != MAPPING_OK)
@@ -1203,35 +1245,42 @@ no_data:
return false;
fallback:
- /* RFC 8684 section 3.7. */
- if (subflow->send_mp_fail) {
- if (mptcp_has_another_subflow(ssk)) {
+ if (!__mptcp_check_fallback(msk)) {
+ /* RFC 8684 section 3.7. */
+ if (status == MAPPING_BAD_CSUM &&
+ (subflow->mp_join || subflow->valid_csum_seen)) {
+ subflow->send_mp_fail = 1;
+
+ if (!READ_ONCE(msk->allow_infinite_fallback)) {
+ subflow->reset_transient = 0;
+ subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
+ goto reset;
+ }
+ mptcp_subflow_fail(msk, ssk);
+ WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL);
+ return true;
+ }
+
+ if (!subflow_can_fallback(subflow) && subflow->map_data_len) {
+ /* fatal protocol error, close the socket.
+ * subflow_error_report() will introduce the appropriate barriers
+ */
+ subflow->reset_transient = 0;
+ subflow->reset_reason = MPTCP_RST_EMPTCP;
+
+reset:
+ ssk->sk_err = EBADMSG;
+ tcp_set_state(ssk, TCP_CLOSE);
while ((skb = skb_peek(&ssk->sk_receive_queue)))
sk_eat_skb(ssk, skb);
+ tcp_send_active_reset(ssk, GFP_ATOMIC);
+ WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
+ return false;
}
- ssk->sk_err = EBADMSG;
- tcp_set_state(ssk, TCP_CLOSE);
- subflow->reset_transient = 0;
- subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
- tcp_send_active_reset(ssk, GFP_ATOMIC);
- WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
- return true;
- }
- if (subflow->mp_join || subflow->fully_established) {
- /* fatal protocol error, close the socket.
- * subflow_error_report() will introduce the appropriate barriers
- */
- ssk->sk_err = EBADMSG;
- tcp_set_state(ssk, TCP_CLOSE);
- subflow->reset_transient = 0;
- subflow->reset_reason = MPTCP_RST_EMPTCP;
- tcp_send_active_reset(ssk, GFP_ATOMIC);
- WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
- return false;
+ mptcp_do_fallback(ssk);
}
- __mptcp_do_fallback(msk);
skb = skb_peek(&ssk->sk_receive_queue);
subflow->map_valid = 1;
subflow->map_seq = READ_ONCE(msk->ack_seq);
@@ -1422,20 +1471,20 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
struct sockaddr_storage addr;
int remote_id = remote->id;
int local_id = loc->id;
+ int err = -ENOTCONN;
struct socket *sf;
struct sock *ssk;
u32 remote_token;
int addrlen;
int ifindex;
u8 flags;
- int err;
if (!mptcp_is_fully_established(sk))
- return -ENOTCONN;
+ goto err_out;
err = mptcp_subflow_create_socket(sk, &sf);
if (err)
- return err;
+ goto err_out;
ssk = sf->sk;
subflow = mptcp_subflow_ctx(ssk);
@@ -1446,7 +1495,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
if (local_id)
subflow_set_local_id(subflow, local_id);
- mptcp_pm_get_flags_and_ifindex_by_id(sock_net(sk), local_id,
+ mptcp_pm_get_flags_and_ifindex_by_id(msk, local_id,
&flags, &ifindex);
subflow->remote_key = msk->remote_key;
subflow->local_key = msk->local_key;
@@ -1483,7 +1532,8 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
/* discard the subflow socket */
mptcp_sock_graft(ssk, sk->sk_socket);
iput(SOCK_INODE(sf));
- return err;
+ WRITE_ONCE(msk->allow_infinite_fallback, false);
+ return 0;
failed_unlink:
list_del(&subflow->node);
@@ -1492,6 +1542,12 @@ failed_unlink:
failed:
subflow->disposable = 1;
sock_release(sf);
+
+err_out:
+ /* we account subflows before the creation, and this failures will not
+ * be caught by sk_state_change()
+ */
+ mptcp_pm_close_subflow(msk);
return err;
}
@@ -1667,6 +1723,58 @@ static void subflow_state_change(struct sock *sk)
}
}
+void mptcp_subflow_queue_clean(struct sock *listener_ssk)
+{
+ struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue;
+ struct mptcp_sock *msk, *next, *head = NULL;
+ struct request_sock *req;
+
+ /* build a list of all unaccepted mptcp sockets */
+ spin_lock_bh(&queue->rskq_lock);
+ for (req = queue->rskq_accept_head; req; req = req->dl_next) {
+ struct mptcp_subflow_context *subflow;
+ struct sock *ssk = req->sk;
+ struct mptcp_sock *msk;
+
+ if (!sk_is_mptcp(ssk))
+ continue;
+
+ subflow = mptcp_subflow_ctx(ssk);
+ if (!subflow || !subflow->conn)
+ continue;
+
+ /* skip if already in list */
+ msk = mptcp_sk(subflow->conn);
+ if (msk->dl_next || msk == head)
+ continue;
+
+ msk->dl_next = head;
+ head = msk;
+ }
+ spin_unlock_bh(&queue->rskq_lock);
+ if (!head)
+ return;
+
+ /* can't acquire the msk socket lock under the subflow one,
+ * or will cause ABBA deadlock
+ */
+ release_sock(listener_ssk);
+
+ for (msk = head; msk; msk = next) {
+ struct sock *sk = (struct sock *)msk;
+ bool slow;
+
+ slow = lock_sock_fast_nested(sk);
+ next = msk->dl_next;
+ msk->first = NULL;
+ msk->dl_next = NULL;
+ unlock_sock_fast(sk, slow);
+ }
+
+ /* we are still under the listener msk socket lock */
+ lock_sock_nested(listener_ssk, SINGLE_DEPTH_NESTING);
+}
+
static int subflow_ulp_init(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);