Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit ec3edaa

Browse files
Peter Krystaddavem330
authored andcommitted
mptcp: Add handling of outgoing MP_JOIN requests
Subflow creation may be initiated by the path manager when the primary connection is fully established and a remote address has been received via ADD_ADDR. Create an in-kernel sock and use kernel_connect() to initiate connection. Passive sockets can't acquire the mptcp socket lock at subflow creation time, so an additional list protected by a new spinlock is used to track the MPJ subflows. Such list is spliced into conn_list tail every time the msk socket lock is acquired, so that it will not interfere with data flow on the original connection. Data flow and connection failover not addressed by this commit. Co-developed-by: Florian Westphal <[email protected]> Signed-off-by: Florian Westphal <[email protected]> Co-developed-by: Paolo Abeni <[email protected]> Signed-off-by: Paolo Abeni <[email protected]> Co-developed-by: Matthieu Baerts <[email protected]> Signed-off-by: Matthieu Baerts <[email protected]> Signed-off-by: Peter Krystad <[email protected]> Signed-off-by: Mat Martineau <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent f296234 commit ec3edaa

File tree

5 files changed

+287
-17
lines changed

5 files changed

+287
-17
lines changed

include/net/mptcp.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ struct mptcp_out_options {
4646
u8 backup;
4747
u32 nonce;
4848
u64 thmac;
49+
u32 token;
50+
u8 hmac[20];
4951
struct mptcp_ext ext_copy;
5052
#endif
5153
};

net/mptcp/options.c

Lines changed: 94 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,16 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
328328
opts->sndr_key = subflow->local_key;
329329
*size = TCPOLEN_MPTCP_MPC_SYN;
330330
return true;
331+
} else if (subflow->request_join) {
332+
pr_debug("remote_token=%u, nonce=%u", subflow->remote_token,
333+
subflow->local_nonce);
334+
opts->suboptions = OPTION_MPTCP_MPJ_SYN;
335+
opts->join_id = subflow->local_id;
336+
opts->token = subflow->remote_token;
337+
opts->nonce = subflow->local_nonce;
338+
opts->backup = subflow->request_bkup;
339+
*size = TCPOLEN_MPTCP_MPJ_SYN;
340+
return true;
331341
}
332342
return false;
333343
}
@@ -337,16 +347,55 @@ void mptcp_rcv_synsent(struct sock *sk)
337347
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
338348
struct tcp_sock *tp = tcp_sk(sk);
339349

340-
pr_debug("subflow=%p", subflow);
341350
if (subflow->request_mptcp && tp->rx_opt.mptcp.mp_capable) {
342351
subflow->mp_capable = 1;
343352
subflow->can_ack = 1;
344353
subflow->remote_key = tp->rx_opt.mptcp.sndr_key;
345-
} else {
354+
pr_debug("subflow=%p, remote_key=%llu", subflow,
355+
subflow->remote_key);
356+
} else if (subflow->request_join && tp->rx_opt.mptcp.mp_join) {
357+
subflow->mp_join = 1;
358+
subflow->thmac = tp->rx_opt.mptcp.thmac;
359+
subflow->remote_nonce = tp->rx_opt.mptcp.nonce;
360+
pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow,
361+
subflow->thmac, subflow->remote_nonce);
362+
} else if (subflow->request_mptcp) {
346363
tcp_sk(sk)->is_mptcp = 0;
347364
}
348365
}
349366

367+
/* MP_JOIN client subflow must wait for 4th ack before sending any data:
368+
* TCP can't schedule delack timer before the subflow is fully established.
369+
* MPTCP uses the delack timer to do 3rd ack retransmissions
370+
*/
371+
static void schedule_3rdack_retransmission(struct sock *sk)
372+
{
373+
struct inet_connection_sock *icsk = inet_csk(sk);
374+
struct tcp_sock *tp = tcp_sk(sk);
375+
unsigned long timeout;
376+
377+
/* reschedule with a timeout above RTT, as we must look only for drop */
378+
if (tp->srtt_us)
379+
timeout = tp->srtt_us << 1;
380+
else
381+
timeout = TCP_TIMEOUT_INIT;
382+
383+
WARN_ON_ONCE(icsk->icsk_ack.pending & ICSK_ACK_TIMER);
384+
icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
385+
icsk->icsk_ack.timeout = timeout;
386+
sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
387+
}
388+
389+
static void clear_3rdack_retransmission(struct sock *sk)
390+
{
391+
struct inet_connection_sock *icsk = inet_csk(sk);
392+
393+
sk_stop_timer(sk, &icsk->icsk_delack_timer);
394+
icsk->icsk_ack.timeout = 0;
395+
icsk->icsk_ack.ato = 0;
396+
icsk->icsk_ack.pending &= ~(ICSK_ACK_SCHED | ICSK_ACK_TIMER);
397+
}
398+
350399
static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
351400
unsigned int *size,
352401
unsigned int remaining,
@@ -356,17 +405,21 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
356405
struct mptcp_ext *mpext;
357406
unsigned int data_len;
358407

359-
pr_debug("subflow=%p fully established=%d seq=%x:%x remaining=%d",
360-
subflow, subflow->fully_established, subflow->snd_isn,
361-
skb ? TCP_SKB_CB(skb)->seq : 0, remaining);
408+
/* When skb is not available, we better over-estimate the emitted
409+
* options len. A full DSS option (28 bytes) is longer than
410+
* TCPOLEN_MPTCP_MPC_ACK_DATA(22) or TCPOLEN_MPTCP_MPJ_ACK(24), so
411+
* tell the caller to defer the estimate to
412+
* mptcp_established_options_dss(), which will reserve enough space.
413+
*/
414+
if (!skb)
415+
return false;
362416

363-
if (subflow->mp_capable && !subflow->fully_established && skb &&
364-
subflow->snd_isn == TCP_SKB_CB(skb)->seq) {
365-
/* When skb is not available, we better over-estimate the
366-
* emitted options len. A full DSS option is longer than
367-
* TCPOLEN_MPTCP_MPC_ACK_DATA, so let's the caller try to fit
368-
* that.
369-
*/
417+
/* MPC/MPJ needed only on 3rd ack packet */
418+
if (subflow->fully_established ||
419+
subflow->snd_isn != TCP_SKB_CB(skb)->seq)
420+
return false;
421+
422+
if (subflow->mp_capable) {
370423
mpext = mptcp_get_ext(skb);
371424
data_len = mpext ? mpext->data_len : 0;
372425

@@ -394,6 +447,14 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
394447
data_len);
395448

396449
return true;
450+
} else if (subflow->mp_join) {
451+
opts->suboptions = OPTION_MPTCP_MPJ_ACK;
452+
memcpy(opts->hmac, subflow->hmac, MPTCPOPT_HMAC_LEN);
453+
*size = TCPOLEN_MPTCP_MPJ_ACK;
454+
pr_debug("subflow=%p", subflow);
455+
456+
schedule_3rdack_retransmission(sk);
457+
return true;
397458
}
398459
return false;
399460
}
@@ -674,10 +735,12 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk,
674735
return true;
675736

676737
subflow->pm_notified = 1;
677-
if (subflow->mp_join)
738+
if (subflow->mp_join) {
739+
clear_3rdack_retransmission(sk);
678740
mptcp_pm_subflow_established(msk, subflow);
679-
else
741+
} else {
680742
mptcp_pm_fully_established(msk);
743+
}
681744
return true;
682745
}
683746

@@ -860,6 +923,16 @@ void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts)
860923
0, opts->rm_id);
861924
}
862925

926+
if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) {
927+
*ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
928+
TCPOLEN_MPTCP_MPJ_SYN,
929+
opts->backup, opts->join_id);
930+
put_unaligned_be32(opts->token, ptr);
931+
ptr += 1;
932+
put_unaligned_be32(opts->nonce, ptr);
933+
ptr += 1;
934+
}
935+
863936
if (OPTION_MPTCP_MPJ_SYNACK & opts->suboptions) {
864937
*ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
865938
TCPOLEN_MPTCP_MPJ_SYNACK,
@@ -870,6 +943,13 @@ void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts)
870943
ptr += 1;
871944
}
872945

946+
if (OPTION_MPTCP_MPJ_ACK & opts->suboptions) {
947+
*ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
948+
TCPOLEN_MPTCP_MPJ_ACK, 0, 0);
949+
memcpy(ptr, opts->hmac, MPTCPOPT_HMAC_LEN);
950+
ptr += 5;
951+
}
952+
873953
if (opts->ext_copy.use_ack || opts->ext_copy.use_map) {
874954
struct mptcp_ext *mpext = &opts->ext_copy;
875955
u8 len = TCPOLEN_MPTCP_DSS_BASE;

net/mptcp/protocol.c

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,16 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
241241
sk->sk_data_ready(sk);
242242
}
243243

244+
static void __mptcp_flush_join_list(struct mptcp_sock *msk)
245+
{
246+
if (likely(list_empty(&msk->join_list)))
247+
return;
248+
249+
spin_lock_bh(&msk->join_list_lock);
250+
list_splice_tail_init(&msk->join_list, &msk->conn_list);
251+
spin_unlock_bh(&msk->join_list_lock);
252+
}
253+
244254
static bool mptcp_ext_cache_refill(struct mptcp_sock *msk)
245255
{
246256
if (!msk->cached_ext)
@@ -462,6 +472,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
462472
return ret >= 0 ? ret + copied : (copied ? copied : ret);
463473
}
464474

475+
__mptcp_flush_join_list(msk);
465476
ssk = mptcp_subflow_get_send(msk);
466477
while (!sk_stream_memory_free(sk) || !ssk) {
467478
ret = sk_stream_wait_memory(sk, &timeo);
@@ -603,6 +614,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
603614

604615
len = min_t(size_t, len, INT_MAX);
605616
target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
617+
__mptcp_flush_join_list(msk);
606618

607619
while (len > (size_t)copied) {
608620
int bytes_read;
@@ -718,6 +730,7 @@ static void mptcp_worker(struct work_struct *work)
718730
struct sock *sk = &msk->sk.icsk_inet.sk;
719731

720732
lock_sock(sk);
733+
__mptcp_flush_join_list(msk);
721734
__mptcp_move_skbs(msk);
722735
release_sock(sk);
723736
sock_put(sk);
@@ -727,7 +740,10 @@ static int __mptcp_init_sock(struct sock *sk)
727740
{
728741
struct mptcp_sock *msk = mptcp_sk(sk);
729742

743+
spin_lock_init(&msk->join_list_lock);
744+
730745
INIT_LIST_HEAD(&msk->conn_list);
746+
INIT_LIST_HEAD(&msk->join_list);
731747
__set_bit(MPTCP_SEND_SPACE, &msk->flags);
732748
INIT_WORK(&msk->work, mptcp_worker);
733749

@@ -800,6 +816,8 @@ static void mptcp_close(struct sock *sk, long timeout)
800816
mptcp_token_destroy(msk->token);
801817
inet_sk_state_store(sk, TCP_CLOSE);
802818

819+
__mptcp_flush_join_list(msk);
820+
803821
list_splice_init(&msk->conn_list, &conn_list);
804822

805823
data_fin_tx_seq = msk->write_seq;
@@ -1107,6 +1125,7 @@ bool mptcp_finish_join(struct sock *sk)
11071125
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
11081126
struct sock *parent = (void *)msk;
11091127
struct socket *parent_sock;
1128+
bool ret;
11101129

11111130
pr_debug("msk=%p, subflow=%p", msk, subflow);
11121131

@@ -1122,7 +1141,15 @@ bool mptcp_finish_join(struct sock *sk)
11221141
if (parent_sock && !sk->sk_socket)
11231142
mptcp_sock_graft(sk, parent_sock);
11241143

1125-
return mptcp_pm_allow_new_subflow(msk);
1144+
ret = mptcp_pm_allow_new_subflow(msk);
1145+
if (ret) {
1146+
/* active connections are already on conn_list */
1147+
spin_lock_bh(&msk->join_list_lock);
1148+
if (!WARN_ON_ONCE(!list_empty(&subflow->node)))
1149+
list_add_tail(&subflow->node, &msk->join_list);
1150+
spin_unlock_bh(&msk->join_list_lock);
1151+
}
1152+
return ret;
11261153
}
11271154

11281155
bool mptcp_sk_is_subflow(const struct sock *sk)
@@ -1311,6 +1338,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
13111338
/* set ssk->sk_socket of accept()ed flows to mptcp socket.
13121339
* This is needed so NOSPACE flag can be set from tcp stack.
13131340
*/
1341+
__mptcp_flush_join_list(msk);
13141342
list_for_each_entry(subflow, &msk->conn_list, node) {
13151343
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
13161344

@@ -1392,6 +1420,7 @@ static int mptcp_shutdown(struct socket *sock, int how)
13921420
sock->state = SS_CONNECTED;
13931421
}
13941422

1423+
__mptcp_flush_join_list(msk);
13951424
mptcp_for_each_subflow(msk, subflow) {
13961425
struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow);
13971426

net/mptcp/protocol.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,10 @@
5959
#define TCPOLEN_MPTCP_PORT_LEN 2
6060
#define TCPOLEN_MPTCP_RM_ADDR_BASE 4
6161

62+
/* MPTCP MP_JOIN flags */
6263
#define MPTCPOPT_BACKUP BIT(0)
6364
#define MPTCPOPT_HMAC_LEN 20
65+
#define MPTCPOPT_THMAC_LEN 8
6466

6567
/* MPTCP MP_CAPABLE flags */
6668
#define MPTCP_VERSION_MASK (0x0F)
@@ -148,8 +150,10 @@ struct mptcp_sock {
148150
u32 token;
149151
unsigned long flags;
150152
bool can_ack;
153+
spinlock_t join_list_lock;
151154
struct work_struct work;
152155
struct list_head conn_list;
156+
struct list_head join_list;
153157
struct skb_ext *cached_ext; /* for the next sendmsg */
154158
struct socket *subflow; /* outgoing connect/listener/!mp_capable */
155159
struct sock *first;
@@ -202,6 +206,8 @@ struct mptcp_subflow_context {
202206
u32 ssn_offset;
203207
u32 map_data_len;
204208
u32 request_mptcp : 1, /* send MP_CAPABLE */
209+
request_join : 1, /* send MP_JOIN */
210+
request_bkup : 1,
205211
mp_capable : 1, /* remote is MPTCP capable */
206212
mp_join : 1, /* remote is JOINing */
207213
fully_established : 1, /* path validated */
@@ -218,6 +224,8 @@ struct mptcp_subflow_context {
218224
u32 remote_nonce;
219225
u64 thmac;
220226
u32 local_nonce;
227+
u32 remote_token;
228+
u8 hmac[MPTCPOPT_HMAC_LEN];
221229
u8 local_id;
222230
u8 remote_id;
223231

@@ -263,6 +271,11 @@ mptcp_subflow_get_mapped_dsn(const struct mptcp_subflow_context *subflow)
263271
int mptcp_is_enabled(struct net *net);
264272
bool mptcp_subflow_data_available(struct sock *sk);
265273
void mptcp_subflow_init(void);
274+
275+
/* called with sk socket lock held */
276+
int __mptcp_subflow_connect(struct sock *sk, int ifindex,
277+
const struct mptcp_addr_info *loc,
278+
const struct mptcp_addr_info *remote);
266279
int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock);
267280

268281
static inline void mptcp_subflow_tcp_fallback(struct sock *sk,

0 commit comments

Comments
 (0)