]> asedeno.scripts.mit.edu Git - linux.git/blob - net/mptcp/subflow.c
net/smc: transfer fasync_list in case of fallback
[linux.git] / net / mptcp / subflow.c
1 // SPDX-License-Identifier: GPL-2.0
2 /* Multipath TCP
3  *
4  * Copyright (c) 2017 - 2019, Intel Corporation.
5  */
6
7 #define pr_fmt(fmt) "MPTCP: " fmt
8
9 #include <linux/kernel.h>
10 #include <linux/module.h>
11 #include <linux/netdevice.h>
12 #include <net/sock.h>
13 #include <net/inet_common.h>
14 #include <net/inet_hashtables.h>
15 #include <net/protocol.h>
16 #include <net/tcp.h>
17 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
18 #include <net/ip6_route.h>
19 #endif
20 #include <net/mptcp.h>
21 #include "protocol.h"
22
23 static int subflow_rebuild_header(struct sock *sk)
24 {
25         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
26         int err = 0;
27
28         if (subflow->request_mptcp && !subflow->token) {
29                 pr_debug("subflow=%p", sk);
30                 err = mptcp_token_new_connect(sk);
31         }
32
33         if (err)
34                 return err;
35
36         return subflow->icsk_af_ops->rebuild_header(sk);
37 }
38
39 static void subflow_req_destructor(struct request_sock *req)
40 {
41         struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
42
43         pr_debug("subflow_req=%p", subflow_req);
44
45         if (subflow_req->mp_capable)
46                 mptcp_token_destroy_request(subflow_req->token);
47         tcp_request_sock_ops.destructor(req);
48 }
49
50 static void subflow_init_req(struct request_sock *req,
51                              const struct sock *sk_listener,
52                              struct sk_buff *skb)
53 {
54         struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
55         struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
56         struct tcp_options_received rx_opt;
57
58         pr_debug("subflow_req=%p, listener=%p", subflow_req, listener);
59
60         memset(&rx_opt.mptcp, 0, sizeof(rx_opt.mptcp));
61         mptcp_get_options(skb, &rx_opt);
62
63         subflow_req->mp_capable = 0;
64         subflow_req->remote_key_valid = 0;
65
66 #ifdef CONFIG_TCP_MD5SIG
67         /* no MPTCP if MD5SIG is enabled on this socket or we may run out of
68          * TCP option space.
69          */
70         if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info))
71                 return;
72 #endif
73
74         if (rx_opt.mptcp.mp_capable && listener->request_mptcp) {
75                 int err;
76
77                 err = mptcp_token_new_request(req);
78                 if (err == 0)
79                         subflow_req->mp_capable = 1;
80
81                 subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
82         }
83 }
84
85 static void subflow_v4_init_req(struct request_sock *req,
86                                 const struct sock *sk_listener,
87                                 struct sk_buff *skb)
88 {
89         tcp_rsk(req)->is_mptcp = 1;
90
91         tcp_request_sock_ipv4_ops.init_req(req, sk_listener, skb);
92
93         subflow_init_req(req, sk_listener, skb);
94 }
95
96 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
97 static void subflow_v6_init_req(struct request_sock *req,
98                                 const struct sock *sk_listener,
99                                 struct sk_buff *skb)
100 {
101         tcp_rsk(req)->is_mptcp = 1;
102
103         tcp_request_sock_ipv6_ops.init_req(req, sk_listener, skb);
104
105         subflow_init_req(req, sk_listener, skb);
106 }
107 #endif
108
109 static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
110 {
111         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
112
113         subflow->icsk_af_ops->sk_rx_dst_set(sk, skb);
114
115         if (subflow->conn && !subflow->conn_finished) {
116                 pr_debug("subflow=%p, remote_key=%llu", mptcp_subflow_ctx(sk),
117                          subflow->remote_key);
118                 mptcp_finish_connect(sk);
119                 subflow->conn_finished = 1;
120
121                 if (skb) {
122                         pr_debug("synack seq=%u", TCP_SKB_CB(skb)->seq);
123                         subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
124                 }
125         }
126 }
127
128 static struct request_sock_ops subflow_request_sock_ops;
129 static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops;
130
131 static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb)
132 {
133         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
134
135         pr_debug("subflow=%p", subflow);
136
137         /* Never answer to SYNs sent to broadcast or multicast */
138         if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
139                 goto drop;
140
141         return tcp_conn_request(&subflow_request_sock_ops,
142                                 &subflow_request_sock_ipv4_ops,
143                                 sk, skb);
144 drop:
145         tcp_listendrop(sk);
146         return 0;
147 }
148
149 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
150 static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops;
151 static struct inet_connection_sock_af_ops subflow_v6_specific;
152 static struct inet_connection_sock_af_ops subflow_v6m_specific;
153
154 static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb)
155 {
156         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
157
158         pr_debug("subflow=%p", subflow);
159
160         if (skb->protocol == htons(ETH_P_IP))
161                 return subflow_v4_conn_request(sk, skb);
162
163         if (!ipv6_unicast_destination(skb))
164                 goto drop;
165
166         return tcp_conn_request(&subflow_request_sock_ops,
167                                 &subflow_request_sock_ipv6_ops, sk, skb);
168
169 drop:
170         tcp_listendrop(sk);
171         return 0; /* don't send reset */
172 }
173 #endif
174
175 static struct sock *subflow_syn_recv_sock(const struct sock *sk,
176                                           struct sk_buff *skb,
177                                           struct request_sock *req,
178                                           struct dst_entry *dst,
179                                           struct request_sock *req_unhash,
180                                           bool *own_req)
181 {
182         struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk);
183         struct mptcp_subflow_request_sock *subflow_req;
184         struct tcp_options_received opt_rx;
185         struct sock *child;
186
187         pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
188
189         if (tcp_rsk(req)->is_mptcp == 0)
190                 goto create_child;
191
192         /* if the sk is MP_CAPABLE, we try to fetch the client key */
193         subflow_req = mptcp_subflow_rsk(req);
194         if (subflow_req->mp_capable) {
195                 if (TCP_SKB_CB(skb)->seq != subflow_req->ssn_offset + 1) {
196                         /* here we can receive and accept an in-window,
197                          * out-of-order pkt, which will not carry the MP_CAPABLE
198                          * opt even on mptcp enabled paths
199                          */
200                         goto create_child;
201                 }
202
203                 opt_rx.mptcp.mp_capable = 0;
204                 mptcp_get_options(skb, &opt_rx);
205                 if (opt_rx.mptcp.mp_capable) {
206                         subflow_req->remote_key = opt_rx.mptcp.sndr_key;
207                         subflow_req->remote_key_valid = 1;
208                 } else {
209                         subflow_req->mp_capable = 0;
210                 }
211         }
212
213 create_child:
214         child = listener->icsk_af_ops->syn_recv_sock(sk, skb, req, dst,
215                                                      req_unhash, own_req);
216
217         if (child && *own_req) {
218                 struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(child);
219
220                 /* we have null ctx on TCP fallback, not fatal on MPC
221                  * handshake
222                  */
223                 if (!ctx)
224                         return child;
225
226                 if (ctx->mp_capable) {
227                         if (mptcp_token_new_accept(ctx->token))
228                                 goto close_child;
229                 }
230         }
231
232         return child;
233
234 close_child:
235         pr_debug("closing child socket");
236         tcp_send_active_reset(child, GFP_ATOMIC);
237         inet_csk_prepare_forced_close(child);
238         tcp_done(child);
239         return NULL;
240 }
241
242 static struct inet_connection_sock_af_ops subflow_specific;
243
244 enum mapping_status {
245         MAPPING_OK,
246         MAPPING_INVALID,
247         MAPPING_EMPTY,
248         MAPPING_DATA_FIN
249 };
250
251 static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq)
252 {
253         if ((u32)seq == (u32)old_seq)
254                 return old_seq;
255
256         /* Assume map covers data not mapped yet. */
257         return seq | ((old_seq + old_data_len + 1) & GENMASK_ULL(63, 32));
258 }
259
260 static void warn_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
261 {
262         WARN_ONCE(1, "Bad mapping: ssn=%d map_seq=%d map_data_len=%d",
263                   ssn, subflow->map_subflow_seq, subflow->map_data_len);
264 }
265
266 static bool skb_is_fully_mapped(struct sock *ssk, struct sk_buff *skb)
267 {
268         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
269         unsigned int skb_consumed;
270
271         skb_consumed = tcp_sk(ssk)->copied_seq - TCP_SKB_CB(skb)->seq;
272         if (WARN_ON_ONCE(skb_consumed >= skb->len))
273                 return true;
274
275         return skb->len - skb_consumed <= subflow->map_data_len -
276                                           mptcp_subflow_get_map_offset(subflow);
277 }
278
279 static bool validate_mapping(struct sock *ssk, struct sk_buff *skb)
280 {
281         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
282         u32 ssn = tcp_sk(ssk)->copied_seq - subflow->ssn_offset;
283
284         if (unlikely(before(ssn, subflow->map_subflow_seq))) {
285                 /* Mapping covers data later in the subflow stream,
286                  * currently unsupported.
287                  */
288                 warn_bad_map(subflow, ssn);
289                 return false;
290         }
291         if (unlikely(!before(ssn, subflow->map_subflow_seq +
292                                   subflow->map_data_len))) {
293                 /* Mapping does covers past subflow data, invalid */
294                 warn_bad_map(subflow, ssn + skb->len);
295                 return false;
296         }
297         return true;
298 }
299
300 static enum mapping_status get_mapping_status(struct sock *ssk)
301 {
302         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
303         struct mptcp_ext *mpext;
304         struct sk_buff *skb;
305         u16 data_len;
306         u64 map_seq;
307
308         skb = skb_peek(&ssk->sk_receive_queue);
309         if (!skb)
310                 return MAPPING_EMPTY;
311
312         mpext = mptcp_get_ext(skb);
313         if (!mpext || !mpext->use_map) {
314                 if (!subflow->map_valid && !skb->len) {
315                         /* the TCP stack deliver 0 len FIN pkt to the receive
316                          * queue, that is the only 0len pkts ever expected here,
317                          * and we can admit no mapping only for 0 len pkts
318                          */
319                         if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
320                                 WARN_ONCE(1, "0len seq %d:%d flags %x",
321                                           TCP_SKB_CB(skb)->seq,
322                                           TCP_SKB_CB(skb)->end_seq,
323                                           TCP_SKB_CB(skb)->tcp_flags);
324                         sk_eat_skb(ssk, skb);
325                         return MAPPING_EMPTY;
326                 }
327
328                 if (!subflow->map_valid)
329                         return MAPPING_INVALID;
330
331                 goto validate_seq;
332         }
333
334         pr_debug("seq=%llu is64=%d ssn=%u data_len=%u data_fin=%d",
335                  mpext->data_seq, mpext->dsn64, mpext->subflow_seq,
336                  mpext->data_len, mpext->data_fin);
337
338         data_len = mpext->data_len;
339         if (data_len == 0) {
340                 pr_err("Infinite mapping not handled");
341                 return MAPPING_INVALID;
342         }
343
344         if (mpext->data_fin == 1) {
345                 if (data_len == 1) {
346                         pr_debug("DATA_FIN with no payload");
347                         if (subflow->map_valid) {
348                                 /* A DATA_FIN might arrive in a DSS
349                                  * option before the previous mapping
350                                  * has been fully consumed. Continue
351                                  * handling the existing mapping.
352                                  */
353                                 skb_ext_del(skb, SKB_EXT_MPTCP);
354                                 return MAPPING_OK;
355                         } else {
356                                 return MAPPING_DATA_FIN;
357                         }
358                 }
359
360                 /* Adjust for DATA_FIN using 1 byte of sequence space */
361                 data_len--;
362         }
363
364         if (!mpext->dsn64) {
365                 map_seq = expand_seq(subflow->map_seq, subflow->map_data_len,
366                                      mpext->data_seq);
367                 pr_debug("expanded seq=%llu", subflow->map_seq);
368         } else {
369                 map_seq = mpext->data_seq;
370         }
371
372         if (subflow->map_valid) {
373                 /* Allow replacing only with an identical map */
374                 if (subflow->map_seq == map_seq &&
375                     subflow->map_subflow_seq == mpext->subflow_seq &&
376                     subflow->map_data_len == data_len) {
377                         skb_ext_del(skb, SKB_EXT_MPTCP);
378                         return MAPPING_OK;
379                 }
380
381                 /* If this skb data are fully covered by the current mapping,
382                  * the new map would need caching, which is not supported
383                  */
384                 if (skb_is_fully_mapped(ssk, skb))
385                         return MAPPING_INVALID;
386
387                 /* will validate the next map after consuming the current one */
388                 return MAPPING_OK;
389         }
390
391         subflow->map_seq = map_seq;
392         subflow->map_subflow_seq = mpext->subflow_seq;
393         subflow->map_data_len = data_len;
394         subflow->map_valid = 1;
395         subflow->mpc_map = mpext->mpc_map;
396         pr_debug("new map seq=%llu subflow_seq=%u data_len=%u",
397                  subflow->map_seq, subflow->map_subflow_seq,
398                  subflow->map_data_len);
399
400 validate_seq:
401         /* we revalidate valid mapping on new skb, because we must ensure
402          * the current skb is completely covered by the available mapping
403          */
404         if (!validate_mapping(ssk, skb))
405                 return MAPPING_INVALID;
406
407         skb_ext_del(skb, SKB_EXT_MPTCP);
408         return MAPPING_OK;
409 }
410
411 static bool subflow_check_data_avail(struct sock *ssk)
412 {
413         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
414         enum mapping_status status;
415         struct mptcp_sock *msk;
416         struct sk_buff *skb;
417
418         pr_debug("msk=%p ssk=%p data_avail=%d skb=%p", subflow->conn, ssk,
419                  subflow->data_avail, skb_peek(&ssk->sk_receive_queue));
420         if (subflow->data_avail)
421                 return true;
422
423         if (!subflow->conn)
424                 return false;
425
426         msk = mptcp_sk(subflow->conn);
427         for (;;) {
428                 u32 map_remaining;
429                 size_t delta;
430                 u64 ack_seq;
431                 u64 old_ack;
432
433                 status = get_mapping_status(ssk);
434                 pr_debug("msk=%p ssk=%p status=%d", msk, ssk, status);
435                 if (status == MAPPING_INVALID) {
436                         ssk->sk_err = EBADMSG;
437                         goto fatal;
438                 }
439
440                 if (status != MAPPING_OK)
441                         return false;
442
443                 skb = skb_peek(&ssk->sk_receive_queue);
444                 if (WARN_ON_ONCE(!skb))
445                         return false;
446
447                 /* if msk lacks the remote key, this subflow must provide an
448                  * MP_CAPABLE-based mapping
449                  */
450                 if (unlikely(!READ_ONCE(msk->can_ack))) {
451                         if (!subflow->mpc_map) {
452                                 ssk->sk_err = EBADMSG;
453                                 goto fatal;
454                         }
455                         WRITE_ONCE(msk->remote_key, subflow->remote_key);
456                         WRITE_ONCE(msk->ack_seq, subflow->map_seq);
457                         WRITE_ONCE(msk->can_ack, true);
458                 }
459
460                 old_ack = READ_ONCE(msk->ack_seq);
461                 ack_seq = mptcp_subflow_get_mapped_dsn(subflow);
462                 pr_debug("msk ack_seq=%llx subflow ack_seq=%llx", old_ack,
463                          ack_seq);
464                 if (ack_seq == old_ack)
465                         break;
466
467                 /* only accept in-sequence mapping. Old values are spurious
468                  * retransmission; we can hit "future" values on active backup
469                  * subflow switch, we relay on retransmissions to get
470                  * in-sequence data.
471                  * Cuncurrent subflows support will require subflow data
472                  * reordering
473                  */
474                 map_remaining = subflow->map_data_len -
475                                 mptcp_subflow_get_map_offset(subflow);
476                 if (before64(ack_seq, old_ack))
477                         delta = min_t(size_t, old_ack - ack_seq, map_remaining);
478                 else
479                         delta = min_t(size_t, ack_seq - old_ack, map_remaining);
480
481                 /* discard mapped data */
482                 pr_debug("discarding %zu bytes, current map len=%d", delta,
483                          map_remaining);
484                 if (delta) {
485                         struct mptcp_read_arg arg = {
486                                 .msg = NULL,
487                         };
488                         read_descriptor_t desc = {
489                                 .count = delta,
490                                 .arg.data = &arg,
491                         };
492                         int ret;
493
494                         ret = tcp_read_sock(ssk, &desc, mptcp_read_actor);
495                         if (ret < 0) {
496                                 ssk->sk_err = -ret;
497                                 goto fatal;
498                         }
499                         if (ret < delta)
500                                 return false;
501                         if (delta == map_remaining)
502                                 subflow->map_valid = 0;
503                 }
504         }
505         return true;
506
507 fatal:
508         /* fatal protocol error, close the socket */
509         /* This barrier is coupled with smp_rmb() in tcp_poll() */
510         smp_wmb();
511         ssk->sk_error_report(ssk);
512         tcp_set_state(ssk, TCP_CLOSE);
513         tcp_send_active_reset(ssk, GFP_ATOMIC);
514         return false;
515 }
516
517 bool mptcp_subflow_data_available(struct sock *sk)
518 {
519         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
520         struct sk_buff *skb;
521
522         /* check if current mapping is still valid */
523         if (subflow->map_valid &&
524             mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) {
525                 subflow->map_valid = 0;
526                 subflow->data_avail = 0;
527
528                 pr_debug("Done with mapping: seq=%u data_len=%u",
529                          subflow->map_subflow_seq,
530                          subflow->map_data_len);
531         }
532
533         if (!subflow_check_data_avail(sk)) {
534                 subflow->data_avail = 0;
535                 return false;
536         }
537
538         skb = skb_peek(&sk->sk_receive_queue);
539         subflow->data_avail = skb &&
540                        before(tcp_sk(sk)->copied_seq, TCP_SKB_CB(skb)->end_seq);
541         return subflow->data_avail;
542 }
543
544 static void subflow_data_ready(struct sock *sk)
545 {
546         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
547         struct sock *parent = subflow->conn;
548
549         if (!parent || !subflow->mp_capable) {
550                 subflow->tcp_data_ready(sk);
551
552                 if (parent)
553                         parent->sk_data_ready(parent);
554                 return;
555         }
556
557         if (mptcp_subflow_data_available(sk)) {
558                 set_bit(MPTCP_DATA_READY, &mptcp_sk(parent)->flags);
559
560                 parent->sk_data_ready(parent);
561         }
562 }
563
564 static void subflow_write_space(struct sock *sk)
565 {
566         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
567         struct sock *parent = subflow->conn;
568
569         sk_stream_write_space(sk);
570         if (parent && sk_stream_is_writeable(sk)) {
571                 set_bit(MPTCP_SEND_SPACE, &mptcp_sk(parent)->flags);
572                 smp_mb__after_atomic();
573                 /* set SEND_SPACE before sk_stream_write_space clears NOSPACE */
574                 sk_stream_write_space(parent);
575         }
576 }
577
578 static struct inet_connection_sock_af_ops *
579 subflow_default_af_ops(struct sock *sk)
580 {
581 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
582         if (sk->sk_family == AF_INET6)
583                 return &subflow_v6_specific;
584 #endif
585         return &subflow_specific;
586 }
587
588 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
589 void mptcpv6_handle_mapped(struct sock *sk, bool mapped)
590 {
591         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
592         struct inet_connection_sock *icsk = inet_csk(sk);
593         struct inet_connection_sock_af_ops *target;
594
595         target = mapped ? &subflow_v6m_specific : subflow_default_af_ops(sk);
596
597         pr_debug("subflow=%p family=%d ops=%p target=%p mapped=%d",
598                  subflow, sk->sk_family, icsk->icsk_af_ops, target, mapped);
599
600         if (likely(icsk->icsk_af_ops == target))
601                 return;
602
603         subflow->icsk_af_ops = icsk->icsk_af_ops;
604         icsk->icsk_af_ops = target;
605 }
606 #endif
607
608 int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
609 {
610         struct mptcp_subflow_context *subflow;
611         struct net *net = sock_net(sk);
612         struct socket *sf;
613         int err;
614
615         err = sock_create_kern(net, sk->sk_family, SOCK_STREAM, IPPROTO_TCP,
616                                &sf);
617         if (err)
618                 return err;
619
620         lock_sock(sf->sk);
621
622         /* kernel sockets do not by default acquire net ref, but TCP timer
623          * needs it.
624          */
625         sf->sk->sk_net_refcnt = 1;
626         get_net(net);
627 #ifdef CONFIG_PROC_FS
628         this_cpu_add(*net->core.sock_inuse, 1);
629 #endif
630         err = tcp_set_ulp(sf->sk, "mptcp");
631         release_sock(sf->sk);
632
633         if (err)
634                 return err;
635
636         subflow = mptcp_subflow_ctx(sf->sk);
637         pr_debug("subflow=%p", subflow);
638
639         *new_sock = sf;
640         sock_hold(sk);
641         subflow->conn = sk;
642
643         return 0;
644 }
645
646 static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk,
647                                                         gfp_t priority)
648 {
649         struct inet_connection_sock *icsk = inet_csk(sk);
650         struct mptcp_subflow_context *ctx;
651
652         ctx = kzalloc(sizeof(*ctx), priority);
653         if (!ctx)
654                 return NULL;
655
656         rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
657         INIT_LIST_HEAD(&ctx->node);
658
659         pr_debug("subflow=%p", ctx);
660
661         ctx->tcp_sock = sk;
662
663         return ctx;
664 }
665
666 static void __subflow_state_change(struct sock *sk)
667 {
668         struct socket_wq *wq;
669
670         rcu_read_lock();
671         wq = rcu_dereference(sk->sk_wq);
672         if (skwq_has_sleeper(wq))
673                 wake_up_interruptible_all(&wq->wait);
674         rcu_read_unlock();
675 }
676
677 static bool subflow_is_done(const struct sock *sk)
678 {
679         return sk->sk_shutdown & RCV_SHUTDOWN || sk->sk_state == TCP_CLOSE;
680 }
681
682 static void subflow_state_change(struct sock *sk)
683 {
684         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
685         struct sock *parent = READ_ONCE(subflow->conn);
686
687         __subflow_state_change(sk);
688
689         /* as recvmsg() does not acquire the subflow socket for ssk selection
690          * a fin packet carrying a DSS can be unnoticed if we don't trigger
691          * the data available machinery here.
692          */
693         if (parent && subflow->mp_capable && mptcp_subflow_data_available(sk)) {
694                 set_bit(MPTCP_DATA_READY, &mptcp_sk(parent)->flags);
695
696                 parent->sk_data_ready(parent);
697         }
698
699         if (parent && !(parent->sk_shutdown & RCV_SHUTDOWN) &&
700             !subflow->rx_eof && subflow_is_done(sk)) {
701                 subflow->rx_eof = 1;
702                 parent->sk_shutdown |= RCV_SHUTDOWN;
703                 __subflow_state_change(parent);
704         }
705 }
706
707 static int subflow_ulp_init(struct sock *sk)
708 {
709         struct inet_connection_sock *icsk = inet_csk(sk);
710         struct mptcp_subflow_context *ctx;
711         struct tcp_sock *tp = tcp_sk(sk);
712         int err = 0;
713
714         /* disallow attaching ULP to a socket unless it has been
715          * created with sock_create_kern()
716          */
717         if (!sk->sk_kern_sock) {
718                 err = -EOPNOTSUPP;
719                 goto out;
720         }
721
722         ctx = subflow_create_ctx(sk, GFP_KERNEL);
723         if (!ctx) {
724                 err = -ENOMEM;
725                 goto out;
726         }
727
728         pr_debug("subflow=%p, family=%d", ctx, sk->sk_family);
729
730         tp->is_mptcp = 1;
731         ctx->icsk_af_ops = icsk->icsk_af_ops;
732         icsk->icsk_af_ops = subflow_default_af_ops(sk);
733         ctx->tcp_data_ready = sk->sk_data_ready;
734         ctx->tcp_state_change = sk->sk_state_change;
735         ctx->tcp_write_space = sk->sk_write_space;
736         sk->sk_data_ready = subflow_data_ready;
737         sk->sk_write_space = subflow_write_space;
738         sk->sk_state_change = subflow_state_change;
739 out:
740         return err;
741 }
742
743 static void subflow_ulp_release(struct sock *sk)
744 {
745         struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(sk);
746
747         if (!ctx)
748                 return;
749
750         if (ctx->conn)
751                 sock_put(ctx->conn);
752
753         kfree_rcu(ctx, rcu);
754 }
755
756 static void subflow_ulp_fallback(struct sock *sk,
757                                  struct mptcp_subflow_context *old_ctx)
758 {
759         struct inet_connection_sock *icsk = inet_csk(sk);
760
761         mptcp_subflow_tcp_fallback(sk, old_ctx);
762         icsk->icsk_ulp_ops = NULL;
763         rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
764         tcp_sk(sk)->is_mptcp = 0;
765 }
766
767 static void subflow_ulp_clone(const struct request_sock *req,
768                               struct sock *newsk,
769                               const gfp_t priority)
770 {
771         struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
772         struct mptcp_subflow_context *old_ctx = mptcp_subflow_ctx(newsk);
773         struct mptcp_subflow_context *new_ctx;
774
775         if (!tcp_rsk(req)->is_mptcp || !subflow_req->mp_capable) {
776                 subflow_ulp_fallback(newsk, old_ctx);
777                 return;
778         }
779
780         new_ctx = subflow_create_ctx(newsk, priority);
781         if (!new_ctx) {
782                 subflow_ulp_fallback(newsk, old_ctx);
783                 return;
784         }
785
786         /* see comments in subflow_syn_recv_sock(), MPTCP connection is fully
787          * established only after we receive the remote key
788          */
789         new_ctx->conn_finished = 1;
790         new_ctx->icsk_af_ops = old_ctx->icsk_af_ops;
791         new_ctx->tcp_data_ready = old_ctx->tcp_data_ready;
792         new_ctx->tcp_state_change = old_ctx->tcp_state_change;
793         new_ctx->tcp_write_space = old_ctx->tcp_write_space;
794         new_ctx->mp_capable = 1;
795         new_ctx->fourth_ack = subflow_req->remote_key_valid;
796         new_ctx->can_ack = subflow_req->remote_key_valid;
797         new_ctx->remote_key = subflow_req->remote_key;
798         new_ctx->local_key = subflow_req->local_key;
799         new_ctx->token = subflow_req->token;
800         new_ctx->ssn_offset = subflow_req->ssn_offset;
801         new_ctx->idsn = subflow_req->idsn;
802 }
803
804 static struct tcp_ulp_ops subflow_ulp_ops __read_mostly = {
805         .name           = "mptcp",
806         .owner          = THIS_MODULE,
807         .init           = subflow_ulp_init,
808         .release        = subflow_ulp_release,
809         .clone          = subflow_ulp_clone,
810 };
811
812 static int subflow_ops_init(struct request_sock_ops *subflow_ops)
813 {
814         subflow_ops->obj_size = sizeof(struct mptcp_subflow_request_sock);
815         subflow_ops->slab_name = "request_sock_subflow";
816
817         subflow_ops->slab = kmem_cache_create(subflow_ops->slab_name,
818                                               subflow_ops->obj_size, 0,
819                                               SLAB_ACCOUNT |
820                                               SLAB_TYPESAFE_BY_RCU,
821                                               NULL);
822         if (!subflow_ops->slab)
823                 return -ENOMEM;
824
825         subflow_ops->destructor = subflow_req_destructor;
826
827         return 0;
828 }
829
830 void mptcp_subflow_init(void)
831 {
832         subflow_request_sock_ops = tcp_request_sock_ops;
833         if (subflow_ops_init(&subflow_request_sock_ops) != 0)
834                 panic("MPTCP: failed to init subflow request sock ops\n");
835
836         subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops;
837         subflow_request_sock_ipv4_ops.init_req = subflow_v4_init_req;
838
839         subflow_specific = ipv4_specific;
840         subflow_specific.conn_request = subflow_v4_conn_request;
841         subflow_specific.syn_recv_sock = subflow_syn_recv_sock;
842         subflow_specific.sk_rx_dst_set = subflow_finish_connect;
843         subflow_specific.rebuild_header = subflow_rebuild_header;
844
845 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
846         subflow_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops;
847         subflow_request_sock_ipv6_ops.init_req = subflow_v6_init_req;
848
849         subflow_v6_specific = ipv6_specific;
850         subflow_v6_specific.conn_request = subflow_v6_conn_request;
851         subflow_v6_specific.syn_recv_sock = subflow_syn_recv_sock;
852         subflow_v6_specific.sk_rx_dst_set = subflow_finish_connect;
853         subflow_v6_specific.rebuild_header = subflow_rebuild_header;
854
855         subflow_v6m_specific = subflow_v6_specific;
856         subflow_v6m_specific.queue_xmit = ipv4_specific.queue_xmit;
857         subflow_v6m_specific.send_check = ipv4_specific.send_check;
858         subflow_v6m_specific.net_header_len = ipv4_specific.net_header_len;
859         subflow_v6m_specific.mtu_reduced = ipv4_specific.mtu_reduced;
860         subflow_v6m_specific.net_frag_header_len = 0;
861 #endif
862
863         if (tcp_register_ulp(&subflow_ulp_ops) != 0)
864                 panic("MPTCP: failed to register subflows to ULP\n");
865 }