1 // SPDX-License-Identifier: GPL-2.0-or-later
4 * Linux INET6 implementation
7 * Pedro Roque <roque@di.fc.ul.pt>
10 * linux/net/ipv4/tcp.c
11 * linux/net/ipv4/tcp_input.c
12 * linux/net/ipv4/tcp_output.c
15 * Hideaki YOSHIFUJI : sin6_scope_id support
16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
18 * a single port at the same time.
19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
69 #include <trace/events/tcp.h>
71 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73 struct request_sock *req);
75 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 static const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84 const struct in6_addr *addr)
90 /* Helper returning the inet6 address from a given tcp socket.
91 * It can be used in TCP stack instead of inet6_sk(sk).
92 * This avoids a dereference and allow compiler optimizations.
93 * It is a specialized version of inet6_sk_generic().
95 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97 unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99 return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
102 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104 struct dst_entry *dst = skb_dst(skb);
106 if (dst && dst_hold_safe(dst)) {
107 const struct rt6_info *rt = (const struct rt6_info *)dst;
110 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
111 tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
115 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
118 ipv6_hdr(skb)->saddr.s6_addr32,
120 tcp_hdr(skb)->source);
123 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
126 ipv6_hdr(skb)->saddr.s6_addr32);
129 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
132 /* This check is replicated from tcp_v6_connect() and intended to
133 * prevent BPF program called below from accessing bytes that are out
134 * of the bound specified by user in addr_len.
136 if (addr_len < SIN6_LEN_RFC2133)
139 sock_owned_by_me(sk);
141 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
144 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
147 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
148 struct inet_sock *inet = inet_sk(sk);
149 struct inet_connection_sock *icsk = inet_csk(sk);
150 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
151 struct tcp_sock *tp = tcp_sk(sk);
152 struct in6_addr *saddr = NULL, *final_p, final;
153 struct ipv6_txoptions *opt;
155 struct dst_entry *dst;
158 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
160 if (addr_len < SIN6_LEN_RFC2133)
163 if (usin->sin6_family != AF_INET6)
164 return -EAFNOSUPPORT;
166 memset(&fl6, 0, sizeof(fl6));
169 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
170 IP6_ECN_flow_init(fl6.flowlabel);
171 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
172 struct ip6_flowlabel *flowlabel;
173 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
174 if (IS_ERR(flowlabel))
176 fl6_sock_release(flowlabel);
181 * connect() to INADDR_ANY means loopback (BSD'ism).
184 if (ipv6_addr_any(&usin->sin6_addr)) {
185 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
186 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
189 usin->sin6_addr = in6addr_loopback;
192 addr_type = ipv6_addr_type(&usin->sin6_addr);
194 if (addr_type & IPV6_ADDR_MULTICAST)
197 if (addr_type&IPV6_ADDR_LINKLOCAL) {
198 if (addr_len >= sizeof(struct sockaddr_in6) &&
199 usin->sin6_scope_id) {
200 /* If interface is set while binding, indices
203 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
206 sk->sk_bound_dev_if = usin->sin6_scope_id;
209 /* Connect to link-local address requires an interface */
210 if (!sk->sk_bound_dev_if)
214 if (tp->rx_opt.ts_recent_stamp &&
215 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
216 tp->rx_opt.ts_recent = 0;
217 tp->rx_opt.ts_recent_stamp = 0;
221 sk->sk_v6_daddr = usin->sin6_addr;
222 np->flow_label = fl6.flowlabel;
228 if (addr_type & IPV6_ADDR_MAPPED) {
229 u32 exthdrlen = icsk->icsk_ext_hdr_len;
230 struct sockaddr_in sin;
232 if (__ipv6_only_sock(sk))
235 sin.sin_family = AF_INET;
236 sin.sin_port = usin->sin6_port;
237 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
239 icsk->icsk_af_ops = &ipv6_mapped;
240 sk->sk_backlog_rcv = tcp_v4_do_rcv;
241 #ifdef CONFIG_TCP_MD5SIG
242 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
245 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
248 icsk->icsk_ext_hdr_len = exthdrlen;
249 icsk->icsk_af_ops = &ipv6_specific;
250 sk->sk_backlog_rcv = tcp_v6_do_rcv;
251 #ifdef CONFIG_TCP_MD5SIG
252 tp->af_specific = &tcp_sock_ipv6_specific;
256 np->saddr = sk->sk_v6_rcv_saddr;
261 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
262 saddr = &sk->sk_v6_rcv_saddr;
264 fl6.flowi6_proto = IPPROTO_TCP;
265 fl6.daddr = sk->sk_v6_daddr;
266 fl6.saddr = saddr ? *saddr : np->saddr;
267 fl6.flowi6_oif = sk->sk_bound_dev_if;
268 fl6.flowi6_mark = sk->sk_mark;
269 fl6.fl6_dport = usin->sin6_port;
270 fl6.fl6_sport = inet->inet_sport;
271 fl6.flowi6_uid = sk->sk_uid;
273 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
274 final_p = fl6_update_dst(&fl6, opt, &final);
276 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
278 dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
286 sk->sk_v6_rcv_saddr = *saddr;
289 /* set the source address */
291 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
293 sk->sk_gso_type = SKB_GSO_TCPV6;
294 ip6_dst_store(sk, dst, NULL, NULL);
296 icsk->icsk_ext_hdr_len = 0;
298 icsk->icsk_ext_hdr_len = opt->opt_flen +
301 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
303 inet->inet_dport = usin->sin6_port;
305 tcp_set_state(sk, TCP_SYN_SENT);
306 err = inet6_hash_connect(tcp_death_row, sk);
312 if (likely(!tp->repair)) {
314 tp->write_seq = secure_tcpv6_seq(np->saddr.s6_addr32,
315 sk->sk_v6_daddr.s6_addr32,
318 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
320 sk->sk_v6_daddr.s6_addr32);
323 if (tcp_fastopen_defer_connect(sk, &err))
328 err = tcp_connect(sk);
335 tcp_set_state(sk, TCP_CLOSE);
337 inet->inet_dport = 0;
338 sk->sk_route_caps = 0;
342 static void tcp_v6_mtu_reduced(struct sock *sk)
344 struct dst_entry *dst;
346 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
349 dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
353 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
354 tcp_sync_mss(sk, dst_mtu(dst));
355 tcp_simple_retransmit(sk);
359 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
360 u8 type, u8 code, int offset, __be32 info)
362 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
363 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
364 struct net *net = dev_net(skb->dev);
365 struct request_sock *fastopen;
366 struct ipv6_pinfo *np;
373 sk = __inet6_lookup_established(net, &tcp_hashinfo,
374 &hdr->daddr, th->dest,
375 &hdr->saddr, ntohs(th->source),
376 skb->dev->ifindex, inet6_sdif(skb));
379 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
384 if (sk->sk_state == TCP_TIME_WAIT) {
385 inet_twsk_put(inet_twsk(sk));
388 seq = ntohl(th->seq);
389 fatal = icmpv6_err_convert(type, code, &err);
390 if (sk->sk_state == TCP_NEW_SYN_RECV) {
391 tcp_req_err(sk, seq, fatal);
396 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
397 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
399 if (sk->sk_state == TCP_CLOSE)
402 if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
403 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
408 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
409 fastopen = tp->fastopen_rsk;
410 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
411 if (sk->sk_state != TCP_LISTEN &&
412 !between(seq, snd_una, tp->snd_nxt)) {
413 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
417 np = tcp_inet6_sk(sk);
419 if (type == NDISC_REDIRECT) {
420 if (!sock_owned_by_user(sk)) {
421 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
424 dst->ops->redirect(dst, sk, skb);
429 if (type == ICMPV6_PKT_TOOBIG) {
430 /* We are not interested in TCP_LISTEN and open_requests
431 * (SYN-ACKs send out by Linux are always <576bytes so
432 * they should go through unfragmented).
434 if (sk->sk_state == TCP_LISTEN)
437 if (!ip6_sk_accept_pmtu(sk))
440 tp->mtu_info = ntohl(info);
441 if (!sock_owned_by_user(sk))
442 tcp_v6_mtu_reduced(sk);
443 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
450 /* Might be for an request_sock */
451 switch (sk->sk_state) {
454 /* Only in fast or simultaneous open. If a fast open socket is
455 * is already accepted it is treated as a connected one below.
457 if (fastopen && !fastopen->sk)
460 if (!sock_owned_by_user(sk)) {
462 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
466 sk->sk_err_soft = err;
470 if (!sock_owned_by_user(sk) && np->recverr) {
472 sk->sk_error_report(sk);
474 sk->sk_err_soft = err;
483 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
485 struct request_sock *req,
486 struct tcp_fastopen_cookie *foc,
487 enum tcp_synack_type synack_type)
489 struct inet_request_sock *ireq = inet_rsk(req);
490 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
491 struct ipv6_txoptions *opt;
492 struct flowi6 *fl6 = &fl->u.ip6;
496 /* First, grab a route. */
497 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
498 IPPROTO_TCP)) == NULL)
501 skb = tcp_make_synack(sk, dst, req, foc, synack_type);
504 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
505 &ireq->ir_v6_rmt_addr);
507 fl6->daddr = ireq->ir_v6_rmt_addr;
508 if (np->repflow && ireq->pktopts)
509 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
512 opt = ireq->ipv6_opt;
514 opt = rcu_dereference(np->opt);
515 err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass,
518 err = net_xmit_eval(err);
526 static void tcp_v6_reqsk_destructor(struct request_sock *req)
528 kfree(inet_rsk(req)->ipv6_opt);
529 kfree_skb(inet_rsk(req)->pktopts);
532 #ifdef CONFIG_TCP_MD5SIG
533 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
534 const struct in6_addr *addr)
536 return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
539 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
540 const struct sock *addr_sk)
542 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
545 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
546 char __user *optval, int optlen)
548 struct tcp_md5sig cmd;
549 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
552 if (optlen < sizeof(cmd))
555 if (copy_from_user(&cmd, optval, sizeof(cmd)))
558 if (sin6->sin6_family != AF_INET6)
561 if (optname == TCP_MD5SIG_EXT &&
562 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
563 prefixlen = cmd.tcpm_prefixlen;
564 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
568 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
571 if (!cmd.tcpm_keylen) {
572 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
573 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
575 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
576 AF_INET6, prefixlen);
579 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
582 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
583 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
584 AF_INET, prefixlen, cmd.tcpm_key,
585 cmd.tcpm_keylen, GFP_KERNEL);
587 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
588 AF_INET6, prefixlen, cmd.tcpm_key,
589 cmd.tcpm_keylen, GFP_KERNEL);
592 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
593 const struct in6_addr *daddr,
594 const struct in6_addr *saddr,
595 const struct tcphdr *th, int nbytes)
597 struct tcp6_pseudohdr *bp;
598 struct scatterlist sg;
602 /* 1. TCP pseudo-header (RFC2460) */
605 bp->protocol = cpu_to_be32(IPPROTO_TCP);
606 bp->len = cpu_to_be32(nbytes);
608 _th = (struct tcphdr *)(bp + 1);
609 memcpy(_th, th, sizeof(*th));
612 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
613 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
614 sizeof(*bp) + sizeof(*th));
615 return crypto_ahash_update(hp->md5_req);
618 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
619 const struct in6_addr *daddr, struct in6_addr *saddr,
620 const struct tcphdr *th)
622 struct tcp_md5sig_pool *hp;
623 struct ahash_request *req;
625 hp = tcp_get_md5sig_pool();
627 goto clear_hash_noput;
630 if (crypto_ahash_init(req))
632 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
634 if (tcp_md5_hash_key(hp, key))
636 ahash_request_set_crypt(req, NULL, md5_hash, 0);
637 if (crypto_ahash_final(req))
640 tcp_put_md5sig_pool();
644 tcp_put_md5sig_pool();
646 memset(md5_hash, 0, 16);
650 static int tcp_v6_md5_hash_skb(char *md5_hash,
651 const struct tcp_md5sig_key *key,
652 const struct sock *sk,
653 const struct sk_buff *skb)
655 const struct in6_addr *saddr, *daddr;
656 struct tcp_md5sig_pool *hp;
657 struct ahash_request *req;
658 const struct tcphdr *th = tcp_hdr(skb);
660 if (sk) { /* valid for establish/request sockets */
661 saddr = &sk->sk_v6_rcv_saddr;
662 daddr = &sk->sk_v6_daddr;
664 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
665 saddr = &ip6h->saddr;
666 daddr = &ip6h->daddr;
669 hp = tcp_get_md5sig_pool();
671 goto clear_hash_noput;
674 if (crypto_ahash_init(req))
677 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
679 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
681 if (tcp_md5_hash_key(hp, key))
683 ahash_request_set_crypt(req, NULL, md5_hash, 0);
684 if (crypto_ahash_final(req))
687 tcp_put_md5sig_pool();
691 tcp_put_md5sig_pool();
693 memset(md5_hash, 0, 16);
699 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
700 const struct sk_buff *skb)
702 #ifdef CONFIG_TCP_MD5SIG
703 const __u8 *hash_location = NULL;
704 struct tcp_md5sig_key *hash_expected;
705 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
706 const struct tcphdr *th = tcp_hdr(skb);
710 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
711 hash_location = tcp_parse_md5sig_option(th);
713 /* We've parsed the options - do we have a hash? */
714 if (!hash_expected && !hash_location)
717 if (hash_expected && !hash_location) {
718 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
722 if (!hash_expected && hash_location) {
723 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
727 /* check the signature */
728 genhash = tcp_v6_md5_hash_skb(newhash,
732 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
733 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
734 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
735 genhash ? "failed" : "mismatch",
736 &ip6h->saddr, ntohs(th->source),
737 &ip6h->daddr, ntohs(th->dest));
744 static void tcp_v6_init_req(struct request_sock *req,
745 const struct sock *sk_listener,
748 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
749 struct inet_request_sock *ireq = inet_rsk(req);
750 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
752 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
753 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
755 /* So that link locals have meaning */
756 if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
757 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
758 ireq->ir_iif = tcp_v6_iif(skb);
760 if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
761 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
762 np->rxopt.bits.rxinfo ||
763 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
764 np->rxopt.bits.rxohlim || np->repflow)) {
765 refcount_inc(&skb->users);
770 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
772 const struct request_sock *req)
774 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
777 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
779 .obj_size = sizeof(struct tcp6_request_sock),
780 .rtx_syn_ack = tcp_rtx_synack,
781 .send_ack = tcp_v6_reqsk_send_ack,
782 .destructor = tcp_v6_reqsk_destructor,
783 .send_reset = tcp_v6_send_reset,
784 .syn_ack_timeout = tcp_syn_ack_timeout,
787 static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
788 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
789 sizeof(struct ipv6hdr),
790 #ifdef CONFIG_TCP_MD5SIG
791 .req_md5_lookup = tcp_v6_md5_lookup,
792 .calc_md5_hash = tcp_v6_md5_hash_skb,
794 .init_req = tcp_v6_init_req,
795 #ifdef CONFIG_SYN_COOKIES
796 .cookie_init_seq = cookie_v6_init_sequence,
798 .route_req = tcp_v6_route_req,
799 .init_seq = tcp_v6_init_seq,
800 .init_ts_off = tcp_v6_init_ts_off,
801 .send_synack = tcp_v6_send_synack,
804 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
805 u32 ack, u32 win, u32 tsval, u32 tsecr,
806 int oif, struct tcp_md5sig_key *key, int rst,
807 u8 tclass, __be32 label, u32 priority)
809 const struct tcphdr *th = tcp_hdr(skb);
811 struct sk_buff *buff;
813 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
814 struct sock *ctl_sk = net->ipv6.tcp_sk;
815 unsigned int tot_len = sizeof(struct tcphdr);
816 struct dst_entry *dst;
821 tot_len += TCPOLEN_TSTAMP_ALIGNED;
822 #ifdef CONFIG_TCP_MD5SIG
824 tot_len += TCPOLEN_MD5SIG_ALIGNED;
827 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
832 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
834 t1 = skb_push(buff, tot_len);
835 skb_reset_transport_header(buff);
837 /* Swap the send and the receive. */
838 memset(t1, 0, sizeof(*t1));
839 t1->dest = th->source;
840 t1->source = th->dest;
841 t1->doff = tot_len / 4;
842 t1->seq = htonl(seq);
843 t1->ack_seq = htonl(ack);
844 t1->ack = !rst || !th->ack;
846 t1->window = htons(win);
848 topt = (__be32 *)(t1 + 1);
851 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
852 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
853 *topt++ = htonl(tsval);
854 *topt++ = htonl(tsecr);
857 #ifdef CONFIG_TCP_MD5SIG
859 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
860 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
861 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
862 &ipv6_hdr(skb)->saddr,
863 &ipv6_hdr(skb)->daddr, t1);
867 memset(&fl6, 0, sizeof(fl6));
868 fl6.daddr = ipv6_hdr(skb)->saddr;
869 fl6.saddr = ipv6_hdr(skb)->daddr;
870 fl6.flowlabel = label;
872 buff->ip_summed = CHECKSUM_PARTIAL;
875 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
877 fl6.flowi6_proto = IPPROTO_TCP;
878 if (rt6_need_strict(&fl6.daddr) && !oif)
879 fl6.flowi6_oif = tcp_v6_iif(skb);
881 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
884 fl6.flowi6_oif = oif;
888 if (sk->sk_state == TCP_TIME_WAIT) {
889 mark = inet_twsk(sk)->tw_mark;
890 /* autoflowlabel relies on buff->hash */
891 skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
896 buff->tstamp = tcp_transmit_time(sk);
898 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
899 fl6.fl6_dport = t1->dest;
900 fl6.fl6_sport = t1->source;
901 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
902 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
904 /* Pass a socket to ip6_dst_lookup either it is for RST
905 * Underlying function will use this to retrieve the network
908 dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
910 skb_dst_set(buff, dst);
911 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass,
913 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
915 TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
922 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
924 const struct tcphdr *th = tcp_hdr(skb);
925 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
926 u32 seq = 0, ack_seq = 0;
927 struct tcp_md5sig_key *key = NULL;
928 #ifdef CONFIG_TCP_MD5SIG
929 const __u8 *hash_location = NULL;
930 unsigned char newhash[16];
932 struct sock *sk1 = NULL;
942 /* If sk not NULL, it means we did a successful lookup and incoming
943 * route had to be correct. prequeue might have dropped our dst.
945 if (!sk && !ipv6_unicast_destination(skb))
948 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
949 #ifdef CONFIG_TCP_MD5SIG
951 hash_location = tcp_parse_md5sig_option(th);
952 if (sk && sk_fullsock(sk)) {
953 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
954 } else if (hash_location) {
956 * active side is lost. Try to find listening socket through
957 * source port, and then find md5 key through listening socket.
958 * we are not loose security here:
959 * Incoming packet is checked with md5 hash with finding key,
960 * no RST generated if md5 hash doesn't match.
962 sk1 = inet6_lookup_listener(net,
963 &tcp_hashinfo, NULL, 0,
965 th->source, &ipv6h->daddr,
967 tcp_v6_iif_l3_slave(skb),
972 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
976 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
977 if (genhash || memcmp(hash_location, newhash, 16) != 0)
983 seq = ntohl(th->ack_seq);
985 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
989 oif = sk->sk_bound_dev_if;
990 if (sk_fullsock(sk)) {
991 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
993 trace_tcp_send_reset(sk, skb);
995 label = ip6_flowlabel(ipv6h);
996 priority = sk->sk_priority;
998 if (sk->sk_state == TCP_TIME_WAIT) {
999 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1000 priority = inet_twsk(sk)->tw_priority;
1003 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1004 label = ip6_flowlabel(ipv6h);
1007 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0,
1010 #ifdef CONFIG_TCP_MD5SIG
1016 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1017 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1018 struct tcp_md5sig_key *key, u8 tclass,
1019 __be32 label, u32 priority)
1021 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1022 tclass, label, priority);
1025 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1027 struct inet_timewait_sock *tw = inet_twsk(sk);
1028 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1030 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1031 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1032 tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1033 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1034 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1039 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1040 struct request_sock *req)
1042 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1043 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1046 * The window field (SEG.WND) of every outgoing segment, with the
1047 * exception of <SYN> segments, MUST be right-shifted by
1048 * Rcv.Wind.Shift bits:
1050 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1051 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1052 tcp_rsk(req)->rcv_nxt,
1053 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1054 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1055 req->ts_recent, sk->sk_bound_dev_if,
1056 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
1057 0, 0, sk->sk_priority);
1061 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1063 #ifdef CONFIG_SYN_COOKIES
1064 const struct tcphdr *th = tcp_hdr(skb);
1067 sk = cookie_v6_check(sk, skb);
1072 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1073 struct tcphdr *th, u32 *cookie)
1076 #ifdef CONFIG_SYN_COOKIES
1077 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1078 &tcp_request_sock_ipv6_ops, sk, th);
1080 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1081 tcp_synq_overflow(sk);
1087 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1089 if (skb->protocol == htons(ETH_P_IP))
1090 return tcp_v4_conn_request(sk, skb);
1092 if (!ipv6_unicast_destination(skb))
1095 return tcp_conn_request(&tcp6_request_sock_ops,
1096 &tcp_request_sock_ipv6_ops, sk, skb);
1100 return 0; /* don't send reset */
1103 static void tcp_v6_restore_cb(struct sk_buff *skb)
1105 /* We need to move header back to the beginning if xfrm6_policy_check()
1106 * and tcp_v6_fill_cb() are going to be called again.
1107 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1109 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1110 sizeof(struct inet6_skb_parm));
1113 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1114 struct request_sock *req,
1115 struct dst_entry *dst,
1116 struct request_sock *req_unhash,
1119 struct inet_request_sock *ireq;
1120 struct ipv6_pinfo *newnp;
1121 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1122 struct ipv6_txoptions *opt;
1123 struct inet_sock *newinet;
1124 struct tcp_sock *newtp;
1126 #ifdef CONFIG_TCP_MD5SIG
1127 struct tcp_md5sig_key *key;
1131 if (skb->protocol == htons(ETH_P_IP)) {
1136 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1137 req_unhash, own_req);
1142 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1144 newinet = inet_sk(newsk);
1145 newnp = tcp_inet6_sk(newsk);
1146 newtp = tcp_sk(newsk);
1148 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1150 newnp->saddr = newsk->sk_v6_rcv_saddr;
1152 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1153 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1154 #ifdef CONFIG_TCP_MD5SIG
1155 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1158 newnp->ipv6_mc_list = NULL;
1159 newnp->ipv6_ac_list = NULL;
1160 newnp->ipv6_fl_list = NULL;
1161 newnp->pktoptions = NULL;
1163 newnp->mcast_oif = inet_iif(skb);
1164 newnp->mcast_hops = ip_hdr(skb)->ttl;
1165 newnp->rcv_flowinfo = 0;
1167 newnp->flow_label = 0;
1170 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1171 * here, tcp_create_openreq_child now does this for us, see the comment in
1172 * that function for the gory details. -acme
1175 /* It is tricky place. Until this moment IPv4 tcp
1176 worked with IPv6 icsk.icsk_af_ops.
1179 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1184 ireq = inet_rsk(req);
1186 if (sk_acceptq_is_full(sk))
1190 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1195 newsk = tcp_create_openreq_child(sk, req, skb);
1200 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1201 * count here, tcp_create_openreq_child now does this for us, see the
1202 * comment in that function for the gory details. -acme
1205 newsk->sk_gso_type = SKB_GSO_TCPV6;
1206 ip6_dst_store(newsk, dst, NULL, NULL);
1207 inet6_sk_rx_dst_set(newsk, skb);
1209 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1211 newtp = tcp_sk(newsk);
1212 newinet = inet_sk(newsk);
1213 newnp = tcp_inet6_sk(newsk);
1215 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1217 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1218 newnp->saddr = ireq->ir_v6_loc_addr;
1219 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1220 newsk->sk_bound_dev_if = ireq->ir_iif;
1222 /* Now IPv6 options...
1224 First: no IPv4 options.
1226 newinet->inet_opt = NULL;
1227 newnp->ipv6_mc_list = NULL;
1228 newnp->ipv6_ac_list = NULL;
1229 newnp->ipv6_fl_list = NULL;
1232 newnp->rxopt.all = np->rxopt.all;
1234 newnp->pktoptions = NULL;
1236 newnp->mcast_oif = tcp_v6_iif(skb);
1237 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1238 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1240 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1242 /* Clone native IPv6 options from listening socket (if any)
1244 Yes, keeping reference count would be much more clever,
1245 but we make one more one thing there: reattach optmem
1248 opt = ireq->ipv6_opt;
1250 opt = rcu_dereference(np->opt);
1252 opt = ipv6_dup_options(newsk, opt);
1253 RCU_INIT_POINTER(newnp->opt, opt);
1255 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1257 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1260 tcp_ca_openreq_child(newsk, dst);
1262 tcp_sync_mss(newsk, dst_mtu(dst));
1263 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1265 tcp_initialize_rcv_mss(newsk);
1267 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1268 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1270 #ifdef CONFIG_TCP_MD5SIG
1271 /* Copy over the MD5 key from the original socket */
1272 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr);
1274 /* We're using one, so create a matching key
1275 * on the newsk structure. If we fail to get
1276 * memory, then we end up not copying the key
1279 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1280 AF_INET6, 128, key->key, key->keylen,
1281 sk_gfp_mask(sk, GFP_ATOMIC));
1285 if (__inet_inherit_port(sk, newsk) < 0) {
1286 inet_csk_prepare_forced_close(newsk);
1290 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1292 tcp_move_syn(newtp, req);
1294 /* Clone pktoptions received with SYN, if we own the req */
1295 if (ireq->pktopts) {
1296 newnp->pktoptions = skb_clone(ireq->pktopts,
1297 sk_gfp_mask(sk, GFP_ATOMIC));
1298 consume_skb(ireq->pktopts);
1299 ireq->pktopts = NULL;
1300 if (newnp->pktoptions) {
1301 tcp_v6_restore_cb(newnp->pktoptions);
1302 skb_set_owner_r(newnp->pktoptions, newsk);
1310 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1318 /* The socket must have it's spinlock held when we get
1319 * here, unless it is a TCP_LISTEN socket.
1321 * We have a potential double-lock case here, so even when
1322 * doing backlog processing we use the BH locking scheme.
1323 * This is because we cannot sleep with the original spinlock
1326 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1328 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1329 struct sk_buff *opt_skb = NULL;
1330 struct tcp_sock *tp;
1332 /* Imagine: socket is IPv6. IPv4 packet arrives,
1333 goes to IPv4 receive handler and backlogged.
1334 From backlog it always goes here. Kerboom...
1335 Fortunately, tcp_rcv_established and rcv_established
1336 handle them correctly, but it is not case with
1337 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1340 if (skb->protocol == htons(ETH_P_IP))
1341 return tcp_v4_do_rcv(sk, skb);
1344 * socket locking is here for SMP purposes as backlog rcv
1345 * is currently called with bh processing disabled.
1348 /* Do Stevens' IPV6_PKTOPTIONS.
1350 Yes, guys, it is the only place in our code, where we
1351 may make it not affecting IPv4.
1352 The rest of code is protocol independent,
1353 and I do not like idea to uglify IPv4.
1355 Actually, all the idea behind IPV6_PKTOPTIONS
1356 looks not very well thought. For now we latch
1357 options, received in the last packet, enqueued
1358 by tcp. Feel free to propose better solution.
1362 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1364 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1365 struct dst_entry *dst = sk->sk_rx_dst;
1367 sock_rps_save_rxhash(sk, skb);
1368 sk_mark_napi_id(sk, skb);
1370 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1371 dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1373 sk->sk_rx_dst = NULL;
1377 tcp_rcv_established(sk, skb);
1379 goto ipv6_pktoptions;
1383 if (tcp_checksum_complete(skb))
1386 if (sk->sk_state == TCP_LISTEN) {
1387 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1393 if (tcp_child_process(sk, nsk, skb))
1396 __kfree_skb(opt_skb);
1400 sock_rps_save_rxhash(sk, skb);
1402 if (tcp_rcv_state_process(sk, skb))
1405 goto ipv6_pktoptions;
1409 tcp_v6_send_reset(sk, skb);
1412 __kfree_skb(opt_skb);
1416 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1417 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1422 /* Do you ask, what is it?
1424 1. skb was enqueued by tcp.
1425 2. skb is added to tail of read queue, rather than out of order.
1426 3. socket is not in passive state.
1427 4. Finally, it really contains options, which user wants to receive.
1430 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1431 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1432 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1433 np->mcast_oif = tcp_v6_iif(opt_skb);
1434 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1435 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1436 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1437 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1439 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1440 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1441 skb_set_owner_r(opt_skb, sk);
1442 tcp_v6_restore_cb(opt_skb);
1443 opt_skb = xchg(&np->pktoptions, opt_skb);
1445 __kfree_skb(opt_skb);
1446 opt_skb = xchg(&np->pktoptions, NULL);
1454 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1455 const struct tcphdr *th)
1457 /* This is tricky: we move IP6CB at its correct location into
1458 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1459 * _decode_session6() uses IP6CB().
1460 * barrier() makes sure compiler won't play aliasing games.
1462 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1463 sizeof(struct inet6_skb_parm));
1466 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1467 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1468 skb->len - th->doff*4);
1469 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1470 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1471 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1472 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1473 TCP_SKB_CB(skb)->sacked = 0;
1474 TCP_SKB_CB(skb)->has_rxtstamp =
1475 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1478 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1480 struct sk_buff *skb_to_free;
1481 int sdif = inet6_sdif(skb);
1482 const struct tcphdr *th;
1483 const struct ipv6hdr *hdr;
1487 struct net *net = dev_net(skb->dev);
1489 if (skb->pkt_type != PACKET_HOST)
1493 * Count it even if it's bad.
1495 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1497 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1500 th = (const struct tcphdr *)skb->data;
1502 if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1504 if (!pskb_may_pull(skb, th->doff*4))
1507 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1510 th = (const struct tcphdr *)skb->data;
1511 hdr = ipv6_hdr(skb);
1514 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1515 th->source, th->dest, inet6_iif(skb), sdif,
1521 if (sk->sk_state == TCP_TIME_WAIT)
1524 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1525 struct request_sock *req = inet_reqsk(sk);
1526 bool req_stolen = false;
1529 sk = req->rsk_listener;
1530 if (tcp_v6_inbound_md5_hash(sk, skb)) {
1531 sk_drops_add(sk, skb);
1535 if (tcp_checksum_complete(skb)) {
1539 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1540 inet_csk_reqsk_queue_drop_and_put(sk, req);
1546 if (!tcp_filter(sk, skb)) {
1547 th = (const struct tcphdr *)skb->data;
1548 hdr = ipv6_hdr(skb);
1549 tcp_v6_fill_cb(skb, hdr, th);
1550 nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1555 /* Another cpu got exclusive access to req
1556 * and created a full blown socket.
1557 * Try to feed this packet to this socket
1558 * instead of discarding it.
1560 tcp_v6_restore_cb(skb);
1564 goto discard_and_relse;
1568 tcp_v6_restore_cb(skb);
1569 } else if (tcp_child_process(sk, nsk, skb)) {
1570 tcp_v6_send_reset(nsk, skb);
1571 goto discard_and_relse;
1577 if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1578 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1579 goto discard_and_relse;
1582 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1583 goto discard_and_relse;
1585 if (tcp_v6_inbound_md5_hash(sk, skb))
1586 goto discard_and_relse;
1588 if (tcp_filter(sk, skb))
1589 goto discard_and_relse;
1590 th = (const struct tcphdr *)skb->data;
1591 hdr = ipv6_hdr(skb);
1592 tcp_v6_fill_cb(skb, hdr, th);
1596 if (sk->sk_state == TCP_LISTEN) {
1597 ret = tcp_v6_do_rcv(sk, skb);
1598 goto put_and_return;
1601 sk_incoming_cpu_update(sk);
1603 bh_lock_sock_nested(sk);
1604 tcp_segs_in(tcp_sk(sk), skb);
1606 if (!sock_owned_by_user(sk)) {
1607 skb_to_free = sk->sk_rx_skb_cache;
1608 sk->sk_rx_skb_cache = NULL;
1609 ret = tcp_v6_do_rcv(sk, skb);
1611 if (tcp_add_backlog(sk, skb))
1612 goto discard_and_relse;
1617 __kfree_skb(skb_to_free);
1621 return ret ? -1 : 0;
1624 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1627 tcp_v6_fill_cb(skb, hdr, th);
1629 if (tcp_checksum_complete(skb)) {
1631 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1633 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1635 tcp_v6_send_reset(NULL, skb);
1643 sk_drops_add(sk, skb);
1649 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1650 inet_twsk_put(inet_twsk(sk));
1654 tcp_v6_fill_cb(skb, hdr, th);
1656 if (tcp_checksum_complete(skb)) {
1657 inet_twsk_put(inet_twsk(sk));
1661 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1666 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1667 skb, __tcp_hdrlen(th),
1668 &ipv6_hdr(skb)->saddr, th->source,
1669 &ipv6_hdr(skb)->daddr,
1671 tcp_v6_iif_l3_slave(skb),
1674 struct inet_timewait_sock *tw = inet_twsk(sk);
1675 inet_twsk_deschedule_put(tw);
1677 tcp_v6_restore_cb(skb);
1685 tcp_v6_timewait_ack(sk, skb);
1688 tcp_v6_send_reset(sk, skb);
1689 inet_twsk_deschedule_put(inet_twsk(sk));
1691 case TCP_TW_SUCCESS:
1697 INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
1699 const struct ipv6hdr *hdr;
1700 const struct tcphdr *th;
1703 if (skb->pkt_type != PACKET_HOST)
1706 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1709 hdr = ipv6_hdr(skb);
1712 if (th->doff < sizeof(struct tcphdr) / 4)
1715 /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1716 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1717 &hdr->saddr, th->source,
1718 &hdr->daddr, ntohs(th->dest),
1719 inet6_iif(skb), inet6_sdif(skb));
1722 skb->destructor = sock_edemux;
1723 if (sk_fullsock(sk)) {
1724 struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1727 dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
1729 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1730 skb_dst_set_noref(skb, dst);
1735 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1736 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1737 .twsk_unique = tcp_twsk_unique,
1738 .twsk_destructor = tcp_twsk_destructor,
1741 static const struct inet_connection_sock_af_ops ipv6_specific = {
1742 .queue_xmit = inet6_csk_xmit,
1743 .send_check = tcp_v6_send_check,
1744 .rebuild_header = inet6_sk_rebuild_header,
1745 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1746 .conn_request = tcp_v6_conn_request,
1747 .syn_recv_sock = tcp_v6_syn_recv_sock,
1748 .net_header_len = sizeof(struct ipv6hdr),
1749 .net_frag_header_len = sizeof(struct frag_hdr),
1750 .setsockopt = ipv6_setsockopt,
1751 .getsockopt = ipv6_getsockopt,
1752 .addr2sockaddr = inet6_csk_addr2sockaddr,
1753 .sockaddr_len = sizeof(struct sockaddr_in6),
1754 #ifdef CONFIG_COMPAT
1755 .compat_setsockopt = compat_ipv6_setsockopt,
1756 .compat_getsockopt = compat_ipv6_getsockopt,
1758 .mtu_reduced = tcp_v6_mtu_reduced,
1761 #ifdef CONFIG_TCP_MD5SIG
1762 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1763 .md5_lookup = tcp_v6_md5_lookup,
1764 .calc_md5_hash = tcp_v6_md5_hash_skb,
1765 .md5_parse = tcp_v6_parse_md5_keys,
1770 * TCP over IPv4 via INET6 API
1772 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1773 .queue_xmit = ip_queue_xmit,
1774 .send_check = tcp_v4_send_check,
1775 .rebuild_header = inet_sk_rebuild_header,
1776 .sk_rx_dst_set = inet_sk_rx_dst_set,
1777 .conn_request = tcp_v6_conn_request,
1778 .syn_recv_sock = tcp_v6_syn_recv_sock,
1779 .net_header_len = sizeof(struct iphdr),
1780 .setsockopt = ipv6_setsockopt,
1781 .getsockopt = ipv6_getsockopt,
1782 .addr2sockaddr = inet6_csk_addr2sockaddr,
1783 .sockaddr_len = sizeof(struct sockaddr_in6),
1784 #ifdef CONFIG_COMPAT
1785 .compat_setsockopt = compat_ipv6_setsockopt,
1786 .compat_getsockopt = compat_ipv6_getsockopt,
1788 .mtu_reduced = tcp_v4_mtu_reduced,
1791 #ifdef CONFIG_TCP_MD5SIG
1792 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1793 .md5_lookup = tcp_v4_md5_lookup,
1794 .calc_md5_hash = tcp_v4_md5_hash_skb,
1795 .md5_parse = tcp_v6_parse_md5_keys,
1799 /* NOTE: A lot of things set to zero explicitly by call to
1800 * sk_alloc() so need not be done here.
1802 static int tcp_v6_init_sock(struct sock *sk)
1804 struct inet_connection_sock *icsk = inet_csk(sk);
1808 icsk->icsk_af_ops = &ipv6_specific;
1810 #ifdef CONFIG_TCP_MD5SIG
1811 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1817 static void tcp_v6_destroy_sock(struct sock *sk)
1819 tcp_v4_destroy_sock(sk);
1820 inet6_destroy_sock(sk);
1823 #ifdef CONFIG_PROC_FS
1824 /* Proc filesystem TCPv6 sock list dumping. */
1825 static void get_openreq6(struct seq_file *seq,
1826 const struct request_sock *req, int i)
1828 long ttd = req->rsk_timer.expires - jiffies;
1829 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1830 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1836 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1837 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1839 src->s6_addr32[0], src->s6_addr32[1],
1840 src->s6_addr32[2], src->s6_addr32[3],
1841 inet_rsk(req)->ir_num,
1842 dest->s6_addr32[0], dest->s6_addr32[1],
1843 dest->s6_addr32[2], dest->s6_addr32[3],
1844 ntohs(inet_rsk(req)->ir_rmt_port),
1846 0, 0, /* could print option size, but that is af dependent. */
1847 1, /* timers active (only the expire timer) */
1848 jiffies_to_clock_t(ttd),
1850 from_kuid_munged(seq_user_ns(seq),
1851 sock_i_uid(req->rsk_listener)),
1852 0, /* non standard timer */
1853 0, /* open_requests have no inode */
1857 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1859 const struct in6_addr *dest, *src;
1862 unsigned long timer_expires;
1863 const struct inet_sock *inet = inet_sk(sp);
1864 const struct tcp_sock *tp = tcp_sk(sp);
1865 const struct inet_connection_sock *icsk = inet_csk(sp);
1866 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1870 dest = &sp->sk_v6_daddr;
1871 src = &sp->sk_v6_rcv_saddr;
1872 destp = ntohs(inet->inet_dport);
1873 srcp = ntohs(inet->inet_sport);
1875 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1876 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
1877 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1879 timer_expires = icsk->icsk_timeout;
1880 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1882 timer_expires = icsk->icsk_timeout;
1883 } else if (timer_pending(&sp->sk_timer)) {
1885 timer_expires = sp->sk_timer.expires;
1888 timer_expires = jiffies;
1891 state = inet_sk_state_load(sp);
1892 if (state == TCP_LISTEN)
1893 rx_queue = sp->sk_ack_backlog;
1895 /* Because we don't lock the socket,
1896 * we might find a transient negative value.
1898 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
1901 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1902 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
1904 src->s6_addr32[0], src->s6_addr32[1],
1905 src->s6_addr32[2], src->s6_addr32[3], srcp,
1906 dest->s6_addr32[0], dest->s6_addr32[1],
1907 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1909 tp->write_seq - tp->snd_una,
1912 jiffies_delta_to_clock_t(timer_expires - jiffies),
1913 icsk->icsk_retransmits,
1914 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1915 icsk->icsk_probes_out,
1917 refcount_read(&sp->sk_refcnt), sp,
1918 jiffies_to_clock_t(icsk->icsk_rto),
1919 jiffies_to_clock_t(icsk->icsk_ack.ato),
1920 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
1922 state == TCP_LISTEN ?
1923 fastopenq->max_qlen :
1924 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
1928 static void get_timewait6_sock(struct seq_file *seq,
1929 struct inet_timewait_sock *tw, int i)
1931 long delta = tw->tw_timer.expires - jiffies;
1932 const struct in6_addr *dest, *src;
1935 dest = &tw->tw_v6_daddr;
1936 src = &tw->tw_v6_rcv_saddr;
1937 destp = ntohs(tw->tw_dport);
1938 srcp = ntohs(tw->tw_sport);
1941 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1942 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1944 src->s6_addr32[0], src->s6_addr32[1],
1945 src->s6_addr32[2], src->s6_addr32[3], srcp,
1946 dest->s6_addr32[0], dest->s6_addr32[1],
1947 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1948 tw->tw_substate, 0, 0,
1949 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
1950 refcount_read(&tw->tw_refcnt), tw);
1953 static int tcp6_seq_show(struct seq_file *seq, void *v)
1955 struct tcp_iter_state *st;
1956 struct sock *sk = v;
1958 if (v == SEQ_START_TOKEN) {
1963 "st tx_queue rx_queue tr tm->when retrnsmt"
1964 " uid timeout inode\n");
1969 if (sk->sk_state == TCP_TIME_WAIT)
1970 get_timewait6_sock(seq, v, st->num);
1971 else if (sk->sk_state == TCP_NEW_SYN_RECV)
1972 get_openreq6(seq, v, st->num);
1974 get_tcp6_sock(seq, v, st->num);
1979 static const struct seq_operations tcp6_seq_ops = {
1980 .show = tcp6_seq_show,
1981 .start = tcp_seq_start,
1982 .next = tcp_seq_next,
1983 .stop = tcp_seq_stop,
1986 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1990 int __net_init tcp6_proc_init(struct net *net)
1992 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
1993 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
1998 void tcp6_proc_exit(struct net *net)
2000 remove_proc_entry("tcp6", net->proc_net);
2004 struct proto tcpv6_prot = {
2006 .owner = THIS_MODULE,
2008 .pre_connect = tcp_v6_pre_connect,
2009 .connect = tcp_v6_connect,
2010 .disconnect = tcp_disconnect,
2011 .accept = inet_csk_accept,
2013 .init = tcp_v6_init_sock,
2014 .destroy = tcp_v6_destroy_sock,
2015 .shutdown = tcp_shutdown,
2016 .setsockopt = tcp_setsockopt,
2017 .getsockopt = tcp_getsockopt,
2018 .keepalive = tcp_set_keepalive,
2019 .recvmsg = tcp_recvmsg,
2020 .sendmsg = tcp_sendmsg,
2021 .sendpage = tcp_sendpage,
2022 .backlog_rcv = tcp_v6_do_rcv,
2023 .release_cb = tcp_release_cb,
2025 .unhash = inet_unhash,
2026 .get_port = inet_csk_get_port,
2027 .enter_memory_pressure = tcp_enter_memory_pressure,
2028 .leave_memory_pressure = tcp_leave_memory_pressure,
2029 .stream_memory_free = tcp_stream_memory_free,
2030 .sockets_allocated = &tcp_sockets_allocated,
2031 .memory_allocated = &tcp_memory_allocated,
2032 .memory_pressure = &tcp_memory_pressure,
2033 .orphan_count = &tcp_orphan_count,
2034 .sysctl_mem = sysctl_tcp_mem,
2035 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2036 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2037 .max_header = MAX_TCP_HEADER,
2038 .obj_size = sizeof(struct tcp6_sock),
2039 .slab_flags = SLAB_TYPESAFE_BY_RCU,
2040 .twsk_prot = &tcp6_timewait_sock_ops,
2041 .rsk_prot = &tcp6_request_sock_ops,
2042 .h.hashinfo = &tcp_hashinfo,
2043 .no_autobind = true,
2044 #ifdef CONFIG_COMPAT
2045 .compat_setsockopt = compat_tcp_setsockopt,
2046 .compat_getsockopt = compat_tcp_getsockopt,
2048 .diag_destroy = tcp_abort,
2051 /* thinking of making this const? Don't.
2052 * early_demux can change based on sysctl.
2054 static struct inet6_protocol tcpv6_protocol = {
2055 .early_demux = tcp_v6_early_demux,
2056 .early_demux_handler = tcp_v6_early_demux,
2057 .handler = tcp_v6_rcv,
2058 .err_handler = tcp_v6_err,
2059 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2062 static struct inet_protosw tcpv6_protosw = {
2063 .type = SOCK_STREAM,
2064 .protocol = IPPROTO_TCP,
2065 .prot = &tcpv6_prot,
2066 .ops = &inet6_stream_ops,
2067 .flags = INET_PROTOSW_PERMANENT |
2071 static int __net_init tcpv6_net_init(struct net *net)
2073 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2074 SOCK_RAW, IPPROTO_TCP, net);
2077 static void __net_exit tcpv6_net_exit(struct net *net)
2079 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2082 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2084 inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2087 static struct pernet_operations tcpv6_net_ops = {
2088 .init = tcpv6_net_init,
2089 .exit = tcpv6_net_exit,
2090 .exit_batch = tcpv6_net_exit_batch,
2093 int __init tcpv6_init(void)
2097 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2101 /* register inet6 protocol */
2102 ret = inet6_register_protosw(&tcpv6_protosw);
2104 goto out_tcpv6_protocol;
2106 ret = register_pernet_subsys(&tcpv6_net_ops);
2108 goto out_tcpv6_protosw;
2113 inet6_unregister_protosw(&tcpv6_protosw);
2115 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2119 void tcpv6_exit(void)
2121 unregister_pernet_subsys(&tcpv6_net_ops);
2122 inet6_unregister_protosw(&tcpv6_protosw);
2123 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);