3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
10 * linux/net/ipv4/tcp_input.c
11 * linux/net/ipv4/tcp_output.c
14 * Hideaki YOSHIFUJI : sin6_scope_id support
15 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
16 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
17 * a single port at the same time.
18 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
26 #include <linux/bottom_half.h>
27 #include <linux/module.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/socket.h>
31 #include <linux/sockios.h>
32 #include <linux/net.h>
33 #include <linux/jiffies.h>
35 #include <linux/in6.h>
36 #include <linux/netdevice.h>
37 #include <linux/init.h>
38 #include <linux/jhash.h>
39 #include <linux/ipsec.h>
40 #include <linux/times.h>
41 #include <linux/slab.h>
42 #include <linux/uaccess.h>
43 #include <linux/ipv6.h>
44 #include <linux/icmpv6.h>
45 #include <linux/random.h>
48 #include <net/ndisc.h>
49 #include <net/inet6_hashtables.h>
50 #include <net/inet6_connection_sock.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
60 #include <net/dsfield.h>
61 #include <net/timewait_sock.h>
62 #include <net/inet_common.h>
63 #include <net/secure_seq.h>
64 #include <net/busy_poll.h>
66 #include <linux/proc_fs.h>
67 #include <linux/seq_file.h>
69 #include <crypto/hash.h>
70 #include <linux/scatterlist.h>
72 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
73 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
74 struct request_sock *req);
76 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
78 static const struct inet_connection_sock_af_ops ipv6_mapped;
79 static const struct inet_connection_sock_af_ops ipv6_specific;
80 #ifdef CONFIG_TCP_MD5SIG
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
82 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
84 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
85 const struct in6_addr *addr)
91 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
93 struct dst_entry *dst = skb_dst(skb);
95 if (dst && dst_hold_safe(dst)) {
96 const struct rt6_info *rt = (const struct rt6_info *)dst;
99 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
100 inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
104 static u32 tcp_v6_init_seq_and_tsoff(const struct sk_buff *skb, u32 *tsoff)
106 return secure_tcpv6_seq_and_tsoff(ipv6_hdr(skb)->daddr.s6_addr32,
107 ipv6_hdr(skb)->saddr.s6_addr32,
109 tcp_hdr(skb)->source, tsoff);
112 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
115 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
116 struct inet_sock *inet = inet_sk(sk);
117 struct inet_connection_sock *icsk = inet_csk(sk);
118 struct ipv6_pinfo *np = inet6_sk(sk);
119 struct tcp_sock *tp = tcp_sk(sk);
120 struct in6_addr *saddr = NULL, *final_p, final;
121 struct ipv6_txoptions *opt;
123 struct dst_entry *dst;
127 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
129 if (addr_len < SIN6_LEN_RFC2133)
132 if (usin->sin6_family != AF_INET6)
133 return -EAFNOSUPPORT;
135 memset(&fl6, 0, sizeof(fl6));
138 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
139 IP6_ECN_flow_init(fl6.flowlabel);
140 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
141 struct ip6_flowlabel *flowlabel;
142 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
145 fl6_sock_release(flowlabel);
150 * connect() to INADDR_ANY means loopback (BSD'ism).
153 if (ipv6_addr_any(&usin->sin6_addr)) {
154 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
155 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
158 usin->sin6_addr = in6addr_loopback;
161 addr_type = ipv6_addr_type(&usin->sin6_addr);
163 if (addr_type & IPV6_ADDR_MULTICAST)
166 if (addr_type&IPV6_ADDR_LINKLOCAL) {
167 if (addr_len >= sizeof(struct sockaddr_in6) &&
168 usin->sin6_scope_id) {
169 /* If interface is set while binding, indices
172 if (sk->sk_bound_dev_if &&
173 sk->sk_bound_dev_if != usin->sin6_scope_id)
176 sk->sk_bound_dev_if = usin->sin6_scope_id;
179 /* Connect to link-local address requires an interface */
180 if (!sk->sk_bound_dev_if)
184 if (tp->rx_opt.ts_recent_stamp &&
185 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
186 tp->rx_opt.ts_recent = 0;
187 tp->rx_opt.ts_recent_stamp = 0;
191 sk->sk_v6_daddr = usin->sin6_addr;
192 np->flow_label = fl6.flowlabel;
198 if (addr_type & IPV6_ADDR_MAPPED) {
199 u32 exthdrlen = icsk->icsk_ext_hdr_len;
200 struct sockaddr_in sin;
202 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
204 if (__ipv6_only_sock(sk))
207 sin.sin_family = AF_INET;
208 sin.sin_port = usin->sin6_port;
209 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
211 icsk->icsk_af_ops = &ipv6_mapped;
212 sk->sk_backlog_rcv = tcp_v4_do_rcv;
213 #ifdef CONFIG_TCP_MD5SIG
214 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
217 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
220 icsk->icsk_ext_hdr_len = exthdrlen;
221 icsk->icsk_af_ops = &ipv6_specific;
222 sk->sk_backlog_rcv = tcp_v6_do_rcv;
223 #ifdef CONFIG_TCP_MD5SIG
224 tp->af_specific = &tcp_sock_ipv6_specific;
228 np->saddr = sk->sk_v6_rcv_saddr;
233 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
234 saddr = &sk->sk_v6_rcv_saddr;
236 fl6.flowi6_proto = IPPROTO_TCP;
237 fl6.daddr = sk->sk_v6_daddr;
238 fl6.saddr = saddr ? *saddr : np->saddr;
239 fl6.flowi6_oif = sk->sk_bound_dev_if;
240 fl6.flowi6_mark = sk->sk_mark;
241 fl6.fl6_dport = usin->sin6_port;
242 fl6.fl6_sport = inet->inet_sport;
243 fl6.flowi6_uid = sk->sk_uid;
245 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
246 final_p = fl6_update_dst(&fl6, opt, &final);
248 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
250 dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
258 sk->sk_v6_rcv_saddr = *saddr;
261 /* set the source address */
263 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
265 sk->sk_gso_type = SKB_GSO_TCPV6;
266 ip6_dst_store(sk, dst, NULL, NULL);
268 icsk->icsk_ext_hdr_len = 0;
270 icsk->icsk_ext_hdr_len = opt->opt_flen +
273 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
275 inet->inet_dport = usin->sin6_port;
277 tcp_set_state(sk, TCP_SYN_SENT);
278 err = inet6_hash_connect(tcp_death_row, sk);
284 if (likely(!tp->repair)) {
285 seq = secure_tcpv6_seq_and_tsoff(np->saddr.s6_addr32,
286 sk->sk_v6_daddr.s6_addr32,
294 if (tcp_fastopen_defer_connect(sk, &err))
299 err = tcp_connect(sk);
306 tcp_set_state(sk, TCP_CLOSE);
308 inet->inet_dport = 0;
309 sk->sk_route_caps = 0;
313 static void tcp_v6_mtu_reduced(struct sock *sk)
315 struct dst_entry *dst;
317 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
320 dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
324 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
325 tcp_sync_mss(sk, dst_mtu(dst));
326 tcp_simple_retransmit(sk);
330 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
331 u8 type, u8 code, int offset, __be32 info)
333 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
334 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
335 struct net *net = dev_net(skb->dev);
336 struct request_sock *fastopen;
337 struct ipv6_pinfo *np;
344 sk = __inet6_lookup_established(net, &tcp_hashinfo,
345 &hdr->daddr, th->dest,
346 &hdr->saddr, ntohs(th->source),
350 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
355 if (sk->sk_state == TCP_TIME_WAIT) {
356 inet_twsk_put(inet_twsk(sk));
359 seq = ntohl(th->seq);
360 fatal = icmpv6_err_convert(type, code, &err);
361 if (sk->sk_state == TCP_NEW_SYN_RECV)
362 return tcp_req_err(sk, seq, fatal);
365 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
366 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
368 if (sk->sk_state == TCP_CLOSE)
371 if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) {
372 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
377 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
378 fastopen = tp->fastopen_rsk;
379 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
380 if (sk->sk_state != TCP_LISTEN &&
381 !between(seq, snd_una, tp->snd_nxt)) {
382 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
388 if (type == NDISC_REDIRECT) {
389 if (!sock_owned_by_user(sk)) {
390 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
393 dst->ops->redirect(dst, sk, skb);
398 if (type == ICMPV6_PKT_TOOBIG) {
399 /* We are not interested in TCP_LISTEN and open_requests
400 * (SYN-ACKs send out by Linux are always <576bytes so
401 * they should go through unfragmented).
403 if (sk->sk_state == TCP_LISTEN)
406 if (!ip6_sk_accept_pmtu(sk))
409 tp->mtu_info = ntohl(info);
410 if (!sock_owned_by_user(sk))
411 tcp_v6_mtu_reduced(sk);
412 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
419 /* Might be for an request_sock */
420 switch (sk->sk_state) {
423 /* Only in fast or simultaneous open. If a fast open socket is
424 * is already accepted it is treated as a connected one below.
426 if (fastopen && !fastopen->sk)
429 if (!sock_owned_by_user(sk)) {
431 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
435 sk->sk_err_soft = err;
439 if (!sock_owned_by_user(sk) && np->recverr) {
441 sk->sk_error_report(sk);
443 sk->sk_err_soft = err;
451 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
453 struct request_sock *req,
454 struct tcp_fastopen_cookie *foc,
455 enum tcp_synack_type synack_type)
457 struct inet_request_sock *ireq = inet_rsk(req);
458 struct ipv6_pinfo *np = inet6_sk(sk);
459 struct ipv6_txoptions *opt;
460 struct flowi6 *fl6 = &fl->u.ip6;
464 /* First, grab a route. */
465 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
466 IPPROTO_TCP)) == NULL)
469 skb = tcp_make_synack(sk, dst, req, foc, synack_type);
472 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
473 &ireq->ir_v6_rmt_addr);
475 fl6->daddr = ireq->ir_v6_rmt_addr;
476 if (np->repflow && ireq->pktopts)
477 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
480 opt = ireq->ipv6_opt;
482 opt = rcu_dereference(np->opt);
483 err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass);
485 err = net_xmit_eval(err);
493 static void tcp_v6_reqsk_destructor(struct request_sock *req)
495 kfree(inet_rsk(req)->ipv6_opt);
496 kfree_skb(inet_rsk(req)->pktopts);
499 #ifdef CONFIG_TCP_MD5SIG
500 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
501 const struct in6_addr *addr)
503 return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
506 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
507 const struct sock *addr_sk)
509 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
512 static int tcp_v6_parse_md5_keys(struct sock *sk, char __user *optval,
515 struct tcp_md5sig cmd;
516 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
518 if (optlen < sizeof(cmd))
521 if (copy_from_user(&cmd, optval, sizeof(cmd)))
524 if (sin6->sin6_family != AF_INET6)
527 if (!cmd.tcpm_keylen) {
528 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
529 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
531 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
535 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
538 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
539 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
540 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
542 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
543 AF_INET6, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
546 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
547 const struct in6_addr *daddr,
548 const struct in6_addr *saddr,
549 const struct tcphdr *th, int nbytes)
551 struct tcp6_pseudohdr *bp;
552 struct scatterlist sg;
556 /* 1. TCP pseudo-header (RFC2460) */
559 bp->protocol = cpu_to_be32(IPPROTO_TCP);
560 bp->len = cpu_to_be32(nbytes);
562 _th = (struct tcphdr *)(bp + 1);
563 memcpy(_th, th, sizeof(*th));
566 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
567 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
568 sizeof(*bp) + sizeof(*th));
569 return crypto_ahash_update(hp->md5_req);
572 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
573 const struct in6_addr *daddr, struct in6_addr *saddr,
574 const struct tcphdr *th)
576 struct tcp_md5sig_pool *hp;
577 struct ahash_request *req;
579 hp = tcp_get_md5sig_pool();
581 goto clear_hash_noput;
584 if (crypto_ahash_init(req))
586 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
588 if (tcp_md5_hash_key(hp, key))
590 ahash_request_set_crypt(req, NULL, md5_hash, 0);
591 if (crypto_ahash_final(req))
594 tcp_put_md5sig_pool();
598 tcp_put_md5sig_pool();
600 memset(md5_hash, 0, 16);
604 static int tcp_v6_md5_hash_skb(char *md5_hash,
605 const struct tcp_md5sig_key *key,
606 const struct sock *sk,
607 const struct sk_buff *skb)
609 const struct in6_addr *saddr, *daddr;
610 struct tcp_md5sig_pool *hp;
611 struct ahash_request *req;
612 const struct tcphdr *th = tcp_hdr(skb);
614 if (sk) { /* valid for establish/request sockets */
615 saddr = &sk->sk_v6_rcv_saddr;
616 daddr = &sk->sk_v6_daddr;
618 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
619 saddr = &ip6h->saddr;
620 daddr = &ip6h->daddr;
623 hp = tcp_get_md5sig_pool();
625 goto clear_hash_noput;
628 if (crypto_ahash_init(req))
631 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
633 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
635 if (tcp_md5_hash_key(hp, key))
637 ahash_request_set_crypt(req, NULL, md5_hash, 0);
638 if (crypto_ahash_final(req))
641 tcp_put_md5sig_pool();
645 tcp_put_md5sig_pool();
647 memset(md5_hash, 0, 16);
653 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
654 const struct sk_buff *skb)
656 #ifdef CONFIG_TCP_MD5SIG
657 const __u8 *hash_location = NULL;
658 struct tcp_md5sig_key *hash_expected;
659 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
660 const struct tcphdr *th = tcp_hdr(skb);
664 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
665 hash_location = tcp_parse_md5sig_option(th);
667 /* We've parsed the options - do we have a hash? */
668 if (!hash_expected && !hash_location)
671 if (hash_expected && !hash_location) {
672 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
676 if (!hash_expected && hash_location) {
677 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
681 /* check the signature */
682 genhash = tcp_v6_md5_hash_skb(newhash,
686 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
687 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
688 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
689 genhash ? "failed" : "mismatch",
690 &ip6h->saddr, ntohs(th->source),
691 &ip6h->daddr, ntohs(th->dest));
698 static void tcp_v6_init_req(struct request_sock *req,
699 const struct sock *sk_listener,
702 struct inet_request_sock *ireq = inet_rsk(req);
703 const struct ipv6_pinfo *np = inet6_sk(sk_listener);
705 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
706 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
708 /* So that link locals have meaning */
709 if (!sk_listener->sk_bound_dev_if &&
710 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
711 ireq->ir_iif = tcp_v6_iif(skb);
713 if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
714 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
715 np->rxopt.bits.rxinfo ||
716 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
717 np->rxopt.bits.rxohlim || np->repflow)) {
718 atomic_inc(&skb->users);
723 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
725 const struct request_sock *req)
727 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
730 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
732 .obj_size = sizeof(struct tcp6_request_sock),
733 .rtx_syn_ack = tcp_rtx_synack,
734 .send_ack = tcp_v6_reqsk_send_ack,
735 .destructor = tcp_v6_reqsk_destructor,
736 .send_reset = tcp_v6_send_reset,
737 .syn_ack_timeout = tcp_syn_ack_timeout,
740 static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
741 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
742 sizeof(struct ipv6hdr),
743 #ifdef CONFIG_TCP_MD5SIG
744 .req_md5_lookup = tcp_v6_md5_lookup,
745 .calc_md5_hash = tcp_v6_md5_hash_skb,
747 .init_req = tcp_v6_init_req,
748 #ifdef CONFIG_SYN_COOKIES
749 .cookie_init_seq = cookie_v6_init_sequence,
751 .route_req = tcp_v6_route_req,
752 .init_seq_tsoff = tcp_v6_init_seq_and_tsoff,
753 .send_synack = tcp_v6_send_synack,
756 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
757 u32 ack, u32 win, u32 tsval, u32 tsecr,
758 int oif, struct tcp_md5sig_key *key, int rst,
759 u8 tclass, __be32 label)
761 const struct tcphdr *th = tcp_hdr(skb);
763 struct sk_buff *buff;
765 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
766 struct sock *ctl_sk = net->ipv6.tcp_sk;
767 unsigned int tot_len = sizeof(struct tcphdr);
768 struct dst_entry *dst;
772 tot_len += TCPOLEN_TSTAMP_ALIGNED;
773 #ifdef CONFIG_TCP_MD5SIG
775 tot_len += TCPOLEN_MD5SIG_ALIGNED;
778 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
783 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
785 t1 = (struct tcphdr *) skb_push(buff, tot_len);
786 skb_reset_transport_header(buff);
788 /* Swap the send and the receive. */
789 memset(t1, 0, sizeof(*t1));
790 t1->dest = th->source;
791 t1->source = th->dest;
792 t1->doff = tot_len / 4;
793 t1->seq = htonl(seq);
794 t1->ack_seq = htonl(ack);
795 t1->ack = !rst || !th->ack;
797 t1->window = htons(win);
799 topt = (__be32 *)(t1 + 1);
802 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
803 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
804 *topt++ = htonl(tsval);
805 *topt++ = htonl(tsecr);
808 #ifdef CONFIG_TCP_MD5SIG
810 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
811 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
812 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
813 &ipv6_hdr(skb)->saddr,
814 &ipv6_hdr(skb)->daddr, t1);
818 memset(&fl6, 0, sizeof(fl6));
819 fl6.daddr = ipv6_hdr(skb)->saddr;
820 fl6.saddr = ipv6_hdr(skb)->daddr;
821 fl6.flowlabel = label;
823 buff->ip_summed = CHECKSUM_PARTIAL;
826 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
828 fl6.flowi6_proto = IPPROTO_TCP;
829 if (rt6_need_strict(&fl6.daddr) && !oif)
830 fl6.flowi6_oif = tcp_v6_iif(skb);
832 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
835 fl6.flowi6_oif = oif;
838 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
839 fl6.fl6_dport = t1->dest;
840 fl6.fl6_sport = t1->source;
841 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
842 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
844 /* Pass a socket to ip6_dst_lookup either it is for RST
845 * Underlying function will use this to retrieve the network
848 dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
850 skb_dst_set(buff, dst);
851 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass);
852 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
854 TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
861 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
863 const struct tcphdr *th = tcp_hdr(skb);
864 u32 seq = 0, ack_seq = 0;
865 struct tcp_md5sig_key *key = NULL;
866 #ifdef CONFIG_TCP_MD5SIG
867 const __u8 *hash_location = NULL;
868 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
869 unsigned char newhash[16];
871 struct sock *sk1 = NULL;
878 /* If sk not NULL, it means we did a successful lookup and incoming
879 * route had to be correct. prequeue might have dropped our dst.
881 if (!sk && !ipv6_unicast_destination(skb))
884 #ifdef CONFIG_TCP_MD5SIG
886 hash_location = tcp_parse_md5sig_option(th);
887 if (sk && sk_fullsock(sk)) {
888 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
889 } else if (hash_location) {
891 * active side is lost. Try to find listening socket through
892 * source port, and then find md5 key through listening socket.
893 * we are not loose security here:
894 * Incoming packet is checked with md5 hash with finding key,
895 * no RST generated if md5 hash doesn't match.
897 sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
898 &tcp_hashinfo, NULL, 0,
900 th->source, &ipv6h->daddr,
901 ntohs(th->source), tcp_v6_iif(skb));
905 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
909 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
910 if (genhash || memcmp(hash_location, newhash, 16) != 0)
916 seq = ntohl(th->ack_seq);
918 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
921 oif = sk ? sk->sk_bound_dev_if : 0;
922 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
924 #ifdef CONFIG_TCP_MD5SIG
930 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
931 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
932 struct tcp_md5sig_key *key, u8 tclass,
935 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
939 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
941 struct inet_timewait_sock *tw = inet_twsk(sk);
942 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
944 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
945 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
946 tcp_time_stamp + tcptw->tw_ts_offset,
947 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
948 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel));
953 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
954 struct request_sock *req)
956 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
957 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
960 * The window field (SEG.WND) of every outgoing segment, with the
961 * exception of <SYN> segments, MUST be right-shifted by
962 * Rcv.Wind.Shift bits:
964 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
965 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
966 tcp_rsk(req)->rcv_nxt,
967 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
968 tcp_time_stamp + tcp_rsk(req)->ts_off,
969 req->ts_recent, sk->sk_bound_dev_if,
970 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
975 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
977 #ifdef CONFIG_SYN_COOKIES
978 const struct tcphdr *th = tcp_hdr(skb);
981 sk = cookie_v6_check(sk, skb);
986 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
988 if (skb->protocol == htons(ETH_P_IP))
989 return tcp_v4_conn_request(sk, skb);
991 if (!ipv6_unicast_destination(skb))
994 return tcp_conn_request(&tcp6_request_sock_ops,
995 &tcp_request_sock_ipv6_ops, sk, skb);
999 return 0; /* don't send reset */
1002 static void tcp_v6_restore_cb(struct sk_buff *skb)
1004 /* We need to move header back to the beginning if xfrm6_policy_check()
1005 * and tcp_v6_fill_cb() are going to be called again.
1006 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1008 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1009 sizeof(struct inet6_skb_parm));
1012 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1013 struct request_sock *req,
1014 struct dst_entry *dst,
1015 struct request_sock *req_unhash,
1018 struct inet_request_sock *ireq;
1019 struct ipv6_pinfo *newnp;
1020 const struct ipv6_pinfo *np = inet6_sk(sk);
1021 struct ipv6_txoptions *opt;
1022 struct tcp6_sock *newtcp6sk;
1023 struct inet_sock *newinet;
1024 struct tcp_sock *newtp;
1026 #ifdef CONFIG_TCP_MD5SIG
1027 struct tcp_md5sig_key *key;
1031 if (skb->protocol == htons(ETH_P_IP)) {
1036 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1037 req_unhash, own_req);
1042 newtcp6sk = (struct tcp6_sock *)newsk;
1043 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1045 newinet = inet_sk(newsk);
1046 newnp = inet6_sk(newsk);
1047 newtp = tcp_sk(newsk);
1049 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1051 newnp->saddr = newsk->sk_v6_rcv_saddr;
1053 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1054 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1055 #ifdef CONFIG_TCP_MD5SIG
1056 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1059 newnp->ipv6_ac_list = NULL;
1060 newnp->ipv6_fl_list = NULL;
1061 newnp->pktoptions = NULL;
1063 newnp->mcast_oif = tcp_v6_iif(skb);
1064 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1065 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1067 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1070 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1071 * here, tcp_create_openreq_child now does this for us, see the comment in
1072 * that function for the gory details. -acme
1075 /* It is tricky place. Until this moment IPv4 tcp
1076 worked with IPv6 icsk.icsk_af_ops.
1079 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1084 ireq = inet_rsk(req);
1086 if (sk_acceptq_is_full(sk))
1090 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1095 newsk = tcp_create_openreq_child(sk, req, skb);
1100 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1101 * count here, tcp_create_openreq_child now does this for us, see the
1102 * comment in that function for the gory details. -acme
1105 newsk->sk_gso_type = SKB_GSO_TCPV6;
1106 ip6_dst_store(newsk, dst, NULL, NULL);
1107 inet6_sk_rx_dst_set(newsk, skb);
1109 newtcp6sk = (struct tcp6_sock *)newsk;
1110 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1112 newtp = tcp_sk(newsk);
1113 newinet = inet_sk(newsk);
1114 newnp = inet6_sk(newsk);
1116 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1118 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1119 newnp->saddr = ireq->ir_v6_loc_addr;
1120 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1121 newsk->sk_bound_dev_if = ireq->ir_iif;
1123 /* Now IPv6 options...
1125 First: no IPv4 options.
1127 newinet->inet_opt = NULL;
1128 newnp->ipv6_ac_list = NULL;
1129 newnp->ipv6_fl_list = NULL;
1132 newnp->rxopt.all = np->rxopt.all;
1134 newnp->pktoptions = NULL;
1136 newnp->mcast_oif = tcp_v6_iif(skb);
1137 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1138 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1140 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1142 /* Clone native IPv6 options from listening socket (if any)
1144 Yes, keeping reference count would be much more clever,
1145 but we make one more one thing there: reattach optmem
1148 opt = ireq->ipv6_opt;
1150 opt = rcu_dereference(np->opt);
1152 opt = ipv6_dup_options(newsk, opt);
1153 RCU_INIT_POINTER(newnp->opt, opt);
1155 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1157 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1160 tcp_ca_openreq_child(newsk, dst);
1162 tcp_sync_mss(newsk, dst_mtu(dst));
1163 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1165 tcp_initialize_rcv_mss(newsk);
1167 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1168 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1170 #ifdef CONFIG_TCP_MD5SIG
1171 /* Copy over the MD5 key from the original socket */
1172 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr);
1174 /* We're using one, so create a matching key
1175 * on the newsk structure. If we fail to get
1176 * memory, then we end up not copying the key
1179 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1180 AF_INET6, key->key, key->keylen,
1181 sk_gfp_mask(sk, GFP_ATOMIC));
1185 if (__inet_inherit_port(sk, newsk) < 0) {
1186 inet_csk_prepare_forced_close(newsk);
1190 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1192 tcp_move_syn(newtp, req);
1194 /* Clone pktoptions received with SYN, if we own the req */
1195 if (ireq->pktopts) {
1196 newnp->pktoptions = skb_clone(ireq->pktopts,
1197 sk_gfp_mask(sk, GFP_ATOMIC));
1198 consume_skb(ireq->pktopts);
1199 ireq->pktopts = NULL;
1200 if (newnp->pktoptions) {
1201 tcp_v6_restore_cb(newnp->pktoptions);
1202 skb_set_owner_r(newnp->pktoptions, newsk);
1210 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1218 /* The socket must have it's spinlock held when we get
1219 * here, unless it is a TCP_LISTEN socket.
1221 * We have a potential double-lock case here, so even when
1222 * doing backlog processing we use the BH locking scheme.
1223 * This is because we cannot sleep with the original spinlock
1226 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1228 struct ipv6_pinfo *np = inet6_sk(sk);
1229 struct tcp_sock *tp;
1230 struct sk_buff *opt_skb = NULL;
1232 /* Imagine: socket is IPv6. IPv4 packet arrives,
1233 goes to IPv4 receive handler and backlogged.
1234 From backlog it always goes here. Kerboom...
1235 Fortunately, tcp_rcv_established and rcv_established
1236 handle them correctly, but it is not case with
1237 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1240 if (skb->protocol == htons(ETH_P_IP))
1241 return tcp_v4_do_rcv(sk, skb);
1243 if (tcp_filter(sk, skb))
1247 * socket locking is here for SMP purposes as backlog rcv
1248 * is currently called with bh processing disabled.
1251 /* Do Stevens' IPV6_PKTOPTIONS.
1253 Yes, guys, it is the only place in our code, where we
1254 may make it not affecting IPv4.
1255 The rest of code is protocol independent,
1256 and I do not like idea to uglify IPv4.
1258 Actually, all the idea behind IPV6_PKTOPTIONS
1259 looks not very well thought. For now we latch
1260 options, received in the last packet, enqueued
1261 by tcp. Feel free to propose better solution.
1265 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1267 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1268 struct dst_entry *dst = sk->sk_rx_dst;
1270 sock_rps_save_rxhash(sk, skb);
1271 sk_mark_napi_id(sk, skb);
1273 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1274 dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1276 sk->sk_rx_dst = NULL;
1280 tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
1282 goto ipv6_pktoptions;
1286 if (tcp_checksum_complete(skb))
1289 if (sk->sk_state == TCP_LISTEN) {
1290 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1296 if (tcp_child_process(sk, nsk, skb))
1299 __kfree_skb(opt_skb);
1303 sock_rps_save_rxhash(sk, skb);
1305 if (tcp_rcv_state_process(sk, skb))
1308 goto ipv6_pktoptions;
1312 tcp_v6_send_reset(sk, skb);
1315 __kfree_skb(opt_skb);
1319 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1320 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1325 /* Do you ask, what is it?
1327 1. skb was enqueued by tcp.
1328 2. skb is added to tail of read queue, rather than out of order.
1329 3. socket is not in passive state.
1330 4. Finally, it really contains options, which user wants to receive.
1333 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1334 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1335 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1336 np->mcast_oif = tcp_v6_iif(opt_skb);
1337 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1338 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1339 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1340 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1342 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1343 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1344 skb_set_owner_r(opt_skb, sk);
1345 tcp_v6_restore_cb(opt_skb);
1346 opt_skb = xchg(&np->pktoptions, opt_skb);
1348 __kfree_skb(opt_skb);
1349 opt_skb = xchg(&np->pktoptions, NULL);
1357 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1358 const struct tcphdr *th)
1360 /* This is tricky: we move IP6CB at its correct location into
1361 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1362 * _decode_session6() uses IP6CB().
1363 * barrier() makes sure compiler won't play aliasing games.
1365 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1366 sizeof(struct inet6_skb_parm));
1369 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1370 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1371 skb->len - th->doff*4);
1372 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1373 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1374 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1375 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1376 TCP_SKB_CB(skb)->sacked = 0;
1379 static int tcp_v6_rcv(struct sk_buff *skb)
1381 const struct tcphdr *th;
1382 const struct ipv6hdr *hdr;
1386 struct net *net = dev_net(skb->dev);
1388 if (skb->pkt_type != PACKET_HOST)
1392 * Count it even if it's bad.
1394 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1396 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1399 th = (const struct tcphdr *)skb->data;
1401 if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1403 if (!pskb_may_pull(skb, th->doff*4))
1406 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1409 th = (const struct tcphdr *)skb->data;
1410 hdr = ipv6_hdr(skb);
1413 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1414 th->source, th->dest, inet6_iif(skb),
1420 if (sk->sk_state == TCP_TIME_WAIT)
1423 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1424 struct request_sock *req = inet_reqsk(sk);
1427 sk = req->rsk_listener;
1428 tcp_v6_fill_cb(skb, hdr, th);
1429 if (tcp_v6_inbound_md5_hash(sk, skb)) {
1430 sk_drops_add(sk, skb);
1434 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1435 inet_csk_reqsk_queue_drop_and_put(sk, req);
1440 nsk = tcp_check_req(sk, skb, req, false);
1443 goto discard_and_relse;
1447 tcp_v6_restore_cb(skb);
1448 } else if (tcp_child_process(sk, nsk, skb)) {
1449 tcp_v6_send_reset(nsk, skb);
1450 goto discard_and_relse;
1456 if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
1457 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1458 goto discard_and_relse;
1461 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1462 goto discard_and_relse;
1464 tcp_v6_fill_cb(skb, hdr, th);
1466 if (tcp_v6_inbound_md5_hash(sk, skb))
1467 goto discard_and_relse;
1469 if (tcp_filter(sk, skb))
1470 goto discard_and_relse;
1471 th = (const struct tcphdr *)skb->data;
1472 hdr = ipv6_hdr(skb);
1476 if (sk->sk_state == TCP_LISTEN) {
1477 ret = tcp_v6_do_rcv(sk, skb);
1478 goto put_and_return;
1481 sk_incoming_cpu_update(sk);
1483 bh_lock_sock_nested(sk);
1484 tcp_segs_in(tcp_sk(sk), skb);
1486 if (!sock_owned_by_user(sk)) {
1487 if (!tcp_prequeue(sk, skb))
1488 ret = tcp_v6_do_rcv(sk, skb);
1489 } else if (tcp_add_backlog(sk, skb)) {
1490 goto discard_and_relse;
1497 return ret ? -1 : 0;
1500 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1503 tcp_v6_fill_cb(skb, hdr, th);
1505 if (tcp_checksum_complete(skb)) {
1507 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1509 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1511 tcp_v6_send_reset(NULL, skb);
1519 sk_drops_add(sk, skb);
1525 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1526 inet_twsk_put(inet_twsk(sk));
1530 tcp_v6_fill_cb(skb, hdr, th);
1532 if (tcp_checksum_complete(skb)) {
1533 inet_twsk_put(inet_twsk(sk));
1537 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1542 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1543 skb, __tcp_hdrlen(th),
1544 &ipv6_hdr(skb)->saddr, th->source,
1545 &ipv6_hdr(skb)->daddr,
1546 ntohs(th->dest), tcp_v6_iif(skb));
1548 struct inet_timewait_sock *tw = inet_twsk(sk);
1549 inet_twsk_deschedule_put(tw);
1551 tcp_v6_restore_cb(skb);
1555 /* Fall through to ACK */
1558 tcp_v6_timewait_ack(sk, skb);
1561 tcp_v6_restore_cb(skb);
1562 tcp_v6_send_reset(sk, skb);
1563 inet_twsk_deschedule_put(inet_twsk(sk));
1565 case TCP_TW_SUCCESS:
1571 static void tcp_v6_early_demux(struct sk_buff *skb)
1573 const struct ipv6hdr *hdr;
1574 const struct tcphdr *th;
1577 if (skb->pkt_type != PACKET_HOST)
1580 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1583 hdr = ipv6_hdr(skb);
1586 if (th->doff < sizeof(struct tcphdr) / 4)
1589 /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1590 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1591 &hdr->saddr, th->source,
1592 &hdr->daddr, ntohs(th->dest),
1596 skb->destructor = sock_edemux;
1597 if (sk_fullsock(sk)) {
1598 struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1601 dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
1603 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1604 skb_dst_set_noref(skb, dst);
1609 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1610 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1611 .twsk_unique = tcp_twsk_unique,
1612 .twsk_destructor = tcp_twsk_destructor,
1615 static const struct inet_connection_sock_af_ops ipv6_specific = {
1616 .queue_xmit = inet6_csk_xmit,
1617 .send_check = tcp_v6_send_check,
1618 .rebuild_header = inet6_sk_rebuild_header,
1619 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1620 .conn_request = tcp_v6_conn_request,
1621 .syn_recv_sock = tcp_v6_syn_recv_sock,
1622 .net_header_len = sizeof(struct ipv6hdr),
1623 .net_frag_header_len = sizeof(struct frag_hdr),
1624 .setsockopt = ipv6_setsockopt,
1625 .getsockopt = ipv6_getsockopt,
1626 .addr2sockaddr = inet6_csk_addr2sockaddr,
1627 .sockaddr_len = sizeof(struct sockaddr_in6),
1628 #ifdef CONFIG_COMPAT
1629 .compat_setsockopt = compat_ipv6_setsockopt,
1630 .compat_getsockopt = compat_ipv6_getsockopt,
1632 .mtu_reduced = tcp_v6_mtu_reduced,
1635 #ifdef CONFIG_TCP_MD5SIG
1636 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1637 .md5_lookup = tcp_v6_md5_lookup,
1638 .calc_md5_hash = tcp_v6_md5_hash_skb,
1639 .md5_parse = tcp_v6_parse_md5_keys,
1644 * TCP over IPv4 via INET6 API
1646 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1647 .queue_xmit = ip_queue_xmit,
1648 .send_check = tcp_v4_send_check,
1649 .rebuild_header = inet_sk_rebuild_header,
1650 .sk_rx_dst_set = inet_sk_rx_dst_set,
1651 .conn_request = tcp_v6_conn_request,
1652 .syn_recv_sock = tcp_v6_syn_recv_sock,
1653 .net_header_len = sizeof(struct iphdr),
1654 .setsockopt = ipv6_setsockopt,
1655 .getsockopt = ipv6_getsockopt,
1656 .addr2sockaddr = inet6_csk_addr2sockaddr,
1657 .sockaddr_len = sizeof(struct sockaddr_in6),
1658 #ifdef CONFIG_COMPAT
1659 .compat_setsockopt = compat_ipv6_setsockopt,
1660 .compat_getsockopt = compat_ipv6_getsockopt,
1662 .mtu_reduced = tcp_v4_mtu_reduced,
1665 #ifdef CONFIG_TCP_MD5SIG
1666 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1667 .md5_lookup = tcp_v4_md5_lookup,
1668 .calc_md5_hash = tcp_v4_md5_hash_skb,
1669 .md5_parse = tcp_v6_parse_md5_keys,
1673 /* NOTE: A lot of things set to zero explicitly by call to
1674 * sk_alloc() so need not be done here.
1676 static int tcp_v6_init_sock(struct sock *sk)
1678 struct inet_connection_sock *icsk = inet_csk(sk);
1682 icsk->icsk_af_ops = &ipv6_specific;
1684 #ifdef CONFIG_TCP_MD5SIG
1685 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1691 static void tcp_v6_destroy_sock(struct sock *sk)
1693 tcp_v4_destroy_sock(sk);
1694 inet6_destroy_sock(sk);
1697 #ifdef CONFIG_PROC_FS
1698 /* Proc filesystem TCPv6 sock list dumping. */
1699 static void get_openreq6(struct seq_file *seq,
1700 const struct request_sock *req, int i)
1702 long ttd = req->rsk_timer.expires - jiffies;
1703 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1704 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1710 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1711 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1713 src->s6_addr32[0], src->s6_addr32[1],
1714 src->s6_addr32[2], src->s6_addr32[3],
1715 inet_rsk(req)->ir_num,
1716 dest->s6_addr32[0], dest->s6_addr32[1],
1717 dest->s6_addr32[2], dest->s6_addr32[3],
1718 ntohs(inet_rsk(req)->ir_rmt_port),
1720 0, 0, /* could print option size, but that is af dependent. */
1721 1, /* timers active (only the expire timer) */
1722 jiffies_to_clock_t(ttd),
1724 from_kuid_munged(seq_user_ns(seq),
1725 sock_i_uid(req->rsk_listener)),
1726 0, /* non standard timer */
1727 0, /* open_requests have no inode */
1731 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1733 const struct in6_addr *dest, *src;
1736 unsigned long timer_expires;
1737 const struct inet_sock *inet = inet_sk(sp);
1738 const struct tcp_sock *tp = tcp_sk(sp);
1739 const struct inet_connection_sock *icsk = inet_csk(sp);
1740 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1744 dest = &sp->sk_v6_daddr;
1745 src = &sp->sk_v6_rcv_saddr;
1746 destp = ntohs(inet->inet_dport);
1747 srcp = ntohs(inet->inet_sport);
1749 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1750 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
1751 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1753 timer_expires = icsk->icsk_timeout;
1754 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1756 timer_expires = icsk->icsk_timeout;
1757 } else if (timer_pending(&sp->sk_timer)) {
1759 timer_expires = sp->sk_timer.expires;
1762 timer_expires = jiffies;
1765 state = sk_state_load(sp);
1766 if (state == TCP_LISTEN)
1767 rx_queue = sp->sk_ack_backlog;
1769 /* Because we don't lock the socket,
1770 * we might find a transient negative value.
1772 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
1775 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1776 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
1778 src->s6_addr32[0], src->s6_addr32[1],
1779 src->s6_addr32[2], src->s6_addr32[3], srcp,
1780 dest->s6_addr32[0], dest->s6_addr32[1],
1781 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1783 tp->write_seq - tp->snd_una,
1786 jiffies_delta_to_clock_t(timer_expires - jiffies),
1787 icsk->icsk_retransmits,
1788 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1789 icsk->icsk_probes_out,
1791 atomic_read(&sp->sk_refcnt), sp,
1792 jiffies_to_clock_t(icsk->icsk_rto),
1793 jiffies_to_clock_t(icsk->icsk_ack.ato),
1794 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1796 state == TCP_LISTEN ?
1797 fastopenq->max_qlen :
1798 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
1802 static void get_timewait6_sock(struct seq_file *seq,
1803 struct inet_timewait_sock *tw, int i)
1805 long delta = tw->tw_timer.expires - jiffies;
1806 const struct in6_addr *dest, *src;
1809 dest = &tw->tw_v6_daddr;
1810 src = &tw->tw_v6_rcv_saddr;
1811 destp = ntohs(tw->tw_dport);
1812 srcp = ntohs(tw->tw_sport);
1815 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1816 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1818 src->s6_addr32[0], src->s6_addr32[1],
1819 src->s6_addr32[2], src->s6_addr32[3], srcp,
1820 dest->s6_addr32[0], dest->s6_addr32[1],
1821 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1822 tw->tw_substate, 0, 0,
1823 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
1824 atomic_read(&tw->tw_refcnt), tw);
1827 static int tcp6_seq_show(struct seq_file *seq, void *v)
1829 struct tcp_iter_state *st;
1830 struct sock *sk = v;
1832 if (v == SEQ_START_TOKEN) {
1837 "st tx_queue rx_queue tr tm->when retrnsmt"
1838 " uid timeout inode\n");
1843 if (sk->sk_state == TCP_TIME_WAIT)
1844 get_timewait6_sock(seq, v, st->num);
1845 else if (sk->sk_state == TCP_NEW_SYN_RECV)
1846 get_openreq6(seq, v, st->num);
1848 get_tcp6_sock(seq, v, st->num);
1853 static const struct file_operations tcp6_afinfo_seq_fops = {
1854 .owner = THIS_MODULE,
1855 .open = tcp_seq_open,
1857 .llseek = seq_lseek,
1858 .release = seq_release_net
1861 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1864 .seq_fops = &tcp6_afinfo_seq_fops,
1866 .show = tcp6_seq_show,
1870 int __net_init tcp6_proc_init(struct net *net)
1872 return tcp_proc_register(net, &tcp6_seq_afinfo);
1875 void tcp6_proc_exit(struct net *net)
1877 tcp_proc_unregister(net, &tcp6_seq_afinfo);
1881 struct proto tcpv6_prot = {
1883 .owner = THIS_MODULE,
1885 .connect = tcp_v6_connect,
1886 .disconnect = tcp_disconnect,
1887 .accept = inet_csk_accept,
1889 .init = tcp_v6_init_sock,
1890 .destroy = tcp_v6_destroy_sock,
1891 .shutdown = tcp_shutdown,
1892 .setsockopt = tcp_setsockopt,
1893 .getsockopt = tcp_getsockopt,
1894 .keepalive = tcp_set_keepalive,
1895 .recvmsg = tcp_recvmsg,
1896 .sendmsg = tcp_sendmsg,
1897 .sendpage = tcp_sendpage,
1898 .backlog_rcv = tcp_v6_do_rcv,
1899 .release_cb = tcp_release_cb,
1901 .unhash = inet_unhash,
1902 .get_port = inet_csk_get_port,
1903 .enter_memory_pressure = tcp_enter_memory_pressure,
1904 .stream_memory_free = tcp_stream_memory_free,
1905 .sockets_allocated = &tcp_sockets_allocated,
1906 .memory_allocated = &tcp_memory_allocated,
1907 .memory_pressure = &tcp_memory_pressure,
1908 .orphan_count = &tcp_orphan_count,
1909 .sysctl_mem = sysctl_tcp_mem,
1910 .sysctl_wmem = sysctl_tcp_wmem,
1911 .sysctl_rmem = sysctl_tcp_rmem,
1912 .max_header = MAX_TCP_HEADER,
1913 .obj_size = sizeof(struct tcp6_sock),
1914 .slab_flags = SLAB_DESTROY_BY_RCU,
1915 .twsk_prot = &tcp6_timewait_sock_ops,
1916 .rsk_prot = &tcp6_request_sock_ops,
1917 .h.hashinfo = &tcp_hashinfo,
1918 .no_autobind = true,
1919 #ifdef CONFIG_COMPAT
1920 .compat_setsockopt = compat_tcp_setsockopt,
1921 .compat_getsockopt = compat_tcp_getsockopt,
1923 .diag_destroy = tcp_abort,
1926 static struct inet6_protocol tcpv6_protocol = {
1927 .early_demux = tcp_v6_early_demux,
1928 .early_demux_handler = tcp_v6_early_demux,
1929 .handler = tcp_v6_rcv,
1930 .err_handler = tcp_v6_err,
1931 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
1934 static struct inet_protosw tcpv6_protosw = {
1935 .type = SOCK_STREAM,
1936 .protocol = IPPROTO_TCP,
1937 .prot = &tcpv6_prot,
1938 .ops = &inet6_stream_ops,
1939 .flags = INET_PROTOSW_PERMANENT |
1943 static int __net_init tcpv6_net_init(struct net *net)
1945 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
1946 SOCK_RAW, IPPROTO_TCP, net);
1949 static void __net_exit tcpv6_net_exit(struct net *net)
1951 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
1954 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
1956 inet_twsk_purge(&tcp_hashinfo, AF_INET6);
1959 static struct pernet_operations tcpv6_net_ops = {
1960 .init = tcpv6_net_init,
1961 .exit = tcpv6_net_exit,
1962 .exit_batch = tcpv6_net_exit_batch,
1965 int __init tcpv6_init(void)
1969 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
1973 /* register inet6 protocol */
1974 ret = inet6_register_protosw(&tcpv6_protosw);
1976 goto out_tcpv6_protocol;
1978 ret = register_pernet_subsys(&tcpv6_net_ops);
1980 goto out_tcpv6_protosw;
1985 inet6_unregister_protosw(&tcpv6_protosw);
1987 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
1991 void tcpv6_exit(void)
1993 unregister_pernet_subsys(&tcpv6_net_ops);
1994 inet6_unregister_protosw(&tcpv6_protosw);
1995 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);