3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
10 * linux/net/ipv4/tcp_input.c
11 * linux/net/ipv4/tcp_output.c
14 * Hideaki YOSHIFUJI : sin6_scope_id support
15 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
16 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
17 * a single port at the same time.
18 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
26 #include <linux/bottom_half.h>
27 #include <linux/module.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/socket.h>
31 #include <linux/sockios.h>
32 #include <linux/net.h>
33 #include <linux/jiffies.h>
35 #include <linux/in6.h>
36 #include <linux/netdevice.h>
37 #include <linux/init.h>
38 #include <linux/jhash.h>
39 #include <linux/ipsec.h>
40 #include <linux/times.h>
41 #include <linux/slab.h>
42 #include <linux/uaccess.h>
43 #include <linux/ipv6.h>
44 #include <linux/icmpv6.h>
45 #include <linux/random.h>
48 #include <net/ndisc.h>
49 #include <net/inet6_hashtables.h>
50 #include <net/inet6_connection_sock.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
60 #include <net/dsfield.h>
61 #include <net/timewait_sock.h>
62 #include <net/inet_common.h>
63 #include <net/secure_seq.h>
64 #include <net/busy_poll.h>
66 #include <linux/proc_fs.h>
67 #include <linux/seq_file.h>
69 #include <crypto/hash.h>
70 #include <linux/scatterlist.h>
72 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
73 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
74 struct request_sock *req);
76 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
78 static const struct inet_connection_sock_af_ops ipv6_mapped;
79 static const struct inet_connection_sock_af_ops ipv6_specific;
80 #ifdef CONFIG_TCP_MD5SIG
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
82 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
84 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
85 const struct in6_addr *addr)
91 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
93 struct dst_entry *dst = skb_dst(skb);
95 if (dst && dst_hold_safe(dst)) {
96 const struct rt6_info *rt = (const struct rt6_info *)dst;
99 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
100 inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
104 static u32 tcp_v6_init_sequence(const struct sk_buff *skb, u32 *tsoff)
106 return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
107 ipv6_hdr(skb)->saddr.s6_addr32,
109 tcp_hdr(skb)->source, tsoff);
112 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
115 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
116 struct inet_sock *inet = inet_sk(sk);
117 struct inet_connection_sock *icsk = inet_csk(sk);
118 struct ipv6_pinfo *np = inet6_sk(sk);
119 struct tcp_sock *tp = tcp_sk(sk);
120 struct in6_addr *saddr = NULL, *final_p, final;
121 struct ipv6_txoptions *opt;
123 struct dst_entry *dst;
127 if (addr_len < SIN6_LEN_RFC2133)
130 if (usin->sin6_family != AF_INET6)
131 return -EAFNOSUPPORT;
133 memset(&fl6, 0, sizeof(fl6));
136 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
137 IP6_ECN_flow_init(fl6.flowlabel);
138 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
139 struct ip6_flowlabel *flowlabel;
140 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
143 fl6_sock_release(flowlabel);
148 * connect() to INADDR_ANY means loopback (BSD'ism).
151 if (ipv6_addr_any(&usin->sin6_addr)) {
152 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
153 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
156 usin->sin6_addr = in6addr_loopback;
159 addr_type = ipv6_addr_type(&usin->sin6_addr);
161 if (addr_type & IPV6_ADDR_MULTICAST)
164 if (addr_type&IPV6_ADDR_LINKLOCAL) {
165 if (addr_len >= sizeof(struct sockaddr_in6) &&
166 usin->sin6_scope_id) {
167 /* If interface is set while binding, indices
170 if (sk->sk_bound_dev_if &&
171 sk->sk_bound_dev_if != usin->sin6_scope_id)
174 sk->sk_bound_dev_if = usin->sin6_scope_id;
177 /* Connect to link-local address requires an interface */
178 if (!sk->sk_bound_dev_if)
182 if (tp->rx_opt.ts_recent_stamp &&
183 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
184 tp->rx_opt.ts_recent = 0;
185 tp->rx_opt.ts_recent_stamp = 0;
189 sk->sk_v6_daddr = usin->sin6_addr;
190 np->flow_label = fl6.flowlabel;
196 if (addr_type & IPV6_ADDR_MAPPED) {
197 u32 exthdrlen = icsk->icsk_ext_hdr_len;
198 struct sockaddr_in sin;
200 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
202 if (__ipv6_only_sock(sk))
205 sin.sin_family = AF_INET;
206 sin.sin_port = usin->sin6_port;
207 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
209 icsk->icsk_af_ops = &ipv6_mapped;
210 sk->sk_backlog_rcv = tcp_v4_do_rcv;
211 #ifdef CONFIG_TCP_MD5SIG
212 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
215 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
218 icsk->icsk_ext_hdr_len = exthdrlen;
219 icsk->icsk_af_ops = &ipv6_specific;
220 sk->sk_backlog_rcv = tcp_v6_do_rcv;
221 #ifdef CONFIG_TCP_MD5SIG
222 tp->af_specific = &tcp_sock_ipv6_specific;
226 np->saddr = sk->sk_v6_rcv_saddr;
231 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
232 saddr = &sk->sk_v6_rcv_saddr;
234 fl6.flowi6_proto = IPPROTO_TCP;
235 fl6.daddr = sk->sk_v6_daddr;
236 fl6.saddr = saddr ? *saddr : np->saddr;
237 fl6.flowi6_oif = sk->sk_bound_dev_if;
238 fl6.flowi6_mark = sk->sk_mark;
239 fl6.fl6_dport = usin->sin6_port;
240 fl6.fl6_sport = inet->inet_sport;
241 fl6.flowi6_uid = sk->sk_uid;
243 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
244 final_p = fl6_update_dst(&fl6, opt, &final);
246 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
248 dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
256 sk->sk_v6_rcv_saddr = *saddr;
259 /* set the source address */
261 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
263 sk->sk_gso_type = SKB_GSO_TCPV6;
264 ip6_dst_store(sk, dst, NULL, NULL);
266 if (tcp_death_row.sysctl_tw_recycle &&
267 !tp->rx_opt.ts_recent_stamp &&
268 ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr))
269 tcp_fetch_timewait_stamp(sk, dst);
271 icsk->icsk_ext_hdr_len = 0;
273 icsk->icsk_ext_hdr_len = opt->opt_flen +
276 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
278 inet->inet_dport = usin->sin6_port;
280 tcp_set_state(sk, TCP_SYN_SENT);
281 err = inet6_hash_connect(&tcp_death_row, sk);
287 if (!tp->write_seq && likely(!tp->repair))
288 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
289 sk->sk_v6_daddr.s6_addr32,
294 err = tcp_connect(sk);
301 tcp_set_state(sk, TCP_CLOSE);
304 inet->inet_dport = 0;
305 sk->sk_route_caps = 0;
309 static void tcp_v6_mtu_reduced(struct sock *sk)
311 struct dst_entry *dst;
313 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
316 dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
320 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
321 tcp_sync_mss(sk, dst_mtu(dst));
322 tcp_simple_retransmit(sk);
326 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
327 u8 type, u8 code, int offset, __be32 info)
329 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
330 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
331 struct net *net = dev_net(skb->dev);
332 struct request_sock *fastopen;
333 struct ipv6_pinfo *np;
340 sk = __inet6_lookup_established(net, &tcp_hashinfo,
341 &hdr->daddr, th->dest,
342 &hdr->saddr, ntohs(th->source),
346 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
351 if (sk->sk_state == TCP_TIME_WAIT) {
352 inet_twsk_put(inet_twsk(sk));
355 seq = ntohl(th->seq);
356 fatal = icmpv6_err_convert(type, code, &err);
357 if (sk->sk_state == TCP_NEW_SYN_RECV)
358 return tcp_req_err(sk, seq, fatal);
361 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
362 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
364 if (sk->sk_state == TCP_CLOSE)
367 if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) {
368 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
373 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
374 fastopen = tp->fastopen_rsk;
375 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
376 if (sk->sk_state != TCP_LISTEN &&
377 !between(seq, snd_una, tp->snd_nxt)) {
378 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
384 if (type == NDISC_REDIRECT) {
385 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
388 dst->ops->redirect(dst, sk, skb);
392 if (type == ICMPV6_PKT_TOOBIG) {
393 /* We are not interested in TCP_LISTEN and open_requests
394 * (SYN-ACKs send out by Linux are always <576bytes so
395 * they should go through unfragmented).
397 if (sk->sk_state == TCP_LISTEN)
400 if (!ip6_sk_accept_pmtu(sk))
403 tp->mtu_info = ntohl(info);
404 if (!sock_owned_by_user(sk))
405 tcp_v6_mtu_reduced(sk);
406 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
413 /* Might be for an request_sock */
414 switch (sk->sk_state) {
417 /* Only in fast or simultaneous open. If a fast open socket is
418 * is already accepted it is treated as a connected one below.
420 if (fastopen && !fastopen->sk)
423 if (!sock_owned_by_user(sk)) {
425 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
429 sk->sk_err_soft = err;
433 if (!sock_owned_by_user(sk) && np->recverr) {
435 sk->sk_error_report(sk);
437 sk->sk_err_soft = err;
445 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
447 struct request_sock *req,
448 struct tcp_fastopen_cookie *foc,
449 enum tcp_synack_type synack_type)
451 struct inet_request_sock *ireq = inet_rsk(req);
452 struct ipv6_pinfo *np = inet6_sk(sk);
453 struct ipv6_txoptions *opt;
454 struct flowi6 *fl6 = &fl->u.ip6;
458 /* First, grab a route. */
459 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
460 IPPROTO_TCP)) == NULL)
463 skb = tcp_make_synack(sk, dst, req, foc, synack_type);
466 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
467 &ireq->ir_v6_rmt_addr);
469 fl6->daddr = ireq->ir_v6_rmt_addr;
470 if (np->repflow && ireq->pktopts)
471 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
474 opt = ireq->ipv6_opt;
476 opt = rcu_dereference(np->opt);
477 err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass);
479 err = net_xmit_eval(err);
487 static void tcp_v6_reqsk_destructor(struct request_sock *req)
489 kfree(inet_rsk(req)->ipv6_opt);
490 kfree_skb(inet_rsk(req)->pktopts);
493 #ifdef CONFIG_TCP_MD5SIG
494 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
495 const struct in6_addr *addr)
497 return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
500 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
501 const struct sock *addr_sk)
503 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
506 static int tcp_v6_parse_md5_keys(struct sock *sk, char __user *optval,
509 struct tcp_md5sig cmd;
510 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
512 if (optlen < sizeof(cmd))
515 if (copy_from_user(&cmd, optval, sizeof(cmd)))
518 if (sin6->sin6_family != AF_INET6)
521 if (!cmd.tcpm_keylen) {
522 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
523 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
525 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
529 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
532 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
533 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
534 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
536 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
537 AF_INET6, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
540 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
541 const struct in6_addr *daddr,
542 const struct in6_addr *saddr,
543 const struct tcphdr *th, int nbytes)
545 struct tcp6_pseudohdr *bp;
546 struct scatterlist sg;
550 /* 1. TCP pseudo-header (RFC2460) */
553 bp->protocol = cpu_to_be32(IPPROTO_TCP);
554 bp->len = cpu_to_be32(nbytes);
556 _th = (struct tcphdr *)(bp + 1);
557 memcpy(_th, th, sizeof(*th));
560 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
561 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
562 sizeof(*bp) + sizeof(*th));
563 return crypto_ahash_update(hp->md5_req);
566 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
567 const struct in6_addr *daddr, struct in6_addr *saddr,
568 const struct tcphdr *th)
570 struct tcp_md5sig_pool *hp;
571 struct ahash_request *req;
573 hp = tcp_get_md5sig_pool();
575 goto clear_hash_noput;
578 if (crypto_ahash_init(req))
580 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
582 if (tcp_md5_hash_key(hp, key))
584 ahash_request_set_crypt(req, NULL, md5_hash, 0);
585 if (crypto_ahash_final(req))
588 tcp_put_md5sig_pool();
592 tcp_put_md5sig_pool();
594 memset(md5_hash, 0, 16);
598 static int tcp_v6_md5_hash_skb(char *md5_hash,
599 const struct tcp_md5sig_key *key,
600 const struct sock *sk,
601 const struct sk_buff *skb)
603 const struct in6_addr *saddr, *daddr;
604 struct tcp_md5sig_pool *hp;
605 struct ahash_request *req;
606 const struct tcphdr *th = tcp_hdr(skb);
608 if (sk) { /* valid for establish/request sockets */
609 saddr = &sk->sk_v6_rcv_saddr;
610 daddr = &sk->sk_v6_daddr;
612 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
613 saddr = &ip6h->saddr;
614 daddr = &ip6h->daddr;
617 hp = tcp_get_md5sig_pool();
619 goto clear_hash_noput;
622 if (crypto_ahash_init(req))
625 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
627 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
629 if (tcp_md5_hash_key(hp, key))
631 ahash_request_set_crypt(req, NULL, md5_hash, 0);
632 if (crypto_ahash_final(req))
635 tcp_put_md5sig_pool();
639 tcp_put_md5sig_pool();
641 memset(md5_hash, 0, 16);
647 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
648 const struct sk_buff *skb)
650 #ifdef CONFIG_TCP_MD5SIG
651 const __u8 *hash_location = NULL;
652 struct tcp_md5sig_key *hash_expected;
653 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
654 const struct tcphdr *th = tcp_hdr(skb);
658 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
659 hash_location = tcp_parse_md5sig_option(th);
661 /* We've parsed the options - do we have a hash? */
662 if (!hash_expected && !hash_location)
665 if (hash_expected && !hash_location) {
666 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
670 if (!hash_expected && hash_location) {
671 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
675 /* check the signature */
676 genhash = tcp_v6_md5_hash_skb(newhash,
680 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
681 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
682 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
683 genhash ? "failed" : "mismatch",
684 &ip6h->saddr, ntohs(th->source),
685 &ip6h->daddr, ntohs(th->dest));
692 static void tcp_v6_init_req(struct request_sock *req,
693 const struct sock *sk_listener,
696 struct inet_request_sock *ireq = inet_rsk(req);
697 const struct ipv6_pinfo *np = inet6_sk(sk_listener);
699 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
700 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
702 /* So that link locals have meaning */
703 if (!sk_listener->sk_bound_dev_if &&
704 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
705 ireq->ir_iif = tcp_v6_iif(skb);
707 if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
708 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
709 np->rxopt.bits.rxinfo ||
710 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
711 np->rxopt.bits.rxohlim || np->repflow)) {
712 atomic_inc(&skb->users);
717 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
719 const struct request_sock *req,
724 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
727 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
729 .obj_size = sizeof(struct tcp6_request_sock),
730 .rtx_syn_ack = tcp_rtx_synack,
731 .send_ack = tcp_v6_reqsk_send_ack,
732 .destructor = tcp_v6_reqsk_destructor,
733 .send_reset = tcp_v6_send_reset,
734 .syn_ack_timeout = tcp_syn_ack_timeout,
737 static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
738 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
739 sizeof(struct ipv6hdr),
740 #ifdef CONFIG_TCP_MD5SIG
741 .req_md5_lookup = tcp_v6_md5_lookup,
742 .calc_md5_hash = tcp_v6_md5_hash_skb,
744 .init_req = tcp_v6_init_req,
745 #ifdef CONFIG_SYN_COOKIES
746 .cookie_init_seq = cookie_v6_init_sequence,
748 .route_req = tcp_v6_route_req,
749 .init_seq = tcp_v6_init_sequence,
750 .send_synack = tcp_v6_send_synack,
753 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
754 u32 ack, u32 win, u32 tsval, u32 tsecr,
755 int oif, struct tcp_md5sig_key *key, int rst,
756 u8 tclass, __be32 label)
758 const struct tcphdr *th = tcp_hdr(skb);
760 struct sk_buff *buff;
762 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
763 struct sock *ctl_sk = net->ipv6.tcp_sk;
764 unsigned int tot_len = sizeof(struct tcphdr);
765 struct dst_entry *dst;
769 tot_len += TCPOLEN_TSTAMP_ALIGNED;
770 #ifdef CONFIG_TCP_MD5SIG
772 tot_len += TCPOLEN_MD5SIG_ALIGNED;
775 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
780 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
782 t1 = (struct tcphdr *) skb_push(buff, tot_len);
783 skb_reset_transport_header(buff);
785 /* Swap the send and the receive. */
786 memset(t1, 0, sizeof(*t1));
787 t1->dest = th->source;
788 t1->source = th->dest;
789 t1->doff = tot_len / 4;
790 t1->seq = htonl(seq);
791 t1->ack_seq = htonl(ack);
792 t1->ack = !rst || !th->ack;
794 t1->window = htons(win);
796 topt = (__be32 *)(t1 + 1);
799 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
800 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
801 *topt++ = htonl(tsval);
802 *topt++ = htonl(tsecr);
805 #ifdef CONFIG_TCP_MD5SIG
807 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
808 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
809 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
810 &ipv6_hdr(skb)->saddr,
811 &ipv6_hdr(skb)->daddr, t1);
815 memset(&fl6, 0, sizeof(fl6));
816 fl6.daddr = ipv6_hdr(skb)->saddr;
817 fl6.saddr = ipv6_hdr(skb)->daddr;
818 fl6.flowlabel = label;
820 buff->ip_summed = CHECKSUM_PARTIAL;
823 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
825 fl6.flowi6_proto = IPPROTO_TCP;
826 if (rt6_need_strict(&fl6.daddr) && !oif)
827 fl6.flowi6_oif = tcp_v6_iif(skb);
829 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
832 fl6.flowi6_oif = oif;
835 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
836 fl6.fl6_dport = t1->dest;
837 fl6.fl6_sport = t1->source;
838 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
839 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
841 /* Pass a socket to ip6_dst_lookup either it is for RST
842 * Underlying function will use this to retrieve the network
845 dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
847 skb_dst_set(buff, dst);
848 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass);
849 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
851 TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
858 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
860 const struct tcphdr *th = tcp_hdr(skb);
861 u32 seq = 0, ack_seq = 0;
862 struct tcp_md5sig_key *key = NULL;
863 #ifdef CONFIG_TCP_MD5SIG
864 const __u8 *hash_location = NULL;
865 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
866 unsigned char newhash[16];
868 struct sock *sk1 = NULL;
875 /* If sk not NULL, it means we did a successful lookup and incoming
876 * route had to be correct. prequeue might have dropped our dst.
878 if (!sk && !ipv6_unicast_destination(skb))
881 #ifdef CONFIG_TCP_MD5SIG
883 hash_location = tcp_parse_md5sig_option(th);
884 if (sk && sk_fullsock(sk)) {
885 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
886 } else if (hash_location) {
888 * active side is lost. Try to find listening socket through
889 * source port, and then find md5 key through listening socket.
890 * we are not loose security here:
891 * Incoming packet is checked with md5 hash with finding key,
892 * no RST generated if md5 hash doesn't match.
894 sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
895 &tcp_hashinfo, NULL, 0,
897 th->source, &ipv6h->daddr,
898 ntohs(th->source), tcp_v6_iif(skb));
902 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
906 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
907 if (genhash || memcmp(hash_location, newhash, 16) != 0)
913 seq = ntohl(th->ack_seq);
915 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
918 oif = sk ? sk->sk_bound_dev_if : 0;
919 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
921 #ifdef CONFIG_TCP_MD5SIG
927 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
928 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
929 struct tcp_md5sig_key *key, u8 tclass,
932 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
936 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
938 struct inet_timewait_sock *tw = inet_twsk(sk);
939 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
941 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
942 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
943 tcp_time_stamp + tcptw->tw_ts_offset,
944 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
945 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel));
950 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
951 struct request_sock *req)
953 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
954 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
957 * The window field (SEG.WND) of every outgoing segment, with the
958 * exception of <SYN> segments, MUST be right-shifted by
959 * Rcv.Wind.Shift bits:
961 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
962 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
963 tcp_rsk(req)->rcv_nxt,
964 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
965 tcp_time_stamp + tcp_rsk(req)->ts_off,
966 req->ts_recent, sk->sk_bound_dev_if,
967 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
972 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
974 #ifdef CONFIG_SYN_COOKIES
975 const struct tcphdr *th = tcp_hdr(skb);
978 sk = cookie_v6_check(sk, skb);
983 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
985 if (skb->protocol == htons(ETH_P_IP))
986 return tcp_v4_conn_request(sk, skb);
988 if (!ipv6_unicast_destination(skb))
991 return tcp_conn_request(&tcp6_request_sock_ops,
992 &tcp_request_sock_ipv6_ops, sk, skb);
996 return 0; /* don't send reset */
999 static void tcp_v6_restore_cb(struct sk_buff *skb)
1001 /* We need to move header back to the beginning if xfrm6_policy_check()
1002 * and tcp_v6_fill_cb() are going to be called again.
1003 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1005 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1006 sizeof(struct inet6_skb_parm));
1009 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1010 struct request_sock *req,
1011 struct dst_entry *dst,
1012 struct request_sock *req_unhash,
1015 struct inet_request_sock *ireq;
1016 struct ipv6_pinfo *newnp;
1017 const struct ipv6_pinfo *np = inet6_sk(sk);
1018 struct ipv6_txoptions *opt;
1019 struct tcp6_sock *newtcp6sk;
1020 struct inet_sock *newinet;
1021 struct tcp_sock *newtp;
1023 #ifdef CONFIG_TCP_MD5SIG
1024 struct tcp_md5sig_key *key;
1028 if (skb->protocol == htons(ETH_P_IP)) {
1033 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1034 req_unhash, own_req);
1039 newtcp6sk = (struct tcp6_sock *)newsk;
1040 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1042 newinet = inet_sk(newsk);
1043 newnp = inet6_sk(newsk);
1044 newtp = tcp_sk(newsk);
1046 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1048 newnp->saddr = newsk->sk_v6_rcv_saddr;
1050 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1051 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1052 #ifdef CONFIG_TCP_MD5SIG
1053 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1056 newnp->ipv6_ac_list = NULL;
1057 newnp->ipv6_fl_list = NULL;
1058 newnp->pktoptions = NULL;
1060 newnp->mcast_oif = tcp_v6_iif(skb);
1061 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1062 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1064 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1067 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1068 * here, tcp_create_openreq_child now does this for us, see the comment in
1069 * that function for the gory details. -acme
1072 /* It is tricky place. Until this moment IPv4 tcp
1073 worked with IPv6 icsk.icsk_af_ops.
1076 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1081 ireq = inet_rsk(req);
1083 if (sk_acceptq_is_full(sk))
1087 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1092 newsk = tcp_create_openreq_child(sk, req, skb);
1097 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1098 * count here, tcp_create_openreq_child now does this for us, see the
1099 * comment in that function for the gory details. -acme
1102 newsk->sk_gso_type = SKB_GSO_TCPV6;
1103 ip6_dst_store(newsk, dst, NULL, NULL);
1104 inet6_sk_rx_dst_set(newsk, skb);
1106 newtcp6sk = (struct tcp6_sock *)newsk;
1107 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1109 newtp = tcp_sk(newsk);
1110 newinet = inet_sk(newsk);
1111 newnp = inet6_sk(newsk);
1113 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1115 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1116 newnp->saddr = ireq->ir_v6_loc_addr;
1117 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1118 newsk->sk_bound_dev_if = ireq->ir_iif;
1120 /* Now IPv6 options...
1122 First: no IPv4 options.
1124 newinet->inet_opt = NULL;
1125 newnp->ipv6_ac_list = NULL;
1126 newnp->ipv6_fl_list = NULL;
1129 newnp->rxopt.all = np->rxopt.all;
1131 newnp->pktoptions = NULL;
1133 newnp->mcast_oif = tcp_v6_iif(skb);
1134 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1135 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1137 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1139 /* Clone native IPv6 options from listening socket (if any)
1141 Yes, keeping reference count would be much more clever,
1142 but we make one more one thing there: reattach optmem
1145 opt = ireq->ipv6_opt;
1147 opt = rcu_dereference(np->opt);
1149 opt = ipv6_dup_options(newsk, opt);
1150 RCU_INIT_POINTER(newnp->opt, opt);
1152 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1154 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1157 tcp_ca_openreq_child(newsk, dst);
1159 tcp_sync_mss(newsk, dst_mtu(dst));
1160 newtp->advmss = dst_metric_advmss(dst);
1161 if (tcp_sk(sk)->rx_opt.user_mss &&
1162 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1163 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1165 tcp_initialize_rcv_mss(newsk);
1167 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1168 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1170 #ifdef CONFIG_TCP_MD5SIG
1171 /* Copy over the MD5 key from the original socket */
1172 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr);
1174 /* We're using one, so create a matching key
1175 * on the newsk structure. If we fail to get
1176 * memory, then we end up not copying the key
1179 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1180 AF_INET6, key->key, key->keylen,
1181 sk_gfp_mask(sk, GFP_ATOMIC));
1185 if (__inet_inherit_port(sk, newsk) < 0) {
1186 inet_csk_prepare_forced_close(newsk);
1190 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1192 tcp_move_syn(newtp, req);
1194 /* Clone pktoptions received with SYN, if we own the req */
1195 if (ireq->pktopts) {
1196 newnp->pktoptions = skb_clone(ireq->pktopts,
1197 sk_gfp_mask(sk, GFP_ATOMIC));
1198 consume_skb(ireq->pktopts);
1199 ireq->pktopts = NULL;
1200 if (newnp->pktoptions) {
1201 tcp_v6_restore_cb(newnp->pktoptions);
1202 skb_set_owner_r(newnp->pktoptions, newsk);
1210 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1218 /* The socket must have it's spinlock held when we get
1219 * here, unless it is a TCP_LISTEN socket.
1221 * We have a potential double-lock case here, so even when
1222 * doing backlog processing we use the BH locking scheme.
1223 * This is because we cannot sleep with the original spinlock
1226 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1228 struct ipv6_pinfo *np = inet6_sk(sk);
1229 struct tcp_sock *tp;
1230 struct sk_buff *opt_skb = NULL;
1232 /* Imagine: socket is IPv6. IPv4 packet arrives,
1233 goes to IPv4 receive handler and backlogged.
1234 From backlog it always goes here. Kerboom...
1235 Fortunately, tcp_rcv_established and rcv_established
1236 handle them correctly, but it is not case with
1237 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1240 if (skb->protocol == htons(ETH_P_IP))
1241 return tcp_v4_do_rcv(sk, skb);
1243 if (tcp_filter(sk, skb))
1247 * socket locking is here for SMP purposes as backlog rcv
1248 * is currently called with bh processing disabled.
1251 /* Do Stevens' IPV6_PKTOPTIONS.
1253 Yes, guys, it is the only place in our code, where we
1254 may make it not affecting IPv4.
1255 The rest of code is protocol independent,
1256 and I do not like idea to uglify IPv4.
1258 Actually, all the idea behind IPV6_PKTOPTIONS
1259 looks not very well thought. For now we latch
1260 options, received in the last packet, enqueued
1261 by tcp. Feel free to propose better solution.
1265 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1267 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1268 struct dst_entry *dst = sk->sk_rx_dst;
1270 sock_rps_save_rxhash(sk, skb);
1271 sk_mark_napi_id(sk, skb);
1273 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1274 dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1276 sk->sk_rx_dst = NULL;
1280 tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
1282 goto ipv6_pktoptions;
1286 if (tcp_checksum_complete(skb))
1289 if (sk->sk_state == TCP_LISTEN) {
1290 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1296 sock_rps_save_rxhash(nsk, skb);
1297 sk_mark_napi_id(nsk, skb);
1298 if (tcp_child_process(sk, nsk, skb))
1301 __kfree_skb(opt_skb);
1305 sock_rps_save_rxhash(sk, skb);
1307 if (tcp_rcv_state_process(sk, skb))
1310 goto ipv6_pktoptions;
1314 tcp_v6_send_reset(sk, skb);
1317 __kfree_skb(opt_skb);
1321 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1322 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1327 /* Do you ask, what is it?
1329 1. skb was enqueued by tcp.
1330 2. skb is added to tail of read queue, rather than out of order.
1331 3. socket is not in passive state.
1332 4. Finally, it really contains options, which user wants to receive.
1335 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1336 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1337 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1338 np->mcast_oif = tcp_v6_iif(opt_skb);
1339 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1340 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1341 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1342 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1344 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1345 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1346 skb_set_owner_r(opt_skb, sk);
1347 tcp_v6_restore_cb(opt_skb);
1348 opt_skb = xchg(&np->pktoptions, opt_skb);
1350 __kfree_skb(opt_skb);
1351 opt_skb = xchg(&np->pktoptions, NULL);
1359 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1360 const struct tcphdr *th)
1362 /* This is tricky: we move IP6CB at its correct location into
1363 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1364 * _decode_session6() uses IP6CB().
1365 * barrier() makes sure compiler won't play aliasing games.
1367 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1368 sizeof(struct inet6_skb_parm));
1371 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1372 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1373 skb->len - th->doff*4);
1374 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1375 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1376 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1377 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1378 TCP_SKB_CB(skb)->sacked = 0;
1381 static int tcp_v6_rcv(struct sk_buff *skb)
1383 const struct tcphdr *th;
1384 const struct ipv6hdr *hdr;
1388 struct net *net = dev_net(skb->dev);
1390 if (skb->pkt_type != PACKET_HOST)
1394 * Count it even if it's bad.
1396 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1398 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1401 th = (const struct tcphdr *)skb->data;
1403 if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1405 if (!pskb_may_pull(skb, th->doff*4))
1408 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1411 th = (const struct tcphdr *)skb->data;
1412 hdr = ipv6_hdr(skb);
1415 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1416 th->source, th->dest, inet6_iif(skb),
1422 if (sk->sk_state == TCP_TIME_WAIT)
1425 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1426 struct request_sock *req = inet_reqsk(sk);
1429 sk = req->rsk_listener;
1430 tcp_v6_fill_cb(skb, hdr, th);
1431 if (tcp_v6_inbound_md5_hash(sk, skb)) {
1432 sk_drops_add(sk, skb);
1436 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1437 inet_csk_reqsk_queue_drop_and_put(sk, req);
1442 nsk = tcp_check_req(sk, skb, req, false);
1445 goto discard_and_relse;
1449 tcp_v6_restore_cb(skb);
1450 } else if (tcp_child_process(sk, nsk, skb)) {
1451 tcp_v6_send_reset(nsk, skb);
1452 goto discard_and_relse;
1458 if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
1459 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1460 goto discard_and_relse;
1463 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1464 goto discard_and_relse;
1466 tcp_v6_fill_cb(skb, hdr, th);
1468 if (tcp_v6_inbound_md5_hash(sk, skb))
1469 goto discard_and_relse;
1471 if (tcp_filter(sk, skb))
1472 goto discard_and_relse;
1473 th = (const struct tcphdr *)skb->data;
1474 hdr = ipv6_hdr(skb);
1478 if (sk->sk_state == TCP_LISTEN) {
1479 ret = tcp_v6_do_rcv(sk, skb);
1480 goto put_and_return;
1483 sk_incoming_cpu_update(sk);
1485 bh_lock_sock_nested(sk);
1486 tcp_segs_in(tcp_sk(sk), skb);
1488 if (!sock_owned_by_user(sk)) {
1489 if (!tcp_prequeue(sk, skb))
1490 ret = tcp_v6_do_rcv(sk, skb);
1491 } else if (tcp_add_backlog(sk, skb)) {
1492 goto discard_and_relse;
1499 return ret ? -1 : 0;
1502 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1505 tcp_v6_fill_cb(skb, hdr, th);
1507 if (tcp_checksum_complete(skb)) {
1509 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1511 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1513 tcp_v6_send_reset(NULL, skb);
1521 sk_drops_add(sk, skb);
1527 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1528 inet_twsk_put(inet_twsk(sk));
1532 tcp_v6_fill_cb(skb, hdr, th);
1534 if (tcp_checksum_complete(skb)) {
1535 inet_twsk_put(inet_twsk(sk));
1539 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1544 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1545 skb, __tcp_hdrlen(th),
1546 &ipv6_hdr(skb)->saddr, th->source,
1547 &ipv6_hdr(skb)->daddr,
1548 ntohs(th->dest), tcp_v6_iif(skb));
1550 struct inet_timewait_sock *tw = inet_twsk(sk);
1551 inet_twsk_deschedule_put(tw);
1553 tcp_v6_restore_cb(skb);
1557 /* Fall through to ACK */
1560 tcp_v6_timewait_ack(sk, skb);
1563 tcp_v6_restore_cb(skb);
1564 tcp_v6_send_reset(sk, skb);
1565 inet_twsk_deschedule_put(inet_twsk(sk));
1567 case TCP_TW_SUCCESS:
1573 static void tcp_v6_early_demux(struct sk_buff *skb)
1575 const struct ipv6hdr *hdr;
1576 const struct tcphdr *th;
1579 if (skb->pkt_type != PACKET_HOST)
1582 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1585 hdr = ipv6_hdr(skb);
1588 if (th->doff < sizeof(struct tcphdr) / 4)
1591 /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1592 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1593 &hdr->saddr, th->source,
1594 &hdr->daddr, ntohs(th->dest),
1598 skb->destructor = sock_edemux;
1599 if (sk_fullsock(sk)) {
1600 struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1603 dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
1605 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1606 skb_dst_set_noref(skb, dst);
1611 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1612 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1613 .twsk_unique = tcp_twsk_unique,
1614 .twsk_destructor = tcp_twsk_destructor,
1617 static const struct inet_connection_sock_af_ops ipv6_specific = {
1618 .queue_xmit = inet6_csk_xmit,
1619 .send_check = tcp_v6_send_check,
1620 .rebuild_header = inet6_sk_rebuild_header,
1621 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1622 .conn_request = tcp_v6_conn_request,
1623 .syn_recv_sock = tcp_v6_syn_recv_sock,
1624 .net_header_len = sizeof(struct ipv6hdr),
1625 .net_frag_header_len = sizeof(struct frag_hdr),
1626 .setsockopt = ipv6_setsockopt,
1627 .getsockopt = ipv6_getsockopt,
1628 .addr2sockaddr = inet6_csk_addr2sockaddr,
1629 .sockaddr_len = sizeof(struct sockaddr_in6),
1630 .bind_conflict = inet6_csk_bind_conflict,
1631 #ifdef CONFIG_COMPAT
1632 .compat_setsockopt = compat_ipv6_setsockopt,
1633 .compat_getsockopt = compat_ipv6_getsockopt,
1635 .mtu_reduced = tcp_v6_mtu_reduced,
1638 #ifdef CONFIG_TCP_MD5SIG
1639 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1640 .md5_lookup = tcp_v6_md5_lookup,
1641 .calc_md5_hash = tcp_v6_md5_hash_skb,
1642 .md5_parse = tcp_v6_parse_md5_keys,
1647 * TCP over IPv4 via INET6 API
1649 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1650 .queue_xmit = ip_queue_xmit,
1651 .send_check = tcp_v4_send_check,
1652 .rebuild_header = inet_sk_rebuild_header,
1653 .sk_rx_dst_set = inet_sk_rx_dst_set,
1654 .conn_request = tcp_v6_conn_request,
1655 .syn_recv_sock = tcp_v6_syn_recv_sock,
1656 .net_header_len = sizeof(struct iphdr),
1657 .setsockopt = ipv6_setsockopt,
1658 .getsockopt = ipv6_getsockopt,
1659 .addr2sockaddr = inet6_csk_addr2sockaddr,
1660 .sockaddr_len = sizeof(struct sockaddr_in6),
1661 .bind_conflict = inet6_csk_bind_conflict,
1662 #ifdef CONFIG_COMPAT
1663 .compat_setsockopt = compat_ipv6_setsockopt,
1664 .compat_getsockopt = compat_ipv6_getsockopt,
1666 .mtu_reduced = tcp_v4_mtu_reduced,
1669 #ifdef CONFIG_TCP_MD5SIG
1670 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1671 .md5_lookup = tcp_v4_md5_lookup,
1672 .calc_md5_hash = tcp_v4_md5_hash_skb,
1673 .md5_parse = tcp_v6_parse_md5_keys,
1677 /* NOTE: A lot of things set to zero explicitly by call to
1678 * sk_alloc() so need not be done here.
1680 static int tcp_v6_init_sock(struct sock *sk)
1682 struct inet_connection_sock *icsk = inet_csk(sk);
1686 icsk->icsk_af_ops = &ipv6_specific;
1688 #ifdef CONFIG_TCP_MD5SIG
1689 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1695 static void tcp_v6_destroy_sock(struct sock *sk)
1697 tcp_v4_destroy_sock(sk);
1698 inet6_destroy_sock(sk);
1701 #ifdef CONFIG_PROC_FS
1702 /* Proc filesystem TCPv6 sock list dumping. */
1703 static void get_openreq6(struct seq_file *seq,
1704 const struct request_sock *req, int i)
1706 long ttd = req->rsk_timer.expires - jiffies;
1707 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1708 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1714 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1715 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1717 src->s6_addr32[0], src->s6_addr32[1],
1718 src->s6_addr32[2], src->s6_addr32[3],
1719 inet_rsk(req)->ir_num,
1720 dest->s6_addr32[0], dest->s6_addr32[1],
1721 dest->s6_addr32[2], dest->s6_addr32[3],
1722 ntohs(inet_rsk(req)->ir_rmt_port),
1724 0, 0, /* could print option size, but that is af dependent. */
1725 1, /* timers active (only the expire timer) */
1726 jiffies_to_clock_t(ttd),
1728 from_kuid_munged(seq_user_ns(seq),
1729 sock_i_uid(req->rsk_listener)),
1730 0, /* non standard timer */
1731 0, /* open_requests have no inode */
1735 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1737 const struct in6_addr *dest, *src;
1740 unsigned long timer_expires;
1741 const struct inet_sock *inet = inet_sk(sp);
1742 const struct tcp_sock *tp = tcp_sk(sp);
1743 const struct inet_connection_sock *icsk = inet_csk(sp);
1744 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1748 dest = &sp->sk_v6_daddr;
1749 src = &sp->sk_v6_rcv_saddr;
1750 destp = ntohs(inet->inet_dport);
1751 srcp = ntohs(inet->inet_sport);
1753 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1754 icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
1755 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1757 timer_expires = icsk->icsk_timeout;
1758 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1760 timer_expires = icsk->icsk_timeout;
1761 } else if (timer_pending(&sp->sk_timer)) {
1763 timer_expires = sp->sk_timer.expires;
1766 timer_expires = jiffies;
1769 state = sk_state_load(sp);
1770 if (state == TCP_LISTEN)
1771 rx_queue = sp->sk_ack_backlog;
1773 /* Because we don't lock the socket,
1774 * we might find a transient negative value.
1776 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
1779 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1780 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
1782 src->s6_addr32[0], src->s6_addr32[1],
1783 src->s6_addr32[2], src->s6_addr32[3], srcp,
1784 dest->s6_addr32[0], dest->s6_addr32[1],
1785 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1787 tp->write_seq - tp->snd_una,
1790 jiffies_delta_to_clock_t(timer_expires - jiffies),
1791 icsk->icsk_retransmits,
1792 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1793 icsk->icsk_probes_out,
1795 atomic_read(&sp->sk_refcnt), sp,
1796 jiffies_to_clock_t(icsk->icsk_rto),
1797 jiffies_to_clock_t(icsk->icsk_ack.ato),
1798 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1800 state == TCP_LISTEN ?
1801 fastopenq->max_qlen :
1802 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
1806 static void get_timewait6_sock(struct seq_file *seq,
1807 struct inet_timewait_sock *tw, int i)
1809 long delta = tw->tw_timer.expires - jiffies;
1810 const struct in6_addr *dest, *src;
1813 dest = &tw->tw_v6_daddr;
1814 src = &tw->tw_v6_rcv_saddr;
1815 destp = ntohs(tw->tw_dport);
1816 srcp = ntohs(tw->tw_sport);
1819 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1820 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1822 src->s6_addr32[0], src->s6_addr32[1],
1823 src->s6_addr32[2], src->s6_addr32[3], srcp,
1824 dest->s6_addr32[0], dest->s6_addr32[1],
1825 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1826 tw->tw_substate, 0, 0,
1827 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
1828 atomic_read(&tw->tw_refcnt), tw);
1831 static int tcp6_seq_show(struct seq_file *seq, void *v)
1833 struct tcp_iter_state *st;
1834 struct sock *sk = v;
1836 if (v == SEQ_START_TOKEN) {
1841 "st tx_queue rx_queue tr tm->when retrnsmt"
1842 " uid timeout inode\n");
1847 if (sk->sk_state == TCP_TIME_WAIT)
1848 get_timewait6_sock(seq, v, st->num);
1849 else if (sk->sk_state == TCP_NEW_SYN_RECV)
1850 get_openreq6(seq, v, st->num);
1852 get_tcp6_sock(seq, v, st->num);
1857 static const struct file_operations tcp6_afinfo_seq_fops = {
1858 .owner = THIS_MODULE,
1859 .open = tcp_seq_open,
1861 .llseek = seq_lseek,
1862 .release = seq_release_net
1865 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1868 .seq_fops = &tcp6_afinfo_seq_fops,
1870 .show = tcp6_seq_show,
1874 int __net_init tcp6_proc_init(struct net *net)
1876 return tcp_proc_register(net, &tcp6_seq_afinfo);
1879 void tcp6_proc_exit(struct net *net)
1881 tcp_proc_unregister(net, &tcp6_seq_afinfo);
1885 struct proto tcpv6_prot = {
1887 .owner = THIS_MODULE,
1889 .connect = tcp_v6_connect,
1890 .disconnect = tcp_disconnect,
1891 .accept = inet_csk_accept,
1893 .init = tcp_v6_init_sock,
1894 .destroy = tcp_v6_destroy_sock,
1895 .shutdown = tcp_shutdown,
1896 .setsockopt = tcp_setsockopt,
1897 .getsockopt = tcp_getsockopt,
1898 .recvmsg = tcp_recvmsg,
1899 .sendmsg = tcp_sendmsg,
1900 .sendpage = tcp_sendpage,
1901 .backlog_rcv = tcp_v6_do_rcv,
1902 .release_cb = tcp_release_cb,
1904 .unhash = inet_unhash,
1905 .get_port = inet_csk_get_port,
1906 .enter_memory_pressure = tcp_enter_memory_pressure,
1907 .stream_memory_free = tcp_stream_memory_free,
1908 .sockets_allocated = &tcp_sockets_allocated,
1909 .memory_allocated = &tcp_memory_allocated,
1910 .memory_pressure = &tcp_memory_pressure,
1911 .orphan_count = &tcp_orphan_count,
1912 .sysctl_mem = sysctl_tcp_mem,
1913 .sysctl_wmem = sysctl_tcp_wmem,
1914 .sysctl_rmem = sysctl_tcp_rmem,
1915 .max_header = MAX_TCP_HEADER,
1916 .obj_size = sizeof(struct tcp6_sock),
1917 .slab_flags = SLAB_DESTROY_BY_RCU,
1918 .twsk_prot = &tcp6_timewait_sock_ops,
1919 .rsk_prot = &tcp6_request_sock_ops,
1920 .h.hashinfo = &tcp_hashinfo,
1921 .no_autobind = true,
1922 #ifdef CONFIG_COMPAT
1923 .compat_setsockopt = compat_tcp_setsockopt,
1924 .compat_getsockopt = compat_tcp_getsockopt,
1926 .diag_destroy = tcp_abort,
1929 static const struct inet6_protocol tcpv6_protocol = {
1930 .early_demux = tcp_v6_early_demux,
1931 .handler = tcp_v6_rcv,
1932 .err_handler = tcp_v6_err,
1933 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
1936 static struct inet_protosw tcpv6_protosw = {
1937 .type = SOCK_STREAM,
1938 .protocol = IPPROTO_TCP,
1939 .prot = &tcpv6_prot,
1940 .ops = &inet6_stream_ops,
1941 .flags = INET_PROTOSW_PERMANENT |
1945 static int __net_init tcpv6_net_init(struct net *net)
1947 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
1948 SOCK_RAW, IPPROTO_TCP, net);
1951 static void __net_exit tcpv6_net_exit(struct net *net)
1953 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
1956 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
1958 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET6);
1961 static struct pernet_operations tcpv6_net_ops = {
1962 .init = tcpv6_net_init,
1963 .exit = tcpv6_net_exit,
1964 .exit_batch = tcpv6_net_exit_batch,
1967 int __init tcpv6_init(void)
1971 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
1975 /* register inet6 protocol */
1976 ret = inet6_register_protosw(&tcpv6_protosw);
1978 goto out_tcpv6_protocol;
1980 ret = register_pernet_subsys(&tcpv6_net_ops);
1982 goto out_tcpv6_protosw;
1987 inet6_unregister_protosw(&tcpv6_protosw);
1989 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
1993 void tcpv6_exit(void)
1995 unregister_pernet_subsys(&tcpv6_net_ops);
1996 inet6_unregister_protosw(&tcpv6_protosw);
1997 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);