2 * IPv6 output functions
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * Based on linux/net/ipv4/ip_output.c
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
42 #include <linux/bpf-cgroup.h>
43 #include <linux/netfilter.h>
44 #include <linux/netfilter_ipv6.h>
50 #include <net/ndisc.h>
51 #include <net/protocol.h>
52 #include <net/ip6_route.h>
53 #include <net/addrconf.h>
54 #include <net/rawv6.h>
57 #include <net/checksum.h>
58 #include <linux/mroute6.h>
59 #include <net/l3mdev.h>
60 #include <net/lwtunnel.h>
62 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
64 struct dst_entry *dst = skb_dst(skb);
65 struct net_device *dev = dst->dev;
66 struct neighbour *neigh;
67 struct in6_addr *nexthop;
70 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
71 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
73 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
74 ((mroute6_is_socket(net, skb) &&
75 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
76 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
77 &ipv6_hdr(skb)->saddr))) {
78 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
80 /* Do not check for IFF_ALLMULTI; multicast routing
81 is not supported in any case.
84 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
85 net, sk, newskb, NULL, newskb->dev,
88 if (ipv6_hdr(skb)->hop_limit == 0) {
89 IP6_INC_STATS(net, idev,
90 IPSTATS_MIB_OUTDISCARDS);
96 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
98 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
99 IPV6_ADDR_SCOPE_NODELOCAL &&
100 !(dev->flags & IFF_LOOPBACK)) {
106 if (lwtunnel_xmit_redirect(dst->lwtstate)) {
107 int res = lwtunnel_xmit(skb);
109 if (res < 0 || res == LWTUNNEL_XMIT_DONE)
114 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
115 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
116 if (unlikely(!neigh))
117 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
118 if (!IS_ERR(neigh)) {
119 sock_confirm_neigh(skb, neigh);
120 ret = neigh_output(neigh, skb);
121 rcu_read_unlock_bh();
124 rcu_read_unlock_bh();
126 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
131 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
135 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
141 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
142 /* Policy lookup after SNAT yielded a new policy */
143 if (skb_dst(skb)->xfrm) {
144 IPCB(skb)->flags |= IPSKB_REROUTED;
145 return dst_output(net, sk, skb);
149 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
150 dst_allfrag(skb_dst(skb)) ||
151 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
152 return ip6_fragment(net, sk, skb, ip6_finish_output2);
154 return ip6_finish_output2(net, sk, skb);
157 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
159 struct net_device *dev = skb_dst(skb)->dev;
160 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
162 skb->protocol = htons(ETH_P_IPV6);
165 if (unlikely(idev->cnf.disable_ipv6)) {
166 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
171 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
172 net, sk, skb, NULL, dev,
174 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
177 bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
179 if (!np->autoflowlabel_set)
180 return ip6_default_np_autolabel(net);
182 return np->autoflowlabel;
186 * xmit an sk_buff (used by TCP, SCTP and DCCP)
187 * Note : socket lock is not held for SYNACK packets, but might be modified
188 * by calls to skb_set_owner_w() and ipv6_local_error(),
189 * which are using proper atomic operations or spinlocks.
191 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
192 __u32 mark, struct ipv6_txoptions *opt, int tclass)
194 struct net *net = sock_net(sk);
195 const struct ipv6_pinfo *np = inet6_sk(sk);
196 struct in6_addr *first_hop = &fl6->daddr;
197 struct dst_entry *dst = skb_dst(skb);
198 unsigned int head_room;
200 u8 proto = fl6->flowi6_proto;
201 int seg_len = skb->len;
205 head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
207 head_room += opt->opt_nflen + opt->opt_flen;
209 if (unlikely(skb_headroom(skb) < head_room)) {
210 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
212 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
213 IPSTATS_MIB_OUTDISCARDS);
218 skb_set_owner_w(skb2, skb->sk);
224 seg_len += opt->opt_nflen + opt->opt_flen;
227 ipv6_push_frag_opts(skb, opt, &proto);
230 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
234 skb_push(skb, sizeof(struct ipv6hdr));
235 skb_reset_network_header(skb);
239 * Fill in the IPv6 header
242 hlimit = np->hop_limit;
244 hlimit = ip6_dst_hoplimit(dst);
246 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
247 ip6_autoflowlabel(net, np), fl6));
249 hdr->payload_len = htons(seg_len);
250 hdr->nexthdr = proto;
251 hdr->hop_limit = hlimit;
253 hdr->saddr = fl6->saddr;
254 hdr->daddr = *first_hop;
256 skb->protocol = htons(ETH_P_IPV6);
257 skb->priority = sk->sk_priority;
261 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
262 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
263 IPSTATS_MIB_OUT, skb->len);
265 /* if egress device is enslaved to an L3 master device pass the
266 * skb to its handler for processing
268 skb = l3mdev_ip6_out((struct sock *)sk, skb);
272 /* hooks should never assume socket lock is held.
273 * we promote our socket to non const
275 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
276 net, (struct sock *)sk, skb, NULL, dst->dev,
281 /* ipv6_local_error() does not require socket lock,
282 * we promote our socket to non const
284 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
286 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
290 EXPORT_SYMBOL(ip6_xmit);
292 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
294 struct ip6_ra_chain *ra;
295 struct sock *last = NULL;
297 read_lock(&ip6_ra_lock);
298 for (ra = ip6_ra_chain; ra; ra = ra->next) {
299 struct sock *sk = ra->sk;
300 if (sk && ra->sel == sel &&
301 (!sk->sk_bound_dev_if ||
302 sk->sk_bound_dev_if == skb->dev->ifindex)) {
304 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
306 rawv6_rcv(last, skb2);
313 rawv6_rcv(last, skb);
314 read_unlock(&ip6_ra_lock);
317 read_unlock(&ip6_ra_lock);
321 static int ip6_forward_proxy_check(struct sk_buff *skb)
323 struct ipv6hdr *hdr = ipv6_hdr(skb);
324 u8 nexthdr = hdr->nexthdr;
328 if (ipv6_ext_hdr(nexthdr)) {
329 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
333 offset = sizeof(struct ipv6hdr);
335 if (nexthdr == IPPROTO_ICMPV6) {
336 struct icmp6hdr *icmp6;
338 if (!pskb_may_pull(skb, (skb_network_header(skb) +
339 offset + 1 - skb->data)))
342 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
344 switch (icmp6->icmp6_type) {
345 case NDISC_ROUTER_SOLICITATION:
346 case NDISC_ROUTER_ADVERTISEMENT:
347 case NDISC_NEIGHBOUR_SOLICITATION:
348 case NDISC_NEIGHBOUR_ADVERTISEMENT:
350 /* For reaction involving unicast neighbor discovery
351 * message destined to the proxied address, pass it to
361 * The proxying router can't forward traffic sent to a link-local
362 * address, so signal the sender and discard the packet. This
363 * behavior is clarified by the MIPv6 specification.
365 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
366 dst_link_failure(skb);
373 static inline int ip6_forward_finish(struct net *net, struct sock *sk,
376 struct dst_entry *dst = skb_dst(skb);
378 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
379 __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
382 return dst_output(net, sk, skb);
385 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
390 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
391 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
397 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
403 int ip6_forward(struct sk_buff *skb)
405 struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
406 struct dst_entry *dst = skb_dst(skb);
407 struct ipv6hdr *hdr = ipv6_hdr(skb);
408 struct inet6_skb_parm *opt = IP6CB(skb);
409 struct net *net = dev_net(dst->dev);
412 if (net->ipv6.devconf_all->forwarding == 0)
415 if (skb->pkt_type != PACKET_HOST)
418 if (unlikely(skb->sk))
421 if (skb_warn_if_lro(skb))
424 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
425 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
429 skb_forward_csum(skb);
432 * We DO NOT make any processing on
433 * RA packets, pushing them to user level AS IS
434 * without ane WARRANTY that application will be able
435 * to interpret them. The reason is that we
436 * cannot make anything clever here.
438 * We are not end-node, so that if packet contains
439 * AH/ESP, we cannot make anything.
440 * Defragmentation also would be mistake, RA packets
441 * cannot be fragmented, because there is no warranty
442 * that different fragments will go along one path. --ANK
444 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
445 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
450 * check and decrement ttl
452 if (hdr->hop_limit <= 1) {
453 /* Force OUTPUT device used as source address */
455 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
456 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
462 /* XXX: idev->cnf.proxy_ndp? */
463 if (net->ipv6.devconf_all->proxy_ndp &&
464 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
465 int proxied = ip6_forward_proxy_check(skb);
467 return ip6_input(skb);
468 else if (proxied < 0) {
469 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
474 if (!xfrm6_route_forward(skb)) {
475 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
480 /* IPv6 specs say nothing about it, but it is clear that we cannot
481 send redirects to source routed frames.
482 We don't send redirects to frames decapsulated from IPsec.
484 if (IP6CB(skb)->iif == dst->dev->ifindex &&
485 opt->srcrt == 0 && !skb_sec_path(skb)) {
486 struct in6_addr *target = NULL;
487 struct inet_peer *peer;
491 * incoming and outgoing devices are the same
495 rt = (struct rt6_info *) dst;
496 if (rt->rt6i_flags & RTF_GATEWAY)
497 target = &rt->rt6i_gateway;
499 target = &hdr->daddr;
501 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
503 /* Limit redirects both by destination (here)
504 and by source (inside ndisc_send_redirect)
506 if (inet_peer_xrlim_allow(peer, 1*HZ))
507 ndisc_send_redirect(skb, target);
511 int addrtype = ipv6_addr_type(&hdr->saddr);
513 /* This check is security critical. */
514 if (addrtype == IPV6_ADDR_ANY ||
515 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
517 if (addrtype & IPV6_ADDR_LINKLOCAL) {
518 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
519 ICMPV6_NOT_NEIGHBOUR, 0);
524 mtu = ip6_dst_mtu_forward(dst);
525 if (mtu < IPV6_MIN_MTU)
528 if (ip6_pkt_too_big(skb, mtu)) {
529 /* Again, force OUTPUT device used as source address */
531 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
532 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
533 __IP6_INC_STATS(net, ip6_dst_idev(dst),
534 IPSTATS_MIB_FRAGFAILS);
539 if (skb_cow(skb, dst->dev->hard_header_len)) {
540 __IP6_INC_STATS(net, ip6_dst_idev(dst),
541 IPSTATS_MIB_OUTDISCARDS);
547 /* Mangling hops number delayed to point after skb COW */
551 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
552 net, NULL, skb, skb->dev, dst->dev,
556 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
562 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
564 to->pkt_type = from->pkt_type;
565 to->priority = from->priority;
566 to->protocol = from->protocol;
568 skb_dst_set(to, dst_clone(skb_dst(from)));
570 to->mark = from->mark;
572 skb_copy_hash(to, from);
574 #ifdef CONFIG_NET_SCHED
575 to->tc_index = from->tc_index;
578 skb_copy_secmark(to, from);
581 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
582 int (*output)(struct net *, struct sock *, struct sk_buff *))
584 struct sk_buff *frag;
585 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
586 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
587 inet6_sk(skb->sk) : NULL;
588 struct ipv6hdr *tmp_hdr;
590 unsigned int mtu, hlen, left, len;
593 int ptr, offset = 0, err = 0;
594 u8 *prevhdr, nexthdr = 0;
596 err = ip6_find_1stfragopt(skb, &prevhdr);
602 mtu = ip6_skb_dst_mtu(skb);
604 /* We must not fragment if the socket is set to force MTU discovery
605 * or if the skb it not generated by a local socket.
607 if (unlikely(!skb->ignore_df && skb->len > mtu))
610 if (IP6CB(skb)->frag_max_size) {
611 if (IP6CB(skb)->frag_max_size > mtu)
614 /* don't send fragments larger than what we received */
615 mtu = IP6CB(skb)->frag_max_size;
616 if (mtu < IPV6_MIN_MTU)
620 if (np && np->frag_size < mtu) {
624 if (mtu < hlen + sizeof(struct frag_hdr) + 8)
626 mtu -= hlen + sizeof(struct frag_hdr);
628 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
629 &ipv6_hdr(skb)->saddr);
631 if (skb->ip_summed == CHECKSUM_PARTIAL &&
632 (err = skb_checksum_help(skb)))
635 hroom = LL_RESERVED_SPACE(rt->dst.dev);
636 if (skb_has_frag_list(skb)) {
637 unsigned int first_len = skb_pagelen(skb);
638 struct sk_buff *frag2;
640 if (first_len - hlen > mtu ||
641 ((first_len - hlen) & 7) ||
643 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
646 skb_walk_frags(skb, frag) {
647 /* Correct geometry. */
648 if (frag->len > mtu ||
649 ((frag->len & 7) && frag->next) ||
650 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
651 goto slow_path_clean;
653 /* Partially cloned skb? */
654 if (skb_shared(frag))
655 goto slow_path_clean;
660 frag->destructor = sock_wfree;
662 skb->truesize -= frag->truesize;
669 *prevhdr = NEXTHDR_FRAGMENT;
670 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
675 frag = skb_shinfo(skb)->frag_list;
676 skb_frag_list_init(skb);
678 __skb_pull(skb, hlen);
679 fh = __skb_push(skb, sizeof(struct frag_hdr));
680 __skb_push(skb, hlen);
681 skb_reset_network_header(skb);
682 memcpy(skb_network_header(skb), tmp_hdr, hlen);
684 fh->nexthdr = nexthdr;
686 fh->frag_off = htons(IP6_MF);
687 fh->identification = frag_id;
689 first_len = skb_pagelen(skb);
690 skb->data_len = first_len - skb_headlen(skb);
691 skb->len = first_len;
692 ipv6_hdr(skb)->payload_len = htons(first_len -
693 sizeof(struct ipv6hdr));
696 /* Prepare header of the next frame,
697 * before previous one went down. */
699 frag->ip_summed = CHECKSUM_NONE;
700 skb_reset_transport_header(frag);
701 fh = __skb_push(frag, sizeof(struct frag_hdr));
702 __skb_push(frag, hlen);
703 skb_reset_network_header(frag);
704 memcpy(skb_network_header(frag), tmp_hdr,
706 offset += skb->len - hlen - sizeof(struct frag_hdr);
707 fh->nexthdr = nexthdr;
709 fh->frag_off = htons(offset);
711 fh->frag_off |= htons(IP6_MF);
712 fh->identification = frag_id;
713 ipv6_hdr(frag)->payload_len =
715 sizeof(struct ipv6hdr));
716 ip6_copy_metadata(frag, skb);
719 err = output(net, sk, skb);
721 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
722 IPSTATS_MIB_FRAGCREATES);
729 skb_mark_not_on_list(skb);
735 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
736 IPSTATS_MIB_FRAGOKS);
740 kfree_skb_list(frag);
742 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
743 IPSTATS_MIB_FRAGFAILS);
747 skb_walk_frags(skb, frag2) {
751 frag2->destructor = NULL;
752 skb->truesize += frag2->truesize;
757 left = skb->len - hlen; /* Space per frame */
758 ptr = hlen; /* Where to start from */
761 * Fragment the datagram.
764 troom = rt->dst.dev->needed_tailroom;
767 * Keep copying data until we run out.
770 u8 *fragnexthdr_offset;
773 /* IF: it doesn't fit, use 'mtu' - the data space left */
776 /* IF: we are not sending up to and including the packet end
777 then align the next start on an eight byte boundary */
782 /* Allocate buffer */
783 frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
784 hroom + troom, GFP_ATOMIC);
791 * Set up data on packet
794 ip6_copy_metadata(frag, skb);
795 skb_reserve(frag, hroom);
796 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
797 skb_reset_network_header(frag);
798 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
799 frag->transport_header = (frag->network_header + hlen +
800 sizeof(struct frag_hdr));
803 * Charge the memory for the fragment to any owner
807 skb_set_owner_w(frag, skb->sk);
810 * Copy the packet header into the new buffer.
812 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
814 fragnexthdr_offset = skb_network_header(frag);
815 fragnexthdr_offset += prevhdr - skb_network_header(skb);
816 *fragnexthdr_offset = NEXTHDR_FRAGMENT;
819 * Build fragment header.
821 fh->nexthdr = nexthdr;
823 fh->identification = frag_id;
826 * Copy a block of the IP datagram.
828 BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
832 fh->frag_off = htons(offset);
834 fh->frag_off |= htons(IP6_MF);
835 ipv6_hdr(frag)->payload_len = htons(frag->len -
836 sizeof(struct ipv6hdr));
842 * Put this fragment into the sending queue.
844 err = output(net, sk, frag);
848 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
849 IPSTATS_MIB_FRAGCREATES);
851 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
852 IPSTATS_MIB_FRAGOKS);
857 if (skb->sk && dst_allfrag(skb_dst(skb)))
858 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
860 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
864 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
865 IPSTATS_MIB_FRAGFAILS);
870 static inline int ip6_rt_check(const struct rt6key *rt_key,
871 const struct in6_addr *fl_addr,
872 const struct in6_addr *addr_cache)
874 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
875 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
878 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
879 struct dst_entry *dst,
880 const struct flowi6 *fl6)
882 struct ipv6_pinfo *np = inet6_sk(sk);
888 if (dst->ops->family != AF_INET6) {
893 rt = (struct rt6_info *)dst;
894 /* Yes, checking route validity in not connected
895 * case is not very simple. Take into account,
896 * that we do not support routing by source, TOS,
897 * and MSG_DONTROUTE --ANK (980726)
899 * 1. ip6_rt_check(): If route was host route,
900 * check that cached destination is current.
901 * If it is network route, we still may
902 * check its validity using saved pointer
903 * to the last used address: daddr_cache.
904 * We do not want to save whole address now,
905 * (because main consumer of this service
906 * is tcp, which has not this problem),
907 * so that the last trick works only on connected
909 * 2. oif also should be the same.
911 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
912 #ifdef CONFIG_IPV6_SUBTREES
913 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
915 (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
916 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
925 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
926 struct dst_entry **dst, struct flowi6 *fl6)
928 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
935 /* The correct way to handle this would be to do
936 * ip6_route_get_saddr, and then ip6_route_output; however,
937 * the route-specific preferred source forces the
938 * ip6_route_output call _before_ ip6_route_get_saddr.
940 * In source specific routing (no src=any default route),
941 * ip6_route_output will fail given src=any saddr, though, so
942 * that's why we try it again later.
944 if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
945 struct fib6_info *from;
947 bool had_dst = *dst != NULL;
950 *dst = ip6_route_output(net, sk, fl6);
951 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
954 from = rt ? rcu_dereference(rt->from) : NULL;
955 err = ip6_route_get_saddr(net, from, &fl6->daddr,
956 sk ? inet6_sk(sk)->srcprefs : 0,
961 goto out_err_release;
963 /* If we had an erroneous initial result, pretend it
964 * never existed and let the SA-enabled version take
967 if (!had_dst && (*dst)->error) {
973 flags |= RT6_LOOKUP_F_IFACE;
977 *dst = ip6_route_output_flags(net, sk, fl6, flags);
981 goto out_err_release;
983 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
985 * Here if the dst entry we've looked up
986 * has a neighbour entry that is in the INCOMPLETE
987 * state and the src address from the flow is
988 * marked as OPTIMISTIC, we release the found
989 * dst entry and replace it instead with the
990 * dst entry of the nexthop router
992 rt = (struct rt6_info *) *dst;
994 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
995 rt6_nexthop(rt, &fl6->daddr));
996 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
997 rcu_read_unlock_bh();
1000 struct inet6_ifaddr *ifp;
1001 struct flowi6 fl_gw6;
1004 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1007 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1013 * We need to get the dst entry for the
1014 * default router instead
1017 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1018 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1019 *dst = ip6_route_output(net, sk, &fl_gw6);
1020 err = (*dst)->error;
1022 goto out_err_release;
1026 if (ipv6_addr_v4mapped(&fl6->saddr) &&
1027 !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1028 err = -EAFNOSUPPORT;
1029 goto out_err_release;
1038 if (err == -ENETUNREACH)
1039 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1044 * ip6_dst_lookup - perform route lookup on flow
1045 * @sk: socket which provides route info
1046 * @dst: pointer to dst_entry * for result
1047 * @fl6: flow to lookup
1049 * This function performs a route lookup on the given flow.
1051 * It returns zero on success, or a standard errno code on error.
1053 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1057 return ip6_dst_lookup_tail(net, sk, dst, fl6);
1059 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1062 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1063 * @sk: socket which provides route info
1064 * @fl6: flow to lookup
1065 * @final_dst: final destination address for ipsec lookup
1067 * This function performs a route lookup on the given flow.
1069 * It returns a valid dst pointer on success, or a pointer encoded
1072 struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
1073 const struct in6_addr *final_dst)
1075 struct dst_entry *dst = NULL;
1078 err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
1080 return ERR_PTR(err);
1082 fl6->daddr = *final_dst;
1084 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1086 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1089 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1090 * @sk: socket which provides the dst cache and route info
1091 * @fl6: flow to lookup
1092 * @final_dst: final destination address for ipsec lookup
1093 * @connected: whether @sk is connected or not
1095 * This function performs a route lookup on the given flow with the
1096 * possibility of using the cached route in the socket if it is valid.
1097 * It will take the socket dst lock when operating on the dst cache.
1098 * As a result, this function can only be used in process context.
1100 * In addition, for a connected socket, cache the dst in the socket
1101 * if the current cache is not valid.
1103 * It returns a valid dst pointer on success, or a pointer encoded
1106 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1107 const struct in6_addr *final_dst,
1110 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1112 dst = ip6_sk_dst_check(sk, dst, fl6);
1116 dst = ip6_dst_lookup_flow(sk, fl6, final_dst);
1117 if (connected && !IS_ERR(dst))
1118 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1122 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1124 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1127 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1130 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1133 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1136 static void ip6_append_data_mtu(unsigned int *mtu,
1138 unsigned int fragheaderlen,
1139 struct sk_buff *skb,
1140 struct rt6_info *rt,
1141 unsigned int orig_mtu)
1143 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1145 /* first fragment, reserve header_len */
1146 *mtu = orig_mtu - rt->dst.header_len;
1150 * this fragment is not first, the headers
1151 * space is regarded as data space.
1155 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1156 + fragheaderlen - sizeof(struct frag_hdr);
1160 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1161 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1162 struct rt6_info *rt, struct flowi6 *fl6)
1164 struct ipv6_pinfo *np = inet6_sk(sk);
1166 struct ipv6_txoptions *opt = ipc6->opt;
1172 if (WARN_ON(v6_cork->opt))
1175 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1176 if (unlikely(!v6_cork->opt))
1179 v6_cork->opt->tot_len = sizeof(*opt);
1180 v6_cork->opt->opt_flen = opt->opt_flen;
1181 v6_cork->opt->opt_nflen = opt->opt_nflen;
1183 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1185 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1188 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1190 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1193 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1195 if (opt->hopopt && !v6_cork->opt->hopopt)
1198 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1200 if (opt->srcrt && !v6_cork->opt->srcrt)
1203 /* need source address above miyazawa*/
1206 cork->base.dst = &rt->dst;
1207 cork->fl.u.ip6 = *fl6;
1208 v6_cork->hop_limit = ipc6->hlimit;
1209 v6_cork->tclass = ipc6->tclass;
1210 if (rt->dst.flags & DST_XFRM_TUNNEL)
1211 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1212 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1214 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1215 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1216 if (np->frag_size < mtu) {
1218 mtu = np->frag_size;
1220 if (mtu < IPV6_MIN_MTU)
1222 cork->base.fragsize = mtu;
1223 cork->base.gso_size = ipc6->gso_size;
1224 cork->base.tx_flags = 0;
1225 sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
1227 if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1228 cork->base.flags |= IPCORK_ALLFRAG;
1229 cork->base.length = 0;
1231 cork->base.transmit_time = ipc6->sockc.transmit_time;
1236 static int __ip6_append_data(struct sock *sk,
1238 struct sk_buff_head *queue,
1239 struct inet_cork *cork,
1240 struct inet6_cork *v6_cork,
1241 struct page_frag *pfrag,
1242 int getfrag(void *from, char *to, int offset,
1243 int len, int odd, struct sk_buff *skb),
1244 void *from, int length, int transhdrlen,
1245 unsigned int flags, struct ipcm6_cookie *ipc6)
1247 struct sk_buff *skb, *skb_prev = NULL;
1248 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1250 int dst_exthdrlen = 0;
1256 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1257 struct ipv6_txoptions *opt = v6_cork->opt;
1258 int csummode = CHECKSUM_NONE;
1259 unsigned int maxnonfragsize, headersize;
1260 unsigned int wmem_alloc_delta = 0;
1263 skb = skb_peek_tail(queue);
1265 exthdrlen = opt ? opt->opt_flen : 0;
1266 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1269 paged = !!cork->gso_size;
1270 mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1273 if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
1274 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1275 tskey = sk->sk_tskey++;
1277 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1279 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1280 (opt ? opt->opt_nflen : 0);
1281 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1282 sizeof(struct frag_hdr);
1284 headersize = sizeof(struct ipv6hdr) +
1285 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1286 (dst_allfrag(&rt->dst) ?
1287 sizeof(struct frag_hdr) : 0) +
1288 rt->rt6i_nfheader_len;
1290 /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1291 * the first fragment
1293 if (headersize + transhdrlen > mtu)
1296 if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1297 (sk->sk_protocol == IPPROTO_UDP ||
1298 sk->sk_protocol == IPPROTO_RAW)) {
1299 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1300 sizeof(struct ipv6hdr));
1304 if (ip6_sk_ignore_df(sk))
1305 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1307 maxnonfragsize = mtu;
1309 if (cork->length + length > maxnonfragsize - headersize) {
1311 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1312 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1316 /* CHECKSUM_PARTIAL only with no extension headers and when
1317 * we are not going to fragment
1319 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1320 headersize == sizeof(struct ipv6hdr) &&
1321 length <= mtu - headersize &&
1322 (!(flags & MSG_MORE) || cork->gso_size) &&
1323 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1324 csummode = CHECKSUM_PARTIAL;
1327 * Let's try using as much space as possible.
1328 * Use MTU if total length of the message fits into the MTU.
1329 * Otherwise, we need to reserve fragment header and
1330 * fragment alignment (= 8-15 octects, in total).
1332 * Note that we may need to "move" the data from the tail of
1333 * of the buffer to the new fragment when we split
1336 * FIXME: It may be fragmented into multiple chunks
1337 * at once if non-fragmentable extension headers
1342 cork->length += length;
1346 while (length > 0) {
1347 /* Check if the remaining data fits into current packet. */
1348 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1350 copy = maxfraglen - skb->len;
1354 unsigned int datalen;
1355 unsigned int fraglen;
1356 unsigned int fraggap;
1357 unsigned int alloclen;
1358 unsigned int pagedlen;
1360 /* There's no room in the current skb */
1362 fraggap = skb->len - maxfraglen;
1365 /* update mtu and maxfraglen if necessary */
1366 if (!skb || !skb_prev)
1367 ip6_append_data_mtu(&mtu, &maxfraglen,
1368 fragheaderlen, skb, rt,
1374 * If remaining data exceeds the mtu,
1375 * we know we need more fragment(s).
1377 datalen = length + fraggap;
1379 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1380 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1381 fraglen = datalen + fragheaderlen;
1384 if ((flags & MSG_MORE) &&
1385 !(rt->dst.dev->features&NETIF_F_SG))
1390 alloclen = min_t(int, fraglen, MAX_HEADER);
1391 pagedlen = fraglen - alloclen;
1394 alloclen += dst_exthdrlen;
1396 if (datalen != length + fraggap) {
1398 * this is not the last fragment, the trailer
1399 * space is regarded as data space.
1401 datalen += rt->dst.trailer_len;
1404 alloclen += rt->dst.trailer_len;
1405 fraglen = datalen + fragheaderlen;
1408 * We just reserve space for fragment header.
1409 * Note: this may be overallocation if the message
1410 * (without MSG_MORE) fits into the MTU.
1412 alloclen += sizeof(struct frag_hdr);
1414 copy = datalen - transhdrlen - fraggap - pagedlen;
1420 skb = sock_alloc_send_skb(sk,
1422 (flags & MSG_DONTWAIT), &err);
1425 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1427 skb = alloc_skb(alloclen + hh_len,
1435 * Fill in the control structures
1437 skb->protocol = htons(ETH_P_IPV6);
1438 skb->ip_summed = csummode;
1440 /* reserve for fragmentation and ipsec header */
1441 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1444 /* Only the initial fragment is time stamped */
1445 skb_shinfo(skb)->tx_flags = cork->tx_flags;
1447 skb_shinfo(skb)->tskey = tskey;
1451 * Find where to start putting bytes
1453 data = skb_put(skb, fraglen - pagedlen);
1454 skb_set_network_header(skb, exthdrlen);
1455 data += fragheaderlen;
1456 skb->transport_header = (skb->network_header +
1459 skb->csum = skb_copy_and_csum_bits(
1460 skb_prev, maxfraglen,
1461 data + transhdrlen, fraggap, 0);
1462 skb_prev->csum = csum_sub(skb_prev->csum,
1465 pskb_trim_unique(skb_prev, maxfraglen);
1468 getfrag(from, data + transhdrlen, offset,
1469 copy, fraggap, skb) < 0) {
1476 length -= copy + transhdrlen;
1481 if ((flags & MSG_CONFIRM) && !skb_prev)
1482 skb_set_dst_pending_confirm(skb, 1);
1485 * Put the packet on the pending queue
1487 if (!skb->destructor) {
1488 skb->destructor = sock_wfree;
1490 wmem_alloc_delta += skb->truesize;
1492 __skb_queue_tail(queue, skb);
1499 if (!(rt->dst.dev->features&NETIF_F_SG) &&
1500 skb_tailroom(skb) >= copy) {
1504 if (getfrag(from, skb_put(skb, copy),
1505 offset, copy, off, skb) < 0) {
1506 __skb_trim(skb, off);
1511 int i = skb_shinfo(skb)->nr_frags;
1514 if (!sk_page_frag_refill(sk, pfrag))
1517 if (!skb_can_coalesce(skb, i, pfrag->page,
1520 if (i == MAX_SKB_FRAGS)
1523 __skb_fill_page_desc(skb, i, pfrag->page,
1525 skb_shinfo(skb)->nr_frags = ++i;
1526 get_page(pfrag->page);
1528 copy = min_t(int, copy, pfrag->size - pfrag->offset);
1530 page_address(pfrag->page) + pfrag->offset,
1531 offset, copy, skb->len, skb) < 0)
1534 pfrag->offset += copy;
1535 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1537 skb->data_len += copy;
1538 skb->truesize += copy;
1539 wmem_alloc_delta += copy;
1545 if (wmem_alloc_delta)
1546 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1552 cork->length -= length;
1553 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1554 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1558 int ip6_append_data(struct sock *sk,
1559 int getfrag(void *from, char *to, int offset, int len,
1560 int odd, struct sk_buff *skb),
1561 void *from, int length, int transhdrlen,
1562 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1563 struct rt6_info *rt, unsigned int flags)
1565 struct inet_sock *inet = inet_sk(sk);
1566 struct ipv6_pinfo *np = inet6_sk(sk);
1570 if (flags&MSG_PROBE)
1572 if (skb_queue_empty(&sk->sk_write_queue)) {
1576 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1581 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1582 length += exthdrlen;
1583 transhdrlen += exthdrlen;
1585 fl6 = &inet->cork.fl.u.ip6;
1589 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1590 &np->cork, sk_page_frag(sk), getfrag,
1591 from, length, transhdrlen, flags, ipc6);
1593 EXPORT_SYMBOL_GPL(ip6_append_data);
1595 static void ip6_cork_release(struct inet_cork_full *cork,
1596 struct inet6_cork *v6_cork)
1599 kfree(v6_cork->opt->dst0opt);
1600 kfree(v6_cork->opt->dst1opt);
1601 kfree(v6_cork->opt->hopopt);
1602 kfree(v6_cork->opt->srcrt);
1603 kfree(v6_cork->opt);
1604 v6_cork->opt = NULL;
1607 if (cork->base.dst) {
1608 dst_release(cork->base.dst);
1609 cork->base.dst = NULL;
1610 cork->base.flags &= ~IPCORK_ALLFRAG;
1612 memset(&cork->fl, 0, sizeof(cork->fl));
1615 struct sk_buff *__ip6_make_skb(struct sock *sk,
1616 struct sk_buff_head *queue,
1617 struct inet_cork_full *cork,
1618 struct inet6_cork *v6_cork)
1620 struct sk_buff *skb, *tmp_skb;
1621 struct sk_buff **tail_skb;
1622 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1623 struct ipv6_pinfo *np = inet6_sk(sk);
1624 struct net *net = sock_net(sk);
1625 struct ipv6hdr *hdr;
1626 struct ipv6_txoptions *opt = v6_cork->opt;
1627 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1628 struct flowi6 *fl6 = &cork->fl.u.ip6;
1629 unsigned char proto = fl6->flowi6_proto;
1631 skb = __skb_dequeue(queue);
1634 tail_skb = &(skb_shinfo(skb)->frag_list);
1636 /* move skb->data to ip header from ext header */
1637 if (skb->data < skb_network_header(skb))
1638 __skb_pull(skb, skb_network_offset(skb));
1639 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1640 __skb_pull(tmp_skb, skb_network_header_len(skb));
1641 *tail_skb = tmp_skb;
1642 tail_skb = &(tmp_skb->next);
1643 skb->len += tmp_skb->len;
1644 skb->data_len += tmp_skb->len;
1645 skb->truesize += tmp_skb->truesize;
1646 tmp_skb->destructor = NULL;
1650 /* Allow local fragmentation. */
1651 skb->ignore_df = ip6_sk_ignore_df(sk);
1653 *final_dst = fl6->daddr;
1654 __skb_pull(skb, skb_network_header_len(skb));
1655 if (opt && opt->opt_flen)
1656 ipv6_push_frag_opts(skb, opt, &proto);
1657 if (opt && opt->opt_nflen)
1658 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1660 skb_push(skb, sizeof(struct ipv6hdr));
1661 skb_reset_network_header(skb);
1662 hdr = ipv6_hdr(skb);
1664 ip6_flow_hdr(hdr, v6_cork->tclass,
1665 ip6_make_flowlabel(net, skb, fl6->flowlabel,
1666 ip6_autoflowlabel(net, np), fl6));
1667 hdr->hop_limit = v6_cork->hop_limit;
1668 hdr->nexthdr = proto;
1669 hdr->saddr = fl6->saddr;
1670 hdr->daddr = *final_dst;
1672 skb->priority = sk->sk_priority;
1673 skb->mark = sk->sk_mark;
1675 skb->tstamp = cork->base.transmit_time;
1677 skb_dst_set(skb, dst_clone(&rt->dst));
1678 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1679 if (proto == IPPROTO_ICMPV6) {
1680 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1682 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1683 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1686 ip6_cork_release(cork, v6_cork);
1691 int ip6_send_skb(struct sk_buff *skb)
1693 struct net *net = sock_net(skb->sk);
1694 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1697 err = ip6_local_out(net, skb->sk, skb);
1700 err = net_xmit_errno(err);
1702 IP6_INC_STATS(net, rt->rt6i_idev,
1703 IPSTATS_MIB_OUTDISCARDS);
1709 int ip6_push_pending_frames(struct sock *sk)
1711 struct sk_buff *skb;
1713 skb = ip6_finish_skb(sk);
1717 return ip6_send_skb(skb);
1719 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1721 static void __ip6_flush_pending_frames(struct sock *sk,
1722 struct sk_buff_head *queue,
1723 struct inet_cork_full *cork,
1724 struct inet6_cork *v6_cork)
1726 struct sk_buff *skb;
1728 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1730 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1731 IPSTATS_MIB_OUTDISCARDS);
1735 ip6_cork_release(cork, v6_cork);
1738 void ip6_flush_pending_frames(struct sock *sk)
1740 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1741 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1743 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1745 struct sk_buff *ip6_make_skb(struct sock *sk,
1746 int getfrag(void *from, char *to, int offset,
1747 int len, int odd, struct sk_buff *skb),
1748 void *from, int length, int transhdrlen,
1749 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1750 struct rt6_info *rt, unsigned int flags,
1751 struct inet_cork_full *cork)
1753 struct inet6_cork v6_cork;
1754 struct sk_buff_head queue;
1755 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1758 if (flags & MSG_PROBE)
1761 __skb_queue_head_init(&queue);
1763 cork->base.flags = 0;
1764 cork->base.addr = 0;
1765 cork->base.opt = NULL;
1766 cork->base.dst = NULL;
1768 err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
1770 ip6_cork_release(cork, &v6_cork);
1771 return ERR_PTR(err);
1773 if (ipc6->dontfrag < 0)
1774 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1776 err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
1777 ¤t->task_frag, getfrag, from,
1778 length + exthdrlen, transhdrlen + exthdrlen,
1781 __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
1782 return ERR_PTR(err);
1785 return __ip6_make_skb(sk, &queue, cork, &v6_cork);