net/ipv6/ip6_output.c

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  *      IPv6 output functions
   4  *      Linux INET6 implementation
   5  *
   6  *      Authors:
   7  *      Pedro Roque             <roque@di.fc.ul.pt>
   8  *
   9  *      Based on linux/net/ipv4/ip_output.c
  10  *
  11  *      Changes:
  12  *      A.N.Kuznetsov   :       airthmetics in fragmentation.
  13  *                              extension headers are implemented.
  14  *                              route changes now work.
  15  *                              ip6_forward does not confuse sniffers.
  16  *                              etc.
  17  *
  18  *      H. von Brand    :       Added missing #include <linux/string.h>
  19  *      Imran Patel     :       frag id should be in NBO
  20  *      Kazunori MIYAZAWA @USAGI
  21  *                      :       add ip6_append_data and related functions
  22  *                              for datagram xmit
  23  */
  24
  25 #include <linux/errno.h>
  26 #include <linux/kernel.h>
  27 #include <linux/string.h>
  28 #include <linux/socket.h>
  29 #include <linux/net.h>
  30 #include <linux/netdevice.h>
  31 #include <linux/if_arp.h>
  32 #include <linux/in6.h>
  33 #include <linux/tcp.h>
  34 #include <linux/route.h>
  35 #include <linux/module.h>
  36 #include <linux/slab.h>
  37
  38 #include <linux/bpf-cgroup.h>
  39 #include <linux/netfilter.h>
  40 #include <linux/netfilter_ipv6.h>
  41
  42 #include <net/sock.h>
  43 #include <net/snmp.h>
  44
  45 #include <net/ipv6.h>
  46 #include <net/ndisc.h>
  47 #include <net/protocol.h>
  48 #include <net/ip6_route.h>
  49 #include <net/addrconf.h>
  50 #include <net/rawv6.h>
  51 #include <net/icmp.h>
  52 #include <net/xfrm.h>
  53 #include <net/checksum.h>
  54 #include <linux/mroute6.h>
  55 #include <net/l3mdev.h>
  56 #include <net/lwtunnel.h>
  57
  58 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
  59 {
  60         struct dst_entry *dst = skb_dst(skb);
  61         struct net_device *dev = dst->dev;
  62         const struct in6_addr *nexthop;
  63         struct neighbour *neigh;
  64         int ret;
  65
  66         if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
  67                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
  68
  69                 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
  70                     ((mroute6_is_socket(net, skb) &&
  71                      !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
  72                      ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
  73                                          &ipv6_hdr(skb)->saddr))) {
  74                         struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
  75
  76                         /* Do not check for IFF_ALLMULTI; multicast routing
  77                            is not supported in any case.
  78                          */
  79                         if (newskb)
  80                                 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
  81                                         net, sk, newskb, NULL, newskb->dev,
  82                                         dev_loopback_xmit);
  83
  84                         if (ipv6_hdr(skb)->hop_limit == 0) {
  85                                 IP6_INC_STATS(net, idev,
  86                                               IPSTATS_MIB_OUTDISCARDS);
  87                                 kfree_skb(skb);
  88                                 return 0;
  89                         }
  90                 }
  91
  92                 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
  93
  94                 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
  95                     IPV6_ADDR_SCOPE_NODELOCAL &&
  96                     !(dev->flags & IFF_LOOPBACK)) {
  97                         kfree_skb(skb);
  98                         return 0;
  99                 }
 100         }
 101
 102         if (lwtunnel_xmit_redirect(dst->lwtstate)) {
 103                 int res = lwtunnel_xmit(skb);
 104
 105                 if (res < 0 || res == LWTUNNEL_XMIT_DONE)
 106                         return res;
 107         }
 108
 109         rcu_read_lock_bh();
 110         nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
 111         neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
 112         if (unlikely(!neigh))
 113                 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
 114         if (!IS_ERR(neigh)) {
 115                 sock_confirm_neigh(skb, neigh);
 116                 ret = neigh_output(neigh, skb, false);
 117                 rcu_read_unlock_bh();
 118                 return ret;
 119         }
 120         rcu_read_unlock_bh();
 121
 122         IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
 123         kfree_skb(skb);
 124         return -EINVAL;
 125 }
 126
 127 static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 128 {
 129 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
 130         /* Policy lookup after SNAT yielded a new policy */
 131         if (skb_dst(skb)->xfrm) {
 132                 IPCB(skb)->flags |= IPSKB_REROUTED;
 133                 return dst_output(net, sk, skb);
 134         }
 135 #endif
 136
 137         if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
 138             dst_allfrag(skb_dst(skb)) ||
 139             (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
 140                 return ip6_fragment(net, sk, skb, ip6_finish_output2);
 141         else
 142                 return ip6_finish_output2(net, sk, skb);
 143 }
 144
 145 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 146 {
 147         int ret;
 148
 149         ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
 150         switch (ret) {
 151         case NET_XMIT_SUCCESS:
 152                 return __ip6_finish_output(net, sk, skb);
 153         case NET_XMIT_CN:
 154                 return __ip6_finish_output(net, sk, skb) ? : ret;
 155         default:
 156                 kfree_skb(skb);
 157                 return ret;
 158         }
 159 }
 160
 161 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 162 {
 163         struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
 164         struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 165
 166         skb->protocol = htons(ETH_P_IPV6);
 167         skb->dev = dev;
 168
 169         if (unlikely(idev->cnf.disable_ipv6)) {
 170                 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
 171                 kfree_skb(skb);
 172                 return 0;
 173         }
 174
 175         return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
 176                             net, sk, skb, indev, dev,
 177                             ip6_finish_output,
 178                             !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 179 }
 180
 181 bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
 182 {
 183         if (!np->autoflowlabel_set)
 184                 return ip6_default_np_autolabel(net);
 185         else
 186                 return np->autoflowlabel;
 187 }
 188
 189 /*
 190  * xmit an sk_buff (used by TCP, SCTP and DCCP)
 191  * Note : socket lock is not held for SYNACK packets, but might be modified
 192  * by calls to skb_set_owner_w() and ipv6_local_error(),
 193  * which are using proper atomic operations or spinlocks.
 194  */
 195 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 196              __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
 197 {
 198         struct net *net = sock_net(sk);
 199         const struct ipv6_pinfo *np = inet6_sk(sk);
 200         struct in6_addr *first_hop = &fl6->daddr;
 201         struct dst_entry *dst = skb_dst(skb);
 202         unsigned int head_room;
 203         struct ipv6hdr *hdr;
 204         u8  proto = fl6->flowi6_proto;
 205         int seg_len = skb->len;
 206         int hlimit = -1;
 207         u32 mtu;
 208
 209         head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
 210         if (opt)
 211                 head_room += opt->opt_nflen + opt->opt_flen;
 212
 213         if (unlikely(skb_headroom(skb) < head_room)) {
 214                 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
 215                 if (!skb2) {
 216                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 217                                       IPSTATS_MIB_OUTDISCARDS);
 218                         kfree_skb(skb);
 219                         return -ENOBUFS;
 220                 }
 221                 if (skb->sk)
 222                         skb_set_owner_w(skb2, skb->sk);
 223                 consume_skb(skb);
 224                 skb = skb2;
 225         }
 226
 227         if (opt) {
 228                 seg_len += opt->opt_nflen + opt->opt_flen;
 229
 230                 if (opt->opt_flen)
 231                         ipv6_push_frag_opts(skb, opt, &proto);
 232
 233                 if (opt->opt_nflen)
 234                         ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
 235                                              &fl6->saddr);
 236         }
 237
 238         skb_push(skb, sizeof(struct ipv6hdr));
 239         skb_reset_network_header(skb);
 240         hdr = ipv6_hdr(skb);
 241
 242         /*
 243          *      Fill in the IPv6 header
 244          */
 245         if (np)
 246                 hlimit = np->hop_limit;
 247         if (hlimit < 0)
 248                 hlimit = ip6_dst_hoplimit(dst);
 249
 250         ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
 251                                 ip6_autoflowlabel(net, np), fl6));
 252
 253         hdr->payload_len = htons(seg_len);
 254         hdr->nexthdr = proto;
 255         hdr->hop_limit = hlimit;
 256
 257         hdr->saddr = fl6->saddr;
 258         hdr->daddr = *first_hop;
 259
 260         skb->protocol = htons(ETH_P_IPV6);
 261         skb->priority = priority;
 262         skb->mark = mark;
 263
 264         mtu = dst_mtu(dst);
 265         if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
 266                 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
 267                               IPSTATS_MIB_OUT, skb->len);
 268
 269                 /* if egress device is enslaved to an L3 master device pass the
 270                  * skb to its handler for processing
 271                  */
 272                 skb = l3mdev_ip6_out((struct sock *)sk, skb);
 273                 if (unlikely(!skb))
 274                         return 0;
 275
 276                 /* hooks should never assume socket lock is held.
 277                  * we promote our socket to non const
 278                  */
 279                 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
 280                                net, (struct sock *)sk, skb, NULL, dst->dev,
 281                                dst_output);
 282         }
 283
 284         skb->dev = dst->dev;
 285         /* ipv6_local_error() does not require socket lock,
 286          * we promote our socket to non const
 287          */
 288         ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
 289
 290         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
 291         kfree_skb(skb);
 292         return -EMSGSIZE;
 293 }
 294 EXPORT_SYMBOL(ip6_xmit);
 295
 296 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
 297 {
 298         struct ip6_ra_chain *ra;
 299         struct sock *last = NULL;
 300
 301         read_lock(&ip6_ra_lock);
 302         for (ra = ip6_ra_chain; ra; ra = ra->next) {
 303                 struct sock *sk = ra->sk;
 304                 if (sk && ra->sel == sel &&
 305                     (!sk->sk_bound_dev_if ||
 306                      sk->sk_bound_dev_if == skb->dev->ifindex)) {
 307                         struct ipv6_pinfo *np = inet6_sk(sk);
 308
 309                         if (np && np->rtalert_isolate &&
 310                             !net_eq(sock_net(sk), dev_net(skb->dev))) {
 311                                 continue;
 312                         }
 313                         if (last) {
 314                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 315                                 if (skb2)
 316                                         rawv6_rcv(last, skb2);
 317                         }
 318                         last = sk;
 319                 }
 320         }
 321
 322         if (last) {
 323                 rawv6_rcv(last, skb);
 324                 read_unlock(&ip6_ra_lock);
 325                 return 1;
 326         }
 327         read_unlock(&ip6_ra_lock);
 328         return 0;
 329 }
 330
 331 static int ip6_forward_proxy_check(struct sk_buff *skb)
 332 {
 333         struct ipv6hdr *hdr = ipv6_hdr(skb);
 334         u8 nexthdr = hdr->nexthdr;
 335         __be16 frag_off;
 336         int offset;
 337
 338         if (ipv6_ext_hdr(nexthdr)) {
 339                 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
 340                 if (offset < 0)
 341                         return 0;
 342         } else
 343                 offset = sizeof(struct ipv6hdr);
 344
 345         if (nexthdr == IPPROTO_ICMPV6) {
 346                 struct icmp6hdr *icmp6;
 347
 348                 if (!pskb_may_pull(skb, (skb_network_header(skb) +
 349                                          offset + 1 - skb->data)))
 350                         return 0;
 351
 352                 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
 353
 354                 switch (icmp6->icmp6_type) {
 355                 case NDISC_ROUTER_SOLICITATION:
 356                 case NDISC_ROUTER_ADVERTISEMENT:
 357                 case NDISC_NEIGHBOUR_SOLICITATION:
 358                 case NDISC_NEIGHBOUR_ADVERTISEMENT:
 359                 case NDISC_REDIRECT:
 360                         /* For reaction involving unicast neighbor discovery
 361                          * message destined to the proxied address, pass it to
 362                          * input function.
 363                          */
 364                         return 1;
 365                 default:
 366                         break;
 367                 }
 368         }
 369
 370         /*
 371          * The proxying router can't forward traffic sent to a link-local
 372          * address, so signal the sender and discard the packet. This
 373          * behavior is clarified by the MIPv6 specification.
 374          */
 375         if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
 376                 dst_link_failure(skb);
 377                 return -1;
 378         }
 379
 380         return 0;
 381 }
 382
 383 static inline int ip6_forward_finish(struct net *net, struct sock *sk,
 384                                      struct sk_buff *skb)
 385 {
 386         struct dst_entry *dst = skb_dst(skb);
 387
 388         __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
 389         __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
 390
 391 #ifdef CONFIG_NET_SWITCHDEV
 392         if (skb->offload_l3_fwd_mark) {
 393                 consume_skb(skb);
 394                 return 0;
 395         }
 396 #endif
 397
 398         skb->tstamp = 0;
 399         return dst_output(net, sk, skb);
 400 }
 401
 402 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
 403 {
 404         if (skb->len <= mtu)
 405                 return false;
 406
 407         /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
 408         if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
 409                 return true;
 410
 411         if (skb->ignore_df)
 412                 return false;
 413
 414         if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
 415                 return false;
 416
 417         return true;
 418 }
 419
 420 int ip6_forward(struct sk_buff *skb)
 421 {
 422         struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
 423         struct dst_entry *dst = skb_dst(skb);
 424         struct ipv6hdr *hdr = ipv6_hdr(skb);
 425         struct inet6_skb_parm *opt = IP6CB(skb);
 426         struct net *net = dev_net(dst->dev);
 427         u32 mtu;
 428
 429         if (net->ipv6.devconf_all->forwarding == 0)
 430                 goto error;
 431
 432         if (skb->pkt_type != PACKET_HOST)
 433                 goto drop;
 434
 435         if (unlikely(skb->sk))
 436                 goto drop;
 437
 438         if (skb_warn_if_lro(skb))
 439                 goto drop;
 440
 441         if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
 442                 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
 443                 goto drop;
 444         }
 445
 446         skb_forward_csum(skb);
 447
 448         /*
 449          *      We DO NOT make any processing on
 450          *      RA packets, pushing them to user level AS IS
 451          *      without ane WARRANTY that application will be able
 452          *      to interpret them. The reason is that we
 453          *      cannot make anything clever here.
 454          *
 455          *      We are not end-node, so that if packet contains
 456          *      AH/ESP, we cannot make anything.
 457          *      Defragmentation also would be mistake, RA packets
 458          *      cannot be fragmented, because there is no warranty
 459          *      that different fragments will go along one path. --ANK
 460          */
 461         if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
 462                 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
 463                         return 0;
 464         }
 465
 466         /*
 467          *      check and decrement ttl
 468          */
 469         if (hdr->hop_limit <= 1) {
 470                 /* Force OUTPUT device used as source address */
 471                 skb->dev = dst->dev;
 472                 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
 473                 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
 474
 475                 kfree_skb(skb);
 476                 return -ETIMEDOUT;
 477         }
 478
 479         /* XXX: idev->cnf.proxy_ndp? */
 480         if (net->ipv6.devconf_all->proxy_ndp &&
 481             pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
 482                 int proxied = ip6_forward_proxy_check(skb);
 483                 if (proxied > 0)
 484                         return ip6_input(skb);
 485                 else if (proxied < 0) {
 486                         __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
 487                         goto drop;
 488                 }
 489         }
 490
 491         if (!xfrm6_route_forward(skb)) {
 492                 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
 493                 goto drop;
 494         }
 495         dst = skb_dst(skb);
 496
 497         /* IPv6 specs say nothing about it, but it is clear that we cannot
 498            send redirects to source routed frames.
 499            We don't send redirects to frames decapsulated from IPsec.
 500          */
 501         if (IP6CB(skb)->iif == dst->dev->ifindex &&
 502             opt->srcrt == 0 && !skb_sec_path(skb)) {
 503                 struct in6_addr *target = NULL;
 504                 struct inet_peer *peer;
 505                 struct rt6_info *rt;
 506
 507                 /*
 508                  *      incoming and outgoing devices are the same
 509                  *      send a redirect.
 510                  */
 511
 512                 rt = (struct rt6_info *) dst;
 513                 if (rt->rt6i_flags & RTF_GATEWAY)
 514                         target = &rt->rt6i_gateway;
 515                 else
 516                         target = &hdr->daddr;
 517
 518                 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
 519
 520                 /* Limit redirects both by destination (here)
 521                    and by source (inside ndisc_send_redirect)
 522                  */
 523                 if (inet_peer_xrlim_allow(peer, 1*HZ))
 524                         ndisc_send_redirect(skb, target);
 525                 if (peer)
 526                         inet_putpeer(peer);
 527         } else {
 528                 int addrtype = ipv6_addr_type(&hdr->saddr);
 529
 530                 /* This check is security critical. */
 531                 if (addrtype == IPV6_ADDR_ANY ||
 532                     addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
 533                         goto error;
 534                 if (addrtype & IPV6_ADDR_LINKLOCAL) {
 535                         icmpv6_send(skb, ICMPV6_DEST_UNREACH,
 536                                     ICMPV6_NOT_NEIGHBOUR, 0);
 537                         goto error;
 538                 }
 539         }
 540
 541         mtu = ip6_dst_mtu_forward(dst);
 542         if (mtu < IPV6_MIN_MTU)
 543                 mtu = IPV6_MIN_MTU;
 544
 545         if (ip6_pkt_too_big(skb, mtu)) {
 546                 /* Again, force OUTPUT device used as source address */
 547                 skb->dev = dst->dev;
 548                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 549                 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
 550                 __IP6_INC_STATS(net, ip6_dst_idev(dst),
 551                                 IPSTATS_MIB_FRAGFAILS);
 552                 kfree_skb(skb);
 553                 return -EMSGSIZE;
 554         }
 555
 556         if (skb_cow(skb, dst->dev->hard_header_len)) {
 557                 __IP6_INC_STATS(net, ip6_dst_idev(dst),
 558                                 IPSTATS_MIB_OUTDISCARDS);
 559                 goto drop;
 560         }
 561
 562         hdr = ipv6_hdr(skb);
 563
 564         /* Mangling hops number delayed to point after skb COW */
 565
 566         hdr->hop_limit--;
 567
 568         return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
 569                        net, NULL, skb, skb->dev, dst->dev,
 570                        ip6_forward_finish);
 571
 572 error:
 573         __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
 574 drop:
 575         kfree_skb(skb);
 576         return -EINVAL;
 577 }
 578
 579 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 580 {
 581         to->pkt_type = from->pkt_type;
 582         to->priority = from->priority;
 583         to->protocol = from->protocol;
 584         skb_dst_drop(to);
 585         skb_dst_set(to, dst_clone(skb_dst(from)));
 586         to->dev = from->dev;
 587         to->mark = from->mark;
 588
 589         skb_copy_hash(to, from);
 590
 591 #ifdef CONFIG_NET_SCHED
 592         to->tc_index = from->tc_index;
 593 #endif
 594         nf_copy(to, from);
 595         skb_ext_copy(to, from);
 596         skb_copy_secmark(to, from);
 597 }
 598
 599 int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
 600                       u8 nexthdr, __be32 frag_id,
 601                       struct ip6_fraglist_iter *iter)
 602 {
 603         unsigned int first_len;
 604         struct frag_hdr *fh;
 605
 606         /* BUILD HEADER */
 607         *prevhdr = NEXTHDR_FRAGMENT;
 608         iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
 609         if (!iter->tmp_hdr)
 610                 return -ENOMEM;
 611
 612         iter->frag = skb_shinfo(skb)->frag_list;
 613         skb_frag_list_init(skb);
 614
 615         iter->offset = 0;
 616         iter->hlen = hlen;
 617         iter->frag_id = frag_id;
 618         iter->nexthdr = nexthdr;
 619
 620         __skb_pull(skb, hlen);
 621         fh = __skb_push(skb, sizeof(struct frag_hdr));
 622         __skb_push(skb, hlen);
 623         skb_reset_network_header(skb);
 624         memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
 625
 626         fh->nexthdr = nexthdr;
 627         fh->reserved = 0;
 628         fh->frag_off = htons(IP6_MF);
 629         fh->identification = frag_id;
 630
 631         first_len = skb_pagelen(skb);
 632         skb->data_len = first_len - skb_headlen(skb);
 633         skb->len = first_len;
 634         ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
 635
 636         return 0;
 637 }
 638 EXPORT_SYMBOL(ip6_fraglist_init);
 639
 640 void ip6_fraglist_prepare(struct sk_buff *skb,
 641                           struct ip6_fraglist_iter *iter)
 642 {
 643         struct sk_buff *frag = iter->frag;
 644         unsigned int hlen = iter->hlen;
 645         struct frag_hdr *fh;
 646
 647         frag->ip_summed = CHECKSUM_NONE;
 648         skb_reset_transport_header(frag);
 649         fh = __skb_push(frag, sizeof(struct frag_hdr));
 650         __skb_push(frag, hlen);
 651         skb_reset_network_header(frag);
 652         memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
 653         iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
 654         fh->nexthdr = iter->nexthdr;
 655         fh->reserved = 0;
 656         fh->frag_off = htons(iter->offset);
 657         if (frag->next)
 658                 fh->frag_off |= htons(IP6_MF);
 659         fh->identification = iter->frag_id;
 660         ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
 661         ip6_copy_metadata(frag, skb);
 662 }
 663 EXPORT_SYMBOL(ip6_fraglist_prepare);
 664
 665 void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
 666                    unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
 667                    u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
 668 {
 669         state->prevhdr = prevhdr;
 670         state->nexthdr = nexthdr;
 671         state->frag_id = frag_id;
 672
 673         state->hlen = hlen;
 674         state->mtu = mtu;
 675
 676         state->left = skb->len - hlen;  /* Space per frame */
 677         state->ptr = hlen;              /* Where to start from */
 678
 679         state->hroom = hdr_room;
 680         state->troom = needed_tailroom;
 681
 682         state->offset = 0;
 683 }
 684 EXPORT_SYMBOL(ip6_frag_init);
 685
 686 struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
 687 {
 688         u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
 689         struct sk_buff *frag;
 690         struct frag_hdr *fh;
 691         unsigned int len;
 692
 693         len = state->left;
 694         /* IF: it doesn't fit, use 'mtu' - the data space left */
 695         if (len > state->mtu)
 696                 len = state->mtu;
 697         /* IF: we are not sending up to and including the packet end
 698            then align the next start on an eight byte boundary */
 699         if (len < state->left)
 700                 len &= ~7;
 701
 702         /* Allocate buffer */
 703         frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
 704                          state->hroom + state->troom, GFP_ATOMIC);
 705         if (!frag)
 706                 return ERR_PTR(-ENOMEM);
 707
 708         /*
 709          *      Set up data on packet
 710          */
 711
 712         ip6_copy_metadata(frag, skb);
 713         skb_reserve(frag, state->hroom);
 714         skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
 715         skb_reset_network_header(frag);
 716         fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
 717         frag->transport_header = (frag->network_header + state->hlen +
 718                                   sizeof(struct frag_hdr));
 719
 720         /*
 721          *      Charge the memory for the fragment to any owner
 722          *      it might possess
 723          */
 724         if (skb->sk)
 725                 skb_set_owner_w(frag, skb->sk);
 726
 727         /*
 728          *      Copy the packet header into the new buffer.
 729          */
 730         skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
 731
 732         fragnexthdr_offset = skb_network_header(frag);
 733         fragnexthdr_offset += prevhdr - skb_network_header(skb);
 734         *fragnexthdr_offset = NEXTHDR_FRAGMENT;
 735
 736         /*
 737          *      Build fragment header.
 738          */
 739         fh->nexthdr = state->nexthdr;
 740         fh->reserved = 0;
 741         fh->identification = state->frag_id;
 742
 743         /*
 744          *      Copy a block of the IP datagram.
 745          */
 746         BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
 747                              len));
 748         state->left -= len;
 749
 750         fh->frag_off = htons(state->offset);
 751         if (state->left > 0)
 752                 fh->frag_off |= htons(IP6_MF);
 753         ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
 754
 755         state->ptr += len;
 756         state->offset += len;
 757
 758         return frag;
 759 }
 760 EXPORT_SYMBOL(ip6_frag_next);
 761
 762 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 763                  int (*output)(struct net *, struct sock *, struct sk_buff *))
 764 {
 765         struct sk_buff *frag;
 766         struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
 767         struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
 768                                 inet6_sk(skb->sk) : NULL;
 769         struct ip6_frag_state state;
 770         unsigned int mtu, hlen, nexthdr_offset;
 771         ktime_t tstamp = skb->tstamp;
 772         int hroom, err = 0;
 773         __be32 frag_id;
 774         u8 *prevhdr, nexthdr = 0;
 775
 776         err = ip6_find_1stfragopt(skb, &prevhdr);
 777         if (err < 0)
 778                 goto fail;
 779         hlen = err;
 780         nexthdr = *prevhdr;
 781         nexthdr_offset = prevhdr - skb_network_header(skb);
 782
 783         mtu = ip6_skb_dst_mtu(skb);
 784
 785         /* We must not fragment if the socket is set to force MTU discovery
 786          * or if the skb it not generated by a local socket.
 787          */
 788         if (unlikely(!skb->ignore_df && skb->len > mtu))
 789                 goto fail_toobig;
 790
 791         if (IP6CB(skb)->frag_max_size) {
 792                 if (IP6CB(skb)->frag_max_size > mtu)
 793                         goto fail_toobig;
 794
 795                 /* don't send fragments larger than what we received */
 796                 mtu = IP6CB(skb)->frag_max_size;
 797                 if (mtu < IPV6_MIN_MTU)
 798                         mtu = IPV6_MIN_MTU;
 799         }
 800
 801         if (np && np->frag_size < mtu) {
 802                 if (np->frag_size)
 803                         mtu = np->frag_size;
 804         }
 805         if (mtu < hlen + sizeof(struct frag_hdr) + 8)
 806                 goto fail_toobig;
 807         mtu -= hlen + sizeof(struct frag_hdr);
 808
 809         frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
 810                                     &ipv6_hdr(skb)->saddr);
 811
 812         if (skb->ip_summed == CHECKSUM_PARTIAL &&
 813             (err = skb_checksum_help(skb)))
 814                 goto fail;
 815
 816         prevhdr = skb_network_header(skb) + nexthdr_offset;
 817         hroom = LL_RESERVED_SPACE(rt->dst.dev);
 818         if (skb_has_frag_list(skb)) {
 819                 unsigned int first_len = skb_pagelen(skb);
 820                 struct ip6_fraglist_iter iter;
 821                 struct sk_buff *frag2;
 822
 823                 if (first_len - hlen > mtu ||
 824                     ((first_len - hlen) & 7) ||
 825                     skb_cloned(skb) ||
 826                     skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
 827                         goto slow_path;
 828
 829                 skb_walk_frags(skb, frag) {
 830                         /* Correct geometry. */
 831                         if (frag->len > mtu ||
 832                             ((frag->len & 7) && frag->next) ||
 833                             skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
 834                                 goto slow_path_clean;
 835
 836                         /* Partially cloned skb? */
 837                         if (skb_shared(frag))
 838                                 goto slow_path_clean;
 839
 840                         BUG_ON(frag->sk);
 841                         if (skb->sk) {
 842                                 frag->sk = skb->sk;
 843                                 frag->destructor = sock_wfree;
 844                         }
 845                         skb->truesize -= frag->truesize;
 846                 }
 847
 848                 err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
 849                                         &iter);
 850                 if (err < 0)
 851                         goto fail;
 852
 853                 for (;;) {
 854                         /* Prepare header of the next frame,
 855                          * before previous one went down. */
 856                         if (iter.frag)
 857                                 ip6_fraglist_prepare(skb, &iter);
 858
 859                         skb->tstamp = tstamp;
 860                         err = output(net, sk, skb);
 861                         if (!err)
 862                                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 863                                               IPSTATS_MIB_FRAGCREATES);
 864
 865                         if (err || !iter.frag)
 866                                 break;
 867
 868                         skb = ip6_fraglist_next(&iter);
 869                 }
 870
 871                 kfree(iter.tmp_hdr);
 872
 873                 if (err == 0) {
 874                         IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 875                                       IPSTATS_MIB_FRAGOKS);
 876                         return 0;
 877                 }
 878
 879                 kfree_skb_list(iter.frag);
 880
 881                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 882                               IPSTATS_MIB_FRAGFAILS);
 883                 return err;
 884
 885 slow_path_clean:
 886                 skb_walk_frags(skb, frag2) {
 887                         if (frag2 == frag)
 888                                 break;
 889                         frag2->sk = NULL;
 890                         frag2->destructor = NULL;
 891                         skb->truesize += frag2->truesize;
 892                 }
 893         }
 894
 895 slow_path:
 896         /*
 897          *      Fragment the datagram.
 898          */
 899
 900         ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
 901                       LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
 902                       &state);
 903
 904         /*
 905          *      Keep copying data until we run out.
 906          */
 907
 908         while (state.left > 0) {
 909                 frag = ip6_frag_next(skb, &state);
 910                 if (IS_ERR(frag)) {
 911                         err = PTR_ERR(frag);
 912                         goto fail;
 913                 }
 914
 915                 /*
 916                  *      Put this fragment into the sending queue.
 917                  */
 918                 frag->tstamp = tstamp;
 919                 err = output(net, sk, frag);
 920                 if (err)
 921                         goto fail;
 922
 923                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 924                               IPSTATS_MIB_FRAGCREATES);
 925         }
 926         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 927                       IPSTATS_MIB_FRAGOKS);
 928         consume_skb(skb);
 929         return err;
 930
 931 fail_toobig:
 932         if (skb->sk && dst_allfrag(skb_dst(skb)))
 933                 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
 934
 935         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 936         err = -EMSGSIZE;
 937
 938 fail:
 939         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 940                       IPSTATS_MIB_FRAGFAILS);
 941         kfree_skb(skb);
 942         return err;
 943 }
 944
 945 static inline int ip6_rt_check(const struct rt6key *rt_key,
 946                                const struct in6_addr *fl_addr,
 947                                const struct in6_addr *addr_cache)
 948 {
 949         return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
 950                 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
 951 }
 952
 953 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
 954                                           struct dst_entry *dst,
 955                                           const struct flowi6 *fl6)
 956 {
 957         struct ipv6_pinfo *np = inet6_sk(sk);
 958         struct rt6_info *rt;
 959
 960         if (!dst)
 961                 goto out;
 962
 963         if (dst->ops->family != AF_INET6) {
 964                 dst_release(dst);
 965                 return NULL;
 966         }
 967
 968         rt = (struct rt6_info *)dst;
 969         /* Yes, checking route validity in not connected
 970          * case is not very simple. Take into account,
 971          * that we do not support routing by source, TOS,
 972          * and MSG_DONTROUTE            --ANK (980726)
 973          *
 974          * 1. ip6_rt_check(): If route was host route,
 975          *    check that cached destination is current.
 976          *    If it is network route, we still may
 977          *    check its validity using saved pointer
 978          *    to the last used address: daddr_cache.
 979          *    We do not want to save whole address now,
 980          *    (because main consumer of this service
 981          *    is tcp, which has not this problem),
 982          *    so that the last trick works only on connected
 983          *    sockets.
 984          * 2. oif also should be the same.
 985          */
 986         if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
 987 #ifdef CONFIG_IPV6_SUBTREES
 988             ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
 989 #endif
 990            (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
 991               (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
 992                 dst_release(dst);
 993                 dst = NULL;
 994         }
 995
 996 out:
 997         return dst;
 998 }
 999
1000 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
1001                                struct dst_entry **dst, struct flowi6 *fl6)
1002 {
1003 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1004         struct neighbour *n;
1005         struct rt6_info *rt;
1006 #endif
1007         int err;
1008         int flags = 0;
1009
1010         /* The correct way to handle this would be to do
1011          * ip6_route_get_saddr, and then ip6_route_output; however,
1012          * the route-specific preferred source forces the
1013          * ip6_route_output call _before_ ip6_route_get_saddr.
1014          *
1015          * In source specific routing (no src=any default route),
1016          * ip6_route_output will fail given src=any saddr, though, so
1017          * that's why we try it again later.
1018          */
1019         if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
1020                 struct fib6_info *from;
1021                 struct rt6_info *rt;
1022                 bool had_dst = *dst != NULL;
1023
1024                 if (!had_dst)
1025                         *dst = ip6_route_output(net, sk, fl6);
1026                 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
1027
1028                 rcu_read_lock();
1029                 from = rt ? rcu_dereference(rt->from) : NULL;
1030                 err = ip6_route_get_saddr(net, from, &fl6->daddr,
1031                                           sk ? inet6_sk(sk)->srcprefs : 0,
1032                                           &fl6->saddr);
1033                 rcu_read_unlock();
1034
1035                 if (err)
1036                         goto out_err_release;
1037
1038                 /* If we had an erroneous initial result, pretend it
1039                  * never existed and let the SA-enabled version take
1040                  * over.
1041                  */
1042                 if (!had_dst && (*dst)->error) {
1043                         dst_release(*dst);
1044                         *dst = NULL;
1045                 }
1046
1047                 if (fl6->flowi6_oif)
1048                         flags |= RT6_LOOKUP_F_IFACE;
1049         }
1050
1051         if (!*dst)
1052                 *dst = ip6_route_output_flags(net, sk, fl6, flags);
1053
1054         err = (*dst)->error;
1055         if (err)
1056                 goto out_err_release;
1057
1058 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1059         /*
1060          * Here if the dst entry we've looked up
1061          * has a neighbour entry that is in the INCOMPLETE
1062          * state and the src address from the flow is
1063          * marked as OPTIMISTIC, we release the found
1064          * dst entry and replace it instead with the
1065          * dst entry of the nexthop router
1066          */
1067         rt = (struct rt6_info *) *dst;
1068         rcu_read_lock_bh();
1069         n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1070                                       rt6_nexthop(rt, &fl6->daddr));
1071         err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1072         rcu_read_unlock_bh();
1073
1074         if (err) {
1075                 struct inet6_ifaddr *ifp;
1076                 struct flowi6 fl_gw6;
1077                 int redirect;
1078
1079                 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1080                                       (*dst)->dev, 1);
1081
1082                 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1083                 if (ifp)
1084                         in6_ifa_put(ifp);
1085
1086                 if (redirect) {
1087                         /*
1088                          * We need to get the dst entry for the
1089                          * default router instead
1090                          */
1091                         dst_release(*dst);
1092                         memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1093                         memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1094                         *dst = ip6_route_output(net, sk, &fl_gw6);
1095                         err = (*dst)->error;
1096                         if (err)
1097                                 goto out_err_release;
1098                 }
1099         }
1100 #endif
1101         if (ipv6_addr_v4mapped(&fl6->saddr) &&
1102             !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1103                 err = -EAFNOSUPPORT;
1104                 goto out_err_release;
1105         }
1106
1107         return 0;
1108
1109 out_err_release:
1110         dst_release(*dst);
1111         *dst = NULL;
1112
1113         if (err == -ENETUNREACH)
1114                 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1115         return err;
1116 }
1117
1118 /**
1119  *      ip6_dst_lookup - perform route lookup on flow
1120  *      @sk: socket which provides route info
1121  *      @dst: pointer to dst_entry * for result
1122  *      @fl6: flow to lookup
1123  *
1124  *      This function performs a route lookup on the given flow.
1125  *
1126  *      It returns zero on success, or a standard errno code on error.
1127  */
1128 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1129                    struct flowi6 *fl6)
1130 {
1131         *dst = NULL;
1132         return ip6_dst_lookup_tail(net, sk, dst, fl6);
1133 }
1134 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1135
1136 /**
1137  *      ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1138  *      @sk: socket which provides route info
1139  *      @fl6: flow to lookup
1140  *      @final_dst: final destination address for ipsec lookup
1141  *
1142  *      This function performs a route lookup on the given flow.
1143  *
1144  *      It returns a valid dst pointer on success, or a pointer encoded
1145  *      error code.
1146  */
1147 struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
1148                                       const struct in6_addr *final_dst)
1149 {
1150         struct dst_entry *dst = NULL;
1151         int err;
1152
1153         err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
1154         if (err)
1155                 return ERR_PTR(err);
1156         if (final_dst)
1157                 fl6->daddr = *final_dst;
1158
1159         return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1160 }
1161 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1162
1163 /**
1164  *      ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1165  *      @sk: socket which provides the dst cache and route info
1166  *      @fl6: flow to lookup
1167  *      @final_dst: final destination address for ipsec lookup
1168  *      @connected: whether @sk is connected or not
1169  *
1170  *      This function performs a route lookup on the given flow with the
1171  *      possibility of using the cached route in the socket if it is valid.
1172  *      It will take the socket dst lock when operating on the dst cache.
1173  *      As a result, this function can only be used in process context.
1174  *
1175  *      In addition, for a connected socket, cache the dst in the socket
1176  *      if the current cache is not valid.
1177  *
1178  *      It returns a valid dst pointer on success, or a pointer encoded
1179  *      error code.
1180  */
1181 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1182                                          const struct in6_addr *final_dst,
1183                                          bool connected)
1184 {
1185         struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1186
1187         dst = ip6_sk_dst_check(sk, dst, fl6);
1188         if (dst)
1189                 return dst;
1190
1191         dst = ip6_dst_lookup_flow(sk, fl6, final_dst);
1192         if (connected && !IS_ERR(dst))
1193                 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1194
1195         return dst;
1196 }
1197 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1198
1199 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1200                                                gfp_t gfp)
1201 {
1202         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1203 }
1204
1205 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1206                                                 gfp_t gfp)
1207 {
1208         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1209 }
1210
1211 static void ip6_append_data_mtu(unsigned int *mtu,
1212                                 int *maxfraglen,
1213                                 unsigned int fragheaderlen,
1214                                 struct sk_buff *skb,
1215                                 struct rt6_info *rt,
1216                                 unsigned int orig_mtu)
1217 {
1218         if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1219                 if (!skb) {
1220                         /* first fragment, reserve header_len */
1221                         *mtu = orig_mtu - rt->dst.header_len;
1222
1223                 } else {
1224                         /*
1225                          * this fragment is not first, the headers
1226                          * space is regarded as data space.
1227                          */
1228                         *mtu = orig_mtu;
1229                 }
1230                 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1231                               + fragheaderlen - sizeof(struct frag_hdr);
1232         }
1233 }
1234
1235 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1236                           struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1237                           struct rt6_info *rt, struct flowi6 *fl6)
1238 {
1239         struct ipv6_pinfo *np = inet6_sk(sk);
1240         unsigned int mtu;
1241         struct ipv6_txoptions *opt = ipc6->opt;
1242
1243         /*
1244          * setup for corking
1245          */
1246         if (opt) {
1247                 if (WARN_ON(v6_cork->opt))
1248                         return -EINVAL;
1249
1250                 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1251                 if (unlikely(!v6_cork->opt))
1252                         return -ENOBUFS;
1253
1254                 v6_cork->opt->tot_len = sizeof(*opt);
1255                 v6_cork->opt->opt_flen = opt->opt_flen;
1256                 v6_cork->opt->opt_nflen = opt->opt_nflen;
1257
1258                 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1259                                                     sk->sk_allocation);
1260                 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1261                         return -ENOBUFS;
1262
1263                 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1264                                                     sk->sk_allocation);
1265                 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1266                         return -ENOBUFS;
1267
1268                 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1269                                                    sk->sk_allocation);
1270                 if (opt->hopopt && !v6_cork->opt->hopopt)
1271                         return -ENOBUFS;
1272
1273                 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1274                                                     sk->sk_allocation);
1275                 if (opt->srcrt && !v6_cork->opt->srcrt)
1276                         return -ENOBUFS;
1277
1278                 /* need source address above miyazawa*/
1279         }
1280         dst_hold(&rt->dst);
1281         cork->base.dst = &rt->dst;
1282         cork->fl.u.ip6 = *fl6;
1283         v6_cork->hop_limit = ipc6->hlimit;
1284         v6_cork->tclass = ipc6->tclass;
1285         if (rt->dst.flags & DST_XFRM_TUNNEL)
1286                 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1287                       READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1288         else
1289                 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1290                         READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1291         if (np->frag_size < mtu) {
1292                 if (np->frag_size)
1293                         mtu = np->frag_size;
1294         }
1295         if (mtu < IPV6_MIN_MTU)
1296                 return -EINVAL;
1297         cork->base.fragsize = mtu;
1298         cork->base.gso_size = ipc6->gso_size;
1299         cork->base.tx_flags = 0;
1300         cork->base.mark = ipc6->sockc.mark;
1301         sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
1302
1303         if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1304                 cork->base.flags |= IPCORK_ALLFRAG;
1305         cork->base.length = 0;
1306
1307         cork->base.transmit_time = ipc6->sockc.transmit_time;
1308
1309         return 0;
1310 }
1311
1312 static int __ip6_append_data(struct sock *sk,
1313                              struct flowi6 *fl6,
1314                              struct sk_buff_head *queue,
1315                              struct inet_cork *cork,
1316                              struct inet6_cork *v6_cork,
1317                              struct page_frag *pfrag,
1318                              int getfrag(void *from, char *to, int offset,
1319                                          int len, int odd, struct sk_buff *skb),
1320                              void *from, int length, int transhdrlen,
1321                              unsigned int flags, struct ipcm6_cookie *ipc6)
1322 {
1323         struct sk_buff *skb, *skb_prev = NULL;
1324         unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1325         struct ubuf_info *uarg = NULL;
1326         int exthdrlen = 0;
1327         int dst_exthdrlen = 0;
1328         int hh_len;
1329         int copy;
1330         int err;
1331         int offset = 0;
1332         u32 tskey = 0;
1333         struct rt6_info *rt = (struct rt6_info *)cork->dst;
1334         struct ipv6_txoptions *opt = v6_cork->opt;
1335         int csummode = CHECKSUM_NONE;
1336         unsigned int maxnonfragsize, headersize;
1337         unsigned int wmem_alloc_delta = 0;
1338         bool paged, extra_uref = false;
1339
1340         skb = skb_peek_tail(queue);
1341         if (!skb) {
1342                 exthdrlen = opt ? opt->opt_flen : 0;
1343                 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1344         }
1345
1346         paged = !!cork->gso_size;
1347         mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1348         orig_mtu = mtu;
1349
1350         if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
1351             sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1352                 tskey = sk->sk_tskey++;
1353
1354         hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1355
1356         fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1357                         (opt ? opt->opt_nflen : 0);
1358         maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1359                      sizeof(struct frag_hdr);
1360
1361         headersize = sizeof(struct ipv6hdr) +
1362                      (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1363                      (dst_allfrag(&rt->dst) ?
1364                       sizeof(struct frag_hdr) : 0) +
1365                      rt->rt6i_nfheader_len;
1366
1367         /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1368          * the first fragment
1369          */
1370         if (headersize + transhdrlen > mtu)
1371                 goto emsgsize;
1372
1373         if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1374             (sk->sk_protocol == IPPROTO_UDP ||
1375              sk->sk_protocol == IPPROTO_RAW)) {
1376                 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1377                                 sizeof(struct ipv6hdr));
1378                 goto emsgsize;
1379         }
1380
1381         if (ip6_sk_ignore_df(sk))
1382                 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1383         else
1384                 maxnonfragsize = mtu;
1385
1386         if (cork->length + length > maxnonfragsize - headersize) {
1387 emsgsize:
1388                 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1389                 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1390                 return -EMSGSIZE;
1391         }
1392
1393         /* CHECKSUM_PARTIAL only with no extension headers and when
1394          * we are not going to fragment
1395          */
1396         if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1397             headersize == sizeof(struct ipv6hdr) &&
1398             length <= mtu - headersize &&
1399             (!(flags & MSG_MORE) || cork->gso_size) &&
1400             rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1401                 csummode = CHECKSUM_PARTIAL;
1402
1403         if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
1404                 uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
1405                 if (!uarg)
1406                         return -ENOBUFS;
1407                 extra_uref = !skb_zcopy(skb);   /* only ref on new uarg */
1408                 if (rt->dst.dev->features & NETIF_F_SG &&
1409                     csummode == CHECKSUM_PARTIAL) {
1410                         paged = true;
1411                 } else {
1412                         uarg->zerocopy = 0;
1413                         skb_zcopy_set(skb, uarg, &extra_uref);
1414                 }
1415         }
1416
1417         /*
1418          * Let's try using as much space as possible.
1419          * Use MTU if total length of the message fits into the MTU.
1420          * Otherwise, we need to reserve fragment header and
1421          * fragment alignment (= 8-15 octects, in total).
1422          *
1423          * Note that we may need to "move" the data from the tail of
1424          * of the buffer to the new fragment when we split
1425          * the message.
1426          *
1427          * FIXME: It may be fragmented into multiple chunks
1428          *        at once if non-fragmentable extension headers
1429          *        are too large.
1430          * --yoshfuji
1431          */
1432
1433         cork->length += length;
1434         if (!skb)
1435                 goto alloc_new_skb;
1436
1437         while (length > 0) {
1438                 /* Check if the remaining data fits into current packet. */
1439                 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1440                 if (copy < length)
1441                         copy = maxfraglen - skb->len;
1442
1443                 if (copy <= 0) {
1444                         char *data;
1445                         unsigned int datalen;
1446                         unsigned int fraglen;
1447                         unsigned int fraggap;
1448                         unsigned int alloclen;
1449                         unsigned int pagedlen;
1450 alloc_new_skb:
1451                         /* There's no room in the current skb */
1452                         if (skb)
1453                                 fraggap = skb->len - maxfraglen;
1454                         else
1455                                 fraggap = 0;
1456                         /* update mtu and maxfraglen if necessary */
1457                         if (!skb || !skb_prev)
1458                                 ip6_append_data_mtu(&mtu, &maxfraglen,
1459                                                     fragheaderlen, skb, rt,
1460                                                     orig_mtu);
1461
1462                         skb_prev = skb;
1463
1464                         /*
1465                          * If remaining data exceeds the mtu,
1466                          * we know we need more fragment(s).
1467                          */
1468                         datalen = length + fraggap;
1469
1470                         if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1471                                 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1472                         fraglen = datalen + fragheaderlen;
1473                         pagedlen = 0;
1474
1475                         if ((flags & MSG_MORE) &&
1476                             !(rt->dst.dev->features&NETIF_F_SG))
1477                                 alloclen = mtu;
1478                         else if (!paged)
1479                                 alloclen = fraglen;
1480                         else {
1481                                 alloclen = min_t(int, fraglen, MAX_HEADER);
1482                                 pagedlen = fraglen - alloclen;
1483                         }
1484
1485                         alloclen += dst_exthdrlen;
1486
1487                         if (datalen != length + fraggap) {
1488                                 /*
1489                                  * this is not the last fragment, the trailer
1490                                  * space is regarded as data space.
1491                                  */
1492                                 datalen += rt->dst.trailer_len;
1493                         }
1494
1495                         alloclen += rt->dst.trailer_len;
1496                         fraglen = datalen + fragheaderlen;
1497
1498                         /*
1499                          * We just reserve space for fragment header.
1500                          * Note: this may be overallocation if the message
1501                          * (without MSG_MORE) fits into the MTU.
1502                          */
1503                         alloclen += sizeof(struct frag_hdr);
1504
1505                         copy = datalen - transhdrlen - fraggap - pagedlen;
1506                         if (copy < 0) {
1507                                 err = -EINVAL;
1508                                 goto error;
1509                         }
1510                         if (transhdrlen) {
1511                                 skb = sock_alloc_send_skb(sk,
1512                                                 alloclen + hh_len,
1513                                                 (flags & MSG_DONTWAIT), &err);
1514                         } else {
1515                                 skb = NULL;
1516                                 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1517                                     2 * sk->sk_sndbuf)
1518                                         skb = alloc_skb(alloclen + hh_len,
1519                                                         sk->sk_allocation);
1520                                 if (unlikely(!skb))
1521                                         err = -ENOBUFS;
1522                         }
1523                         if (!skb)
1524                                 goto error;
1525                         /*
1526                          *      Fill in the control structures
1527                          */
1528                         skb->protocol = htons(ETH_P_IPV6);
1529                         skb->ip_summed = csummode;
1530                         skb->csum = 0;
1531                         /* reserve for fragmentation and ipsec header */
1532                         skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1533                                     dst_exthdrlen);
1534
1535                         /*
1536                          *      Find where to start putting bytes
1537                          */
1538                         data = skb_put(skb, fraglen - pagedlen);
1539                         skb_set_network_header(skb, exthdrlen);
1540                         data += fragheaderlen;
1541                         skb->transport_header = (skb->network_header +
1542                                                  fragheaderlen);
1543                         if (fraggap) {
1544                                 skb->csum = skb_copy_and_csum_bits(
1545                                         skb_prev, maxfraglen,
1546                                         data + transhdrlen, fraggap, 0);
1547                                 skb_prev->csum = csum_sub(skb_prev->csum,
1548                                                           skb->csum);
1549                                 data += fraggap;
1550                                 pskb_trim_unique(skb_prev, maxfraglen);
1551                         }
1552                         if (copy > 0 &&
1553                             getfrag(from, data + transhdrlen, offset,
1554                                     copy, fraggap, skb) < 0) {
1555                                 err = -EFAULT;
1556                                 kfree_skb(skb);
1557                                 goto error;
1558                         }
1559
1560                         offset += copy;
1561                         length -= copy + transhdrlen;
1562                         transhdrlen = 0;
1563                         exthdrlen = 0;
1564                         dst_exthdrlen = 0;
1565
1566                         /* Only the initial fragment is time stamped */
1567                         skb_shinfo(skb)->tx_flags = cork->tx_flags;
1568                         cork->tx_flags = 0;
1569                         skb_shinfo(skb)->tskey = tskey;
1570                         tskey = 0;
1571                         skb_zcopy_set(skb, uarg, &extra_uref);
1572
1573                         if ((flags & MSG_CONFIRM) && !skb_prev)
1574                                 skb_set_dst_pending_confirm(skb, 1);
1575
1576                         /*
1577                          * Put the packet on the pending queue
1578                          */
1579                         if (!skb->destructor) {
1580                                 skb->destructor = sock_wfree;
1581                                 skb->sk = sk;
1582                                 wmem_alloc_delta += skb->truesize;
1583                         }
1584                         __skb_queue_tail(queue, skb);
1585                         continue;
1586                 }
1587
1588                 if (copy > length)
1589                         copy = length;
1590
1591                 if (!(rt->dst.dev->features&NETIF_F_SG) &&
1592                     skb_tailroom(skb) >= copy) {
1593                         unsigned int off;
1594
1595                         off = skb->len;
1596                         if (getfrag(from, skb_put(skb, copy),
1597                                                 offset, copy, off, skb) < 0) {
1598                                 __skb_trim(skb, off);
1599                                 err = -EFAULT;
1600                                 goto error;
1601                         }
1602                 } else if (!uarg || !uarg->zerocopy) {
1603                         int i = skb_shinfo(skb)->nr_frags;
1604
1605                         err = -ENOMEM;
1606                         if (!sk_page_frag_refill(sk, pfrag))
1607                                 goto error;
1608
1609                         if (!skb_can_coalesce(skb, i, pfrag->page,
1610                                               pfrag->offset)) {
1611                                 err = -EMSGSIZE;
1612                                 if (i == MAX_SKB_FRAGS)
1613                                         goto error;
1614
1615                                 __skb_fill_page_desc(skb, i, pfrag->page,
1616                                                      pfrag->offset, 0);
1617                                 skb_shinfo(skb)->nr_frags = ++i;
1618                                 get_page(pfrag->page);
1619                         }
1620                         copy = min_t(int, copy, pfrag->size - pfrag->offset);
1621                         if (getfrag(from,
1622                                     page_address(pfrag->page) + pfrag->offset,
1623                                     offset, copy, skb->len, skb) < 0)
1624                                 goto error_efault;
1625
1626                         pfrag->offset += copy;
1627                         skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1628                         skb->len += copy;
1629                         skb->data_len += copy;
1630                         skb->truesize += copy;
1631                         wmem_alloc_delta += copy;
1632                 } else {
1633                         err = skb_zerocopy_iter_dgram(skb, from, copy);
1634                         if (err < 0)
1635                                 goto error;
1636                 }
1637                 offset += copy;
1638                 length -= copy;
1639         }
1640
1641         if (wmem_alloc_delta)
1642                 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1643         return 0;
1644
1645 error_efault:
1646         err = -EFAULT;
1647 error:
1648         if (uarg)
1649                 sock_zerocopy_put_abort(uarg, extra_uref);
1650         cork->length -= length;
1651         IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1652         refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1653         return err;
1654 }
1655
1656 int ip6_append_data(struct sock *sk,
1657                     int getfrag(void *from, char *to, int offset, int len,
1658                                 int odd, struct sk_buff *skb),
1659                     void *from, int length, int transhdrlen,
1660                     struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1661                     struct rt6_info *rt, unsigned int flags)
1662 {
1663         struct inet_sock *inet = inet_sk(sk);
1664         struct ipv6_pinfo *np = inet6_sk(sk);
1665         int exthdrlen;
1666         int err;
1667
1668         if (flags&MSG_PROBE)
1669                 return 0;
1670         if (skb_queue_empty(&sk->sk_write_queue)) {
1671                 /*
1672                  * setup for corking
1673                  */
1674                 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1675                                      ipc6, rt, fl6);
1676                 if (err)
1677                         return err;
1678
1679                 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1680                 length += exthdrlen;
1681                 transhdrlen += exthdrlen;
1682         } else {
1683                 fl6 = &inet->cork.fl.u.ip6;
1684                 transhdrlen = 0;
1685         }
1686
1687         return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1688                                  &np->cork, sk_page_frag(sk), getfrag,
1689                                  from, length, transhdrlen, flags, ipc6);
1690 }
1691 EXPORT_SYMBOL_GPL(ip6_append_data);
1692
1693 static void ip6_cork_release(struct inet_cork_full *cork,
1694                              struct inet6_cork *v6_cork)
1695 {
1696         if (v6_cork->opt) {
1697                 kfree(v6_cork->opt->dst0opt);
1698                 kfree(v6_cork->opt->dst1opt);
1699                 kfree(v6_cork->opt->hopopt);
1700                 kfree(v6_cork->opt->srcrt);
1701                 kfree(v6_cork->opt);
1702                 v6_cork->opt = NULL;
1703         }
1704
1705         if (cork->base.dst) {
1706                 dst_release(cork->base.dst);
1707                 cork->base.dst = NULL;
1708                 cork->base.flags &= ~IPCORK_ALLFRAG;
1709         }
1710         memset(&cork->fl, 0, sizeof(cork->fl));
1711 }
1712
1713 struct sk_buff *__ip6_make_skb(struct sock *sk,
1714                                struct sk_buff_head *queue,
1715                                struct inet_cork_full *cork,
1716                                struct inet6_cork *v6_cork)
1717 {
1718         struct sk_buff *skb, *tmp_skb;
1719         struct sk_buff **tail_skb;
1720         struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1721         struct ipv6_pinfo *np = inet6_sk(sk);
1722         struct net *net = sock_net(sk);
1723         struct ipv6hdr *hdr;
1724         struct ipv6_txoptions *opt = v6_cork->opt;
1725         struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1726         struct flowi6 *fl6 = &cork->fl.u.ip6;
1727         unsigned char proto = fl6->flowi6_proto;
1728
1729         skb = __skb_dequeue(queue);
1730         if (!skb)
1731                 goto out;
1732         tail_skb = &(skb_shinfo(skb)->frag_list);
1733
1734         /* move skb->data to ip header from ext header */
1735         if (skb->data < skb_network_header(skb))
1736                 __skb_pull(skb, skb_network_offset(skb));
1737         while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1738                 __skb_pull(tmp_skb, skb_network_header_len(skb));
1739                 *tail_skb = tmp_skb;
1740                 tail_skb = &(tmp_skb->next);
1741                 skb->len += tmp_skb->len;
1742                 skb->data_len += tmp_skb->len;
1743                 skb->truesize += tmp_skb->truesize;
1744                 tmp_skb->destructor = NULL;
1745                 tmp_skb->sk = NULL;
1746         }
1747
1748         /* Allow local fragmentation. */
1749         skb->ignore_df = ip6_sk_ignore_df(sk);
1750
1751         *final_dst = fl6->daddr;
1752         __skb_pull(skb, skb_network_header_len(skb));
1753         if (opt && opt->opt_flen)
1754                 ipv6_push_frag_opts(skb, opt, &proto);
1755         if (opt && opt->opt_nflen)
1756                 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1757
1758         skb_push(skb, sizeof(struct ipv6hdr));
1759         skb_reset_network_header(skb);
1760         hdr = ipv6_hdr(skb);
1761
1762         ip6_flow_hdr(hdr, v6_cork->tclass,
1763                      ip6_make_flowlabel(net, skb, fl6->flowlabel,
1764                                         ip6_autoflowlabel(net, np), fl6));
1765         hdr->hop_limit = v6_cork->hop_limit;
1766         hdr->nexthdr = proto;
1767         hdr->saddr = fl6->saddr;
1768         hdr->daddr = *final_dst;
1769
1770         skb->priority = sk->sk_priority;
1771         skb->mark = cork->base.mark;
1772
1773         skb->tstamp = cork->base.transmit_time;
1774
1775         skb_dst_set(skb, dst_clone(&rt->dst));
1776         IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1777         if (proto == IPPROTO_ICMPV6) {
1778                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1779
1780                 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1781                 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1782         }
1783
1784         ip6_cork_release(cork, v6_cork);
1785 out:
1786         return skb;
1787 }
1788
1789 int ip6_send_skb(struct sk_buff *skb)
1790 {
1791         struct net *net = sock_net(skb->sk);
1792         struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1793         int err;
1794
1795         err = ip6_local_out(net, skb->sk, skb);
1796         if (err) {
1797                 if (err > 0)
1798                         err = net_xmit_errno(err);
1799                 if (err)
1800                         IP6_INC_STATS(net, rt->rt6i_idev,
1801                                       IPSTATS_MIB_OUTDISCARDS);
1802         }
1803
1804         return err;
1805 }
1806
1807 int ip6_push_pending_frames(struct sock *sk)
1808 {
1809         struct sk_buff *skb;
1810
1811         skb = ip6_finish_skb(sk);
1812         if (!skb)
1813                 return 0;
1814
1815         return ip6_send_skb(skb);
1816 }
1817 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1818
1819 static void __ip6_flush_pending_frames(struct sock *sk,
1820                                        struct sk_buff_head *queue,
1821                                        struct inet_cork_full *cork,
1822                                        struct inet6_cork *v6_cork)
1823 {
1824         struct sk_buff *skb;
1825
1826         while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1827                 if (skb_dst(skb))
1828                         IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1829                                       IPSTATS_MIB_OUTDISCARDS);
1830                 kfree_skb(skb);
1831         }
1832
1833         ip6_cork_release(cork, v6_cork);
1834 }
1835
1836 void ip6_flush_pending_frames(struct sock *sk)
1837 {
1838         __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1839                                    &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1840 }
1841 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1842
1843 struct sk_buff *ip6_make_skb(struct sock *sk,
1844                              int getfrag(void *from, char *to, int offset,
1845                                          int len, int odd, struct sk_buff *skb),
1846                              void *from, int length, int transhdrlen,
1847                              struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1848                              struct rt6_info *rt, unsigned int flags,
1849                              struct inet_cork_full *cork)
1850 {
1851         struct inet6_cork v6_cork;
1852         struct sk_buff_head queue;
1853         int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1854         int err;
1855
1856         if (flags & MSG_PROBE)
1857                 return NULL;
1858
1859         __skb_queue_head_init(&queue);
1860
1861         cork->base.flags = 0;
1862         cork->base.addr = 0;
1863         cork->base.opt = NULL;
1864         cork->base.dst = NULL;
1865         v6_cork.opt = NULL;
1866         err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
1867         if (err) {
1868                 ip6_cork_release(cork, &v6_cork);
1869                 return ERR_PTR(err);
1870         }
1871         if (ipc6->dontfrag < 0)
1872                 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1873
1874         err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
1875                                 &current->task_frag, getfrag, from,
1876                                 length + exthdrlen, transhdrlen + exthdrlen,
1877                                 flags, ipc6);
1878         if (err) {
1879                 __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
1880                 return ERR_PTR(err);
1881         }
1882
1883         return __ip6_make_skb(sk, &queue, cork, &v6_cork);
1884 }