linux/net/ipv6/ip6_output.c
<<
>>
Prefs
   1/*
   2 *      IPv6 output functions
   3 *      Linux INET6 implementation
   4 *
   5 *      Authors:
   6 *      Pedro Roque             <roque@di.fc.ul.pt>
   7 *
   8 *      Based on linux/net/ipv4/ip_output.c
   9 *
  10 *      This program is free software; you can redistribute it and/or
  11 *      modify it under the terms of the GNU General Public License
  12 *      as published by the Free Software Foundation; either version
  13 *      2 of the License, or (at your option) any later version.
  14 *
  15 *      Changes:
  16 *      A.N.Kuznetsov   :       airthmetics in fragmentation.
  17 *                              extension headers are implemented.
  18 *                              route changes now work.
  19 *                              ip6_forward does not confuse sniffers.
  20 *                              etc.
  21 *
  22 *      H. von Brand    :       Added missing #include <linux/string.h>
  23 *      Imran Patel     :       frag id should be in NBO
  24 *      Kazunori MIYAZAWA @USAGI
  25 *                      :       add ip6_append_data and related functions
  26 *                              for datagram xmit
  27 */
  28
  29#include <linux/errno.h>
  30#include <linux/kernel.h>
  31#include <linux/string.h>
  32#include <linux/socket.h>
  33#include <linux/net.h>
  34#include <linux/netdevice.h>
  35#include <linux/if_arp.h>
  36#include <linux/in6.h>
  37#include <linux/tcp.h>
  38#include <linux/route.h>
  39#include <linux/module.h>
  40#include <linux/slab.h>
  41
  42#include <linux/netfilter.h>
  43#include <linux/netfilter_ipv6.h>
  44
  45#include <net/sock.h>
  46#include <net/snmp.h>
  47
  48#include <net/ipv6.h>
  49#include <net/ndisc.h>
  50#include <net/protocol.h>
  51#include <net/ip6_route.h>
  52#include <net/addrconf.h>
  53#include <net/rawv6.h>
  54#include <net/icmp.h>
  55#include <net/xfrm.h>
  56#include <net/checksum.h>
  57#include <linux/mroute6.h>
  58
  59int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
  60
  61int __ip6_local_out(struct sk_buff *skb)
  62{
  63        int len;
  64
  65        len = skb->len - sizeof(struct ipv6hdr);
  66        if (len > IPV6_MAXPLEN)
  67                len = 0;
  68        ipv6_hdr(skb)->payload_len = htons(len);
  69
  70        return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
  71                       skb_dst(skb)->dev, dst_output);
  72}
  73
  74int ip6_local_out(struct sk_buff *skb)
  75{
  76        int err;
  77
  78        err = __ip6_local_out(skb);
  79        if (likely(err == 1))
  80                err = dst_output(skb);
  81
  82        return err;
  83}
  84EXPORT_SYMBOL_GPL(ip6_local_out);
  85
  86static int ip6_finish_output2(struct sk_buff *skb)
  87{
  88        struct dst_entry *dst = skb_dst(skb);
  89        struct net_device *dev = dst->dev;
  90        struct neighbour *neigh;
  91        struct rt6_info *rt;
  92
  93        skb->protocol = htons(ETH_P_IPV6);
  94        skb->dev = dev;
  95
  96        if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
  97                struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
  98
  99                if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
 100                    ((mroute6_socket(dev_net(dev), skb) &&
 101                     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
 102                     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
 103                                         &ipv6_hdr(skb)->saddr))) {
 104                        struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
 105
 106                        /* Do not check for IFF_ALLMULTI; multicast routing
 107                           is not supported in any case.
 108                         */
 109                        if (newskb)
 110                                NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
 111                                        newskb, NULL, newskb->dev,
 112                                        dev_loopback_xmit);
 113
 114                        if (ipv6_hdr(skb)->hop_limit == 0) {
 115                                IP6_INC_STATS(dev_net(dev), idev,
 116                                              IPSTATS_MIB_OUTDISCARDS);
 117                                kfree_skb(skb);
 118                                return 0;
 119                        }
 120                }
 121
 122                IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
 123                                skb->len);
 124        }
 125
 126        rcu_read_lock();
 127        rt = (struct rt6_info *) dst;
 128        neigh = rt->n;
 129        if (neigh) {
 130                int res = dst_neigh_output(dst, neigh, skb);
 131
 132                rcu_read_unlock();
 133                return res;
 134        }
 135        rcu_read_unlock();
 136        IP6_INC_STATS_BH(dev_net(dst->dev),
 137                         ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
 138        kfree_skb(skb);
 139        return -EINVAL;
 140}
 141
 142static int ip6_finish_output(struct sk_buff *skb)
 143{
 144        if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
 145            dst_allfrag(skb_dst(skb)))
 146                return ip6_fragment(skb, ip6_finish_output2);
 147        else
 148                return ip6_finish_output2(skb);
 149}
 150
 151int ip6_output(struct sk_buff *skb)
 152{
 153        struct net_device *dev = skb_dst(skb)->dev;
 154        struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 155        if (unlikely(idev->cnf.disable_ipv6)) {
 156                IP6_INC_STATS(dev_net(dev), idev,
 157                              IPSTATS_MIB_OUTDISCARDS);
 158                kfree_skb(skb);
 159                return 0;
 160        }
 161
 162        return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
 163                            ip6_finish_output,
 164                            !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 165}
 166
 167/*
 168 *      xmit an sk_buff (used by TCP, SCTP and DCCP)
 169 */
 170
 171int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 172             struct ipv6_txoptions *opt, int tclass)
 173{
 174        struct net *net = sock_net(sk);
 175        struct ipv6_pinfo *np = inet6_sk(sk);
 176        struct in6_addr *first_hop = &fl6->daddr;
 177        struct dst_entry *dst = skb_dst(skb);
 178        struct ipv6hdr *hdr;
 179        u8  proto = fl6->flowi6_proto;
 180        int seg_len = skb->len;
 181        int hlimit = -1;
 182        u32 mtu;
 183
 184        if (opt) {
 185                unsigned int head_room;
 186
 187                /* First: exthdrs may take lots of space (~8K for now)
 188                   MAX_HEADER is not enough.
 189                 */
 190                head_room = opt->opt_nflen + opt->opt_flen;
 191                seg_len += head_room;
 192                head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
 193
 194                if (skb_headroom(skb) < head_room) {
 195                        struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
 196                        if (skb2 == NULL) {
 197                                IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 198                                              IPSTATS_MIB_OUTDISCARDS);
 199                                kfree_skb(skb);
 200                                return -ENOBUFS;
 201                        }
 202                        consume_skb(skb);
 203                        skb = skb2;
 204                        skb_set_owner_w(skb, sk);
 205                }
 206                if (opt->opt_flen)
 207                        ipv6_push_frag_opts(skb, opt, &proto);
 208                if (opt->opt_nflen)
 209                        ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
 210        }
 211
 212        skb_push(skb, sizeof(struct ipv6hdr));
 213        skb_reset_network_header(skb);
 214        hdr = ipv6_hdr(skb);
 215
 216        /*
 217         *      Fill in the IPv6 header
 218         */
 219        if (np)
 220                hlimit = np->hop_limit;
 221        if (hlimit < 0)
 222                hlimit = ip6_dst_hoplimit(dst);
 223
 224        *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl6->flowlabel;
 225
 226        hdr->payload_len = htons(seg_len);
 227        hdr->nexthdr = proto;
 228        hdr->hop_limit = hlimit;
 229
 230        hdr->saddr = fl6->saddr;
 231        hdr->daddr = *first_hop;
 232
 233        skb->priority = sk->sk_priority;
 234        skb->mark = sk->sk_mark;
 235
 236        mtu = dst_mtu(dst);
 237        if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
 238                IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
 239                              IPSTATS_MIB_OUT, skb->len);
 240                return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
 241                               dst->dev, dst_output);
 242        }
 243
 244        net_dbg_ratelimited("IPv6: sending pkt_too_big to self\n");
 245        skb->dev = dst->dev;
 246        icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 247        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
 248        kfree_skb(skb);
 249        return -EMSGSIZE;
 250}
 251
 252EXPORT_SYMBOL(ip6_xmit);
 253
 254/*
 255 *      To avoid extra problems ND packets are send through this
 256 *      routine. It's code duplication but I really want to avoid
 257 *      extra checks since ipv6_build_header is used by TCP (which
 258 *      is for us performance critical)
 259 */
 260
 261int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
 262               const struct in6_addr *saddr, const struct in6_addr *daddr,
 263               int proto, int len)
 264{
 265        struct ipv6_pinfo *np = inet6_sk(sk);
 266        struct ipv6hdr *hdr;
 267
 268        skb->protocol = htons(ETH_P_IPV6);
 269        skb->dev = dev;
 270
 271        skb_reset_network_header(skb);
 272        skb_put(skb, sizeof(struct ipv6hdr));
 273        hdr = ipv6_hdr(skb);
 274
 275        *(__be32*)hdr = htonl(0x60000000);
 276
 277        hdr->payload_len = htons(len);
 278        hdr->nexthdr = proto;
 279        hdr->hop_limit = np->hop_limit;
 280
 281        hdr->saddr = *saddr;
 282        hdr->daddr = *daddr;
 283
 284        return 0;
 285}
 286
 287static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
 288{
 289        struct ip6_ra_chain *ra;
 290        struct sock *last = NULL;
 291
 292        read_lock(&ip6_ra_lock);
 293        for (ra = ip6_ra_chain; ra; ra = ra->next) {
 294                struct sock *sk = ra->sk;
 295                if (sk && ra->sel == sel &&
 296                    (!sk->sk_bound_dev_if ||
 297                     sk->sk_bound_dev_if == skb->dev->ifindex)) {
 298                        if (last) {
 299                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 300                                if (skb2)
 301                                        rawv6_rcv(last, skb2);
 302                        }
 303                        last = sk;
 304                }
 305        }
 306
 307        if (last) {
 308                rawv6_rcv(last, skb);
 309                read_unlock(&ip6_ra_lock);
 310                return 1;
 311        }
 312        read_unlock(&ip6_ra_lock);
 313        return 0;
 314}
 315
 316static int ip6_forward_proxy_check(struct sk_buff *skb)
 317{
 318        struct ipv6hdr *hdr = ipv6_hdr(skb);
 319        u8 nexthdr = hdr->nexthdr;
 320        __be16 frag_off;
 321        int offset;
 322
 323        if (ipv6_ext_hdr(nexthdr)) {
 324                offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
 325                if (offset < 0)
 326                        return 0;
 327        } else
 328                offset = sizeof(struct ipv6hdr);
 329
 330        if (nexthdr == IPPROTO_ICMPV6) {
 331                struct icmp6hdr *icmp6;
 332
 333                if (!pskb_may_pull(skb, (skb_network_header(skb) +
 334                                         offset + 1 - skb->data)))
 335                        return 0;
 336
 337                icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
 338
 339                switch (icmp6->icmp6_type) {
 340                case NDISC_ROUTER_SOLICITATION:
 341                case NDISC_ROUTER_ADVERTISEMENT:
 342                case NDISC_NEIGHBOUR_SOLICITATION:
 343                case NDISC_NEIGHBOUR_ADVERTISEMENT:
 344                case NDISC_REDIRECT:
 345                        /* For reaction involving unicast neighbor discovery
 346                         * message destined to the proxied address, pass it to
 347                         * input function.
 348                         */
 349                        return 1;
 350                default:
 351                        break;
 352                }
 353        }
 354
 355        /*
 356         * The proxying router can't forward traffic sent to a link-local
 357         * address, so signal the sender and discard the packet. This
 358         * behavior is clarified by the MIPv6 specification.
 359         */
 360        if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
 361                dst_link_failure(skb);
 362                return -1;
 363        }
 364
 365        return 0;
 366}
 367
 368static inline int ip6_forward_finish(struct sk_buff *skb)
 369{
 370        return dst_output(skb);
 371}
 372
 373int ip6_forward(struct sk_buff *skb)
 374{
 375        struct dst_entry *dst = skb_dst(skb);
 376        struct ipv6hdr *hdr = ipv6_hdr(skb);
 377        struct inet6_skb_parm *opt = IP6CB(skb);
 378        struct net *net = dev_net(dst->dev);
 379        u32 mtu;
 380
 381        if (net->ipv6.devconf_all->forwarding == 0)
 382                goto error;
 383
 384        if (skb_warn_if_lro(skb))
 385                goto drop;
 386
 387        if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
 388                IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 389                goto drop;
 390        }
 391
 392        if (skb->pkt_type != PACKET_HOST)
 393                goto drop;
 394
 395        skb_forward_csum(skb);
 396
 397        /*
 398         *      We DO NOT make any processing on
 399         *      RA packets, pushing them to user level AS IS
 400         *      without ane WARRANTY that application will be able
 401         *      to interpret them. The reason is that we
 402         *      cannot make anything clever here.
 403         *
 404         *      We are not end-node, so that if packet contains
 405         *      AH/ESP, we cannot make anything.
 406         *      Defragmentation also would be mistake, RA packets
 407         *      cannot be fragmented, because there is no warranty
 408         *      that different fragments will go along one path. --ANK
 409         */
 410        if (opt->ra) {
 411                u8 *ptr = skb_network_header(skb) + opt->ra;
 412                if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
 413                        return 0;
 414        }
 415
 416        /*
 417         *      check and decrement ttl
 418         */
 419        if (hdr->hop_limit <= 1) {
 420                /* Force OUTPUT device used as source address */
 421                skb->dev = dst->dev;
 422                icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
 423                IP6_INC_STATS_BH(net,
 424                                 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
 425
 426                kfree_skb(skb);
 427                return -ETIMEDOUT;
 428        }
 429
 430        /* XXX: idev->cnf.proxy_ndp? */
 431        if (net->ipv6.devconf_all->proxy_ndp &&
 432            pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
 433                int proxied = ip6_forward_proxy_check(skb);
 434                if (proxied > 0)
 435                        return ip6_input(skb);
 436                else if (proxied < 0) {
 437                        IP6_INC_STATS(net, ip6_dst_idev(dst),
 438                                      IPSTATS_MIB_INDISCARDS);
 439                        goto drop;
 440                }
 441        }
 442
 443        if (!xfrm6_route_forward(skb)) {
 444                IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 445                goto drop;
 446        }
 447        dst = skb_dst(skb);
 448
 449        /* IPv6 specs say nothing about it, but it is clear that we cannot
 450           send redirects to source routed frames.
 451           We don't send redirects to frames decapsulated from IPsec.
 452         */
 453        if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
 454                struct in6_addr *target = NULL;
 455                struct inet_peer *peer;
 456                struct rt6_info *rt;
 457
 458                /*
 459                 *      incoming and outgoing devices are the same
 460                 *      send a redirect.
 461                 */
 462
 463                rt = (struct rt6_info *) dst;
 464                if (rt->rt6i_flags & RTF_GATEWAY)
 465                        target = &rt->rt6i_gateway;
 466                else
 467                        target = &hdr->daddr;
 468
 469                peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
 470
 471                /* Limit redirects both by destination (here)
 472                   and by source (inside ndisc_send_redirect)
 473                 */
 474                if (inet_peer_xrlim_allow(peer, 1*HZ))
 475                        ndisc_send_redirect(skb, target);
 476                if (peer)
 477                        inet_putpeer(peer);
 478        } else {
 479                int addrtype = ipv6_addr_type(&hdr->saddr);
 480
 481                /* This check is security critical. */
 482                if (addrtype == IPV6_ADDR_ANY ||
 483                    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
 484                        goto error;
 485                if (addrtype & IPV6_ADDR_LINKLOCAL) {
 486                        icmpv6_send(skb, ICMPV6_DEST_UNREACH,
 487                                    ICMPV6_NOT_NEIGHBOUR, 0);
 488                        goto error;
 489                }
 490        }
 491
 492        mtu = dst_mtu(dst);
 493        if (mtu < IPV6_MIN_MTU)
 494                mtu = IPV6_MIN_MTU;
 495
 496        if (skb->len > mtu && !skb_is_gso(skb)) {
 497                /* Again, force OUTPUT device used as source address */
 498                skb->dev = dst->dev;
 499                icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 500                IP6_INC_STATS_BH(net,
 501                                 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
 502                IP6_INC_STATS_BH(net,
 503                                 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
 504                kfree_skb(skb);
 505                return -EMSGSIZE;
 506        }
 507
 508        if (skb_cow(skb, dst->dev->hard_header_len)) {
 509                IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
 510                goto drop;
 511        }
 512
 513        hdr = ipv6_hdr(skb);
 514
 515        /* Mangling hops number delayed to point after skb COW */
 516
 517        hdr->hop_limit--;
 518
 519        IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
 520        IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
 521        return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
 522                       ip6_forward_finish);
 523
 524error:
 525        IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
 526drop:
 527        kfree_skb(skb);
 528        return -EINVAL;
 529}
 530
 531static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 532{
 533        to->pkt_type = from->pkt_type;
 534        to->priority = from->priority;
 535        to->protocol = from->protocol;
 536        skb_dst_drop(to);
 537        skb_dst_set(to, dst_clone(skb_dst(from)));
 538        to->dev = from->dev;
 539        to->mark = from->mark;
 540
 541#ifdef CONFIG_NET_SCHED
 542        to->tc_index = from->tc_index;
 543#endif
 544        nf_copy(to, from);
 545#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
 546    defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
 547        to->nf_trace = from->nf_trace;
 548#endif
 549        skb_copy_secmark(to, from);
 550}
 551
 552int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 553{
 554        u16 offset = sizeof(struct ipv6hdr);
 555        struct ipv6_opt_hdr *exthdr =
 556                                (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
 557        unsigned int packet_len = skb->tail - skb->network_header;
 558        int found_rhdr = 0;
 559        *nexthdr = &ipv6_hdr(skb)->nexthdr;
 560
 561        while (offset + 1 <= packet_len) {
 562
 563                switch (**nexthdr) {
 564
 565                case NEXTHDR_HOP:
 566                        break;
 567                case NEXTHDR_ROUTING:
 568                        found_rhdr = 1;
 569                        break;
 570                case NEXTHDR_DEST:
 571#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 572                        if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
 573                                break;
 574#endif
 575                        if (found_rhdr)
 576                                return offset;
 577                        break;
 578                default :
 579                        return offset;
 580                }
 581
 582                offset += ipv6_optlen(exthdr);
 583                *nexthdr = &exthdr->nexthdr;
 584                exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
 585                                                 offset);
 586        }
 587
 588        return offset;
 589}
 590
 591void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
 592{
 593        static atomic_t ipv6_fragmentation_id;
 594        int old, new;
 595
 596        if (rt && !(rt->dst.flags & DST_NOPEER)) {
 597                struct inet_peer *peer;
 598                struct net *net;
 599
 600                net = dev_net(rt->dst.dev);
 601                peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
 602                if (peer) {
 603                        fhdr->identification = htonl(inet_getid(peer, 0));
 604                        inet_putpeer(peer);
 605                        return;
 606                }
 607        }
 608        do {
 609                old = atomic_read(&ipv6_fragmentation_id);
 610                new = old + 1;
 611                if (!new)
 612                        new = 1;
 613        } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old);
 614        fhdr->identification = htonl(new);
 615}
 616
 617int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 618{
 619        struct sk_buff *frag;
 620        struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
 621        struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
 622        struct ipv6hdr *tmp_hdr;
 623        struct frag_hdr *fh;
 624        unsigned int mtu, hlen, left, len;
 625        int hroom, troom;
 626        __be32 frag_id = 0;
 627        int ptr, offset = 0, err=0;
 628        u8 *prevhdr, nexthdr = 0;
 629        struct net *net = dev_net(skb_dst(skb)->dev);
 630
 631        hlen = ip6_find_1stfragopt(skb, &prevhdr);
 632        nexthdr = *prevhdr;
 633
 634        mtu = ip6_skb_dst_mtu(skb);
 635
 636        /* We must not fragment if the socket is set to force MTU discovery
 637         * or if the skb it not generated by a local socket.
 638         */
 639        if (unlikely(!skb->local_df && skb->len > mtu)) {
 640                if (skb->sk && dst_allfrag(skb_dst(skb)))
 641                        sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
 642
 643                skb->dev = skb_dst(skb)->dev;
 644                icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 645                IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 646                              IPSTATS_MIB_FRAGFAILS);
 647                kfree_skb(skb);
 648                return -EMSGSIZE;
 649        }
 650
 651        if (np && np->frag_size < mtu) {
 652                if (np->frag_size)
 653                        mtu = np->frag_size;
 654        }
 655        mtu -= hlen + sizeof(struct frag_hdr);
 656
 657        if (skb_has_frag_list(skb)) {
 658                int first_len = skb_pagelen(skb);
 659                struct sk_buff *frag2;
 660
 661                if (first_len - hlen > mtu ||
 662                    ((first_len - hlen) & 7) ||
 663                    skb_cloned(skb))
 664                        goto slow_path;
 665
 666                skb_walk_frags(skb, frag) {
 667                        /* Correct geometry. */
 668                        if (frag->len > mtu ||
 669                            ((frag->len & 7) && frag->next) ||
 670                            skb_headroom(frag) < hlen)
 671                                goto slow_path_clean;
 672
 673                        /* Partially cloned skb? */
 674                        if (skb_shared(frag))
 675                                goto slow_path_clean;
 676
 677                        BUG_ON(frag->sk);
 678                        if (skb->sk) {
 679                                frag->sk = skb->sk;
 680                                frag->destructor = sock_wfree;
 681                        }
 682                        skb->truesize -= frag->truesize;
 683                }
 684
 685                err = 0;
 686                offset = 0;
 687                frag = skb_shinfo(skb)->frag_list;
 688                skb_frag_list_init(skb);
 689                /* BUILD HEADER */
 690
 691                *prevhdr = NEXTHDR_FRAGMENT;
 692                tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
 693                if (!tmp_hdr) {
 694                        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 695                                      IPSTATS_MIB_FRAGFAILS);
 696                        return -ENOMEM;
 697                }
 698
 699                __skb_pull(skb, hlen);
 700                fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
 701                __skb_push(skb, hlen);
 702                skb_reset_network_header(skb);
 703                memcpy(skb_network_header(skb), tmp_hdr, hlen);
 704
 705                ipv6_select_ident(fh, rt);
 706                fh->nexthdr = nexthdr;
 707                fh->reserved = 0;
 708                fh->frag_off = htons(IP6_MF);
 709                frag_id = fh->identification;
 710
 711                first_len = skb_pagelen(skb);
 712                skb->data_len = first_len - skb_headlen(skb);
 713                skb->len = first_len;
 714                ipv6_hdr(skb)->payload_len = htons(first_len -
 715                                                   sizeof(struct ipv6hdr));
 716
 717                dst_hold(&rt->dst);
 718
 719                for (;;) {
 720                        /* Prepare header of the next frame,
 721                         * before previous one went down. */
 722                        if (frag) {
 723                                frag->ip_summed = CHECKSUM_NONE;
 724                                skb_reset_transport_header(frag);
 725                                fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
 726                                __skb_push(frag, hlen);
 727                                skb_reset_network_header(frag);
 728                                memcpy(skb_network_header(frag), tmp_hdr,
 729                                       hlen);
 730                                offset += skb->len - hlen - sizeof(struct frag_hdr);
 731                                fh->nexthdr = nexthdr;
 732                                fh->reserved = 0;
 733                                fh->frag_off = htons(offset);
 734                                if (frag->next != NULL)
 735                                        fh->frag_off |= htons(IP6_MF);
 736                                fh->identification = frag_id;
 737                                ipv6_hdr(frag)->payload_len =
 738                                                htons(frag->len -
 739                                                      sizeof(struct ipv6hdr));
 740                                ip6_copy_metadata(frag, skb);
 741                        }
 742
 743                        err = output(skb);
 744                        if(!err)
 745                                IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 746                                              IPSTATS_MIB_FRAGCREATES);
 747
 748                        if (err || !frag)
 749                                break;
 750
 751                        skb = frag;
 752                        frag = skb->next;
 753                        skb->next = NULL;
 754                }
 755
 756                kfree(tmp_hdr);
 757
 758                if (err == 0) {
 759                        IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 760                                      IPSTATS_MIB_FRAGOKS);
 761                        dst_release(&rt->dst);
 762                        return 0;
 763                }
 764
 765                while (frag) {
 766                        skb = frag->next;
 767                        kfree_skb(frag);
 768                        frag = skb;
 769                }
 770
 771                IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 772                              IPSTATS_MIB_FRAGFAILS);
 773                dst_release(&rt->dst);
 774                return err;
 775
 776slow_path_clean:
 777                skb_walk_frags(skb, frag2) {
 778                        if (frag2 == frag)
 779                                break;
 780                        frag2->sk = NULL;
 781                        frag2->destructor = NULL;
 782                        skb->truesize += frag2->truesize;
 783                }
 784        }
 785
 786slow_path:
 787        if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
 788            skb_checksum_help(skb))
 789                goto fail;
 790
 791        left = skb->len - hlen;         /* Space per frame */
 792        ptr = hlen;                     /* Where to start from */
 793
 794        /*
 795         *      Fragment the datagram.
 796         */
 797
 798        *prevhdr = NEXTHDR_FRAGMENT;
 799        hroom = LL_RESERVED_SPACE(rt->dst.dev);
 800        troom = rt->dst.dev->needed_tailroom;
 801
 802        /*
 803         *      Keep copying data until we run out.
 804         */
 805        while(left > 0) {
 806                len = left;
 807                /* IF: it doesn't fit, use 'mtu' - the data space left */
 808                if (len > mtu)
 809                        len = mtu;
 810                /* IF: we are not sending up to and including the packet end
 811                   then align the next start on an eight byte boundary */
 812                if (len < left) {
 813                        len &= ~7;
 814                }
 815                /*
 816                 *      Allocate buffer.
 817                 */
 818
 819                if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
 820                                      hroom + troom, GFP_ATOMIC)) == NULL) {
 821                        NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
 822                        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 823                                      IPSTATS_MIB_FRAGFAILS);
 824                        err = -ENOMEM;
 825                        goto fail;
 826                }
 827
 828                /*
 829                 *      Set up data on packet
 830                 */
 831
 832                ip6_copy_metadata(frag, skb);
 833                skb_reserve(frag, hroom);
 834                skb_put(frag, len + hlen + sizeof(struct frag_hdr));
 835                skb_reset_network_header(frag);
 836                fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
 837                frag->transport_header = (frag->network_header + hlen +
 838                                          sizeof(struct frag_hdr));
 839
 840                /*
 841                 *      Charge the memory for the fragment to any owner
 842                 *      it might possess
 843                 */
 844                if (skb->sk)
 845                        skb_set_owner_w(frag, skb->sk);
 846
 847                /*
 848                 *      Copy the packet header into the new buffer.
 849                 */
 850                skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
 851
 852                /*
 853                 *      Build fragment header.
 854                 */
 855                fh->nexthdr = nexthdr;
 856                fh->reserved = 0;
 857                if (!frag_id) {
 858                        ipv6_select_ident(fh, rt);
 859                        frag_id = fh->identification;
 860                } else
 861                        fh->identification = frag_id;
 862
 863                /*
 864                 *      Copy a block of the IP datagram.
 865                 */
 866                if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
 867                        BUG();
 868                left -= len;
 869
 870                fh->frag_off = htons(offset);
 871                if (left > 0)
 872                        fh->frag_off |= htons(IP6_MF);
 873                ipv6_hdr(frag)->payload_len = htons(frag->len -
 874                                                    sizeof(struct ipv6hdr));
 875
 876                ptr += len;
 877                offset += len;
 878
 879                /*
 880                 *      Put this fragment into the sending queue.
 881                 */
 882                err = output(frag);
 883                if (err)
 884                        goto fail;
 885
 886                IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 887                              IPSTATS_MIB_FRAGCREATES);
 888        }
 889        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 890                      IPSTATS_MIB_FRAGOKS);
 891        consume_skb(skb);
 892        return err;
 893
 894fail:
 895        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 896                      IPSTATS_MIB_FRAGFAILS);
 897        kfree_skb(skb);
 898        return err;
 899}
 900
 901static inline int ip6_rt_check(const struct rt6key *rt_key,
 902                               const struct in6_addr *fl_addr,
 903                               const struct in6_addr *addr_cache)
 904{
 905        return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
 906                (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
 907}
 908
 909static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
 910                                          struct dst_entry *dst,
 911                                          const struct flowi6 *fl6)
 912{
 913        struct ipv6_pinfo *np = inet6_sk(sk);
 914        struct rt6_info *rt = (struct rt6_info *)dst;
 915
 916        if (!dst)
 917                goto out;
 918
 919        /* Yes, checking route validity in not connected
 920         * case is not very simple. Take into account,
 921         * that we do not support routing by source, TOS,
 922         * and MSG_DONTROUTE            --ANK (980726)
 923         *
 924         * 1. ip6_rt_check(): If route was host route,
 925         *    check that cached destination is current.
 926         *    If it is network route, we still may
 927         *    check its validity using saved pointer
 928         *    to the last used address: daddr_cache.
 929         *    We do not want to save whole address now,
 930         *    (because main consumer of this service
 931         *    is tcp, which has not this problem),
 932         *    so that the last trick works only on connected
 933         *    sockets.
 934         * 2. oif also should be the same.
 935         */
 936        if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
 937#ifdef CONFIG_IPV6_SUBTREES
 938            ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
 939#endif
 940            (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
 941                dst_release(dst);
 942                dst = NULL;
 943        }
 944
 945out:
 946        return dst;
 947}
 948
 949static int ip6_dst_lookup_tail(struct sock *sk,
 950                               struct dst_entry **dst, struct flowi6 *fl6)
 951{
 952        struct net *net = sock_net(sk);
 953#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 954        struct neighbour *n;
 955        struct rt6_info *rt;
 956#endif
 957        int err;
 958
 959        if (*dst == NULL)
 960                *dst = ip6_route_output(net, sk, fl6);
 961
 962        if ((err = (*dst)->error))
 963                goto out_err_release;
 964
 965        if (ipv6_addr_any(&fl6->saddr)) {
 966                struct rt6_info *rt = (struct rt6_info *) *dst;
 967                err = ip6_route_get_saddr(net, rt, &fl6->daddr,
 968                                          sk ? inet6_sk(sk)->srcprefs : 0,
 969                                          &fl6->saddr);
 970                if (err)
 971                        goto out_err_release;
 972        }
 973
 974#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 975        /*
 976         * Here if the dst entry we've looked up
 977         * has a neighbour entry that is in the INCOMPLETE
 978         * state and the src address from the flow is
 979         * marked as OPTIMISTIC, we release the found
 980         * dst entry and replace it instead with the
 981         * dst entry of the nexthop router
 982         */
 983        rcu_read_lock();
 984        rt = (struct rt6_info *) *dst;
 985        n = rt->n;
 986        if (n && !(n->nud_state & NUD_VALID)) {
 987                struct inet6_ifaddr *ifp;
 988                struct flowi6 fl_gw6;
 989                int redirect;
 990
 991                rcu_read_unlock();
 992                ifp = ipv6_get_ifaddr(net, &fl6->saddr,
 993                                      (*dst)->dev, 1);
 994
 995                redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
 996                if (ifp)
 997                        in6_ifa_put(ifp);
 998
 999                if (redirect) {
1000                        /*
1001                         * We need to get the dst entry for the
1002                         * default router instead
1003                         */
1004                        dst_release(*dst);
1005                        memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1006                        memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1007                        *dst = ip6_route_output(net, sk, &fl_gw6);
1008                        if ((err = (*dst)->error))
1009                                goto out_err_release;
1010                }
1011        } else {
1012                rcu_read_unlock();
1013        }
1014#endif
1015
1016        return 0;
1017
1018out_err_release:
1019        if (err == -ENETUNREACH)
1020                IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1021        dst_release(*dst);
1022        *dst = NULL;
1023        return err;
1024}
1025
1026/**
1027 *      ip6_dst_lookup - perform route lookup on flow
1028 *      @sk: socket which provides route info
1029 *      @dst: pointer to dst_entry * for result
1030 *      @fl6: flow to lookup
1031 *
1032 *      This function performs a route lookup on the given flow.
1033 *
1034 *      It returns zero on success, or a standard errno code on error.
1035 */
1036int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
1037{
1038        *dst = NULL;
1039        return ip6_dst_lookup_tail(sk, dst, fl6);
1040}
1041EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1042
1043/**
1044 *      ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1045 *      @sk: socket which provides route info
1046 *      @fl6: flow to lookup
1047 *      @final_dst: final destination address for ipsec lookup
1048 *      @can_sleep: we are in a sleepable context
1049 *
1050 *      This function performs a route lookup on the given flow.
1051 *
1052 *      It returns a valid dst pointer on success, or a pointer encoded
1053 *      error code.
1054 */
1055struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1056                                      const struct in6_addr *final_dst,
1057                                      bool can_sleep)
1058{
1059        struct dst_entry *dst = NULL;
1060        int err;
1061
1062        err = ip6_dst_lookup_tail(sk, &dst, fl6);
1063        if (err)
1064                return ERR_PTR(err);
1065        if (final_dst)
1066                fl6->daddr = *final_dst;
1067        if (can_sleep)
1068                fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1069
1070        return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1071}
1072EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1073
1074/**
1075 *      ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1076 *      @sk: socket which provides the dst cache and route info
1077 *      @fl6: flow to lookup
1078 *      @final_dst: final destination address for ipsec lookup
1079 *      @can_sleep: we are in a sleepable context
1080 *
1081 *      This function performs a route lookup on the given flow with the
1082 *      possibility of using the cached route in the socket if it is valid.
1083 *      It will take the socket dst lock when operating on the dst cache.
1084 *      As a result, this function can only be used in process context.
1085 *
1086 *      It returns a valid dst pointer on success, or a pointer encoded
1087 *      error code.
1088 */
1089struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1090                                         const struct in6_addr *final_dst,
1091                                         bool can_sleep)
1092{
1093        struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1094        int err;
1095
1096        dst = ip6_sk_dst_check(sk, dst, fl6);
1097
1098        err = ip6_dst_lookup_tail(sk, &dst, fl6);
1099        if (err)
1100                return ERR_PTR(err);
1101        if (final_dst)
1102                fl6->daddr = *final_dst;
1103        if (can_sleep)
1104                fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1105
1106        return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1107}
1108EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1109
1110static inline int ip6_ufo_append_data(struct sock *sk,
1111                        int getfrag(void *from, char *to, int offset, int len,
1112                        int odd, struct sk_buff *skb),
1113                        void *from, int length, int hh_len, int fragheaderlen,
1114                        int transhdrlen, int mtu,unsigned int flags,
1115                        struct rt6_info *rt)
1116
1117{
1118        struct sk_buff *skb;
1119        int err;
1120
1121        /* There is support for UDP large send offload by network
1122         * device, so create one single skb packet containing complete
1123         * udp datagram
1124         */
1125        if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1126                skb = sock_alloc_send_skb(sk,
1127                        hh_len + fragheaderlen + transhdrlen + 20,
1128                        (flags & MSG_DONTWAIT), &err);
1129                if (skb == NULL)
1130                        return err;
1131
1132                /* reserve space for Hardware header */
1133                skb_reserve(skb, hh_len);
1134
1135                /* create space for UDP/IP header */
1136                skb_put(skb,fragheaderlen + transhdrlen);
1137
1138                /* initialize network header pointer */
1139                skb_reset_network_header(skb);
1140
1141                /* initialize protocol header pointer */
1142                skb->transport_header = skb->network_header + fragheaderlen;
1143
1144                skb->ip_summed = CHECKSUM_PARTIAL;
1145                skb->csum = 0;
1146        }
1147
1148        err = skb_append_datato_frags(sk,skb, getfrag, from,
1149                                      (length - transhdrlen));
1150        if (!err) {
1151                struct frag_hdr fhdr;
1152
1153                /* Specify the length of each IPv6 datagram fragment.
1154                 * It has to be a multiple of 8.
1155                 */
1156                skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1157                                             sizeof(struct frag_hdr)) & ~7;
1158                skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1159                ipv6_select_ident(&fhdr, rt);
1160                skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1161                __skb_queue_tail(&sk->sk_write_queue, skb);
1162
1163                return 0;
1164        }
1165        /* There is not enough support do UPD LSO,
1166         * so follow normal path
1167         */
1168        kfree_skb(skb);
1169
1170        return err;
1171}
1172
1173static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1174                                               gfp_t gfp)
1175{
1176        return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1177}
1178
1179static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1180                                                gfp_t gfp)
1181{
1182        return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1183}
1184
1185static void ip6_append_data_mtu(int *mtu,
1186                                int *maxfraglen,
1187                                unsigned int fragheaderlen,
1188                                struct sk_buff *skb,
1189                                struct rt6_info *rt)
1190{
1191        if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1192                if (skb == NULL) {
1193                        /* first fragment, reserve header_len */
1194                        *mtu = *mtu - rt->dst.header_len;
1195
1196                } else {
1197                        /*
1198                         * this fragment is not first, the headers
1199                         * space is regarded as data space.
1200                         */
1201                        *mtu = dst_mtu(rt->dst.path);
1202                }
1203                *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1204                              + fragheaderlen - sizeof(struct frag_hdr);
1205        }
1206}
1207
1208int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1209        int offset, int len, int odd, struct sk_buff *skb),
1210        void *from, int length, int transhdrlen,
1211        int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1212        struct rt6_info *rt, unsigned int flags, int dontfrag)
1213{
1214        struct inet_sock *inet = inet_sk(sk);
1215        struct ipv6_pinfo *np = inet6_sk(sk);
1216        struct inet_cork *cork;
1217        struct sk_buff *skb, *skb_prev = NULL;
1218        unsigned int maxfraglen, fragheaderlen;
1219        int exthdrlen;
1220        int dst_exthdrlen;
1221        int hh_len;
1222        int mtu;
1223        int copy;
1224        int err;
1225        int offset = 0;
1226        __u8 tx_flags = 0;
1227
1228        if (flags&MSG_PROBE)
1229                return 0;
1230        cork = &inet->cork.base;
1231        if (skb_queue_empty(&sk->sk_write_queue)) {
1232                /*
1233                 * setup for corking
1234                 */
1235                if (opt) {
1236                        if (WARN_ON(np->cork.opt))
1237                                return -EINVAL;
1238
1239                        np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1240                        if (unlikely(np->cork.opt == NULL))
1241                                return -ENOBUFS;
1242
1243                        np->cork.opt->tot_len = opt->tot_len;
1244                        np->cork.opt->opt_flen = opt->opt_flen;
1245                        np->cork.opt->opt_nflen = opt->opt_nflen;
1246
1247                        np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1248                                                            sk->sk_allocation);
1249                        if (opt->dst0opt && !np->cork.opt->dst0opt)
1250                                return -ENOBUFS;
1251
1252                        np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1253                                                            sk->sk_allocation);
1254                        if (opt->dst1opt && !np->cork.opt->dst1opt)
1255                                return -ENOBUFS;
1256
1257                        np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1258                                                           sk->sk_allocation);
1259                        if (opt->hopopt && !np->cork.opt->hopopt)
1260                                return -ENOBUFS;
1261
1262                        np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1263                                                            sk->sk_allocation);
1264                        if (opt->srcrt && !np->cork.opt->srcrt)
1265                                return -ENOBUFS;
1266
1267                        /* need source address above miyazawa*/
1268                }
1269                dst_hold(&rt->dst);
1270                cork->dst = &rt->dst;
1271                inet->cork.fl.u.ip6 = *fl6;
1272                np->cork.hop_limit = hlimit;
1273                np->cork.tclass = tclass;
1274                if (rt->dst.flags & DST_XFRM_TUNNEL)
1275                        mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1276                              rt->dst.dev->mtu : dst_mtu(&rt->dst);
1277                else
1278                        mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1279                              rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1280                if (np->frag_size < mtu) {
1281                        if (np->frag_size)
1282                                mtu = np->frag_size;
1283                }
1284                cork->fragsize = mtu;
1285                if (dst_allfrag(rt->dst.path))
1286                        cork->flags |= IPCORK_ALLFRAG;
1287                cork->length = 0;
1288                sk->sk_sndmsg_page = NULL;
1289                sk->sk_sndmsg_off = 0;
1290                exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len;
1291                length += exthdrlen;
1292                transhdrlen += exthdrlen;
1293                dst_exthdrlen = rt->dst.header_len;
1294        } else {
1295                rt = (struct rt6_info *)cork->dst;
1296                fl6 = &inet->cork.fl.u.ip6;
1297                opt = np->cork.opt;
1298                transhdrlen = 0;
1299                exthdrlen = 0;
1300                dst_exthdrlen = 0;
1301                mtu = cork->fragsize;
1302        }
1303
1304        hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1305
1306        fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1307                        (opt ? opt->opt_nflen : 0);
1308        maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1309
1310        if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1311                if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1312                        ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen);
1313                        return -EMSGSIZE;
1314                }
1315        }
1316
1317        /* For UDP, check if TX timestamp is enabled */
1318        if (sk->sk_type == SOCK_DGRAM) {
1319                err = sock_tx_timestamp(sk, &tx_flags);
1320                if (err)
1321                        goto error;
1322        }
1323
1324        /*
1325         * Let's try using as much space as possible.
1326         * Use MTU if total length of the message fits into the MTU.
1327         * Otherwise, we need to reserve fragment header and
1328         * fragment alignment (= 8-15 octects, in total).
1329         *
1330         * Note that we may need to "move" the data from the tail of
1331         * of the buffer to the new fragment when we split
1332         * the message.
1333         *
1334         * FIXME: It may be fragmented into multiple chunks
1335         *        at once if non-fragmentable extension headers
1336         *        are too large.
1337         * --yoshfuji
1338         */
1339
1340        cork->length += length;
1341        if (length > mtu) {
1342                int proto = sk->sk_protocol;
1343                if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
1344                        ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
1345                        return -EMSGSIZE;
1346                }
1347
1348                if (proto == IPPROTO_UDP &&
1349                    (rt->dst.dev->features & NETIF_F_UFO)) {
1350
1351                        err = ip6_ufo_append_data(sk, getfrag, from, length,
1352                                                  hh_len, fragheaderlen,
1353                                                  transhdrlen, mtu, flags, rt);
1354                        if (err)
1355                                goto error;
1356                        return 0;
1357                }
1358        }
1359
1360        if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1361                goto alloc_new_skb;
1362
1363        while (length > 0) {
1364                /* Check if the remaining data fits into current packet. */
1365                copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1366                if (copy < length)
1367                        copy = maxfraglen - skb->len;
1368
1369                if (copy <= 0) {
1370                        char *data;
1371                        unsigned int datalen;
1372                        unsigned int fraglen;
1373                        unsigned int fraggap;
1374                        unsigned int alloclen;
1375alloc_new_skb:
1376                        /* There's no room in the current skb */
1377                        if (skb)
1378                                fraggap = skb->len - maxfraglen;
1379                        else
1380                                fraggap = 0;
1381                        /* update mtu and maxfraglen if necessary */
1382                        if (skb == NULL || skb_prev == NULL)
1383                                ip6_append_data_mtu(&mtu, &maxfraglen,
1384                                                    fragheaderlen, skb, rt);
1385
1386                        skb_prev = skb;
1387
1388                        /*
1389                         * If remaining data exceeds the mtu,
1390                         * we know we need more fragment(s).
1391                         */
1392                        datalen = length + fraggap;
1393
1394                        if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1395                                datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1396                        if ((flags & MSG_MORE) &&
1397                            !(rt->dst.dev->features&NETIF_F_SG))
1398                                alloclen = mtu;
1399                        else
1400                                alloclen = datalen + fragheaderlen;
1401
1402                        alloclen += dst_exthdrlen;
1403
1404                        if (datalen != length + fraggap) {
1405                                /*
1406                                 * this is not the last fragment, the trailer
1407                                 * space is regarded as data space.
1408                                 */
1409                                datalen += rt->dst.trailer_len;
1410                        }
1411
1412                        alloclen += rt->dst.trailer_len;
1413                        fraglen = datalen + fragheaderlen;
1414
1415                        /*
1416                         * We just reserve space for fragment header.
1417                         * Note: this may be overallocation if the message
1418                         * (without MSG_MORE) fits into the MTU.
1419                         */
1420                        alloclen += sizeof(struct frag_hdr);
1421
1422                        if (transhdrlen) {
1423                                skb = sock_alloc_send_skb(sk,
1424                                                alloclen + hh_len,
1425                                                (flags & MSG_DONTWAIT), &err);
1426                        } else {
1427                                skb = NULL;
1428                                if (atomic_read(&sk->sk_wmem_alloc) <=
1429                                    2 * sk->sk_sndbuf)
1430                                        skb = sock_wmalloc(sk,
1431                                                           alloclen + hh_len, 1,
1432                                                           sk->sk_allocation);
1433                                if (unlikely(skb == NULL))
1434                                        err = -ENOBUFS;
1435                                else {
1436                                        /* Only the initial fragment
1437                                         * is time stamped.
1438                                         */
1439                                        tx_flags = 0;
1440                                }
1441                        }
1442                        if (skb == NULL)
1443                                goto error;
1444                        /*
1445                         *      Fill in the control structures
1446                         */
1447                        skb->ip_summed = CHECKSUM_NONE;
1448                        skb->csum = 0;
1449                        /* reserve for fragmentation and ipsec header */
1450                        skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1451                                    dst_exthdrlen);
1452
1453                        if (sk->sk_type == SOCK_DGRAM)
1454                                skb_shinfo(skb)->tx_flags = tx_flags;
1455
1456                        /*
1457                         *      Find where to start putting bytes
1458                         */
1459                        data = skb_put(skb, fraglen);
1460                        skb_set_network_header(skb, exthdrlen);
1461                        data += fragheaderlen;
1462                        skb->transport_header = (skb->network_header +
1463                                                 fragheaderlen);
1464                        if (fraggap) {
1465                                skb->csum = skb_copy_and_csum_bits(
1466                                        skb_prev, maxfraglen,
1467                                        data + transhdrlen, fraggap, 0);
1468                                skb_prev->csum = csum_sub(skb_prev->csum,
1469                                                          skb->csum);
1470                                data += fraggap;
1471                                pskb_trim_unique(skb_prev, maxfraglen);
1472                        }
1473                        copy = datalen - transhdrlen - fraggap;
1474
1475                        if (copy < 0) {
1476                                err = -EINVAL;
1477                                kfree_skb(skb);
1478                                goto error;
1479                        } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1480                                err = -EFAULT;
1481                                kfree_skb(skb);
1482                                goto error;
1483                        }
1484
1485                        offset += copy;
1486                        length -= datalen - fraggap;
1487                        transhdrlen = 0;
1488                        exthdrlen = 0;
1489                        dst_exthdrlen = 0;
1490
1491                        /*
1492                         * Put the packet on the pending queue
1493                         */
1494                        __skb_queue_tail(&sk->sk_write_queue, skb);
1495                        continue;
1496                }
1497
1498                if (copy > length)
1499                        copy = length;
1500
1501                if (!(rt->dst.dev->features&NETIF_F_SG)) {
1502                        unsigned int off;
1503
1504                        off = skb->len;
1505                        if (getfrag(from, skb_put(skb, copy),
1506                                                offset, copy, off, skb) < 0) {
1507                                __skb_trim(skb, off);
1508                                err = -EFAULT;
1509                                goto error;
1510                        }
1511                } else {
1512                        int i = skb_shinfo(skb)->nr_frags;
1513                        skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1514                        struct page *page = sk->sk_sndmsg_page;
1515                        int off = sk->sk_sndmsg_off;
1516                        unsigned int left;
1517
1518                        if (page && (left = PAGE_SIZE - off) > 0) {
1519                                if (copy >= left)
1520                                        copy = left;
1521                                if (page != skb_frag_page(frag)) {
1522                                        if (i == MAX_SKB_FRAGS) {
1523                                                err = -EMSGSIZE;
1524                                                goto error;
1525                                        }
1526                                        skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1527                                        skb_frag_ref(skb, i);
1528                                        frag = &skb_shinfo(skb)->frags[i];
1529                                }
1530                        } else if(i < MAX_SKB_FRAGS) {
1531                                if (copy > PAGE_SIZE)
1532                                        copy = PAGE_SIZE;
1533                                page = alloc_pages(sk->sk_allocation, 0);
1534                                if (page == NULL) {
1535                                        err = -ENOMEM;
1536                                        goto error;
1537                                }
1538                                sk->sk_sndmsg_page = page;
1539                                sk->sk_sndmsg_off = 0;
1540
1541                                skb_fill_page_desc(skb, i, page, 0, 0);
1542                                frag = &skb_shinfo(skb)->frags[i];
1543                        } else {
1544                                err = -EMSGSIZE;
1545                                goto error;
1546                        }
1547                        if (getfrag(from,
1548                                    skb_frag_address(frag) + skb_frag_size(frag),
1549                                    offset, copy, skb->len, skb) < 0) {
1550                                err = -EFAULT;
1551                                goto error;
1552                        }
1553                        sk->sk_sndmsg_off += copy;
1554                        skb_frag_size_add(frag, copy);
1555                        skb->len += copy;
1556                        skb->data_len += copy;
1557                        skb->truesize += copy;
1558                        atomic_add(copy, &sk->sk_wmem_alloc);
1559                }
1560                offset += copy;
1561                length -= copy;
1562        }
1563        return 0;
1564error:
1565        cork->length -= length;
1566        IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1567        return err;
1568}
1569EXPORT_SYMBOL_GPL(ip6_append_data);
1570
1571static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1572{
1573        if (np->cork.opt) {
1574                kfree(np->cork.opt->dst0opt);
1575                kfree(np->cork.opt->dst1opt);
1576                kfree(np->cork.opt->hopopt);
1577                kfree(np->cork.opt->srcrt);
1578                kfree(np->cork.opt);
1579                np->cork.opt = NULL;
1580        }
1581
1582        if (inet->cork.base.dst) {
1583                dst_release(inet->cork.base.dst);
1584                inet->cork.base.dst = NULL;
1585                inet->cork.base.flags &= ~IPCORK_ALLFRAG;
1586        }
1587        memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1588}
1589
1590int ip6_push_pending_frames(struct sock *sk)
1591{
1592        struct sk_buff *skb, *tmp_skb;
1593        struct sk_buff **tail_skb;
1594        struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1595        struct inet_sock *inet = inet_sk(sk);
1596        struct ipv6_pinfo *np = inet6_sk(sk);
1597        struct net *net = sock_net(sk);
1598        struct ipv6hdr *hdr;
1599        struct ipv6_txoptions *opt = np->cork.opt;
1600        struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
1601        struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
1602        unsigned char proto = fl6->flowi6_proto;
1603        int err = 0;
1604
1605        if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1606                goto out;
1607        tail_skb = &(skb_shinfo(skb)->frag_list);
1608
1609        /* move skb->data to ip header from ext header */
1610        if (skb->data < skb_network_header(skb))
1611                __skb_pull(skb, skb_network_offset(skb));
1612        while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1613                __skb_pull(tmp_skb, skb_network_header_len(skb));
1614                *tail_skb = tmp_skb;
1615                tail_skb = &(tmp_skb->next);
1616                skb->len += tmp_skb->len;
1617                skb->data_len += tmp_skb->len;
1618                skb->truesize += tmp_skb->truesize;
1619                tmp_skb->destructor = NULL;
1620                tmp_skb->sk = NULL;
1621        }
1622
1623        /* Allow local fragmentation. */
1624        if (np->pmtudisc < IPV6_PMTUDISC_DO)
1625                skb->local_df = 1;
1626
1627        *final_dst = fl6->daddr;
1628        __skb_pull(skb, skb_network_header_len(skb));
1629        if (opt && opt->opt_flen)
1630                ipv6_push_frag_opts(skb, opt, &proto);
1631        if (opt && opt->opt_nflen)
1632                ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1633
1634        skb_push(skb, sizeof(struct ipv6hdr));
1635        skb_reset_network_header(skb);
1636        hdr = ipv6_hdr(skb);
1637
1638        *(__be32*)hdr = fl6->flowlabel |
1639                     htonl(0x60000000 | ((int)np->cork.tclass << 20));
1640
1641        hdr->hop_limit = np->cork.hop_limit;
1642        hdr->nexthdr = proto;
1643        hdr->saddr = fl6->saddr;
1644        hdr->daddr = *final_dst;
1645
1646        skb->priority = sk->sk_priority;
1647        skb->mark = sk->sk_mark;
1648
1649        skb_dst_set(skb, dst_clone(&rt->dst));
1650        IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1651        if (proto == IPPROTO_ICMPV6) {
1652                struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1653
1654                ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1655                ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1656        }
1657
1658        err = ip6_local_out(skb);
1659        if (err) {
1660                if (err > 0)
1661                        err = net_xmit_errno(err);
1662                if (err)
1663                        goto error;
1664        }
1665
1666out:
1667        ip6_cork_release(inet, np);
1668        return err;
1669error:
1670        IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1671        goto out;
1672}
1673EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1674
1675void ip6_flush_pending_frames(struct sock *sk)
1676{
1677        struct sk_buff *skb;
1678
1679        while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1680                if (skb_dst(skb))
1681                        IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1682                                      IPSTATS_MIB_OUTDISCARDS);
1683                kfree_skb(skb);
1684        }
1685
1686        ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1687}
1688EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1689
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.