linux/net/ipv6/ip6_output.c
<<
>>
Prefs
   1/*
   2 *      IPv6 output functions
   3 *      Linux INET6 implementation
   4 *
   5 *      Authors:
   6 *      Pedro Roque             <roque@di.fc.ul.pt>
   7 *
   8 *      Based on linux/net/ipv4/ip_output.c
   9 *
  10 *      This program is free software; you can redistribute it and/or
  11 *      modify it under the terms of the GNU General Public License
  12 *      as published by the Free Software Foundation; either version
  13 *      2 of the License, or (at your option) any later version.
  14 *
  15 *      Changes:
  16 *      A.N.Kuznetsov   :       airthmetics in fragmentation.
  17 *                              extension headers are implemented.
  18 *                              route changes now work.
  19 *                              ip6_forward does not confuse sniffers.
  20 *                              etc.
  21 *
  22 *      H. von Brand    :       Added missing #include <linux/string.h>
  23 *      Imran Patel     :       frag id should be in NBO
  24 *      Kazunori MIYAZAWA @USAGI
  25 *                      :       add ip6_append_data and related functions
  26 *                              for datagram xmit
  27 */
  28
  29#include <linux/errno.h>
  30#include <linux/kernel.h>
  31#include <linux/string.h>
  32#include <linux/socket.h>
  33#include <linux/net.h>
  34#include <linux/netdevice.h>
  35#include <linux/if_arp.h>
  36#include <linux/in6.h>
  37#include <linux/tcp.h>
  38#include <linux/route.h>
  39#include <linux/module.h>
  40#include <linux/slab.h>
  41
  42#include <linux/netfilter.h>
  43#include <linux/netfilter_ipv6.h>
  44
  45#include <net/sock.h>
  46#include <net/snmp.h>
  47
  48#include <net/ipv6.h>
  49#include <net/ndisc.h>
  50#include <net/protocol.h>
  51#include <net/ip6_route.h>
  52#include <net/addrconf.h>
  53#include <net/rawv6.h>
  54#include <net/icmp.h>
  55#include <net/xfrm.h>
  56#include <net/checksum.h>
  57#include <linux/mroute6.h>
  58
  59int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
  60
  61int __ip6_local_out(struct sk_buff *skb)
  62{
  63        int len;
  64
  65        len = skb->len - sizeof(struct ipv6hdr);
  66        if (len > IPV6_MAXPLEN)
  67                len = 0;
  68        ipv6_hdr(skb)->payload_len = htons(len);
  69
  70        return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
  71                       skb_dst(skb)->dev, dst_output);
  72}
  73
  74int ip6_local_out(struct sk_buff *skb)
  75{
  76        int err;
  77
  78        err = __ip6_local_out(skb);
  79        if (likely(err == 1))
  80                err = dst_output(skb);
  81
  82        return err;
  83}
  84EXPORT_SYMBOL_GPL(ip6_local_out);
  85
  86/* dev_loopback_xmit for use with netfilter. */
  87static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
  88{
  89        skb_reset_mac_header(newskb);
  90        __skb_pull(newskb, skb_network_offset(newskb));
  91        newskb->pkt_type = PACKET_LOOPBACK;
  92        newskb->ip_summed = CHECKSUM_UNNECESSARY;
  93        WARN_ON(!skb_dst(newskb));
  94
  95        netif_rx_ni(newskb);
  96        return 0;
  97}
  98
  99static int ip6_finish_output2(struct sk_buff *skb)
 100{
 101        struct dst_entry *dst = skb_dst(skb);
 102        struct net_device *dev = dst->dev;
 103
 104        skb->protocol = htons(ETH_P_IPV6);
 105        skb->dev = dev;
 106
 107        if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
 108                struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 109
 110                if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
 111                    ((mroute6_socket(dev_net(dev), skb) &&
 112                     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
 113                     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
 114                                         &ipv6_hdr(skb)->saddr))) {
 115                        struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
 116
 117                        /* Do not check for IFF_ALLMULTI; multicast routing
 118                           is not supported in any case.
 119                         */
 120                        if (newskb)
 121                                NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
 122                                        newskb, NULL, newskb->dev,
 123                                        ip6_dev_loopback_xmit);
 124
 125                        if (ipv6_hdr(skb)->hop_limit == 0) {
 126                                IP6_INC_STATS(dev_net(dev), idev,
 127                                              IPSTATS_MIB_OUTDISCARDS);
 128                                kfree_skb(skb);
 129                                return 0;
 130                        }
 131                }
 132
 133                IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
 134                                skb->len);
 135        }
 136
 137        if (dst->hh)
 138                return neigh_hh_output(dst->hh, skb);
 139        else if (dst->neighbour)
 140                return dst->neighbour->output(skb);
 141
 142        IP6_INC_STATS_BH(dev_net(dst->dev),
 143                         ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
 144        kfree_skb(skb);
 145        return -EINVAL;
 146}
 147
 148static int ip6_finish_output(struct sk_buff *skb)
 149{
 150        if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
 151            dst_allfrag(skb_dst(skb)))
 152                return ip6_fragment(skb, ip6_finish_output2);
 153        else
 154                return ip6_finish_output2(skb);
 155}
 156
 157int ip6_output(struct sk_buff *skb)
 158{
 159        struct net_device *dev = skb_dst(skb)->dev;
 160        struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 161        if (unlikely(idev->cnf.disable_ipv6)) {
 162                IP6_INC_STATS(dev_net(dev), idev,
 163                              IPSTATS_MIB_OUTDISCARDS);
 164                kfree_skb(skb);
 165                return 0;
 166        }
 167
 168        return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
 169                            ip6_finish_output,
 170                            !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 171}
 172
 173/*
 174 *      xmit an sk_buff (used by TCP, SCTP and DCCP)
 175 */
 176
 177int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 178             struct ipv6_txoptions *opt)
 179{
 180        struct net *net = sock_net(sk);
 181        struct ipv6_pinfo *np = inet6_sk(sk);
 182        struct in6_addr *first_hop = &fl->fl6_dst;
 183        struct dst_entry *dst = skb_dst(skb);
 184        struct ipv6hdr *hdr;
 185        u8  proto = fl->proto;
 186        int seg_len = skb->len;
 187        int hlimit = -1;
 188        int tclass = 0;
 189        u32 mtu;
 190
 191        if (opt) {
 192                unsigned int head_room;
 193
 194                /* First: exthdrs may take lots of space (~8K for now)
 195                   MAX_HEADER is not enough.
 196                 */
 197                head_room = opt->opt_nflen + opt->opt_flen;
 198                seg_len += head_room;
 199                head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
 200
 201                if (skb_headroom(skb) < head_room) {
 202                        struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
 203                        if (skb2 == NULL) {
 204                                IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 205                                              IPSTATS_MIB_OUTDISCARDS);
 206                                kfree_skb(skb);
 207                                return -ENOBUFS;
 208                        }
 209                        kfree_skb(skb);
 210                        skb = skb2;
 211                        skb_set_owner_w(skb, sk);
 212                }
 213                if (opt->opt_flen)
 214                        ipv6_push_frag_opts(skb, opt, &proto);
 215                if (opt->opt_nflen)
 216                        ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
 217        }
 218
 219        skb_push(skb, sizeof(struct ipv6hdr));
 220        skb_reset_network_header(skb);
 221        hdr = ipv6_hdr(skb);
 222
 223        /*
 224         *      Fill in the IPv6 header
 225         */
 226        if (np) {
 227                tclass = np->tclass;
 228                hlimit = np->hop_limit;
 229        }
 230        if (hlimit < 0)
 231                hlimit = ip6_dst_hoplimit(dst);
 232
 233        *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
 234
 235        hdr->payload_len = htons(seg_len);
 236        hdr->nexthdr = proto;
 237        hdr->hop_limit = hlimit;
 238
 239        ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
 240        ipv6_addr_copy(&hdr->daddr, first_hop);
 241
 242        skb->priority = sk->sk_priority;
 243        skb->mark = sk->sk_mark;
 244
 245        mtu = dst_mtu(dst);
 246        if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
 247                IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
 248                              IPSTATS_MIB_OUT, skb->len);
 249                return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
 250                               dst->dev, dst_output);
 251        }
 252
 253        if (net_ratelimit())
 254                printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
 255        skb->dev = dst->dev;
 256        icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 257        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
 258        kfree_skb(skb);
 259        return -EMSGSIZE;
 260}
 261
 262EXPORT_SYMBOL(ip6_xmit);
 263
 264/*
 265 *      To avoid extra problems ND packets are send through this
 266 *      routine. It's code duplication but I really want to avoid
 267 *      extra checks since ipv6_build_header is used by TCP (which
 268 *      is for us performance critical)
 269 */
 270
 271int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
 272               const struct in6_addr *saddr, const struct in6_addr *daddr,
 273               int proto, int len)
 274{
 275        struct ipv6_pinfo *np = inet6_sk(sk);
 276        struct ipv6hdr *hdr;
 277        int totlen;
 278
 279        skb->protocol = htons(ETH_P_IPV6);
 280        skb->dev = dev;
 281
 282        totlen = len + sizeof(struct ipv6hdr);
 283
 284        skb_reset_network_header(skb);
 285        skb_put(skb, sizeof(struct ipv6hdr));
 286        hdr = ipv6_hdr(skb);
 287
 288        *(__be32*)hdr = htonl(0x60000000);
 289
 290        hdr->payload_len = htons(len);
 291        hdr->nexthdr = proto;
 292        hdr->hop_limit = np->hop_limit;
 293
 294        ipv6_addr_copy(&hdr->saddr, saddr);
 295        ipv6_addr_copy(&hdr->daddr, daddr);
 296
 297        return 0;
 298}
 299
 300static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
 301{
 302        struct ip6_ra_chain *ra;
 303        struct sock *last = NULL;
 304
 305        read_lock(&ip6_ra_lock);
 306        for (ra = ip6_ra_chain; ra; ra = ra->next) {
 307                struct sock *sk = ra->sk;
 308                if (sk && ra->sel == sel &&
 309                    (!sk->sk_bound_dev_if ||
 310                     sk->sk_bound_dev_if == skb->dev->ifindex)) {
 311                        if (last) {
 312                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 313                                if (skb2)
 314                                        rawv6_rcv(last, skb2);
 315                        }
 316                        last = sk;
 317                }
 318        }
 319
 320        if (last) {
 321                rawv6_rcv(last, skb);
 322                read_unlock(&ip6_ra_lock);
 323                return 1;
 324        }
 325        read_unlock(&ip6_ra_lock);
 326        return 0;
 327}
 328
 329static int ip6_forward_proxy_check(struct sk_buff *skb)
 330{
 331        struct ipv6hdr *hdr = ipv6_hdr(skb);
 332        u8 nexthdr = hdr->nexthdr;
 333        int offset;
 334
 335        if (ipv6_ext_hdr(nexthdr)) {
 336                offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
 337                if (offset < 0)
 338                        return 0;
 339        } else
 340                offset = sizeof(struct ipv6hdr);
 341
 342        if (nexthdr == IPPROTO_ICMPV6) {
 343                struct icmp6hdr *icmp6;
 344
 345                if (!pskb_may_pull(skb, (skb_network_header(skb) +
 346                                         offset + 1 - skb->data)))
 347                        return 0;
 348
 349                icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
 350
 351                switch (icmp6->icmp6_type) {
 352                case NDISC_ROUTER_SOLICITATION:
 353                case NDISC_ROUTER_ADVERTISEMENT:
 354                case NDISC_NEIGHBOUR_SOLICITATION:
 355                case NDISC_NEIGHBOUR_ADVERTISEMENT:
 356                case NDISC_REDIRECT:
 357                        /* For reaction involving unicast neighbor discovery
 358                         * message destined to the proxied address, pass it to
 359                         * input function.
 360                         */
 361                        return 1;
 362                default:
 363                        break;
 364                }
 365        }
 366
 367        /*
 368         * The proxying router can't forward traffic sent to a link-local
 369         * address, so signal the sender and discard the packet. This
 370         * behavior is clarified by the MIPv6 specification.
 371         */
 372        if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
 373                dst_link_failure(skb);
 374                return -1;
 375        }
 376
 377        return 0;
 378}
 379
 380static inline int ip6_forward_finish(struct sk_buff *skb)
 381{
 382        return dst_output(skb);
 383}
 384
 385int ip6_forward(struct sk_buff *skb)
 386{
 387        struct dst_entry *dst = skb_dst(skb);
 388        struct ipv6hdr *hdr = ipv6_hdr(skb);
 389        struct inet6_skb_parm *opt = IP6CB(skb);
 390        struct net *net = dev_net(dst->dev);
 391        u32 mtu;
 392
 393        if (net->ipv6.devconf_all->forwarding == 0)
 394                goto error;
 395
 396        if (skb_warn_if_lro(skb))
 397                goto drop;
 398
 399        if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
 400                IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 401                goto drop;
 402        }
 403
 404        skb_forward_csum(skb);
 405
 406        /*
 407         *      We DO NOT make any processing on
 408         *      RA packets, pushing them to user level AS IS
 409         *      without ane WARRANTY that application will be able
 410         *      to interpret them. The reason is that we
 411         *      cannot make anything clever here.
 412         *
 413         *      We are not end-node, so that if packet contains
 414         *      AH/ESP, we cannot make anything.
 415         *      Defragmentation also would be mistake, RA packets
 416         *      cannot be fragmented, because there is no warranty
 417         *      that different fragments will go along one path. --ANK
 418         */
 419        if (opt->ra) {
 420                u8 *ptr = skb_network_header(skb) + opt->ra;
 421                if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
 422                        return 0;
 423        }
 424
 425        /*
 426         *      check and decrement ttl
 427         */
 428        if (hdr->hop_limit <= 1) {
 429                /* Force OUTPUT device used as source address */
 430                skb->dev = dst->dev;
 431                icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
 432                IP6_INC_STATS_BH(net,
 433                                 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
 434
 435                kfree_skb(skb);
 436                return -ETIMEDOUT;
 437        }
 438
 439        /* XXX: idev->cnf.proxy_ndp? */
 440        if (net->ipv6.devconf_all->proxy_ndp &&
 441            pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
 442                int proxied = ip6_forward_proxy_check(skb);
 443                if (proxied > 0)
 444                        return ip6_input(skb);
 445                else if (proxied < 0) {
 446                        IP6_INC_STATS(net, ip6_dst_idev(dst),
 447                                      IPSTATS_MIB_INDISCARDS);
 448                        goto drop;
 449                }
 450        }
 451
 452        if (!xfrm6_route_forward(skb)) {
 453                IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 454                goto drop;
 455        }
 456        dst = skb_dst(skb);
 457
 458        /* IPv6 specs say nothing about it, but it is clear that we cannot
 459           send redirects to source routed frames.
 460           We don't send redirects to frames decapsulated from IPsec.
 461         */
 462        if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
 463            !skb_sec_path(skb)) {
 464                struct in6_addr *target = NULL;
 465                struct rt6_info *rt;
 466                struct neighbour *n = dst->neighbour;
 467
 468                /*
 469                 *      incoming and outgoing devices are the same
 470                 *      send a redirect.
 471                 */
 472
 473                rt = (struct rt6_info *) dst;
 474                if ((rt->rt6i_flags & RTF_GATEWAY))
 475                        target = (struct in6_addr*)&n->primary_key;
 476                else
 477                        target = &hdr->daddr;
 478
 479                /* Limit redirects both by destination (here)
 480                   and by source (inside ndisc_send_redirect)
 481                 */
 482                if (xrlim_allow(dst, 1*HZ))
 483                        ndisc_send_redirect(skb, n, target);
 484        } else {
 485                int addrtype = ipv6_addr_type(&hdr->saddr);
 486
 487                /* This check is security critical. */
 488                if (addrtype == IPV6_ADDR_ANY ||
 489                    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
 490                        goto error;
 491                if (addrtype & IPV6_ADDR_LINKLOCAL) {
 492                        icmpv6_send(skb, ICMPV6_DEST_UNREACH,
 493                                    ICMPV6_NOT_NEIGHBOUR, 0);
 494                        goto error;
 495                }
 496        }
 497
 498        mtu = dst_mtu(dst);
 499        if (mtu < IPV6_MIN_MTU)
 500                mtu = IPV6_MIN_MTU;
 501
 502        if (skb->len > mtu && !skb_is_gso(skb)) {
 503                /* Again, force OUTPUT device used as source address */
 504                skb->dev = dst->dev;
 505                icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 506                IP6_INC_STATS_BH(net,
 507                                 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
 508                IP6_INC_STATS_BH(net,
 509                                 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
 510                kfree_skb(skb);
 511                return -EMSGSIZE;
 512        }
 513
 514        if (skb_cow(skb, dst->dev->hard_header_len)) {
 515                IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
 516                goto drop;
 517        }
 518
 519        hdr = ipv6_hdr(skb);
 520
 521        /* Mangling hops number delayed to point after skb COW */
 522
 523        hdr->hop_limit--;
 524
 525        IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
 526        return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
 527                       ip6_forward_finish);
 528
 529error:
 530        IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
 531drop:
 532        kfree_skb(skb);
 533        return -EINVAL;
 534}
 535
 536static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 537{
 538        to->pkt_type = from->pkt_type;
 539        to->priority = from->priority;
 540        to->protocol = from->protocol;
 541        skb_dst_drop(to);
 542        skb_dst_set(to, dst_clone(skb_dst(from)));
 543        to->dev = from->dev;
 544        to->mark = from->mark;
 545
 546#ifdef CONFIG_NET_SCHED
 547        to->tc_index = from->tc_index;
 548#endif
 549        nf_copy(to, from);
 550#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
 551    defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
 552        to->nf_trace = from->nf_trace;
 553#endif
 554        skb_copy_secmark(to, from);
 555}
 556
 557int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 558{
 559        u16 offset = sizeof(struct ipv6hdr);
 560        struct ipv6_opt_hdr *exthdr =
 561                                (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
 562        unsigned int packet_len = skb->tail - skb->network_header;
 563        int found_rhdr = 0;
 564        *nexthdr = &ipv6_hdr(skb)->nexthdr;
 565
 566        while (offset + 1 <= packet_len) {
 567
 568                switch (**nexthdr) {
 569
 570                case NEXTHDR_HOP:
 571                        break;
 572                case NEXTHDR_ROUTING:
 573                        found_rhdr = 1;
 574                        break;
 575                case NEXTHDR_DEST:
 576#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 577                        if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
 578                                break;
 579#endif
 580                        if (found_rhdr)
 581                                return offset;
 582                        break;
 583                default :
 584                        return offset;
 585                }
 586
 587                offset += ipv6_optlen(exthdr);
 588                *nexthdr = &exthdr->nexthdr;
 589                exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
 590                                                 offset);
 591        }
 592
 593        return offset;
 594}
 595
 596int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 597{
 598        struct sk_buff *frag;
 599        struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
 600        struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
 601        struct ipv6hdr *tmp_hdr;
 602        struct frag_hdr *fh;
 603        unsigned int mtu, hlen, left, len;
 604        __be32 frag_id = 0;
 605        int ptr, offset = 0, err=0;
 606        u8 *prevhdr, nexthdr = 0;
 607        struct net *net = dev_net(skb_dst(skb)->dev);
 608
 609        hlen = ip6_find_1stfragopt(skb, &prevhdr);
 610        nexthdr = *prevhdr;
 611
 612        mtu = ip6_skb_dst_mtu(skb);
 613
 614        /* We must not fragment if the socket is set to force MTU discovery
 615         * or if the skb it not generated by a local socket.
 616         */
 617        if (!skb->local_df && skb->len > mtu) {
 618                skb->dev = skb_dst(skb)->dev;
 619                icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 620                IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 621                              IPSTATS_MIB_FRAGFAILS);
 622                kfree_skb(skb);
 623                return -EMSGSIZE;
 624        }
 625
 626        if (np && np->frag_size < mtu) {
 627                if (np->frag_size)
 628                        mtu = np->frag_size;
 629        }
 630        mtu -= hlen + sizeof(struct frag_hdr);
 631
 632        if (skb_has_frag_list(skb)) {
 633                int first_len = skb_pagelen(skb);
 634                struct sk_buff *frag2;
 635
 636                if (first_len - hlen > mtu ||
 637                    ((first_len - hlen) & 7) ||
 638                    skb_cloned(skb))
 639                        goto slow_path;
 640
 641                skb_walk_frags(skb, frag) {
 642                        /* Correct geometry. */
 643                        if (frag->len > mtu ||
 644                            ((frag->len & 7) && frag->next) ||
 645                            skb_headroom(frag) < hlen)
 646                                goto slow_path_clean;
 647
 648                        /* Partially cloned skb? */
 649                        if (skb_shared(frag))
 650                                goto slow_path_clean;
 651
 652                        BUG_ON(frag->sk);
 653                        if (skb->sk) {
 654                                frag->sk = skb->sk;
 655                                frag->destructor = sock_wfree;
 656                        }
 657                        skb->truesize -= frag->truesize;
 658                }
 659
 660                err = 0;
 661                offset = 0;
 662                frag = skb_shinfo(skb)->frag_list;
 663                skb_frag_list_init(skb);
 664                /* BUILD HEADER */
 665
 666                *prevhdr = NEXTHDR_FRAGMENT;
 667                tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
 668                if (!tmp_hdr) {
 669                        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 670                                      IPSTATS_MIB_FRAGFAILS);
 671                        return -ENOMEM;
 672                }
 673
 674                __skb_pull(skb, hlen);
 675                fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
 676                __skb_push(skb, hlen);
 677                skb_reset_network_header(skb);
 678                memcpy(skb_network_header(skb), tmp_hdr, hlen);
 679
 680                ipv6_select_ident(fh);
 681                fh->nexthdr = nexthdr;
 682                fh->reserved = 0;
 683                fh->frag_off = htons(IP6_MF);
 684                frag_id = fh->identification;
 685
 686                first_len = skb_pagelen(skb);
 687                skb->data_len = first_len - skb_headlen(skb);
 688                skb->len = first_len;
 689                ipv6_hdr(skb)->payload_len = htons(first_len -
 690                                                   sizeof(struct ipv6hdr));
 691
 692                dst_hold(&rt->dst);
 693
 694                for (;;) {
 695                        /* Prepare header of the next frame,
 696                         * before previous one went down. */
 697                        if (frag) {
 698                                frag->ip_summed = CHECKSUM_NONE;
 699                                skb_reset_transport_header(frag);
 700                                fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
 701                                __skb_push(frag, hlen);
 702                                skb_reset_network_header(frag);
 703                                memcpy(skb_network_header(frag), tmp_hdr,
 704                                       hlen);
 705                                offset += skb->len - hlen - sizeof(struct frag_hdr);
 706                                fh->nexthdr = nexthdr;
 707                                fh->reserved = 0;
 708                                fh->frag_off = htons(offset);
 709                                if (frag->next != NULL)
 710                                        fh->frag_off |= htons(IP6_MF);
 711                                fh->identification = frag_id;
 712                                ipv6_hdr(frag)->payload_len =
 713                                                htons(frag->len -
 714                                                      sizeof(struct ipv6hdr));
 715                                ip6_copy_metadata(frag, skb);
 716                        }
 717
 718                        err = output(skb);
 719                        if(!err)
 720                                IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 721                                              IPSTATS_MIB_FRAGCREATES);
 722
 723                        if (err || !frag)
 724                                break;
 725
 726                        skb = frag;
 727                        frag = skb->next;
 728                        skb->next = NULL;
 729                }
 730
 731                kfree(tmp_hdr);
 732
 733                if (err == 0) {
 734                        IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 735                                      IPSTATS_MIB_FRAGOKS);
 736                        dst_release(&rt->dst);
 737                        return 0;
 738                }
 739
 740                while (frag) {
 741                        skb = frag->next;
 742                        kfree_skb(frag);
 743                        frag = skb;
 744                }
 745
 746                IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 747                              IPSTATS_MIB_FRAGFAILS);
 748                dst_release(&rt->dst);
 749                return err;
 750
 751slow_path_clean:
 752                skb_walk_frags(skb, frag2) {
 753                        if (frag2 == frag)
 754                                break;
 755                        frag2->sk = NULL;
 756                        frag2->destructor = NULL;
 757                        skb->truesize += frag2->truesize;
 758                }
 759        }
 760
 761slow_path:
 762        left = skb->len - hlen;         /* Space per frame */
 763        ptr = hlen;                     /* Where to start from */
 764
 765        /*
 766         *      Fragment the datagram.
 767         */
 768
 769        *prevhdr = NEXTHDR_FRAGMENT;
 770
 771        /*
 772         *      Keep copying data until we run out.
 773         */
 774        while(left > 0) {
 775                len = left;
 776                /* IF: it doesn't fit, use 'mtu' - the data space left */
 777                if (len > mtu)
 778                        len = mtu;
 779                /* IF: we are not sending upto and including the packet end
 780                   then align the next start on an eight byte boundary */
 781                if (len < left) {
 782                        len &= ~7;
 783                }
 784                /*
 785                 *      Allocate buffer.
 786                 */
 787
 788                if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->dst.dev), GFP_ATOMIC)) == NULL) {
 789                        NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
 790                        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 791                                      IPSTATS_MIB_FRAGFAILS);
 792                        err = -ENOMEM;
 793                        goto fail;
 794                }
 795
 796                /*
 797                 *      Set up data on packet
 798                 */
 799
 800                ip6_copy_metadata(frag, skb);
 801                skb_reserve(frag, LL_RESERVED_SPACE(rt->dst.dev));
 802                skb_put(frag, len + hlen + sizeof(struct frag_hdr));
 803                skb_reset_network_header(frag);
 804                fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
 805                frag->transport_header = (frag->network_header + hlen +
 806                                          sizeof(struct frag_hdr));
 807
 808                /*
 809                 *      Charge the memory for the fragment to any owner
 810                 *      it might possess
 811                 */
 812                if (skb->sk)
 813                        skb_set_owner_w(frag, skb->sk);
 814
 815                /*
 816                 *      Copy the packet header into the new buffer.
 817                 */
 818                skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
 819
 820                /*
 821                 *      Build fragment header.
 822                 */
 823                fh->nexthdr = nexthdr;
 824                fh->reserved = 0;
 825                if (!frag_id) {
 826                        ipv6_select_ident(fh);
 827                        frag_id = fh->identification;
 828                } else
 829                        fh->identification = frag_id;
 830
 831                /*
 832                 *      Copy a block of the IP datagram.
 833                 */
 834                if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
 835                        BUG();
 836                left -= len;
 837
 838                fh->frag_off = htons(offset);
 839                if (left > 0)
 840                        fh->frag_off |= htons(IP6_MF);
 841                ipv6_hdr(frag)->payload_len = htons(frag->len -
 842                                                    sizeof(struct ipv6hdr));
 843
 844                ptr += len;
 845                offset += len;
 846
 847                /*
 848                 *      Put this fragment into the sending queue.
 849                 */
 850                err = output(frag);
 851                if (err)
 852                        goto fail;
 853
 854                IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 855                              IPSTATS_MIB_FRAGCREATES);
 856        }
 857        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 858                      IPSTATS_MIB_FRAGOKS);
 859        kfree_skb(skb);
 860        return err;
 861
 862fail:
 863        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 864                      IPSTATS_MIB_FRAGFAILS);
 865        kfree_skb(skb);
 866        return err;
 867}
 868
 869static inline int ip6_rt_check(struct rt6key *rt_key,
 870                               struct in6_addr *fl_addr,
 871                               struct in6_addr *addr_cache)
 872{
 873        return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
 874                (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
 875}
 876
 877static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
 878                                          struct dst_entry *dst,
 879                                          struct flowi *fl)
 880{
 881        struct ipv6_pinfo *np = inet6_sk(sk);
 882        struct rt6_info *rt = (struct rt6_info *)dst;
 883
 884        if (!dst)
 885                goto out;
 886
 887        /* Yes, checking route validity in not connected
 888         * case is not very simple. Take into account,
 889         * that we do not support routing by source, TOS,
 890         * and MSG_DONTROUTE            --ANK (980726)
 891         *
 892         * 1. ip6_rt_check(): If route was host route,
 893         *    check that cached destination is current.
 894         *    If it is network route, we still may
 895         *    check its validity using saved pointer
 896         *    to the last used address: daddr_cache.
 897         *    We do not want to save whole address now,
 898         *    (because main consumer of this service
 899         *    is tcp, which has not this problem),
 900         *    so that the last trick works only on connected
 901         *    sockets.
 902         * 2. oif also should be the same.
 903         */
 904        if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
 905#ifdef CONFIG_IPV6_SUBTREES
 906            ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
 907#endif
 908            (fl->oif && fl->oif != dst->dev->ifindex)) {
 909                dst_release(dst);
 910                dst = NULL;
 911        }
 912
 913out:
 914        return dst;
 915}
 916
 917static int ip6_dst_lookup_tail(struct sock *sk,
 918                               struct dst_entry **dst, struct flowi *fl)
 919{
 920        int err;
 921        struct net *net = sock_net(sk);
 922
 923        if (*dst == NULL)
 924                *dst = ip6_route_output(net, sk, fl);
 925
 926        if ((err = (*dst)->error))
 927                goto out_err_release;
 928
 929        if (ipv6_addr_any(&fl->fl6_src)) {
 930                err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
 931                                         &fl->fl6_dst,
 932                                         sk ? inet6_sk(sk)->srcprefs : 0,
 933                                         &fl->fl6_src);
 934                if (err)
 935                        goto out_err_release;
 936        }
 937
 938#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 939        /*
 940         * Here if the dst entry we've looked up
 941         * has a neighbour entry that is in the INCOMPLETE
 942         * state and the src address from the flow is
 943         * marked as OPTIMISTIC, we release the found
 944         * dst entry and replace it instead with the
 945         * dst entry of the nexthop router
 946         */
 947        if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
 948                struct inet6_ifaddr *ifp;
 949                struct flowi fl_gw;
 950                int redirect;
 951
 952                ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
 953                                      (*dst)->dev, 1);
 954
 955                redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
 956                if (ifp)
 957                        in6_ifa_put(ifp);
 958
 959                if (redirect) {
 960                        /*
 961                         * We need to get the dst entry for the
 962                         * default router instead
 963                         */
 964                        dst_release(*dst);
 965                        memcpy(&fl_gw, fl, sizeof(struct flowi));
 966                        memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
 967                        *dst = ip6_route_output(net, sk, &fl_gw);
 968                        if ((err = (*dst)->error))
 969                                goto out_err_release;
 970                }
 971        }
 972#endif
 973
 974        return 0;
 975
 976out_err_release:
 977        if (err == -ENETUNREACH)
 978                IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
 979        dst_release(*dst);
 980        *dst = NULL;
 981        return err;
 982}
 983
 984/**
 985 *      ip6_dst_lookup - perform route lookup on flow
 986 *      @sk: socket which provides route info
 987 *      @dst: pointer to dst_entry * for result
 988 *      @fl: flow to lookup
 989 *
 990 *      This function performs a route lookup on the given flow.
 991 *
 992 *      It returns zero on success, or a standard errno code on error.
 993 */
 994int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
 995{
 996        *dst = NULL;
 997        return ip6_dst_lookup_tail(sk, dst, fl);
 998}
 999EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1000
1001/**
1002 *      ip6_sk_dst_lookup - perform socket cached route lookup on flow
1003 *      @sk: socket which provides the dst cache and route info
1004 *      @dst: pointer to dst_entry * for result
1005 *      @fl: flow to lookup
1006 *
1007 *      This function performs a route lookup on the given flow with the
1008 *      possibility of using the cached route in the socket if it is valid.
1009 *      It will take the socket dst lock when operating on the dst cache.
1010 *      As a result, this function can only be used in process context.
1011 *
1012 *      It returns zero on success, or a standard errno code on error.
1013 */
1014int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1015{
1016        *dst = NULL;
1017        if (sk) {
1018                *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1019                *dst = ip6_sk_dst_check(sk, *dst, fl);
1020        }
1021
1022        return ip6_dst_lookup_tail(sk, dst, fl);
1023}
1024EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1025
1026static inline int ip6_ufo_append_data(struct sock *sk,
1027                        int getfrag(void *from, char *to, int offset, int len,
1028                        int odd, struct sk_buff *skb),
1029                        void *from, int length, int hh_len, int fragheaderlen,
1030                        int transhdrlen, int mtu,unsigned int flags)
1031
1032{
1033        struct sk_buff *skb;
1034        int err;
1035
1036        /* There is support for UDP large send offload by network
1037         * device, so create one single skb packet containing complete
1038         * udp datagram
1039         */
1040        if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1041                skb = sock_alloc_send_skb(sk,
1042                        hh_len + fragheaderlen + transhdrlen + 20,
1043                        (flags & MSG_DONTWAIT), &err);
1044                if (skb == NULL)
1045                        return -ENOMEM;
1046
1047                /* reserve space for Hardware header */
1048                skb_reserve(skb, hh_len);
1049
1050                /* create space for UDP/IP header */
1051                skb_put(skb,fragheaderlen + transhdrlen);
1052
1053                /* initialize network header pointer */
1054                skb_reset_network_header(skb);
1055
1056                /* initialize protocol header pointer */
1057                skb->transport_header = skb->network_header + fragheaderlen;
1058
1059                skb->ip_summed = CHECKSUM_PARTIAL;
1060                skb->csum = 0;
1061                sk->sk_sndmsg_off = 0;
1062        }
1063
1064        err = skb_append_datato_frags(sk,skb, getfrag, from,
1065                                      (length - transhdrlen));
1066        if (!err) {
1067                struct frag_hdr fhdr;
1068
1069                /* Specify the length of each IPv6 datagram fragment.
1070                 * It has to be a multiple of 8.
1071                 */
1072                skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1073                                             sizeof(struct frag_hdr)) & ~7;
1074                skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1075                ipv6_select_ident(&fhdr);
1076                skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1077                __skb_queue_tail(&sk->sk_write_queue, skb);
1078
1079                return 0;
1080        }
1081        /* There is not enough support do UPD LSO,
1082         * so follow normal path
1083         */
1084        kfree_skb(skb);
1085
1086        return err;
1087}
1088
1089static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1090                                               gfp_t gfp)
1091{
1092        return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1093}
1094
1095static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1096                                                gfp_t gfp)
1097{
1098        return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1099}
1100
1101int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1102        int offset, int len, int odd, struct sk_buff *skb),
1103        void *from, int length, int transhdrlen,
1104        int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1105        struct rt6_info *rt, unsigned int flags, int dontfrag)
1106{
1107        struct inet_sock *inet = inet_sk(sk);
1108        struct ipv6_pinfo *np = inet6_sk(sk);
1109        struct sk_buff *skb;
1110        unsigned int maxfraglen, fragheaderlen;
1111        int exthdrlen;
1112        int hh_len;
1113        int mtu;
1114        int copy;
1115        int err;
1116        int offset = 0;
1117        int csummode = CHECKSUM_NONE;
1118
1119        if (flags&MSG_PROBE)
1120                return 0;
1121        if (skb_queue_empty(&sk->sk_write_queue)) {
1122                /*
1123                 * setup for corking
1124                 */
1125                if (opt) {
1126                        if (WARN_ON(np->cork.opt))
1127                                return -EINVAL;
1128
1129                        np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1130                        if (unlikely(np->cork.opt == NULL))
1131                                return -ENOBUFS;
1132
1133                        np->cork.opt->tot_len = opt->tot_len;
1134                        np->cork.opt->opt_flen = opt->opt_flen;
1135                        np->cork.opt->opt_nflen = opt->opt_nflen;
1136
1137                        np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1138                                                            sk->sk_allocation);
1139                        if (opt->dst0opt && !np->cork.opt->dst0opt)
1140                                return -ENOBUFS;
1141
1142                        np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1143                                                            sk->sk_allocation);
1144                        if (opt->dst1opt && !np->cork.opt->dst1opt)
1145                                return -ENOBUFS;
1146
1147                        np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1148                                                           sk->sk_allocation);
1149                        if (opt->hopopt && !np->cork.opt->hopopt)
1150                                return -ENOBUFS;
1151
1152                        np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1153                                                            sk->sk_allocation);
1154                        if (opt->srcrt && !np->cork.opt->srcrt)
1155                                return -ENOBUFS;
1156
1157                        /* need source address above miyazawa*/
1158                }
1159                dst_hold(&rt->dst);
1160                inet->cork.dst = &rt->dst;
1161                inet->cork.fl = *fl;
1162                np->cork.hop_limit = hlimit;
1163                np->cork.tclass = tclass;
1164                mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1165                      rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1166                if (np->frag_size < mtu) {
1167                        if (np->frag_size)
1168                                mtu = np->frag_size;
1169                }
1170                inet->cork.fragsize = mtu;
1171                if (dst_allfrag(rt->dst.path))
1172                        inet->cork.flags |= IPCORK_ALLFRAG;
1173                inet->cork.length = 0;
1174                sk->sk_sndmsg_page = NULL;
1175                sk->sk_sndmsg_off = 0;
1176                exthdrlen = rt->dst.header_len + (opt ? opt->opt_flen : 0) -
1177                            rt->rt6i_nfheader_len;
1178                length += exthdrlen;
1179                transhdrlen += exthdrlen;
1180        } else {
1181                rt = (struct rt6_info *)inet->cork.dst;
1182                fl = &inet->cork.fl;
1183                opt = np->cork.opt;
1184                transhdrlen = 0;
1185                exthdrlen = 0;
1186                mtu = inet->cork.fragsize;
1187        }
1188
1189        hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1190
1191        fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1192                        (opt ? opt->opt_nflen : 0);
1193        maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1194
1195        if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1196                if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1197                        ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1198                        return -EMSGSIZE;
1199                }
1200        }
1201
1202        /*
1203         * Let's try using as much space as possible.
1204         * Use MTU if total length of the message fits into the MTU.
1205         * Otherwise, we need to reserve fragment header and
1206         * fragment alignment (= 8-15 octects, in total).
1207         *
1208         * Note that we may need to "move" the data from the tail of
1209         * of the buffer to the new fragment when we split
1210         * the message.
1211         *
1212         * FIXME: It may be fragmented into multiple chunks
1213         *        at once if non-fragmentable extension headers
1214         *        are too large.
1215         * --yoshfuji
1216         */
1217
1218        inet->cork.length += length;
1219        if (length > mtu) {
1220                int proto = sk->sk_protocol;
1221                if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
1222                        ipv6_local_rxpmtu(sk, fl, mtu-exthdrlen);
1223                        return -EMSGSIZE;
1224                }
1225
1226                if (proto == IPPROTO_UDP &&
1227                    (rt->dst.dev->features & NETIF_F_UFO)) {
1228
1229                        err = ip6_ufo_append_data(sk, getfrag, from, length,
1230                                                  hh_len, fragheaderlen,
1231                                                  transhdrlen, mtu, flags);
1232                        if (err)
1233                                goto error;
1234                        return 0;
1235                }
1236        }
1237
1238        if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1239                goto alloc_new_skb;
1240
1241        while (length > 0) {
1242                /* Check if the remaining data fits into current packet. */
1243                copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1244                if (copy < length)
1245                        copy = maxfraglen - skb->len;
1246
1247                if (copy <= 0) {
1248                        char *data;
1249                        unsigned int datalen;
1250                        unsigned int fraglen;
1251                        unsigned int fraggap;
1252                        unsigned int alloclen;
1253                        struct sk_buff *skb_prev;
1254alloc_new_skb:
1255                        skb_prev = skb;
1256
1257                        /* There's no room in the current skb */
1258                        if (skb_prev)
1259                                fraggap = skb_prev->len - maxfraglen;
1260                        else
1261                                fraggap = 0;
1262
1263                        /*
1264                         * If remaining data exceeds the mtu,
1265                         * we know we need more fragment(s).
1266                         */
1267                        datalen = length + fraggap;
1268                        if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1269                                datalen = maxfraglen - fragheaderlen;
1270
1271                        fraglen = datalen + fragheaderlen;
1272                        if ((flags & MSG_MORE) &&
1273                            !(rt->dst.dev->features&NETIF_F_SG))
1274                                alloclen = mtu;
1275                        else
1276                                alloclen = datalen + fragheaderlen;
1277
1278                        /*
1279                         * The last fragment gets additional space at tail.
1280                         * Note: we overallocate on fragments with MSG_MODE
1281                         * because we have no idea if we're the last one.
1282                         */
1283                        if (datalen == length + fraggap)
1284                                alloclen += rt->dst.trailer_len;
1285
1286                        /*
1287                         * We just reserve space for fragment header.
1288                         * Note: this may be overallocation if the message
1289                         * (without MSG_MORE) fits into the MTU.
1290                         */
1291                        alloclen += sizeof(struct frag_hdr);
1292
1293                        if (transhdrlen) {
1294                                skb = sock_alloc_send_skb(sk,
1295                                                alloclen + hh_len,
1296                                                (flags & MSG_DONTWAIT), &err);
1297                        } else {
1298                                skb = NULL;
1299                                if (atomic_read(&sk->sk_wmem_alloc) <=
1300                                    2 * sk->sk_sndbuf)
1301                                        skb = sock_wmalloc(sk,
1302                                                           alloclen + hh_len, 1,
1303                                                           sk->sk_allocation);
1304                                if (unlikely(skb == NULL))
1305                                        err = -ENOBUFS;
1306                        }
1307                        if (skb == NULL)
1308                                goto error;
1309                        /*
1310                         *      Fill in the control structures
1311                         */
1312                        skb->ip_summed = csummode;
1313                        skb->csum = 0;
1314                        /* reserve for fragmentation */
1315                        skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1316
1317                        /*
1318                         *      Find where to start putting bytes
1319                         */
1320                        data = skb_put(skb, fraglen);
1321                        skb_set_network_header(skb, exthdrlen);
1322                        data += fragheaderlen;
1323                        skb->transport_header = (skb->network_header +
1324                                                 fragheaderlen);
1325                        if (fraggap) {
1326                                skb->csum = skb_copy_and_csum_bits(
1327                                        skb_prev, maxfraglen,
1328                                        data + transhdrlen, fraggap, 0);
1329                                skb_prev->csum = csum_sub(skb_prev->csum,
1330                                                          skb->csum);
1331                                data += fraggap;
1332                                pskb_trim_unique(skb_prev, maxfraglen);
1333                        }
1334                        copy = datalen - transhdrlen - fraggap;
1335                        if (copy < 0) {
1336                                err = -EINVAL;
1337                                kfree_skb(skb);
1338                                goto error;
1339                        } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1340                                err = -EFAULT;
1341                                kfree_skb(skb);
1342                                goto error;
1343                        }
1344
1345                        offset += copy;
1346                        length -= datalen - fraggap;
1347                        transhdrlen = 0;
1348                        exthdrlen = 0;
1349                        csummode = CHECKSUM_NONE;
1350
1351                        /*
1352                         * Put the packet on the pending queue
1353                         */
1354                        __skb_queue_tail(&sk->sk_write_queue, skb);
1355                        continue;
1356                }
1357
1358                if (copy > length)
1359                        copy = length;
1360
1361                if (!(rt->dst.dev->features&NETIF_F_SG)) {
1362                        unsigned int off;
1363
1364                        off = skb->len;
1365                        if (getfrag(from, skb_put(skb, copy),
1366                                                offset, copy, off, skb) < 0) {
1367                                __skb_trim(skb, off);
1368                                err = -EFAULT;
1369                                goto error;
1370                        }
1371                } else {
1372                        int i = skb_shinfo(skb)->nr_frags;
1373                        skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1374                        struct page *page = sk->sk_sndmsg_page;
1375                        int off = sk->sk_sndmsg_off;
1376                        unsigned int left;
1377
1378                        if (page && (left = PAGE_SIZE - off) > 0) {
1379                                if (copy >= left)
1380                                        copy = left;
1381                                if (page != frag->page) {
1382                                        if (i == MAX_SKB_FRAGS) {
1383                                                err = -EMSGSIZE;
1384                                                goto error;
1385                                        }
1386                                        get_page(page);
1387                                        skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1388                                        frag = &skb_shinfo(skb)->frags[i];
1389                                }
1390                        } else if(i < MAX_SKB_FRAGS) {
1391                                if (copy > PAGE_SIZE)
1392                                        copy = PAGE_SIZE;
1393                                page = alloc_pages(sk->sk_allocation, 0);
1394                                if (page == NULL) {
1395                                        err = -ENOMEM;
1396                                        goto error;
1397                                }
1398                                sk->sk_sndmsg_page = page;
1399                                sk->sk_sndmsg_off = 0;
1400
1401                                skb_fill_page_desc(skb, i, page, 0, 0);
1402                                frag = &skb_shinfo(skb)->frags[i];
1403                        } else {
1404                                err = -EMSGSIZE;
1405                                goto error;
1406                        }
1407                        if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1408                                err = -EFAULT;
1409                                goto error;
1410                        }
1411                        sk->sk_sndmsg_off += copy;
1412                        frag->size += copy;
1413                        skb->len += copy;
1414                        skb->data_len += copy;
1415                        skb->truesize += copy;
1416                        atomic_add(copy, &sk->sk_wmem_alloc);
1417                }
1418                offset += copy;
1419                length -= copy;
1420        }
1421        return 0;
1422error:
1423        inet->cork.length -= length;
1424        IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1425        return err;
1426}
1427
1428static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1429{
1430        if (np->cork.opt) {
1431                kfree(np->cork.opt->dst0opt);
1432                kfree(np->cork.opt->dst1opt);
1433                kfree(np->cork.opt->hopopt);
1434                kfree(np->cork.opt->srcrt);
1435                kfree(np->cork.opt);
1436                np->cork.opt = NULL;
1437        }
1438
1439        if (inet->cork.dst) {
1440                dst_release(inet->cork.dst);
1441                inet->cork.dst = NULL;
1442                inet->cork.flags &= ~IPCORK_ALLFRAG;
1443        }
1444        memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1445}
1446
1447int ip6_push_pending_frames(struct sock *sk)
1448{
1449        struct sk_buff *skb, *tmp_skb;
1450        struct sk_buff **tail_skb;
1451        struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1452        struct inet_sock *inet = inet_sk(sk);
1453        struct ipv6_pinfo *np = inet6_sk(sk);
1454        struct net *net = sock_net(sk);
1455        struct ipv6hdr *hdr;
1456        struct ipv6_txoptions *opt = np->cork.opt;
1457        struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1458        struct flowi *fl = &inet->cork.fl;
1459        unsigned char proto = fl->proto;
1460        int err = 0;
1461
1462        if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1463                goto out;
1464        tail_skb = &(skb_shinfo(skb)->frag_list);
1465
1466        /* move skb->data to ip header from ext header */
1467        if (skb->data < skb_network_header(skb))
1468                __skb_pull(skb, skb_network_offset(skb));
1469        while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1470                __skb_pull(tmp_skb, skb_network_header_len(skb));
1471                *tail_skb = tmp_skb;
1472                tail_skb = &(tmp_skb->next);
1473                skb->len += tmp_skb->len;
1474                skb->data_len += tmp_skb->len;
1475                skb->truesize += tmp_skb->truesize;
1476                tmp_skb->destructor = NULL;
1477                tmp_skb->sk = NULL;
1478        }
1479
1480        /* Allow local fragmentation. */
1481        if (np->pmtudisc < IPV6_PMTUDISC_DO)
1482                skb->local_df = 1;
1483
1484        ipv6_addr_copy(final_dst, &fl->fl6_dst);
1485        __skb_pull(skb, skb_network_header_len(skb));
1486        if (opt && opt->opt_flen)
1487                ipv6_push_frag_opts(skb, opt, &proto);
1488        if (opt && opt->opt_nflen)
1489                ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1490
1491        skb_push(skb, sizeof(struct ipv6hdr));
1492        skb_reset_network_header(skb);
1493        hdr = ipv6_hdr(skb);
1494
1495        *(__be32*)hdr = fl->fl6_flowlabel |
1496                     htonl(0x60000000 | ((int)np->cork.tclass << 20));
1497
1498        hdr->hop_limit = np->cork.hop_limit;
1499        hdr->nexthdr = proto;
1500        ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1501        ipv6_addr_copy(&hdr->daddr, final_dst);
1502
1503        skb->priority = sk->sk_priority;
1504        skb->mark = sk->sk_mark;
1505
1506        skb_dst_set(skb, dst_clone(&rt->dst));
1507        IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1508        if (proto == IPPROTO_ICMPV6) {
1509                struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1510
1511                ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1512                ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1513        }
1514
1515        err = ip6_local_out(skb);
1516        if (err) {
1517                if (err > 0)
1518                        err = net_xmit_errno(err);
1519                if (err)
1520                        goto error;
1521        }
1522
1523out:
1524        ip6_cork_release(inet, np);
1525        return err;
1526error:
1527        IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1528        goto out;
1529}
1530
1531void ip6_flush_pending_frames(struct sock *sk)
1532{
1533        struct sk_buff *skb;
1534
1535        while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1536                if (skb_dst(skb))
1537                        IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1538                                      IPSTATS_MIB_OUTDISCARDS);
1539                kfree_skb(skb);
1540        }
1541
1542        ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1543}
1544
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.