linux/net/ipv6/ip6_output.c
<<
>>
Prefs
   1/*
   2 *      IPv6 output functions
   3 *      Linux INET6 implementation
   4 *
   5 *      Authors:
   6 *      Pedro Roque             <roque@di.fc.ul.pt>
   7 *
   8 *      Based on linux/net/ipv4/ip_output.c
   9 *
  10 *      This program is free software; you can redistribute it and/or
  11 *      modify it under the terms of the GNU General Public License
  12 *      as published by the Free Software Foundation; either version
  13 *      2 of the License, or (at your option) any later version.
  14 *
  15 *      Changes:
  16 *      A.N.Kuznetsov   :       airthmetics in fragmentation.
  17 *                              extension headers are implemented.
  18 *                              route changes now work.
  19 *                              ip6_forward does not confuse sniffers.
  20 *                              etc.
  21 *
  22 *      H. von Brand    :       Added missing #include <linux/string.h>
  23 *      Imran Patel     :       frag id should be in NBO
  24 *      Kazunori MIYAZAWA @USAGI
  25 *                      :       add ip6_append_data and related functions
  26 *                              for datagram xmit
  27 */
  28
  29#include <linux/errno.h>
  30#include <linux/kernel.h>
  31#include <linux/string.h>
  32#include <linux/socket.h>
  33#include <linux/net.h>
  34#include <linux/netdevice.h>
  35#include <linux/if_arp.h>
  36#include <linux/in6.h>
  37#include <linux/tcp.h>
  38#include <linux/route.h>
  39#include <linux/module.h>
  40
  41#include <linux/netfilter.h>
  42#include <linux/netfilter_ipv6.h>
  43
  44#include <net/sock.h>
  45#include <net/snmp.h>
  46
  47#include <net/ipv6.h>
  48#include <net/ndisc.h>
  49#include <net/protocol.h>
  50#include <net/ip6_route.h>
  51#include <net/addrconf.h>
  52#include <net/rawv6.h>
  53#include <net/icmp.h>
  54#include <net/xfrm.h>
  55#include <net/checksum.h>
  56#include <linux/mroute6.h>
  57
  58static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
  59
  60static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
  61{
  62        static u32 ipv6_fragmentation_id = 1;
  63        static DEFINE_SPINLOCK(ip6_id_lock);
  64
  65        spin_lock_bh(&ip6_id_lock);
  66        fhdr->identification = htonl(ipv6_fragmentation_id);
  67        if (++ipv6_fragmentation_id == 0)
  68                ipv6_fragmentation_id = 1;
  69        spin_unlock_bh(&ip6_id_lock);
  70}
  71
  72int __ip6_local_out(struct sk_buff *skb)
  73{
  74        int len;
  75
  76        len = skb->len - sizeof(struct ipv6hdr);
  77        if (len > IPV6_MAXPLEN)
  78                len = 0;
  79        ipv6_hdr(skb)->payload_len = htons(len);
  80
  81        return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev,
  82                       dst_output);
  83}
  84
  85int ip6_local_out(struct sk_buff *skb)
  86{
  87        int err;
  88
  89        err = __ip6_local_out(skb);
  90        if (likely(err == 1))
  91                err = dst_output(skb);
  92
  93        return err;
  94}
  95EXPORT_SYMBOL_GPL(ip6_local_out);
  96
  97static int ip6_output_finish(struct sk_buff *skb)
  98{
  99        struct dst_entry *dst = skb->dst;
 100
 101        if (dst->hh)
 102                return neigh_hh_output(dst->hh, skb);
 103        else if (dst->neighbour)
 104                return dst->neighbour->output(skb);
 105
 106        IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
 107        kfree_skb(skb);
 108        return -EINVAL;
 109
 110}
 111
 112/* dev_loopback_xmit for use with netfilter. */
 113static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
 114{
 115        skb_reset_mac_header(newskb);
 116        __skb_pull(newskb, skb_network_offset(newskb));
 117        newskb->pkt_type = PACKET_LOOPBACK;
 118        newskb->ip_summed = CHECKSUM_UNNECESSARY;
 119        WARN_ON(!newskb->dst);
 120
 121        netif_rx(newskb);
 122        return 0;
 123}
 124
 125
 126static int ip6_output2(struct sk_buff *skb)
 127{
 128        struct dst_entry *dst = skb->dst;
 129        struct net_device *dev = dst->dev;
 130
 131        skb->protocol = htons(ETH_P_IPV6);
 132        skb->dev = dev;
 133
 134        if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
 135                struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
 136                struct inet6_dev *idev = ip6_dst_idev(skb->dst);
 137
 138                if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
 139                    ((mroute6_socket && !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
 140                     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
 141                                         &ipv6_hdr(skb)->saddr))) {
 142                        struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
 143
 144                        /* Do not check for IFF_ALLMULTI; multicast routing
 145                           is not supported in any case.
 146                         */
 147                        if (newskb)
 148                                NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
 149                                        NULL, newskb->dev,
 150                                        ip6_dev_loopback_xmit);
 151
 152                        if (ipv6_hdr(skb)->hop_limit == 0) {
 153                                IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
 154                                kfree_skb(skb);
 155                                return 0;
 156                        }
 157                }
 158
 159                IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS);
 160        }
 161
 162        return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
 163                       ip6_output_finish);
 164}
 165
 166static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
 167{
 168        struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
 169
 170        return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
 171               skb->dst->dev->mtu : dst_mtu(skb->dst);
 172}
 173
 174int ip6_output(struct sk_buff *skb)
 175{
 176        struct inet6_dev *idev = ip6_dst_idev(skb->dst);
 177        if (unlikely(idev->cnf.disable_ipv6)) {
 178                IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
 179                kfree_skb(skb);
 180                return 0;
 181        }
 182
 183        if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
 184                                dst_allfrag(skb->dst))
 185                return ip6_fragment(skb, ip6_output2);
 186        else
 187                return ip6_output2(skb);
 188}
 189
 190/*
 191 *      xmit an sk_buff (used by TCP)
 192 */
 193
 194int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 195             struct ipv6_txoptions *opt, int ipfragok)
 196{
 197        struct ipv6_pinfo *np = inet6_sk(sk);
 198        struct in6_addr *first_hop = &fl->fl6_dst;
 199        struct dst_entry *dst = skb->dst;
 200        struct ipv6hdr *hdr;
 201        u8  proto = fl->proto;
 202        int seg_len = skb->len;
 203        int hlimit, tclass;
 204        u32 mtu;
 205
 206        if (opt) {
 207                unsigned int head_room;
 208
 209                /* First: exthdrs may take lots of space (~8K for now)
 210                   MAX_HEADER is not enough.
 211                 */
 212                head_room = opt->opt_nflen + opt->opt_flen;
 213                seg_len += head_room;
 214                head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
 215
 216                if (skb_headroom(skb) < head_room) {
 217                        struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
 218                        if (skb2 == NULL) {
 219                                IP6_INC_STATS(ip6_dst_idev(skb->dst),
 220                                              IPSTATS_MIB_OUTDISCARDS);
 221                                kfree_skb(skb);
 222                                return -ENOBUFS;
 223                        }
 224                        kfree_skb(skb);
 225                        skb = skb2;
 226                        if (sk)
 227                                skb_set_owner_w(skb, sk);
 228                }
 229                if (opt->opt_flen)
 230                        ipv6_push_frag_opts(skb, opt, &proto);
 231                if (opt->opt_nflen)
 232                        ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
 233        }
 234
 235        skb_push(skb, sizeof(struct ipv6hdr));
 236        skb_reset_network_header(skb);
 237        hdr = ipv6_hdr(skb);
 238
 239        /* Allow local fragmentation. */
 240        if (ipfragok)
 241                skb->local_df = 1;
 242
 243        /*
 244         *      Fill in the IPv6 header
 245         */
 246
 247        hlimit = -1;
 248        if (np)
 249                hlimit = np->hop_limit;
 250        if (hlimit < 0)
 251                hlimit = ip6_dst_hoplimit(dst);
 252
 253        tclass = -1;
 254        if (np)
 255                tclass = np->tclass;
 256        if (tclass < 0)
 257                tclass = 0;
 258
 259        *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
 260
 261        hdr->payload_len = htons(seg_len);
 262        hdr->nexthdr = proto;
 263        hdr->hop_limit = hlimit;
 264
 265        ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
 266        ipv6_addr_copy(&hdr->daddr, first_hop);
 267
 268        skb->priority = sk->sk_priority;
 269        skb->mark = sk->sk_mark;
 270
 271        mtu = dst_mtu(dst);
 272        if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
 273                IP6_INC_STATS(ip6_dst_idev(skb->dst),
 274                              IPSTATS_MIB_OUTREQUESTS);
 275                return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
 276                                dst_output);
 277        }
 278
 279        if (net_ratelimit())
 280                printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
 281        skb->dev = dst->dev;
 282        icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
 283        IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
 284        kfree_skb(skb);
 285        return -EMSGSIZE;
 286}
 287
 288EXPORT_SYMBOL(ip6_xmit);
 289
 290/*
 291 *      To avoid extra problems ND packets are send through this
 292 *      routine. It's code duplication but I really want to avoid
 293 *      extra checks since ipv6_build_header is used by TCP (which
 294 *      is for us performance critical)
 295 */
 296
 297int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
 298               const struct in6_addr *saddr, const struct in6_addr *daddr,
 299               int proto, int len)
 300{
 301        struct ipv6_pinfo *np = inet6_sk(sk);
 302        struct ipv6hdr *hdr;
 303        int totlen;
 304
 305        skb->protocol = htons(ETH_P_IPV6);
 306        skb->dev = dev;
 307
 308        totlen = len + sizeof(struct ipv6hdr);
 309
 310        skb_reset_network_header(skb);
 311        skb_put(skb, sizeof(struct ipv6hdr));
 312        hdr = ipv6_hdr(skb);
 313
 314        *(__be32*)hdr = htonl(0x60000000);
 315
 316        hdr->payload_len = htons(len);
 317        hdr->nexthdr = proto;
 318        hdr->hop_limit = np->hop_limit;
 319
 320        ipv6_addr_copy(&hdr->saddr, saddr);
 321        ipv6_addr_copy(&hdr->daddr, daddr);
 322
 323        return 0;
 324}
 325
 326static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
 327{
 328        struct ip6_ra_chain *ra;
 329        struct sock *last = NULL;
 330
 331        read_lock(&ip6_ra_lock);
 332        for (ra = ip6_ra_chain; ra; ra = ra->next) {
 333                struct sock *sk = ra->sk;
 334                if (sk && ra->sel == sel &&
 335                    (!sk->sk_bound_dev_if ||
 336                     sk->sk_bound_dev_if == skb->dev->ifindex)) {
 337                        if (last) {
 338                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 339                                if (skb2)
 340                                        rawv6_rcv(last, skb2);
 341                        }
 342                        last = sk;
 343                }
 344        }
 345
 346        if (last) {
 347                rawv6_rcv(last, skb);
 348                read_unlock(&ip6_ra_lock);
 349                return 1;
 350        }
 351        read_unlock(&ip6_ra_lock);
 352        return 0;
 353}
 354
 355static int ip6_forward_proxy_check(struct sk_buff *skb)
 356{
 357        struct ipv6hdr *hdr = ipv6_hdr(skb);
 358        u8 nexthdr = hdr->nexthdr;
 359        int offset;
 360
 361        if (ipv6_ext_hdr(nexthdr)) {
 362                offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
 363                if (offset < 0)
 364                        return 0;
 365        } else
 366                offset = sizeof(struct ipv6hdr);
 367
 368        if (nexthdr == IPPROTO_ICMPV6) {
 369                struct icmp6hdr *icmp6;
 370
 371                if (!pskb_may_pull(skb, (skb_network_header(skb) +
 372                                         offset + 1 - skb->data)))
 373                        return 0;
 374
 375                icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
 376
 377                switch (icmp6->icmp6_type) {
 378                case NDISC_ROUTER_SOLICITATION:
 379                case NDISC_ROUTER_ADVERTISEMENT:
 380                case NDISC_NEIGHBOUR_SOLICITATION:
 381                case NDISC_NEIGHBOUR_ADVERTISEMENT:
 382                case NDISC_REDIRECT:
 383                        /* For reaction involving unicast neighbor discovery
 384                         * message destined to the proxied address, pass it to
 385                         * input function.
 386                         */
 387                        return 1;
 388                default:
 389                        break;
 390                }
 391        }
 392
 393        /*
 394         * The proxying router can't forward traffic sent to a link-local
 395         * address, so signal the sender and discard the packet. This
 396         * behavior is clarified by the MIPv6 specification.
 397         */
 398        if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
 399                dst_link_failure(skb);
 400                return -1;
 401        }
 402
 403        return 0;
 404}
 405
 406static inline int ip6_forward_finish(struct sk_buff *skb)
 407{
 408        return dst_output(skb);
 409}
 410
 411int ip6_forward(struct sk_buff *skb)
 412{
 413        struct dst_entry *dst = skb->dst;
 414        struct ipv6hdr *hdr = ipv6_hdr(skb);
 415        struct inet6_skb_parm *opt = IP6CB(skb);
 416        struct net *net = dev_net(dst->dev);
 417
 418        if (net->ipv6.devconf_all->forwarding == 0)
 419                goto error;
 420
 421        if (skb_warn_if_lro(skb))
 422                goto drop;
 423
 424        if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
 425                IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 426                goto drop;
 427        }
 428
 429        skb_forward_csum(skb);
 430
 431        /*
 432         *      We DO NOT make any processing on
 433         *      RA packets, pushing them to user level AS IS
 434         *      without ane WARRANTY that application will be able
 435         *      to interpret them. The reason is that we
 436         *      cannot make anything clever here.
 437         *
 438         *      We are not end-node, so that if packet contains
 439         *      AH/ESP, we cannot make anything.
 440         *      Defragmentation also would be mistake, RA packets
 441         *      cannot be fragmented, because there is no warranty
 442         *      that different fragments will go along one path. --ANK
 443         */
 444        if (opt->ra) {
 445                u8 *ptr = skb_network_header(skb) + opt->ra;
 446                if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
 447                        return 0;
 448        }
 449
 450        /*
 451         *      check and decrement ttl
 452         */
 453        if (hdr->hop_limit <= 1) {
 454                /* Force OUTPUT device used as source address */
 455                skb->dev = dst->dev;
 456                icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
 457                            0, skb->dev);
 458                IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
 459
 460                kfree_skb(skb);
 461                return -ETIMEDOUT;
 462        }
 463
 464        /* XXX: idev->cnf.proxy_ndp? */
 465        if (net->ipv6.devconf_all->proxy_ndp &&
 466            pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
 467                int proxied = ip6_forward_proxy_check(skb);
 468                if (proxied > 0)
 469                        return ip6_input(skb);
 470                else if (proxied < 0) {
 471                        IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 472                        goto drop;
 473                }
 474        }
 475
 476        if (!xfrm6_route_forward(skb)) {
 477                IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 478                goto drop;
 479        }
 480        dst = skb->dst;
 481
 482        /* IPv6 specs say nothing about it, but it is clear that we cannot
 483           send redirects to source routed frames.
 484           We don't send redirects to frames decapsulated from IPsec.
 485         */
 486        if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
 487            !skb->sp) {
 488                struct in6_addr *target = NULL;
 489                struct rt6_info *rt;
 490                struct neighbour *n = dst->neighbour;
 491
 492                /*
 493                 *      incoming and outgoing devices are the same
 494                 *      send a redirect.
 495                 */
 496
 497                rt = (struct rt6_info *) dst;
 498                if ((rt->rt6i_flags & RTF_GATEWAY))
 499                        target = (struct in6_addr*)&n->primary_key;
 500                else
 501                        target = &hdr->daddr;
 502
 503                /* Limit redirects both by destination (here)
 504                   and by source (inside ndisc_send_redirect)
 505                 */
 506                if (xrlim_allow(dst, 1*HZ))
 507                        ndisc_send_redirect(skb, n, target);
 508        } else {
 509                int addrtype = ipv6_addr_type(&hdr->saddr);
 510
 511                /* This check is security critical. */
 512                if (addrtype == IPV6_ADDR_ANY ||
 513                    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
 514                        goto error;
 515                if (addrtype & IPV6_ADDR_LINKLOCAL) {
 516                        icmpv6_send(skb, ICMPV6_DEST_UNREACH,
 517                                ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
 518                        goto error;
 519                }
 520        }
 521
 522        if (skb->len > dst_mtu(dst)) {
 523                /* Again, force OUTPUT device used as source address */
 524                skb->dev = dst->dev;
 525                icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
 526                IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
 527                IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
 528                kfree_skb(skb);
 529                return -EMSGSIZE;
 530        }
 531
 532        if (skb_cow(skb, dst->dev->hard_header_len)) {
 533                IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
 534                goto drop;
 535        }
 536
 537        hdr = ipv6_hdr(skb);
 538
 539        /* Mangling hops number delayed to point after skb COW */
 540
 541        hdr->hop_limit--;
 542
 543        IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
 544        return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
 545                       ip6_forward_finish);
 546
 547error:
 548        IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
 549drop:
 550        kfree_skb(skb);
 551        return -EINVAL;
 552}
 553
 554static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 555{
 556        to->pkt_type = from->pkt_type;
 557        to->priority = from->priority;
 558        to->protocol = from->protocol;
 559        dst_release(to->dst);
 560        to->dst = dst_clone(from->dst);
 561        to->dev = from->dev;
 562        to->mark = from->mark;
 563
 564#ifdef CONFIG_NET_SCHED
 565        to->tc_index = from->tc_index;
 566#endif
 567        nf_copy(to, from);
 568#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
 569    defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
 570        to->nf_trace = from->nf_trace;
 571#endif
 572        skb_copy_secmark(to, from);
 573}
 574
 575int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 576{
 577        u16 offset = sizeof(struct ipv6hdr);
 578        struct ipv6_opt_hdr *exthdr =
 579                                (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
 580        unsigned int packet_len = skb->tail - skb->network_header;
 581        int found_rhdr = 0;
 582        *nexthdr = &ipv6_hdr(skb)->nexthdr;
 583
 584        while (offset + 1 <= packet_len) {
 585
 586                switch (**nexthdr) {
 587
 588                case NEXTHDR_HOP:
 589                        break;
 590                case NEXTHDR_ROUTING:
 591                        found_rhdr = 1;
 592                        break;
 593                case NEXTHDR_DEST:
 594#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 595                        if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
 596                                break;
 597#endif
 598                        if (found_rhdr)
 599                                return offset;
 600                        break;
 601                default :
 602                        return offset;
 603                }
 604
 605                offset += ipv6_optlen(exthdr);
 606                *nexthdr = &exthdr->nexthdr;
 607                exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
 608                                                 offset);
 609        }
 610
 611        return offset;
 612}
 613
 614static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 615{
 616        struct net_device *dev;
 617        struct sk_buff *frag;
 618        struct rt6_info *rt = (struct rt6_info*)skb->dst;
 619        struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
 620        struct ipv6hdr *tmp_hdr;
 621        struct frag_hdr *fh;
 622        unsigned int mtu, hlen, left, len;
 623        __be32 frag_id = 0;
 624        int ptr, offset = 0, err=0;
 625        u8 *prevhdr, nexthdr = 0;
 626
 627        dev = rt->u.dst.dev;
 628        hlen = ip6_find_1stfragopt(skb, &prevhdr);
 629        nexthdr = *prevhdr;
 630
 631        mtu = ip6_skb_dst_mtu(skb);
 632
 633        /* We must not fragment if the socket is set to force MTU discovery
 634         * or if the skb it not generated by a local socket.  (This last
 635         * check should be redundant, but it's free.)
 636         */
 637        if (!skb->local_df) {
 638                skb->dev = skb->dst->dev;
 639                icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
 640                IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
 641                kfree_skb(skb);
 642                return -EMSGSIZE;
 643        }
 644
 645        if (np && np->frag_size < mtu) {
 646                if (np->frag_size)
 647                        mtu = np->frag_size;
 648        }
 649        mtu -= hlen + sizeof(struct frag_hdr);
 650
 651        if (skb_shinfo(skb)->frag_list) {
 652                int first_len = skb_pagelen(skb);
 653                int truesizes = 0;
 654
 655                if (first_len - hlen > mtu ||
 656                    ((first_len - hlen) & 7) ||
 657                    skb_cloned(skb))
 658                        goto slow_path;
 659
 660                for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
 661                        /* Correct geometry. */
 662                        if (frag->len > mtu ||
 663                            ((frag->len & 7) && frag->next) ||
 664                            skb_headroom(frag) < hlen)
 665                            goto slow_path;
 666
 667                        /* Partially cloned skb? */
 668                        if (skb_shared(frag))
 669                                goto slow_path;
 670
 671                        BUG_ON(frag->sk);
 672                        if (skb->sk) {
 673                                sock_hold(skb->sk);
 674                                frag->sk = skb->sk;
 675                                frag->destructor = sock_wfree;
 676                                truesizes += frag->truesize;
 677                        }
 678                }
 679
 680                err = 0;
 681                offset = 0;
 682                frag = skb_shinfo(skb)->frag_list;
 683                skb_shinfo(skb)->frag_list = NULL;
 684                /* BUILD HEADER */
 685
 686                *prevhdr = NEXTHDR_FRAGMENT;
 687                tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
 688                if (!tmp_hdr) {
 689                        IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
 690                        return -ENOMEM;
 691                }
 692
 693                __skb_pull(skb, hlen);
 694                fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
 695                __skb_push(skb, hlen);
 696                skb_reset_network_header(skb);
 697                memcpy(skb_network_header(skb), tmp_hdr, hlen);
 698
 699                ipv6_select_ident(skb, fh);
 700                fh->nexthdr = nexthdr;
 701                fh->reserved = 0;
 702                fh->frag_off = htons(IP6_MF);
 703                frag_id = fh->identification;
 704
 705                first_len = skb_pagelen(skb);
 706                skb->data_len = first_len - skb_headlen(skb);
 707                skb->truesize -= truesizes;
 708                skb->len = first_len;
 709                ipv6_hdr(skb)->payload_len = htons(first_len -
 710                                                   sizeof(struct ipv6hdr));
 711
 712                dst_hold(&rt->u.dst);
 713
 714                for (;;) {
 715                        /* Prepare header of the next frame,
 716                         * before previous one went down. */
 717                        if (frag) {
 718                                frag->ip_summed = CHECKSUM_NONE;
 719                                skb_reset_transport_header(frag);
 720                                fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
 721                                __skb_push(frag, hlen);
 722                                skb_reset_network_header(frag);
 723                                memcpy(skb_network_header(frag), tmp_hdr,
 724                                       hlen);
 725                                offset += skb->len - hlen - sizeof(struct frag_hdr);
 726                                fh->nexthdr = nexthdr;
 727                                fh->reserved = 0;
 728                                fh->frag_off = htons(offset);
 729                                if (frag->next != NULL)
 730                                        fh->frag_off |= htons(IP6_MF);
 731                                fh->identification = frag_id;
 732                                ipv6_hdr(frag)->payload_len =
 733                                                htons(frag->len -
 734                                                      sizeof(struct ipv6hdr));
 735                                ip6_copy_metadata(frag, skb);
 736                        }
 737
 738                        err = output(skb);
 739                        if(!err)
 740                                IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGCREATES);
 741
 742                        if (err || !frag)
 743                                break;
 744
 745                        skb = frag;
 746                        frag = skb->next;
 747                        skb->next = NULL;
 748                }
 749
 750                kfree(tmp_hdr);
 751
 752                if (err == 0) {
 753                        IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGOKS);
 754                        dst_release(&rt->u.dst);
 755                        return 0;
 756                }
 757
 758                while (frag) {
 759                        skb = frag->next;
 760                        kfree_skb(frag);
 761                        frag = skb;
 762                }
 763
 764                IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGFAILS);
 765                dst_release(&rt->u.dst);
 766                return err;
 767        }
 768
 769slow_path:
 770        left = skb->len - hlen;         /* Space per frame */
 771        ptr = hlen;                     /* Where to start from */
 772
 773        /*
 774         *      Fragment the datagram.
 775         */
 776
 777        *prevhdr = NEXTHDR_FRAGMENT;
 778
 779        /*
 780         *      Keep copying data until we run out.
 781         */
 782        while(left > 0) {
 783                len = left;
 784                /* IF: it doesn't fit, use 'mtu' - the data space left */
 785                if (len > mtu)
 786                        len = mtu;
 787                /* IF: we are not sending upto and including the packet end
 788                   then align the next start on an eight byte boundary */
 789                if (len < left) {
 790                        len &= ~7;
 791                }
 792                /*
 793                 *      Allocate buffer.
 794                 */
 795
 796                if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
 797                        NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
 798                        IP6_INC_STATS(ip6_dst_idev(skb->dst),
 799                                      IPSTATS_MIB_FRAGFAILS);
 800                        err = -ENOMEM;
 801                        goto fail;
 802                }
 803
 804                /*
 805                 *      Set up data on packet
 806                 */
 807
 808                ip6_copy_metadata(frag, skb);
 809                skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
 810                skb_put(frag, len + hlen + sizeof(struct frag_hdr));
 811                skb_reset_network_header(frag);
 812                fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
 813                frag->transport_header = (frag->network_header + hlen +
 814                                          sizeof(struct frag_hdr));
 815
 816                /*
 817                 *      Charge the memory for the fragment to any owner
 818                 *      it might possess
 819                 */
 820                if (skb->sk)
 821                        skb_set_owner_w(frag, skb->sk);
 822
 823                /*
 824                 *      Copy the packet header into the new buffer.
 825                 */
 826                skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
 827
 828                /*
 829                 *      Build fragment header.
 830                 */
 831                fh->nexthdr = nexthdr;
 832                fh->reserved = 0;
 833                if (!frag_id) {
 834                        ipv6_select_ident(skb, fh);
 835                        frag_id = fh->identification;
 836                } else
 837                        fh->identification = frag_id;
 838
 839                /*
 840                 *      Copy a block of the IP datagram.
 841                 */
 842                if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
 843                        BUG();
 844                left -= len;
 845
 846                fh->frag_off = htons(offset);
 847                if (left > 0)
 848                        fh->frag_off |= htons(IP6_MF);
 849                ipv6_hdr(frag)->payload_len = htons(frag->len -
 850                                                    sizeof(struct ipv6hdr));
 851
 852                ptr += len;
 853                offset += len;
 854
 855                /*
 856                 *      Put this fragment into the sending queue.
 857                 */
 858                err = output(frag);
 859                if (err)
 860                        goto fail;
 861
 862                IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGCREATES);
 863        }
 864        IP6_INC_STATS(ip6_dst_idev(skb->dst),
 865                      IPSTATS_MIB_FRAGOKS);
 866        kfree_skb(skb);
 867        return err;
 868
 869fail:
 870        IP6_INC_STATS(ip6_dst_idev(skb->dst),
 871                      IPSTATS_MIB_FRAGFAILS);
 872        kfree_skb(skb);
 873        return err;
 874}
 875
 876static inline int ip6_rt_check(struct rt6key *rt_key,
 877                               struct in6_addr *fl_addr,
 878                               struct in6_addr *addr_cache)
 879{
 880        return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
 881                (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
 882}
 883
 884static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
 885                                          struct dst_entry *dst,
 886                                          struct flowi *fl)
 887{
 888        struct ipv6_pinfo *np = inet6_sk(sk);
 889        struct rt6_info *rt = (struct rt6_info *)dst;
 890
 891        if (!dst)
 892                goto out;
 893
 894        /* Yes, checking route validity in not connected
 895         * case is not very simple. Take into account,
 896         * that we do not support routing by source, TOS,
 897         * and MSG_DONTROUTE            --ANK (980726)
 898         *
 899         * 1. ip6_rt_check(): If route was host route,
 900         *    check that cached destination is current.
 901         *    If it is network route, we still may
 902         *    check its validity using saved pointer
 903         *    to the last used address: daddr_cache.
 904         *    We do not want to save whole address now,
 905         *    (because main consumer of this service
 906         *    is tcp, which has not this problem),
 907         *    so that the last trick works only on connected
 908         *    sockets.
 909         * 2. oif also should be the same.
 910         */
 911        if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
 912#ifdef CONFIG_IPV6_SUBTREES
 913            ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
 914#endif
 915            (fl->oif && fl->oif != dst->dev->ifindex)) {
 916                dst_release(dst);
 917                dst = NULL;
 918        }
 919
 920out:
 921        return dst;
 922}
 923
 924static int ip6_dst_lookup_tail(struct sock *sk,
 925                               struct dst_entry **dst, struct flowi *fl)
 926{
 927        int err;
 928        struct net *net = sock_net(sk);
 929
 930        if (*dst == NULL)
 931                *dst = ip6_route_output(net, sk, fl);
 932
 933        if ((err = (*dst)->error))
 934                goto out_err_release;
 935
 936        if (ipv6_addr_any(&fl->fl6_src)) {
 937                err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
 938                                         &fl->fl6_dst,
 939                                         sk ? inet6_sk(sk)->srcprefs : 0,
 940                                         &fl->fl6_src);
 941                if (err)
 942                        goto out_err_release;
 943        }
 944
 945#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 946        /*
 947         * Here if the dst entry we've looked up
 948         * has a neighbour entry that is in the INCOMPLETE
 949         * state and the src address from the flow is
 950         * marked as OPTIMISTIC, we release the found
 951         * dst entry and replace it instead with the
 952         * dst entry of the nexthop router
 953         */
 954        if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
 955                struct inet6_ifaddr *ifp;
 956                struct flowi fl_gw;
 957                int redirect;
 958
 959                ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
 960                                      (*dst)->dev, 1);
 961
 962                redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
 963                if (ifp)
 964                        in6_ifa_put(ifp);
 965
 966                if (redirect) {
 967                        /*
 968                         * We need to get the dst entry for the
 969                         * default router instead
 970                         */
 971                        dst_release(*dst);
 972                        memcpy(&fl_gw, fl, sizeof(struct flowi));
 973                        memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
 974                        *dst = ip6_route_output(net, sk, &fl_gw);
 975                        if ((err = (*dst)->error))
 976                                goto out_err_release;
 977                }
 978        }
 979#endif
 980
 981        return 0;
 982
 983out_err_release:
 984        if (err == -ENETUNREACH)
 985                IP6_INC_STATS_BH(NULL, IPSTATS_MIB_OUTNOROUTES);
 986        dst_release(*dst);
 987        *dst = NULL;
 988        return err;
 989}
 990
 991/**
 992 *      ip6_dst_lookup - perform route lookup on flow
 993 *      @sk: socket which provides route info
 994 *      @dst: pointer to dst_entry * for result
 995 *      @fl: flow to lookup
 996 *
 997 *      This function performs a route lookup on the given flow.
 998 *
 999 *      It returns zero on success, or a standard errno code on error.
1000 */
1001int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1002{
1003        *dst = NULL;
1004        return ip6_dst_lookup_tail(sk, dst, fl);
1005}
1006EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1007
1008/**
1009 *      ip6_sk_dst_lookup - perform socket cached route lookup on flow
1010 *      @sk: socket which provides the dst cache and route info
1011 *      @dst: pointer to dst_entry * for result
1012 *      @fl: flow to lookup
1013 *
1014 *      This function performs a route lookup on the given flow with the
1015 *      possibility of using the cached route in the socket if it is valid.
1016 *      It will take the socket dst lock when operating on the dst cache.
1017 *      As a result, this function can only be used in process context.
1018 *
1019 *      It returns zero on success, or a standard errno code on error.
1020 */
1021int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1022{
1023        *dst = NULL;
1024        if (sk) {
1025                *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1026                *dst = ip6_sk_dst_check(sk, *dst, fl);
1027        }
1028
1029        return ip6_dst_lookup_tail(sk, dst, fl);
1030}
1031EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1032
1033static inline int ip6_ufo_append_data(struct sock *sk,
1034                        int getfrag(void *from, char *to, int offset, int len,
1035                        int odd, struct sk_buff *skb),
1036                        void *from, int length, int hh_len, int fragheaderlen,
1037                        int transhdrlen, int mtu,unsigned int flags)
1038
1039{
1040        struct sk_buff *skb;
1041        int err;
1042
1043        /* There is support for UDP large send offload by network
1044         * device, so create one single skb packet containing complete
1045         * udp datagram
1046         */
1047        if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1048                skb = sock_alloc_send_skb(sk,
1049                        hh_len + fragheaderlen + transhdrlen + 20,
1050                        (flags & MSG_DONTWAIT), &err);
1051                if (skb == NULL)
1052                        return -ENOMEM;
1053
1054                /* reserve space for Hardware header */
1055                skb_reserve(skb, hh_len);
1056
1057                /* create space for UDP/IP header */
1058                skb_put(skb,fragheaderlen + transhdrlen);
1059
1060                /* initialize network header pointer */
1061                skb_reset_network_header(skb);
1062
1063                /* initialize protocol header pointer */
1064                skb->transport_header = skb->network_header + fragheaderlen;
1065
1066                skb->ip_summed = CHECKSUM_PARTIAL;
1067                skb->csum = 0;
1068                sk->sk_sndmsg_off = 0;
1069        }
1070
1071        err = skb_append_datato_frags(sk,skb, getfrag, from,
1072                                      (length - transhdrlen));
1073        if (!err) {
1074                struct frag_hdr fhdr;
1075
1076                /* specify the length of each IP datagram fragment*/
1077                skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
1078                                            sizeof(struct frag_hdr);
1079                skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1080                ipv6_select_ident(skb, &fhdr);
1081                skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1082                __skb_queue_tail(&sk->sk_write_queue, skb);
1083
1084                return 0;
1085        }
1086        /* There is not enough support do UPD LSO,
1087         * so follow normal path
1088         */
1089        kfree_skb(skb);
1090
1091        return err;
1092}
1093
1094static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1095                                               gfp_t gfp)
1096{
1097        return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1098}
1099
1100static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1101                                                gfp_t gfp)
1102{
1103        return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1104}
1105
1106int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1107        int offset, int len, int odd, struct sk_buff *skb),
1108        void *from, int length, int transhdrlen,
1109        int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1110        struct rt6_info *rt, unsigned int flags)
1111{
1112        struct inet_sock *inet = inet_sk(sk);
1113        struct ipv6_pinfo *np = inet6_sk(sk);
1114        struct sk_buff *skb;
1115        unsigned int maxfraglen, fragheaderlen;
1116        int exthdrlen;
1117        int hh_len;
1118        int mtu;
1119        int copy;
1120        int err;
1121        int offset = 0;
1122        int csummode = CHECKSUM_NONE;
1123
1124        if (flags&MSG_PROBE)
1125                return 0;
1126        if (skb_queue_empty(&sk->sk_write_queue)) {
1127                /*
1128                 * setup for corking
1129                 */
1130                if (opt) {
1131                        if (WARN_ON(np->cork.opt))
1132                                return -EINVAL;
1133
1134                        np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1135                        if (unlikely(np->cork.opt == NULL))
1136                                return -ENOBUFS;
1137
1138                        np->cork.opt->tot_len = opt->tot_len;
1139                        np->cork.opt->opt_flen = opt->opt_flen;
1140                        np->cork.opt->opt_nflen = opt->opt_nflen;
1141
1142                        np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1143                                                            sk->sk_allocation);
1144                        if (opt->dst0opt && !np->cork.opt->dst0opt)
1145                                return -ENOBUFS;
1146
1147                        np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1148                                                            sk->sk_allocation);
1149                        if (opt->dst1opt && !np->cork.opt->dst1opt)
1150                                return -ENOBUFS;
1151
1152                        np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1153                                                           sk->sk_allocation);
1154                        if (opt->hopopt && !np->cork.opt->hopopt)
1155                                return -ENOBUFS;
1156
1157                        np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1158                                                            sk->sk_allocation);
1159                        if (opt->srcrt && !np->cork.opt->srcrt)
1160                                return -ENOBUFS;
1161
1162                        /* need source address above miyazawa*/
1163                }
1164                dst_hold(&rt->u.dst);
1165                inet->cork.dst = &rt->u.dst;
1166                inet->cork.fl = *fl;
1167                np->cork.hop_limit = hlimit;
1168                np->cork.tclass = tclass;
1169                mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1170                      rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1171                if (np->frag_size < mtu) {
1172                        if (np->frag_size)
1173                                mtu = np->frag_size;
1174                }
1175                inet->cork.fragsize = mtu;
1176                if (dst_allfrag(rt->u.dst.path))
1177                        inet->cork.flags |= IPCORK_ALLFRAG;
1178                inet->cork.length = 0;
1179                sk->sk_sndmsg_page = NULL;
1180                sk->sk_sndmsg_off = 0;
1181                exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
1182                            rt->rt6i_nfheader_len;
1183                length += exthdrlen;
1184                transhdrlen += exthdrlen;
1185        } else {
1186                rt = (struct rt6_info *)inet->cork.dst;
1187                fl = &inet->cork.fl;
1188                opt = np->cork.opt;
1189                transhdrlen = 0;
1190                exthdrlen = 0;
1191                mtu = inet->cork.fragsize;
1192        }
1193
1194        hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1195
1196        fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1197                        (opt ? opt->opt_nflen : 0);
1198        maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1199
1200        if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1201                if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1202                        ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1203                        return -EMSGSIZE;
1204                }
1205        }
1206
1207        /*
1208         * Let's try using as much space as possible.
1209         * Use MTU if total length of the message fits into the MTU.
1210         * Otherwise, we need to reserve fragment header and
1211         * fragment alignment (= 8-15 octects, in total).
1212         *
1213         * Note that we may need to "move" the data from the tail of
1214         * of the buffer to the new fragment when we split
1215         * the message.
1216         *
1217         * FIXME: It may be fragmented into multiple chunks
1218         *        at once if non-fragmentable extension headers
1219         *        are too large.
1220         * --yoshfuji
1221         */
1222
1223        inet->cork.length += length;
1224        if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1225            (rt->u.dst.dev->features & NETIF_F_UFO)) {
1226
1227                err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1228                                          fragheaderlen, transhdrlen, mtu,
1229                                          flags);
1230                if (err)
1231                        goto error;
1232                return 0;
1233        }
1234
1235        if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1236                goto alloc_new_skb;
1237
1238        while (length > 0) {
1239                /* Check if the remaining data fits into current packet. */
1240                copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1241                if (copy < length)
1242                        copy = maxfraglen - skb->len;
1243
1244                if (copy <= 0) {
1245                        char *data;
1246                        unsigned int datalen;
1247                        unsigned int fraglen;
1248                        unsigned int fraggap;
1249                        unsigned int alloclen;
1250                        struct sk_buff *skb_prev;
1251alloc_new_skb:
1252                        skb_prev = skb;
1253
1254                        /* There's no room in the current skb */
1255                        if (skb_prev)
1256                                fraggap = skb_prev->len - maxfraglen;
1257                        else
1258                                fraggap = 0;
1259
1260                        /*
1261                         * If remaining data exceeds the mtu,
1262                         * we know we need more fragment(s).
1263                         */
1264                        datalen = length + fraggap;
1265                        if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1266                                datalen = maxfraglen - fragheaderlen;
1267
1268                        fraglen = datalen + fragheaderlen;
1269                        if ((flags & MSG_MORE) &&
1270                            !(rt->u.dst.dev->features&NETIF_F_SG))
1271                                alloclen = mtu;
1272                        else
1273                                alloclen = datalen + fragheaderlen;
1274
1275                        /*
1276                         * The last fragment gets additional space at tail.
1277                         * Note: we overallocate on fragments with MSG_MODE
1278                         * because we have no idea if we're the last one.
1279                         */
1280                        if (datalen == length + fraggap)
1281                                alloclen += rt->u.dst.trailer_len;
1282
1283                        /*
1284                         * We just reserve space for fragment header.
1285                         * Note: this may be overallocation if the message
1286                         * (without MSG_MORE) fits into the MTU.
1287                         */
1288                        alloclen += sizeof(struct frag_hdr);
1289
1290                        if (transhdrlen) {
1291                                skb = sock_alloc_send_skb(sk,
1292                                                alloclen + hh_len,
1293                                                (flags & MSG_DONTWAIT), &err);
1294                        } else {
1295                                skb = NULL;
1296                                if (atomic_read(&sk->sk_wmem_alloc) <=
1297                                    2 * sk->sk_sndbuf)
1298                                        skb = sock_wmalloc(sk,
1299                                                           alloclen + hh_len, 1,
1300                                                           sk->sk_allocation);
1301                                if (unlikely(skb == NULL))
1302                                        err = -ENOBUFS;
1303                        }
1304                        if (skb == NULL)
1305                                goto error;
1306                        /*
1307                         *      Fill in the control structures
1308                         */
1309                        skb->ip_summed = csummode;
1310                        skb->csum = 0;
1311                        /* reserve for fragmentation */
1312                        skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1313
1314                        /*
1315                         *      Find where to start putting bytes
1316                         */
1317                        data = skb_put(skb, fraglen);
1318                        skb_set_network_header(skb, exthdrlen);
1319                        data += fragheaderlen;
1320                        skb->transport_header = (skb->network_header +
1321                                                 fragheaderlen);
1322                        if (fraggap) {
1323                                skb->csum = skb_copy_and_csum_bits(
1324                                        skb_prev, maxfraglen,
1325                                        data + transhdrlen, fraggap, 0);
1326                                skb_prev->csum = csum_sub(skb_prev->csum,
1327                                                          skb->csum);
1328                                data += fraggap;
1329                                pskb_trim_unique(skb_prev, maxfraglen);
1330                        }
1331                        copy = datalen - transhdrlen - fraggap;
1332                        if (copy < 0) {
1333                                err = -EINVAL;
1334                                kfree_skb(skb);
1335                                goto error;
1336                        } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1337                                err = -EFAULT;
1338                                kfree_skb(skb);
1339                                goto error;
1340                        }
1341
1342                        offset += copy;
1343                        length -= datalen - fraggap;
1344                        transhdrlen = 0;
1345                        exthdrlen = 0;
1346                        csummode = CHECKSUM_NONE;
1347
1348                        /*
1349                         * Put the packet on the pending queue
1350                         */
1351                        __skb_queue_tail(&sk->sk_write_queue, skb);
1352                        continue;
1353                }
1354
1355                if (copy > length)
1356                        copy = length;
1357
1358                if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1359                        unsigned int off;
1360
1361                        off = skb->len;
1362                        if (getfrag(from, skb_put(skb, copy),
1363                                                offset, copy, off, skb) < 0) {
1364                                __skb_trim(skb, off);
1365                                err = -EFAULT;
1366                                goto error;
1367                        }
1368                } else {
1369                        int i = skb_shinfo(skb)->nr_frags;
1370                        skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1371                        struct page *page = sk->sk_sndmsg_page;
1372                        int off = sk->sk_sndmsg_off;
1373                        unsigned int left;
1374
1375                        if (page && (left = PAGE_SIZE - off) > 0) {
1376                                if (copy >= left)
1377                                        copy = left;
1378                                if (page != frag->page) {
1379                                        if (i == MAX_SKB_FRAGS) {
1380                                                err = -EMSGSIZE;
1381                                                goto error;
1382                                        }
1383                                        get_page(page);
1384                                        skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1385                                        frag = &skb_shinfo(skb)->frags[i];
1386                                }
1387                        } else if(i < MAX_SKB_FRAGS) {
1388                                if (copy > PAGE_SIZE)
1389                                        copy = PAGE_SIZE;
1390                                page = alloc_pages(sk->sk_allocation, 0);
1391                                if (page == NULL) {
1392                                        err = -ENOMEM;
1393                                        goto error;
1394                                }
1395                                sk->sk_sndmsg_page = page;
1396                                sk->sk_sndmsg_off = 0;
1397
1398                                skb_fill_page_desc(skb, i, page, 0, 0);
1399                                frag = &skb_shinfo(skb)->frags[i];
1400                        } else {
1401                                err = -EMSGSIZE;
1402                                goto error;
1403                        }
1404                        if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1405                                err = -EFAULT;
1406                                goto error;
1407                        }
1408                        sk->sk_sndmsg_off += copy;
1409                        frag->size += copy;
1410                        skb->len += copy;
1411                        skb->data_len += copy;
1412                        skb->truesize += copy;
1413                        atomic_add(copy, &sk->sk_wmem_alloc);
1414                }
1415                offset += copy;
1416                length -= copy;
1417        }
1418        return 0;
1419error:
1420        inet->cork.length -= length;
1421        IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1422        return err;
1423}
1424
1425static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1426{
1427        if (np->cork.opt) {
1428                kfree(np->cork.opt->dst0opt);
1429                kfree(np->cork.opt->dst1opt);
1430                kfree(np->cork.opt->hopopt);
1431                kfree(np->cork.opt->srcrt);
1432                kfree(np->cork.opt);
1433                np->cork.opt = NULL;
1434        }
1435
1436        if (inet->cork.dst) {
1437                dst_release(inet->cork.dst);
1438                inet->cork.dst = NULL;
1439                inet->cork.flags &= ~IPCORK_ALLFRAG;
1440        }
1441        memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1442}
1443
1444int ip6_push_pending_frames(struct sock *sk)
1445{
1446        struct sk_buff *skb, *tmp_skb;
1447        struct sk_buff **tail_skb;
1448        struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1449        struct inet_sock *inet = inet_sk(sk);
1450        struct ipv6_pinfo *np = inet6_sk(sk);
1451        struct ipv6hdr *hdr;
1452        struct ipv6_txoptions *opt = np->cork.opt;
1453        struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1454        struct flowi *fl = &inet->cork.fl;
1455        unsigned char proto = fl->proto;
1456        int err = 0;
1457
1458        if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1459                goto out;
1460        tail_skb = &(skb_shinfo(skb)->frag_list);
1461
1462        /* move skb->data to ip header from ext header */
1463        if (skb->data < skb_network_header(skb))
1464                __skb_pull(skb, skb_network_offset(skb));
1465        while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1466                __skb_pull(tmp_skb, skb_network_header_len(skb));
1467                *tail_skb = tmp_skb;
1468                tail_skb = &(tmp_skb->next);
1469                skb->len += tmp_skb->len;
1470                skb->data_len += tmp_skb->len;
1471                skb->truesize += tmp_skb->truesize;
1472                __sock_put(tmp_skb->sk);
1473                tmp_skb->destructor = NULL;
1474                tmp_skb->sk = NULL;
1475        }
1476
1477        /* Allow local fragmentation. */
1478        if (np->pmtudisc < IPV6_PMTUDISC_DO)
1479                skb->local_df = 1;
1480
1481        ipv6_addr_copy(final_dst, &fl->fl6_dst);
1482        __skb_pull(skb, skb_network_header_len(skb));
1483        if (opt && opt->opt_flen)
1484                ipv6_push_frag_opts(skb, opt, &proto);
1485        if (opt && opt->opt_nflen)
1486                ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1487
1488        skb_push(skb, sizeof(struct ipv6hdr));
1489        skb_reset_network_header(skb);
1490        hdr = ipv6_hdr(skb);
1491
1492        *(__be32*)hdr = fl->fl6_flowlabel |
1493                     htonl(0x60000000 | ((int)np->cork.tclass << 20));
1494
1495        hdr->hop_limit = np->cork.hop_limit;
1496        hdr->nexthdr = proto;
1497        ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1498        ipv6_addr_copy(&hdr->daddr, final_dst);
1499
1500        skb->priority = sk->sk_priority;
1501        skb->mark = sk->sk_mark;
1502
1503        skb->dst = dst_clone(&rt->u.dst);
1504        IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
1505        if (proto == IPPROTO_ICMPV6) {
1506                struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1507
1508                ICMP6MSGOUT_INC_STATS_BH(idev, icmp6_hdr(skb)->icmp6_type);
1509                ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
1510        }
1511
1512        err = ip6_local_out(skb);
1513        if (err) {
1514                if (err > 0)
1515                        err = np->recverr ? net_xmit_errno(err) : 0;
1516                if (err)
1517                        goto error;
1518        }
1519
1520out:
1521        ip6_cork_release(inet, np);
1522        return err;
1523error:
1524        goto out;
1525}
1526
1527void ip6_flush_pending_frames(struct sock *sk)
1528{
1529        struct sk_buff *skb;
1530
1531        while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1532                if (skb->dst)
1533                        IP6_INC_STATS(ip6_dst_idev(skb->dst),
1534                                      IPSTATS_MIB_OUTDISCARDS);
1535                kfree_skb(skb);
1536        }
1537
1538        ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1539}
1540
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.