linux/net/ipv6/ip6_output.c
<<
>>
Prefs
   1/*
   2 *      IPv6 output functions
   3 *      Linux INET6 implementation
   4 *
   5 *      Authors:
   6 *      Pedro Roque             <roque@di.fc.ul.pt>
   7 *
   8 *      Based on linux/net/ipv4/ip_output.c
   9 *
  10 *      This program is free software; you can redistribute it and/or
  11 *      modify it under the terms of the GNU General Public License
  12 *      as published by the Free Software Foundation; either version
  13 *      2 of the License, or (at your option) any later version.
  14 *
  15 *      Changes:
  16 *      A.N.Kuznetsov   :       airthmetics in fragmentation.
  17 *                              extension headers are implemented.
  18 *                              route changes now work.
  19 *                              ip6_forward does not confuse sniffers.
  20 *                              etc.
  21 *
  22 *      H. von Brand    :       Added missing #include <linux/string.h>
  23 *      Imran Patel     :       frag id should be in NBO
  24 *      Kazunori MIYAZAWA @USAGI
  25 *                      :       add ip6_append_data and related functions
  26 *                              for datagram xmit
  27 */
  28
  29#include <linux/errno.h>
  30#include <linux/kernel.h>
  31#include <linux/string.h>
  32#include <linux/socket.h>
  33#include <linux/net.h>
  34#include <linux/netdevice.h>
  35#include <linux/if_arp.h>
  36#include <linux/in6.h>
  37#include <linux/tcp.h>
  38#include <linux/route.h>
  39#include <linux/module.h>
  40
  41#include <linux/netfilter.h>
  42#include <linux/netfilter_ipv6.h>
  43
  44#include <net/sock.h>
  45#include <net/snmp.h>
  46
  47#include <net/ipv6.h>
  48#include <net/ndisc.h>
  49#include <net/protocol.h>
  50#include <net/ip6_route.h>
  51#include <net/addrconf.h>
  52#include <net/rawv6.h>
  53#include <net/icmp.h>
  54#include <net/xfrm.h>
  55#include <net/checksum.h>
  56#include <linux/mroute6.h>
  57
  58static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
  59
  60static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
  61{
  62        static u32 ipv6_fragmentation_id = 1;
  63        static DEFINE_SPINLOCK(ip6_id_lock);
  64
  65        spin_lock_bh(&ip6_id_lock);
  66        fhdr->identification = htonl(ipv6_fragmentation_id);
  67        if (++ipv6_fragmentation_id == 0)
  68                ipv6_fragmentation_id = 1;
  69        spin_unlock_bh(&ip6_id_lock);
  70}
  71
  72int __ip6_local_out(struct sk_buff *skb)
  73{
  74        int len;
  75
  76        len = skb->len - sizeof(struct ipv6hdr);
  77        if (len > IPV6_MAXPLEN)
  78                len = 0;
  79        ipv6_hdr(skb)->payload_len = htons(len);
  80
  81        return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev,
  82                       dst_output);
  83}
  84
  85int ip6_local_out(struct sk_buff *skb)
  86{
  87        int err;
  88
  89        err = __ip6_local_out(skb);
  90        if (likely(err == 1))
  91                err = dst_output(skb);
  92
  93        return err;
  94}
  95EXPORT_SYMBOL_GPL(ip6_local_out);
  96
  97static int ip6_output_finish(struct sk_buff *skb)
  98{
  99        struct dst_entry *dst = skb->dst;
 100
 101        if (dst->hh)
 102                return neigh_hh_output(dst->hh, skb);
 103        else if (dst->neighbour)
 104                return dst->neighbour->output(skb);
 105
 106        IP6_INC_STATS_BH(dev_net(dst->dev),
 107                         ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
 108        kfree_skb(skb);
 109        return -EINVAL;
 110
 111}
 112
 113/* dev_loopback_xmit for use with netfilter. */
 114static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
 115{
 116        skb_reset_mac_header(newskb);
 117        __skb_pull(newskb, skb_network_offset(newskb));
 118        newskb->pkt_type = PACKET_LOOPBACK;
 119        newskb->ip_summed = CHECKSUM_UNNECESSARY;
 120        WARN_ON(!newskb->dst);
 121
 122        netif_rx(newskb);
 123        return 0;
 124}
 125
 126
 127static int ip6_output2(struct sk_buff *skb)
 128{
 129        struct dst_entry *dst = skb->dst;
 130        struct net_device *dev = dst->dev;
 131
 132        skb->protocol = htons(ETH_P_IPV6);
 133        skb->dev = dev;
 134
 135        if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
 136                struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
 137                struct inet6_dev *idev = ip6_dst_idev(skb->dst);
 138
 139                if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
 140                    ((mroute6_socket(dev_net(dev)) &&
 141                     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
 142                     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
 143                                         &ipv6_hdr(skb)->saddr))) {
 144                        struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
 145
 146                        /* Do not check for IFF_ALLMULTI; multicast routing
 147                           is not supported in any case.
 148                         */
 149                        if (newskb)
 150                                NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
 151                                        NULL, newskb->dev,
 152                                        ip6_dev_loopback_xmit);
 153
 154                        if (ipv6_hdr(skb)->hop_limit == 0) {
 155                                IP6_INC_STATS(dev_net(dev), idev,
 156                                              IPSTATS_MIB_OUTDISCARDS);
 157                                kfree_skb(skb);
 158                                return 0;
 159                        }
 160                }
 161
 162                IP6_INC_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCASTPKTS);
 163        }
 164
 165        return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
 166                       ip6_output_finish);
 167}
 168
 169static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
 170{
 171        struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
 172
 173        return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
 174               skb->dst->dev->mtu : dst_mtu(skb->dst);
 175}
 176
 177int ip6_output(struct sk_buff *skb)
 178{
 179        struct inet6_dev *idev = ip6_dst_idev(skb->dst);
 180        if (unlikely(idev->cnf.disable_ipv6)) {
 181                IP6_INC_STATS(dev_net(skb->dst->dev), idev,
 182                              IPSTATS_MIB_OUTDISCARDS);
 183                kfree_skb(skb);
 184                return 0;
 185        }
 186
 187        if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
 188                                dst_allfrag(skb->dst))
 189                return ip6_fragment(skb, ip6_output2);
 190        else
 191                return ip6_output2(skb);
 192}
 193
 194/*
 195 *      xmit an sk_buff (used by TCP)
 196 */
 197
 198int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 199             struct ipv6_txoptions *opt, int ipfragok)
 200{
 201        struct net *net = sock_net(sk);
 202        struct ipv6_pinfo *np = inet6_sk(sk);
 203        struct in6_addr *first_hop = &fl->fl6_dst;
 204        struct dst_entry *dst = skb->dst;
 205        struct ipv6hdr *hdr;
 206        u8  proto = fl->proto;
 207        int seg_len = skb->len;
 208        int hlimit, tclass;
 209        u32 mtu;
 210
 211        if (opt) {
 212                unsigned int head_room;
 213
 214                /* First: exthdrs may take lots of space (~8K for now)
 215                   MAX_HEADER is not enough.
 216                 */
 217                head_room = opt->opt_nflen + opt->opt_flen;
 218                seg_len += head_room;
 219                head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
 220
 221                if (skb_headroom(skb) < head_room) {
 222                        struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
 223                        if (skb2 == NULL) {
 224                                IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
 225                                              IPSTATS_MIB_OUTDISCARDS);
 226                                kfree_skb(skb);
 227                                return -ENOBUFS;
 228                        }
 229                        kfree_skb(skb);
 230                        skb = skb2;
 231                        if (sk)
 232                                skb_set_owner_w(skb, sk);
 233                }
 234                if (opt->opt_flen)
 235                        ipv6_push_frag_opts(skb, opt, &proto);
 236                if (opt->opt_nflen)
 237                        ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
 238        }
 239
 240        skb_push(skb, sizeof(struct ipv6hdr));
 241        skb_reset_network_header(skb);
 242        hdr = ipv6_hdr(skb);
 243
 244        /* Allow local fragmentation. */
 245        if (ipfragok)
 246                skb->local_df = 1;
 247
 248        /*
 249         *      Fill in the IPv6 header
 250         */
 251
 252        hlimit = -1;
 253        if (np)
 254                hlimit = np->hop_limit;
 255        if (hlimit < 0)
 256                hlimit = ip6_dst_hoplimit(dst);
 257
 258        tclass = -1;
 259        if (np)
 260                tclass = np->tclass;
 261        if (tclass < 0)
 262                tclass = 0;
 263
 264        *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
 265
 266        hdr->payload_len = htons(seg_len);
 267        hdr->nexthdr = proto;
 268        hdr->hop_limit = hlimit;
 269
 270        ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
 271        ipv6_addr_copy(&hdr->daddr, first_hop);
 272
 273        skb->priority = sk->sk_priority;
 274        skb->mark = sk->sk_mark;
 275
 276        mtu = dst_mtu(dst);
 277        if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
 278                IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
 279                              IPSTATS_MIB_OUTREQUESTS);
 280                return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
 281                                dst_output);
 282        }
 283
 284        if (net_ratelimit())
 285                printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
 286        skb->dev = dst->dev;
 287        icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
 288        IP6_INC_STATS(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
 289        kfree_skb(skb);
 290        return -EMSGSIZE;
 291}
 292
 293EXPORT_SYMBOL(ip6_xmit);
 294
 295/*
 296 *      To avoid extra problems ND packets are send through this
 297 *      routine. It's code duplication but I really want to avoid
 298 *      extra checks since ipv6_build_header is used by TCP (which
 299 *      is for us performance critical)
 300 */
 301
 302int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
 303               const struct in6_addr *saddr, const struct in6_addr *daddr,
 304               int proto, int len)
 305{
 306        struct ipv6_pinfo *np = inet6_sk(sk);
 307        struct ipv6hdr *hdr;
 308        int totlen;
 309
 310        skb->protocol = htons(ETH_P_IPV6);
 311        skb->dev = dev;
 312
 313        totlen = len + sizeof(struct ipv6hdr);
 314
 315        skb_reset_network_header(skb);
 316        skb_put(skb, sizeof(struct ipv6hdr));
 317        hdr = ipv6_hdr(skb);
 318
 319        *(__be32*)hdr = htonl(0x60000000);
 320
 321        hdr->payload_len = htons(len);
 322        hdr->nexthdr = proto;
 323        hdr->hop_limit = np->hop_limit;
 324
 325        ipv6_addr_copy(&hdr->saddr, saddr);
 326        ipv6_addr_copy(&hdr->daddr, daddr);
 327
 328        return 0;
 329}
 330
 331static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
 332{
 333        struct ip6_ra_chain *ra;
 334        struct sock *last = NULL;
 335
 336        read_lock(&ip6_ra_lock);
 337        for (ra = ip6_ra_chain; ra; ra = ra->next) {
 338                struct sock *sk = ra->sk;
 339                if (sk && ra->sel == sel &&
 340                    (!sk->sk_bound_dev_if ||
 341                     sk->sk_bound_dev_if == skb->dev->ifindex)) {
 342                        if (last) {
 343                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 344                                if (skb2)
 345                                        rawv6_rcv(last, skb2);
 346                        }
 347                        last = sk;
 348                }
 349        }
 350
 351        if (last) {
 352                rawv6_rcv(last, skb);
 353                read_unlock(&ip6_ra_lock);
 354                return 1;
 355        }
 356        read_unlock(&ip6_ra_lock);
 357        return 0;
 358}
 359
 360static int ip6_forward_proxy_check(struct sk_buff *skb)
 361{
 362        struct ipv6hdr *hdr = ipv6_hdr(skb);
 363        u8 nexthdr = hdr->nexthdr;
 364        int offset;
 365
 366        if (ipv6_ext_hdr(nexthdr)) {
 367                offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
 368                if (offset < 0)
 369                        return 0;
 370        } else
 371                offset = sizeof(struct ipv6hdr);
 372
 373        if (nexthdr == IPPROTO_ICMPV6) {
 374                struct icmp6hdr *icmp6;
 375
 376                if (!pskb_may_pull(skb, (skb_network_header(skb) +
 377                                         offset + 1 - skb->data)))
 378                        return 0;
 379
 380                icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
 381
 382                switch (icmp6->icmp6_type) {
 383                case NDISC_ROUTER_SOLICITATION:
 384                case NDISC_ROUTER_ADVERTISEMENT:
 385                case NDISC_NEIGHBOUR_SOLICITATION:
 386                case NDISC_NEIGHBOUR_ADVERTISEMENT:
 387                case NDISC_REDIRECT:
 388                        /* For reaction involving unicast neighbor discovery
 389                         * message destined to the proxied address, pass it to
 390                         * input function.
 391                         */
 392                        return 1;
 393                default:
 394                        break;
 395                }
 396        }
 397
 398        /*
 399         * The proxying router can't forward traffic sent to a link-local
 400         * address, so signal the sender and discard the packet. This
 401         * behavior is clarified by the MIPv6 specification.
 402         */
 403        if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
 404                dst_link_failure(skb);
 405                return -1;
 406        }
 407
 408        return 0;
 409}
 410
 411static inline int ip6_forward_finish(struct sk_buff *skb)
 412{
 413        return dst_output(skb);
 414}
 415
 416int ip6_forward(struct sk_buff *skb)
 417{
 418        struct dst_entry *dst = skb->dst;
 419        struct ipv6hdr *hdr = ipv6_hdr(skb);
 420        struct inet6_skb_parm *opt = IP6CB(skb);
 421        struct net *net = dev_net(dst->dev);
 422
 423        if (net->ipv6.devconf_all->forwarding == 0)
 424                goto error;
 425
 426        if (skb_warn_if_lro(skb))
 427                goto drop;
 428
 429        if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
 430                IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 431                goto drop;
 432        }
 433
 434        skb_forward_csum(skb);
 435
 436        /*
 437         *      We DO NOT make any processing on
 438         *      RA packets, pushing them to user level AS IS
 439         *      without ane WARRANTY that application will be able
 440         *      to interpret them. The reason is that we
 441         *      cannot make anything clever here.
 442         *
 443         *      We are not end-node, so that if packet contains
 444         *      AH/ESP, we cannot make anything.
 445         *      Defragmentation also would be mistake, RA packets
 446         *      cannot be fragmented, because there is no warranty
 447         *      that different fragments will go along one path. --ANK
 448         */
 449        if (opt->ra) {
 450                u8 *ptr = skb_network_header(skb) + opt->ra;
 451                if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
 452                        return 0;
 453        }
 454
 455        /*
 456         *      check and decrement ttl
 457         */
 458        if (hdr->hop_limit <= 1) {
 459                /* Force OUTPUT device used as source address */
 460                skb->dev = dst->dev;
 461                icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
 462                            0, skb->dev);
 463                IP6_INC_STATS_BH(net,
 464                                 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
 465
 466                kfree_skb(skb);
 467                return -ETIMEDOUT;
 468        }
 469
 470        /* XXX: idev->cnf.proxy_ndp? */
 471        if (net->ipv6.devconf_all->proxy_ndp &&
 472            pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
 473                int proxied = ip6_forward_proxy_check(skb);
 474                if (proxied > 0)
 475                        return ip6_input(skb);
 476                else if (proxied < 0) {
 477                        IP6_INC_STATS(net, ip6_dst_idev(dst),
 478                                      IPSTATS_MIB_INDISCARDS);
 479                        goto drop;
 480                }
 481        }
 482
 483        if (!xfrm6_route_forward(skb)) {
 484                IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 485                goto drop;
 486        }
 487        dst = skb->dst;
 488
 489        /* IPv6 specs say nothing about it, but it is clear that we cannot
 490           send redirects to source routed frames.
 491           We don't send redirects to frames decapsulated from IPsec.
 492         */
 493        if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
 494            !skb_sec_path(skb)) {
 495                struct in6_addr *target = NULL;
 496                struct rt6_info *rt;
 497                struct neighbour *n = dst->neighbour;
 498
 499                /*
 500                 *      incoming and outgoing devices are the same
 501                 *      send a redirect.
 502                 */
 503
 504                rt = (struct rt6_info *) dst;
 505                if ((rt->rt6i_flags & RTF_GATEWAY))
 506                        target = (struct in6_addr*)&n->primary_key;
 507                else
 508                        target = &hdr->daddr;
 509
 510                /* Limit redirects both by destination (here)
 511                   and by source (inside ndisc_send_redirect)
 512                 */
 513                if (xrlim_allow(dst, 1*HZ))
 514                        ndisc_send_redirect(skb, n, target);
 515        } else {
 516                int addrtype = ipv6_addr_type(&hdr->saddr);
 517
 518                /* This check is security critical. */
 519                if (addrtype == IPV6_ADDR_ANY ||
 520                    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
 521                        goto error;
 522                if (addrtype & IPV6_ADDR_LINKLOCAL) {
 523                        icmpv6_send(skb, ICMPV6_DEST_UNREACH,
 524                                ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
 525                        goto error;
 526                }
 527        }
 528
 529        if (skb->len > dst_mtu(dst)) {
 530                /* Again, force OUTPUT device used as source address */
 531                skb->dev = dst->dev;
 532                icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
 533                IP6_INC_STATS_BH(net,
 534                                 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
 535                IP6_INC_STATS_BH(net,
 536                                 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
 537                kfree_skb(skb);
 538                return -EMSGSIZE;
 539        }
 540
 541        if (skb_cow(skb, dst->dev->hard_header_len)) {
 542                IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
 543                goto drop;
 544        }
 545
 546        hdr = ipv6_hdr(skb);
 547
 548        /* Mangling hops number delayed to point after skb COW */
 549
 550        hdr->hop_limit--;
 551
 552        IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
 553        return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
 554                       ip6_forward_finish);
 555
 556error:
 557        IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
 558drop:
 559        kfree_skb(skb);
 560        return -EINVAL;
 561}
 562
 563static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 564{
 565        to->pkt_type = from->pkt_type;
 566        to->priority = from->priority;
 567        to->protocol = from->protocol;
 568        dst_release(to->dst);
 569        to->dst = dst_clone(from->dst);
 570        to->dev = from->dev;
 571        to->mark = from->mark;
 572
 573#ifdef CONFIG_NET_SCHED
 574        to->tc_index = from->tc_index;
 575#endif
 576        nf_copy(to, from);
 577#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
 578    defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
 579        to->nf_trace = from->nf_trace;
 580#endif
 581        skb_copy_secmark(to, from);
 582}
 583
 584int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 585{
 586        u16 offset = sizeof(struct ipv6hdr);
 587        struct ipv6_opt_hdr *exthdr =
 588                                (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
 589        unsigned int packet_len = skb->tail - skb->network_header;
 590        int found_rhdr = 0;
 591        *nexthdr = &ipv6_hdr(skb)->nexthdr;
 592
 593        while (offset + 1 <= packet_len) {
 594
 595                switch (**nexthdr) {
 596
 597                case NEXTHDR_HOP:
 598                        break;
 599                case NEXTHDR_ROUTING:
 600                        found_rhdr = 1;
 601                        break;
 602                case NEXTHDR_DEST:
 603#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 604                        if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
 605                                break;
 606#endif
 607                        if (found_rhdr)
 608                                return offset;
 609                        break;
 610                default :
 611                        return offset;
 612                }
 613
 614                offset += ipv6_optlen(exthdr);
 615                *nexthdr = &exthdr->nexthdr;
 616                exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
 617                                                 offset);
 618        }
 619
 620        return offset;
 621}
 622
 623static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 624{
 625        struct sk_buff *frag;
 626        struct rt6_info *rt = (struct rt6_info*)skb->dst;
 627        struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
 628        struct ipv6hdr *tmp_hdr;
 629        struct frag_hdr *fh;
 630        unsigned int mtu, hlen, left, len;
 631        __be32 frag_id = 0;
 632        int ptr, offset = 0, err=0;
 633        u8 *prevhdr, nexthdr = 0;
 634        struct net *net = dev_net(skb->dst->dev);
 635
 636        hlen = ip6_find_1stfragopt(skb, &prevhdr);
 637        nexthdr = *prevhdr;
 638
 639        mtu = ip6_skb_dst_mtu(skb);
 640
 641        /* We must not fragment if the socket is set to force MTU discovery
 642         * or if the skb it not generated by a local socket.  (This last
 643         * check should be redundant, but it's free.)
 644         */
 645        if (!skb->local_df) {
 646                skb->dev = skb->dst->dev;
 647                icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
 648                IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
 649                              IPSTATS_MIB_FRAGFAILS);
 650                kfree_skb(skb);
 651                return -EMSGSIZE;
 652        }
 653
 654        if (np && np->frag_size < mtu) {
 655                if (np->frag_size)
 656                        mtu = np->frag_size;
 657        }
 658        mtu -= hlen + sizeof(struct frag_hdr);
 659
 660        if (skb_shinfo(skb)->frag_list) {
 661                int first_len = skb_pagelen(skb);
 662                int truesizes = 0;
 663
 664                if (first_len - hlen > mtu ||
 665                    ((first_len - hlen) & 7) ||
 666                    skb_cloned(skb))
 667                        goto slow_path;
 668
 669                for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
 670                        /* Correct geometry. */
 671                        if (frag->len > mtu ||
 672                            ((frag->len & 7) && frag->next) ||
 673                            skb_headroom(frag) < hlen)
 674                            goto slow_path;
 675
 676                        /* Partially cloned skb? */
 677                        if (skb_shared(frag))
 678                                goto slow_path;
 679
 680                        BUG_ON(frag->sk);
 681                        if (skb->sk) {
 682                                sock_hold(skb->sk);
 683                                frag->sk = skb->sk;
 684                                frag->destructor = sock_wfree;
 685                                truesizes += frag->truesize;
 686                        }
 687                }
 688
 689                err = 0;
 690                offset = 0;
 691                frag = skb_shinfo(skb)->frag_list;
 692                skb_shinfo(skb)->frag_list = NULL;
 693                /* BUILD HEADER */
 694
 695                *prevhdr = NEXTHDR_FRAGMENT;
 696                tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
 697                if (!tmp_hdr) {
 698                        IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
 699                                      IPSTATS_MIB_FRAGFAILS);
 700                        return -ENOMEM;
 701                }
 702
 703                __skb_pull(skb, hlen);
 704                fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
 705                __skb_push(skb, hlen);
 706                skb_reset_network_header(skb);
 707                memcpy(skb_network_header(skb), tmp_hdr, hlen);
 708
 709                ipv6_select_ident(skb, fh);
 710                fh->nexthdr = nexthdr;
 711                fh->reserved = 0;
 712                fh->frag_off = htons(IP6_MF);
 713                frag_id = fh->identification;
 714
 715                first_len = skb_pagelen(skb);
 716                skb->data_len = first_len - skb_headlen(skb);
 717                skb->truesize -= truesizes;
 718                skb->len = first_len;
 719                ipv6_hdr(skb)->payload_len = htons(first_len -
 720                                                   sizeof(struct ipv6hdr));
 721
 722                dst_hold(&rt->u.dst);
 723
 724                for (;;) {
 725                        /* Prepare header of the next frame,
 726                         * before previous one went down. */
 727                        if (frag) {
 728                                frag->ip_summed = CHECKSUM_NONE;
 729                                skb_reset_transport_header(frag);
 730                                fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
 731                                __skb_push(frag, hlen);
 732                                skb_reset_network_header(frag);
 733                                memcpy(skb_network_header(frag), tmp_hdr,
 734                                       hlen);
 735                                offset += skb->len - hlen - sizeof(struct frag_hdr);
 736                                fh->nexthdr = nexthdr;
 737                                fh->reserved = 0;
 738                                fh->frag_off = htons(offset);
 739                                if (frag->next != NULL)
 740                                        fh->frag_off |= htons(IP6_MF);
 741                                fh->identification = frag_id;
 742                                ipv6_hdr(frag)->payload_len =
 743                                                htons(frag->len -
 744                                                      sizeof(struct ipv6hdr));
 745                                ip6_copy_metadata(frag, skb);
 746                        }
 747
 748                        err = output(skb);
 749                        if(!err)
 750                                IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
 751                                              IPSTATS_MIB_FRAGCREATES);
 752
 753                        if (err || !frag)
 754                                break;
 755
 756                        skb = frag;
 757                        frag = skb->next;
 758                        skb->next = NULL;
 759                }
 760
 761                kfree(tmp_hdr);
 762
 763                if (err == 0) {
 764                        IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
 765                                      IPSTATS_MIB_FRAGOKS);
 766                        dst_release(&rt->u.dst);
 767                        return 0;
 768                }
 769
 770                while (frag) {
 771                        skb = frag->next;
 772                        kfree_skb(frag);
 773                        frag = skb;
 774                }
 775
 776                IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
 777                              IPSTATS_MIB_FRAGFAILS);
 778                dst_release(&rt->u.dst);
 779                return err;
 780        }
 781
 782slow_path:
 783        left = skb->len - hlen;         /* Space per frame */
 784        ptr = hlen;                     /* Where to start from */
 785
 786        /*
 787         *      Fragment the datagram.
 788         */
 789
 790        *prevhdr = NEXTHDR_FRAGMENT;
 791
 792        /*
 793         *      Keep copying data until we run out.
 794         */
 795        while(left > 0) {
 796                len = left;
 797                /* IF: it doesn't fit, use 'mtu' - the data space left */
 798                if (len > mtu)
 799                        len = mtu;
 800                /* IF: we are not sending upto and including the packet end
 801                   then align the next start on an eight byte boundary */
 802                if (len < left) {
 803                        len &= ~7;
 804                }
 805                /*
 806                 *      Allocate buffer.
 807                 */
 808
 809                if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
 810                        NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
 811                        IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
 812                                      IPSTATS_MIB_FRAGFAILS);
 813                        err = -ENOMEM;
 814                        goto fail;
 815                }
 816
 817                /*
 818                 *      Set up data on packet
 819                 */
 820
 821                ip6_copy_metadata(frag, skb);
 822                skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
 823                skb_put(frag, len + hlen + sizeof(struct frag_hdr));
 824                skb_reset_network_header(frag);
 825                fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
 826                frag->transport_header = (frag->network_header + hlen +
 827                                          sizeof(struct frag_hdr));
 828
 829                /*
 830                 *      Charge the memory for the fragment to any owner
 831                 *      it might possess
 832                 */
 833                if (skb->sk)
 834                        skb_set_owner_w(frag, skb->sk);
 835
 836                /*
 837                 *      Copy the packet header into the new buffer.
 838                 */
 839                skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
 840
 841                /*
 842                 *      Build fragment header.
 843                 */
 844                fh->nexthdr = nexthdr;
 845                fh->reserved = 0;
 846                if (!frag_id) {
 847                        ipv6_select_ident(skb, fh);
 848                        frag_id = fh->identification;
 849                } else
 850                        fh->identification = frag_id;
 851
 852                /*
 853                 *      Copy a block of the IP datagram.
 854                 */
 855                if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
 856                        BUG();
 857                left -= len;
 858
 859                fh->frag_off = htons(offset);
 860                if (left > 0)
 861                        fh->frag_off |= htons(IP6_MF);
 862                ipv6_hdr(frag)->payload_len = htons(frag->len -
 863                                                    sizeof(struct ipv6hdr));
 864
 865                ptr += len;
 866                offset += len;
 867
 868                /*
 869                 *      Put this fragment into the sending queue.
 870                 */
 871                err = output(frag);
 872                if (err)
 873                        goto fail;
 874
 875                IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
 876                              IPSTATS_MIB_FRAGCREATES);
 877        }
 878        IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
 879                      IPSTATS_MIB_FRAGOKS);
 880        kfree_skb(skb);
 881        return err;
 882
 883fail:
 884        IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
 885                      IPSTATS_MIB_FRAGFAILS);
 886        kfree_skb(skb);
 887        return err;
 888}
 889
 890static inline int ip6_rt_check(struct rt6key *rt_key,
 891                               struct in6_addr *fl_addr,
 892                               struct in6_addr *addr_cache)
 893{
 894        return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
 895                (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
 896}
 897
 898static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
 899                                          struct dst_entry *dst,
 900                                          struct flowi *fl)
 901{
 902        struct ipv6_pinfo *np = inet6_sk(sk);
 903        struct rt6_info *rt = (struct rt6_info *)dst;
 904
 905        if (!dst)
 906                goto out;
 907
 908        /* Yes, checking route validity in not connected
 909         * case is not very simple. Take into account,
 910         * that we do not support routing by source, TOS,
 911         * and MSG_DONTROUTE            --ANK (980726)
 912         *
 913         * 1. ip6_rt_check(): If route was host route,
 914         *    check that cached destination is current.
 915         *    If it is network route, we still may
 916         *    check its validity using saved pointer
 917         *    to the last used address: daddr_cache.
 918         *    We do not want to save whole address now,
 919         *    (because main consumer of this service
 920         *    is tcp, which has not this problem),
 921         *    so that the last trick works only on connected
 922         *    sockets.
 923         * 2. oif also should be the same.
 924         */
 925        if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
 926#ifdef CONFIG_IPV6_SUBTREES
 927            ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
 928#endif
 929            (fl->oif && fl->oif != dst->dev->ifindex)) {
 930                dst_release(dst);
 931                dst = NULL;
 932        }
 933
 934out:
 935        return dst;
 936}
 937
 938static int ip6_dst_lookup_tail(struct sock *sk,
 939                               struct dst_entry **dst, struct flowi *fl)
 940{
 941        int err;
 942        struct net *net = sock_net(sk);
 943
 944        if (*dst == NULL)
 945                *dst = ip6_route_output(net, sk, fl);
 946
 947        if ((err = (*dst)->error))
 948                goto out_err_release;
 949
 950        if (ipv6_addr_any(&fl->fl6_src)) {
 951                err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
 952                                         &fl->fl6_dst,
 953                                         sk ? inet6_sk(sk)->srcprefs : 0,
 954                                         &fl->fl6_src);
 955                if (err)
 956                        goto out_err_release;
 957        }
 958
 959#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 960        /*
 961         * Here if the dst entry we've looked up
 962         * has a neighbour entry that is in the INCOMPLETE
 963         * state and the src address from the flow is
 964         * marked as OPTIMISTIC, we release the found
 965         * dst entry and replace it instead with the
 966         * dst entry of the nexthop router
 967         */
 968        if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
 969                struct inet6_ifaddr *ifp;
 970                struct flowi fl_gw;
 971                int redirect;
 972
 973                ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
 974                                      (*dst)->dev, 1);
 975
 976                redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
 977                if (ifp)
 978                        in6_ifa_put(ifp);
 979
 980                if (redirect) {
 981                        /*
 982                         * We need to get the dst entry for the
 983                         * default router instead
 984                         */
 985                        dst_release(*dst);
 986                        memcpy(&fl_gw, fl, sizeof(struct flowi));
 987                        memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
 988                        *dst = ip6_route_output(net, sk, &fl_gw);
 989                        if ((err = (*dst)->error))
 990                                goto out_err_release;
 991                }
 992        }
 993#endif
 994
 995        return 0;
 996
 997out_err_release:
 998        if (err == -ENETUNREACH)
 999                IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1000        dst_release(*dst);
1001        *dst = NULL;
1002        return err;
1003}
1004
1005/**
1006 *      ip6_dst_lookup - perform route lookup on flow
1007 *      @sk: socket which provides route info
1008 *      @dst: pointer to dst_entry * for result
1009 *      @fl: flow to lookup
1010 *
1011 *      This function performs a route lookup on the given flow.
1012 *
1013 *      It returns zero on success, or a standard errno code on error.
1014 */
1015int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1016{
1017        *dst = NULL;
1018        return ip6_dst_lookup_tail(sk, dst, fl);
1019}
1020EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1021
1022/**
1023 *      ip6_sk_dst_lookup - perform socket cached route lookup on flow
1024 *      @sk: socket which provides the dst cache and route info
1025 *      @dst: pointer to dst_entry * for result
1026 *      @fl: flow to lookup
1027 *
1028 *      This function performs a route lookup on the given flow with the
1029 *      possibility of using the cached route in the socket if it is valid.
1030 *      It will take the socket dst lock when operating on the dst cache.
1031 *      As a result, this function can only be used in process context.
1032 *
1033 *      It returns zero on success, or a standard errno code on error.
1034 */
1035int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1036{
1037        *dst = NULL;
1038        if (sk) {
1039                *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1040                *dst = ip6_sk_dst_check(sk, *dst, fl);
1041        }
1042
1043        return ip6_dst_lookup_tail(sk, dst, fl);
1044}
1045EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1046
1047static inline int ip6_ufo_append_data(struct sock *sk,
1048                        int getfrag(void *from, char *to, int offset, int len,
1049                        int odd, struct sk_buff *skb),
1050                        void *from, int length, int hh_len, int fragheaderlen,
1051                        int transhdrlen, int mtu,unsigned int flags)
1052
1053{
1054        struct sk_buff *skb;
1055        int err;
1056
1057        /* There is support for UDP large send offload by network
1058         * device, so create one single skb packet containing complete
1059         * udp datagram
1060         */
1061        if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1062                skb = sock_alloc_send_skb(sk,
1063                        hh_len + fragheaderlen + transhdrlen + 20,
1064                        (flags & MSG_DONTWAIT), &err);
1065                if (skb == NULL)
1066                        return -ENOMEM;
1067
1068                /* reserve space for Hardware header */
1069                skb_reserve(skb, hh_len);
1070
1071                /* create space for UDP/IP header */
1072                skb_put(skb,fragheaderlen + transhdrlen);
1073
1074                /* initialize network header pointer */
1075                skb_reset_network_header(skb);
1076
1077                /* initialize protocol header pointer */
1078                skb->transport_header = skb->network_header + fragheaderlen;
1079
1080                skb->ip_summed = CHECKSUM_PARTIAL;
1081                skb->csum = 0;
1082                sk->sk_sndmsg_off = 0;
1083        }
1084
1085        err = skb_append_datato_frags(sk,skb, getfrag, from,
1086                                      (length - transhdrlen));
1087        if (!err) {
1088                struct frag_hdr fhdr;
1089
1090                /* specify the length of each IP datagram fragment*/
1091                skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
1092                                            sizeof(struct frag_hdr);
1093                skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1094                ipv6_select_ident(skb, &fhdr);
1095                skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1096                __skb_queue_tail(&sk->sk_write_queue, skb);
1097
1098                return 0;
1099        }
1100        /* There is not enough support do UPD LSO,
1101         * so follow normal path
1102         */
1103        kfree_skb(skb);
1104
1105        return err;
1106}
1107
1108static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1109                                               gfp_t gfp)
1110{
1111        return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1112}
1113
1114static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1115                                                gfp_t gfp)
1116{
1117        return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1118}
1119
1120int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1121        int offset, int len, int odd, struct sk_buff *skb),
1122        void *from, int length, int transhdrlen,
1123        int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1124        struct rt6_info *rt, unsigned int flags)
1125{
1126        struct inet_sock *inet = inet_sk(sk);
1127        struct ipv6_pinfo *np = inet6_sk(sk);
1128        struct sk_buff *skb;
1129        unsigned int maxfraglen, fragheaderlen;
1130        int exthdrlen;
1131        int hh_len;
1132        int mtu;
1133        int copy;
1134        int err;
1135        int offset = 0;
1136        int csummode = CHECKSUM_NONE;
1137
1138        if (flags&MSG_PROBE)
1139                return 0;
1140        if (skb_queue_empty(&sk->sk_write_queue)) {
1141                /*
1142                 * setup for corking
1143                 */
1144                if (opt) {
1145                        if (WARN_ON(np->cork.opt))
1146                                return -EINVAL;
1147
1148                        np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1149                        if (unlikely(np->cork.opt == NULL))
1150                                return -ENOBUFS;
1151
1152                        np->cork.opt->tot_len = opt->tot_len;
1153                        np->cork.opt->opt_flen = opt->opt_flen;
1154                        np->cork.opt->opt_nflen = opt->opt_nflen;
1155
1156                        np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1157                                                            sk->sk_allocation);
1158                        if (opt->dst0opt && !np->cork.opt->dst0opt)
1159                                return -ENOBUFS;
1160
1161                        np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1162                                                            sk->sk_allocation);
1163                        if (opt->dst1opt && !np->cork.opt->dst1opt)
1164                                return -ENOBUFS;
1165
1166                        np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1167                                                           sk->sk_allocation);
1168                        if (opt->hopopt && !np->cork.opt->hopopt)
1169                                return -ENOBUFS;
1170
1171                        np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1172                                                            sk->sk_allocation);
1173                        if (opt->srcrt && !np->cork.opt->srcrt)
1174                                return -ENOBUFS;
1175
1176                        /* need source address above miyazawa*/
1177                }
1178                dst_hold(&rt->u.dst);
1179                inet->cork.dst = &rt->u.dst;
1180                inet->cork.fl = *fl;
1181                np->cork.hop_limit = hlimit;
1182                np->cork.tclass = tclass;
1183                mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1184                      rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1185                if (np->frag_size < mtu) {
1186                        if (np->frag_size)
1187                                mtu = np->frag_size;
1188                }
1189                inet->cork.fragsize = mtu;
1190                if (dst_allfrag(rt->u.dst.path))
1191                        inet->cork.flags |= IPCORK_ALLFRAG;
1192                inet->cork.length = 0;
1193                sk->sk_sndmsg_page = NULL;
1194                sk->sk_sndmsg_off = 0;
1195                exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
1196                            rt->rt6i_nfheader_len;
1197                length += exthdrlen;
1198                transhdrlen += exthdrlen;
1199        } else {
1200                rt = (struct rt6_info *)inet->cork.dst;
1201                fl = &inet->cork.fl;
1202                opt = np->cork.opt;
1203                transhdrlen = 0;
1204                exthdrlen = 0;
1205                mtu = inet->cork.fragsize;
1206        }
1207
1208        hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1209
1210        fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1211                        (opt ? opt->opt_nflen : 0);
1212        maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1213
1214        if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1215                if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1216                        ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1217                        return -EMSGSIZE;
1218                }
1219        }
1220
1221        /*
1222         * Let's try using as much space as possible.
1223         * Use MTU if total length of the message fits into the MTU.
1224         * Otherwise, we need to reserve fragment header and
1225         * fragment alignment (= 8-15 octects, in total).
1226         *
1227         * Note that we may need to "move" the data from the tail of
1228         * of the buffer to the new fragment when we split
1229         * the message.
1230         *
1231         * FIXME: It may be fragmented into multiple chunks
1232         *        at once if non-fragmentable extension headers
1233         *        are too large.
1234         * --yoshfuji
1235         */
1236
1237        inet->cork.length += length;
1238        if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1239            (rt->u.dst.dev->features & NETIF_F_UFO)) {
1240
1241                err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1242                                          fragheaderlen, transhdrlen, mtu,
1243                                          flags);
1244                if (err)
1245                        goto error;
1246                return 0;
1247        }
1248
1249        if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1250                goto alloc_new_skb;
1251
1252        while (length > 0) {
1253                /* Check if the remaining data fits into current packet. */
1254                copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1255                if (copy < length)
1256                        copy = maxfraglen - skb->len;
1257
1258                if (copy <= 0) {
1259                        char *data;
1260                        unsigned int datalen;
1261                        unsigned int fraglen;
1262                        unsigned int fraggap;
1263                        unsigned int alloclen;
1264                        struct sk_buff *skb_prev;
1265alloc_new_skb:
1266                        skb_prev = skb;
1267
1268                        /* There's no room in the current skb */
1269                        if (skb_prev)
1270                                fraggap = skb_prev->len - maxfraglen;
1271                        else
1272                                fraggap = 0;
1273
1274                        /*
1275                         * If remaining data exceeds the mtu,
1276                         * we know we need more fragment(s).
1277                         */
1278                        datalen = length + fraggap;
1279                        if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1280                                datalen = maxfraglen - fragheaderlen;
1281
1282                        fraglen = datalen + fragheaderlen;
1283                        if ((flags & MSG_MORE) &&
1284                            !(rt->u.dst.dev->features&NETIF_F_SG))
1285                                alloclen = mtu;
1286                        else
1287                                alloclen = datalen + fragheaderlen;
1288
1289                        /*
1290                         * The last fragment gets additional space at tail.
1291                         * Note: we overallocate on fragments with MSG_MODE
1292                         * because we have no idea if we're the last one.
1293                         */
1294                        if (datalen == length + fraggap)
1295                                alloclen += rt->u.dst.trailer_len;
1296
1297                        /*
1298                         * We just reserve space for fragment header.
1299                         * Note: this may be overallocation if the message
1300                         * (without MSG_MORE) fits into the MTU.
1301                         */
1302                        alloclen += sizeof(struct frag_hdr);
1303
1304                        if (transhdrlen) {
1305                                skb = sock_alloc_send_skb(sk,
1306                                                alloclen + hh_len,
1307                                                (flags & MSG_DONTWAIT), &err);
1308                        } else {
1309                                skb = NULL;
1310                                if (atomic_read(&sk->sk_wmem_alloc) <=
1311                                    2 * sk->sk_sndbuf)
1312                                        skb = sock_wmalloc(sk,
1313                                                           alloclen + hh_len, 1,
1314                                                           sk->sk_allocation);
1315                                if (unlikely(skb == NULL))
1316                                        err = -ENOBUFS;
1317                        }
1318                        if (skb == NULL)
1319                                goto error;
1320                        /*
1321                         *      Fill in the control structures
1322                         */
1323                        skb->ip_summed = csummode;
1324                        skb->csum = 0;
1325                        /* reserve for fragmentation */
1326                        skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1327
1328                        /*
1329                         *      Find where to start putting bytes
1330                         */
1331                        data = skb_put(skb, fraglen);
1332                        skb_set_network_header(skb, exthdrlen);
1333                        data += fragheaderlen;
1334                        skb->transport_header = (skb->network_header +
1335                                                 fragheaderlen);
1336                        if (fraggap) {
1337                                skb->csum = skb_copy_and_csum_bits(
1338                                        skb_prev, maxfraglen,
1339                                        data + transhdrlen, fraggap, 0);
1340                                skb_prev->csum = csum_sub(skb_prev->csum,
1341                                                          skb->csum);
1342                                data += fraggap;
1343                                pskb_trim_unique(skb_prev, maxfraglen);
1344                        }
1345                        copy = datalen - transhdrlen - fraggap;
1346                        if (copy < 0) {
1347                                err = -EINVAL;
1348                                kfree_skb(skb);
1349                                goto error;
1350                        } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1351                                err = -EFAULT;
1352                                kfree_skb(skb);
1353                                goto error;
1354                        }
1355
1356                        offset += copy;
1357                        length -= datalen - fraggap;
1358                        transhdrlen = 0;
1359                        exthdrlen = 0;
1360                        csummode = CHECKSUM_NONE;
1361
1362                        /*
1363                         * Put the packet on the pending queue
1364                         */
1365                        __skb_queue_tail(&sk->sk_write_queue, skb);
1366                        continue;
1367                }
1368
1369                if (copy > length)
1370                        copy = length;
1371
1372                if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1373                        unsigned int off;
1374
1375                        off = skb->len;
1376                        if (getfrag(from, skb_put(skb, copy),
1377                                                offset, copy, off, skb) < 0) {
1378                                __skb_trim(skb, off);
1379                                err = -EFAULT;
1380                                goto error;
1381                        }
1382                } else {
1383                        int i = skb_shinfo(skb)->nr_frags;
1384                        skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1385                        struct page *page = sk->sk_sndmsg_page;
1386                        int off = sk->sk_sndmsg_off;
1387                        unsigned int left;
1388
1389                        if (page && (left = PAGE_SIZE - off) > 0) {
1390                                if (copy >= left)
1391                                        copy = left;
1392                                if (page != frag->page) {
1393                                        if (i == MAX_SKB_FRAGS) {
1394                                                err = -EMSGSIZE;
1395                                                goto error;
1396                                        }
1397                                        get_page(page);
1398                                        skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1399                                        frag = &skb_shinfo(skb)->frags[i];
1400                                }
1401                        } else if(i < MAX_SKB_FRAGS) {
1402                                if (copy > PAGE_SIZE)
1403                                        copy = PAGE_SIZE;
1404                                page = alloc_pages(sk->sk_allocation, 0);
1405                                if (page == NULL) {
1406                                        err = -ENOMEM;
1407                                        goto error;
1408                                }
1409                                sk->sk_sndmsg_page = page;
1410                                sk->sk_sndmsg_off = 0;
1411
1412                                skb_fill_page_desc(skb, i, page, 0, 0);
1413                                frag = &skb_shinfo(skb)->frags[i];
1414                        } else {
1415                                err = -EMSGSIZE;
1416                                goto error;
1417                        }
1418                        if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1419                                err = -EFAULT;
1420                                goto error;
1421                        }
1422                        sk->sk_sndmsg_off += copy;
1423                        frag->size += copy;
1424                        skb->len += copy;
1425                        skb->data_len += copy;
1426                        skb->truesize += copy;
1427                        atomic_add(copy, &sk->sk_wmem_alloc);
1428                }
1429                offset += copy;
1430                length -= copy;
1431        }
1432        return 0;
1433error:
1434        inet->cork.length -= length;
1435        IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1436        return err;
1437}
1438
1439static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1440{
1441        if (np->cork.opt) {
1442                kfree(np->cork.opt->dst0opt);
1443                kfree(np->cork.opt->dst1opt);
1444                kfree(np->cork.opt->hopopt);
1445                kfree(np->cork.opt->srcrt);
1446                kfree(np->cork.opt);
1447                np->cork.opt = NULL;
1448        }
1449
1450        if (inet->cork.dst) {
1451                dst_release(inet->cork.dst);
1452                inet->cork.dst = NULL;
1453                inet->cork.flags &= ~IPCORK_ALLFRAG;
1454        }
1455        memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1456}
1457
1458int ip6_push_pending_frames(struct sock *sk)
1459{
1460        struct sk_buff *skb, *tmp_skb;
1461        struct sk_buff **tail_skb;
1462        struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1463        struct inet_sock *inet = inet_sk(sk);
1464        struct ipv6_pinfo *np = inet6_sk(sk);
1465        struct net *net = sock_net(sk);
1466        struct ipv6hdr *hdr;
1467        struct ipv6_txoptions *opt = np->cork.opt;
1468        struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1469        struct flowi *fl = &inet->cork.fl;
1470        unsigned char proto = fl->proto;
1471        int err = 0;
1472
1473        if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1474                goto out;
1475        tail_skb = &(skb_shinfo(skb)->frag_list);
1476
1477        /* move skb->data to ip header from ext header */
1478        if (skb->data < skb_network_header(skb))
1479                __skb_pull(skb, skb_network_offset(skb));
1480        while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1481                __skb_pull(tmp_skb, skb_network_header_len(skb));
1482                *tail_skb = tmp_skb;
1483                tail_skb = &(tmp_skb->next);
1484                skb->len += tmp_skb->len;
1485                skb->data_len += tmp_skb->len;
1486                skb->truesize += tmp_skb->truesize;
1487                __sock_put(tmp_skb->sk);
1488                tmp_skb->destructor = NULL;
1489                tmp_skb->sk = NULL;
1490        }
1491
1492        /* Allow local fragmentation. */
1493        if (np->pmtudisc < IPV6_PMTUDISC_DO)
1494                skb->local_df = 1;
1495
1496        ipv6_addr_copy(final_dst, &fl->fl6_dst);
1497        __skb_pull(skb, skb_network_header_len(skb));
1498        if (opt && opt->opt_flen)
1499                ipv6_push_frag_opts(skb, opt, &proto);
1500        if (opt && opt->opt_nflen)
1501                ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1502
1503        skb_push(skb, sizeof(struct ipv6hdr));
1504        skb_reset_network_header(skb);
1505        hdr = ipv6_hdr(skb);
1506
1507        *(__be32*)hdr = fl->fl6_flowlabel |
1508                     htonl(0x60000000 | ((int)np->cork.tclass << 20));
1509
1510        hdr->hop_limit = np->cork.hop_limit;
1511        hdr->nexthdr = proto;
1512        ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1513        ipv6_addr_copy(&hdr->daddr, final_dst);
1514
1515        skb->priority = sk->sk_priority;
1516        skb->mark = sk->sk_mark;
1517
1518        skb->dst = dst_clone(&rt->u.dst);
1519        IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
1520        if (proto == IPPROTO_ICMPV6) {
1521                struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1522
1523                ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1524                ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1525        }
1526
1527        err = ip6_local_out(skb);
1528        if (err) {
1529                if (err > 0)
1530                        err = np->recverr ? net_xmit_errno(err) : 0;
1531                if (err)
1532                        goto error;
1533        }
1534
1535out:
1536        ip6_cork_release(inet, np);
1537        return err;
1538error:
1539        goto out;
1540}
1541
1542void ip6_flush_pending_frames(struct sock *sk)
1543{
1544        struct sk_buff *skb;
1545
1546        while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1547                if (skb->dst)
1548                        IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb->dst),
1549                                      IPSTATS_MIB_OUTDISCARDS);
1550                kfree_skb(skb);
1551        }
1552
1553        ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1554}
1555
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.