linux/net/ipv6/ip6_output.c
<<
>>
Prefs
   1/*
   2 *      IPv6 output functions
   3 *      Linux INET6 implementation
   4 *
   5 *      Authors:
   6 *      Pedro Roque             <roque@di.fc.ul.pt>
   7 *
   8 *      Based on linux/net/ipv4/ip_output.c
   9 *
  10 *      This program is free software; you can redistribute it and/or
  11 *      modify it under the terms of the GNU General Public License
  12 *      as published by the Free Software Foundation; either version
  13 *      2 of the License, or (at your option) any later version.
  14 *
  15 *      Changes:
  16 *      A.N.Kuznetsov   :       airthmetics in fragmentation.
  17 *                              extension headers are implemented.
  18 *                              route changes now work.
  19 *                              ip6_forward does not confuse sniffers.
  20 *                              etc.
  21 *
  22 *      H. von Brand    :       Added missing #include <linux/string.h>
  23 *      Imran Patel     :       frag id should be in NBO
  24 *      Kazunori MIYAZAWA @USAGI
  25 *                      :       add ip6_append_data and related functions
  26 *                              for datagram xmit
  27 */
  28
  29#include <linux/errno.h>
  30#include <linux/kernel.h>
  31#include <linux/string.h>
  32#include <linux/socket.h>
  33#include <linux/net.h>
  34#include <linux/netdevice.h>
  35#include <linux/if_arp.h>
  36#include <linux/in6.h>
  37#include <linux/tcp.h>
  38#include <linux/route.h>
  39#include <linux/module.h>
  40#include <linux/slab.h>
  41
  42#include <linux/netfilter.h>
  43#include <linux/netfilter_ipv6.h>
  44
  45#include <net/sock.h>
  46#include <net/snmp.h>
  47
  48#include <net/ipv6.h>
  49#include <net/ndisc.h>
  50#include <net/protocol.h>
  51#include <net/ip6_route.h>
  52#include <net/addrconf.h>
  53#include <net/rawv6.h>
  54#include <net/icmp.h>
  55#include <net/xfrm.h>
  56#include <net/checksum.h>
  57#include <linux/mroute6.h>
  58
  59int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
  60
  61int __ip6_local_out(struct sk_buff *skb)
  62{
  63        int len;
  64
  65        len = skb->len - sizeof(struct ipv6hdr);
  66        if (len > IPV6_MAXPLEN)
  67                len = 0;
  68        ipv6_hdr(skb)->payload_len = htons(len);
  69
  70        return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
  71                       skb_dst(skb)->dev, dst_output);
  72}
  73
  74int ip6_local_out(struct sk_buff *skb)
  75{
  76        int err;
  77
  78        err = __ip6_local_out(skb);
  79        if (likely(err == 1))
  80                err = dst_output(skb);
  81
  82        return err;
  83}
  84EXPORT_SYMBOL_GPL(ip6_local_out);
  85
  86static int ip6_finish_output2(struct sk_buff *skb)
  87{
  88        struct dst_entry *dst = skb_dst(skb);
  89        struct net_device *dev = dst->dev;
  90        struct neighbour *neigh;
  91        struct rt6_info *rt;
  92
  93        skb->protocol = htons(ETH_P_IPV6);
  94        skb->dev = dev;
  95
  96        if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
  97                struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
  98
  99                if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
 100                    ((mroute6_socket(dev_net(dev), skb) &&
 101                     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
 102                     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
 103                                         &ipv6_hdr(skb)->saddr))) {
 104                        struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
 105
 106                        /* Do not check for IFF_ALLMULTI; multicast routing
 107                           is not supported in any case.
 108                         */
 109                        if (newskb)
 110                                NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
 111                                        newskb, NULL, newskb->dev,
 112                                        dev_loopback_xmit);
 113
 114                        if (ipv6_hdr(skb)->hop_limit == 0) {
 115                                IP6_INC_STATS(dev_net(dev), idev,
 116                                              IPSTATS_MIB_OUTDISCARDS);
 117                                kfree_skb(skb);
 118                                return 0;
 119                        }
 120                }
 121
 122                IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
 123                                skb->len);
 124        }
 125
 126        rt = (struct rt6_info *) dst;
 127        neigh = rt->n;
 128        if (neigh)
 129                return dst_neigh_output(dst, neigh, skb);
 130
 131        IP6_INC_STATS_BH(dev_net(dst->dev),
 132                         ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
 133        kfree_skb(skb);
 134        return -EINVAL;
 135}
 136
 137static int ip6_finish_output(struct sk_buff *skb)
 138{
 139        if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
 140            dst_allfrag(skb_dst(skb)))
 141                return ip6_fragment(skb, ip6_finish_output2);
 142        else
 143                return ip6_finish_output2(skb);
 144}
 145
 146int ip6_output(struct sk_buff *skb)
 147{
 148        struct net_device *dev = skb_dst(skb)->dev;
 149        struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 150        if (unlikely(idev->cnf.disable_ipv6)) {
 151                IP6_INC_STATS(dev_net(dev), idev,
 152                              IPSTATS_MIB_OUTDISCARDS);
 153                kfree_skb(skb);
 154                return 0;
 155        }
 156
 157        return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
 158                            ip6_finish_output,
 159                            !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 160}
 161
 162/*
 163 *      xmit an sk_buff (used by TCP, SCTP and DCCP)
 164 */
 165
 166int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 167             struct ipv6_txoptions *opt, int tclass)
 168{
 169        struct net *net = sock_net(sk);
 170        struct ipv6_pinfo *np = inet6_sk(sk);
 171        struct in6_addr *first_hop = &fl6->daddr;
 172        struct dst_entry *dst = skb_dst(skb);
 173        struct ipv6hdr *hdr;
 174        u8  proto = fl6->flowi6_proto;
 175        int seg_len = skb->len;
 176        int hlimit = -1;
 177        u32 mtu;
 178
 179        if (opt) {
 180                unsigned int head_room;
 181
 182                /* First: exthdrs may take lots of space (~8K for now)
 183                   MAX_HEADER is not enough.
 184                 */
 185                head_room = opt->opt_nflen + opt->opt_flen;
 186                seg_len += head_room;
 187                head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
 188
 189                if (skb_headroom(skb) < head_room) {
 190                        struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
 191                        if (skb2 == NULL) {
 192                                IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 193                                              IPSTATS_MIB_OUTDISCARDS);
 194                                kfree_skb(skb);
 195                                return -ENOBUFS;
 196                        }
 197                        consume_skb(skb);
 198                        skb = skb2;
 199                        skb_set_owner_w(skb, sk);
 200                }
 201                if (opt->opt_flen)
 202                        ipv6_push_frag_opts(skb, opt, &proto);
 203                if (opt->opt_nflen)
 204                        ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
 205        }
 206
 207        skb_push(skb, sizeof(struct ipv6hdr));
 208        skb_reset_network_header(skb);
 209        hdr = ipv6_hdr(skb);
 210
 211        /*
 212         *      Fill in the IPv6 header
 213         */
 214        if (np)
 215                hlimit = np->hop_limit;
 216        if (hlimit < 0)
 217                hlimit = ip6_dst_hoplimit(dst);
 218
 219        *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl6->flowlabel;
 220
 221        hdr->payload_len = htons(seg_len);
 222        hdr->nexthdr = proto;
 223        hdr->hop_limit = hlimit;
 224
 225        hdr->saddr = fl6->saddr;
 226        hdr->daddr = *first_hop;
 227
 228        skb->priority = sk->sk_priority;
 229        skb->mark = sk->sk_mark;
 230
 231        mtu = dst_mtu(dst);
 232        if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
 233                IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
 234                              IPSTATS_MIB_OUT, skb->len);
 235                return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
 236                               dst->dev, dst_output);
 237        }
 238
 239        net_dbg_ratelimited("IPv6: sending pkt_too_big to self\n");
 240        skb->dev = dst->dev;
 241        icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 242        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
 243        kfree_skb(skb);
 244        return -EMSGSIZE;
 245}
 246
 247EXPORT_SYMBOL(ip6_xmit);
 248
 249/*
 250 *      To avoid extra problems ND packets are send through this
 251 *      routine. It's code duplication but I really want to avoid
 252 *      extra checks since ipv6_build_header is used by TCP (which
 253 *      is for us performance critical)
 254 */
 255
 256int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
 257               const struct in6_addr *saddr, const struct in6_addr *daddr,
 258               int proto, int len)
 259{
 260        struct ipv6_pinfo *np = inet6_sk(sk);
 261        struct ipv6hdr *hdr;
 262
 263        skb->protocol = htons(ETH_P_IPV6);
 264        skb->dev = dev;
 265
 266        skb_reset_network_header(skb);
 267        skb_put(skb, sizeof(struct ipv6hdr));
 268        hdr = ipv6_hdr(skb);
 269
 270        *(__be32*)hdr = htonl(0x60000000);
 271
 272        hdr->payload_len = htons(len);
 273        hdr->nexthdr = proto;
 274        hdr->hop_limit = np->hop_limit;
 275
 276        hdr->saddr = *saddr;
 277        hdr->daddr = *daddr;
 278
 279        return 0;
 280}
 281
 282static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
 283{
 284        struct ip6_ra_chain *ra;
 285        struct sock *last = NULL;
 286
 287        read_lock(&ip6_ra_lock);
 288        for (ra = ip6_ra_chain; ra; ra = ra->next) {
 289                struct sock *sk = ra->sk;
 290                if (sk && ra->sel == sel &&
 291                    (!sk->sk_bound_dev_if ||
 292                     sk->sk_bound_dev_if == skb->dev->ifindex)) {
 293                        if (last) {
 294                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 295                                if (skb2)
 296                                        rawv6_rcv(last, skb2);
 297                        }
 298                        last = sk;
 299                }
 300        }
 301
 302        if (last) {
 303                rawv6_rcv(last, skb);
 304                read_unlock(&ip6_ra_lock);
 305                return 1;
 306        }
 307        read_unlock(&ip6_ra_lock);
 308        return 0;
 309}
 310
 311static int ip6_forward_proxy_check(struct sk_buff *skb)
 312{
 313        struct ipv6hdr *hdr = ipv6_hdr(skb);
 314        u8 nexthdr = hdr->nexthdr;
 315        __be16 frag_off;
 316        int offset;
 317
 318        if (ipv6_ext_hdr(nexthdr)) {
 319                offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
 320                if (offset < 0)
 321                        return 0;
 322        } else
 323                offset = sizeof(struct ipv6hdr);
 324
 325        if (nexthdr == IPPROTO_ICMPV6) {
 326                struct icmp6hdr *icmp6;
 327
 328                if (!pskb_may_pull(skb, (skb_network_header(skb) +
 329                                         offset + 1 - skb->data)))
 330                        return 0;
 331
 332                icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
 333
 334                switch (icmp6->icmp6_type) {
 335                case NDISC_ROUTER_SOLICITATION:
 336                case NDISC_ROUTER_ADVERTISEMENT:
 337                case NDISC_NEIGHBOUR_SOLICITATION:
 338                case NDISC_NEIGHBOUR_ADVERTISEMENT:
 339                case NDISC_REDIRECT:
 340                        /* For reaction involving unicast neighbor discovery
 341                         * message destined to the proxied address, pass it to
 342                         * input function.
 343                         */
 344                        return 1;
 345                default:
 346                        break;
 347                }
 348        }
 349
 350        /*
 351         * The proxying router can't forward traffic sent to a link-local
 352         * address, so signal the sender and discard the packet. This
 353         * behavior is clarified by the MIPv6 specification.
 354         */
 355        if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
 356                dst_link_failure(skb);
 357                return -1;
 358        }
 359
 360        return 0;
 361}
 362
 363static inline int ip6_forward_finish(struct sk_buff *skb)
 364{
 365        return dst_output(skb);
 366}
 367
 368int ip6_forward(struct sk_buff *skb)
 369{
 370        struct dst_entry *dst = skb_dst(skb);
 371        struct ipv6hdr *hdr = ipv6_hdr(skb);
 372        struct inet6_skb_parm *opt = IP6CB(skb);
 373        struct net *net = dev_net(dst->dev);
 374        u32 mtu;
 375
 376        if (net->ipv6.devconf_all->forwarding == 0)
 377                goto error;
 378
 379        if (skb_warn_if_lro(skb))
 380                goto drop;
 381
 382        if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
 383                IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 384                goto drop;
 385        }
 386
 387        if (skb->pkt_type != PACKET_HOST)
 388                goto drop;
 389
 390        skb_forward_csum(skb);
 391
 392        /*
 393         *      We DO NOT make any processing on
 394         *      RA packets, pushing them to user level AS IS
 395         *      without ane WARRANTY that application will be able
 396         *      to interpret them. The reason is that we
 397         *      cannot make anything clever here.
 398         *
 399         *      We are not end-node, so that if packet contains
 400         *      AH/ESP, we cannot make anything.
 401         *      Defragmentation also would be mistake, RA packets
 402         *      cannot be fragmented, because there is no warranty
 403         *      that different fragments will go along one path. --ANK
 404         */
 405        if (opt->ra) {
 406                u8 *ptr = skb_network_header(skb) + opt->ra;
 407                if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
 408                        return 0;
 409        }
 410
 411        /*
 412         *      check and decrement ttl
 413         */
 414        if (hdr->hop_limit <= 1) {
 415                /* Force OUTPUT device used as source address */
 416                skb->dev = dst->dev;
 417                icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
 418                IP6_INC_STATS_BH(net,
 419                                 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
 420
 421                kfree_skb(skb);
 422                return -ETIMEDOUT;
 423        }
 424
 425        /* XXX: idev->cnf.proxy_ndp? */
 426        if (net->ipv6.devconf_all->proxy_ndp &&
 427            pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
 428                int proxied = ip6_forward_proxy_check(skb);
 429                if (proxied > 0)
 430                        return ip6_input(skb);
 431                else if (proxied < 0) {
 432                        IP6_INC_STATS(net, ip6_dst_idev(dst),
 433                                      IPSTATS_MIB_INDISCARDS);
 434                        goto drop;
 435                }
 436        }
 437
 438        if (!xfrm6_route_forward(skb)) {
 439                IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 440                goto drop;
 441        }
 442        dst = skb_dst(skb);
 443
 444        /* IPv6 specs say nothing about it, but it is clear that we cannot
 445           send redirects to source routed frames.
 446           We don't send redirects to frames decapsulated from IPsec.
 447         */
 448        if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
 449                struct in6_addr *target = NULL;
 450                struct inet_peer *peer;
 451                struct rt6_info *rt;
 452
 453                /*
 454                 *      incoming and outgoing devices are the same
 455                 *      send a redirect.
 456                 */
 457
 458                rt = (struct rt6_info *) dst;
 459                if (rt->rt6i_flags & RTF_GATEWAY)
 460                        target = &rt->rt6i_gateway;
 461                else
 462                        target = &hdr->daddr;
 463
 464                peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
 465
 466                /* Limit redirects both by destination (here)
 467                   and by source (inside ndisc_send_redirect)
 468                 */
 469                if (inet_peer_xrlim_allow(peer, 1*HZ))
 470                        ndisc_send_redirect(skb, target);
 471                if (peer)
 472                        inet_putpeer(peer);
 473        } else {
 474                int addrtype = ipv6_addr_type(&hdr->saddr);
 475
 476                /* This check is security critical. */
 477                if (addrtype == IPV6_ADDR_ANY ||
 478                    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
 479                        goto error;
 480                if (addrtype & IPV6_ADDR_LINKLOCAL) {
 481                        icmpv6_send(skb, ICMPV6_DEST_UNREACH,
 482                                    ICMPV6_NOT_NEIGHBOUR, 0);
 483                        goto error;
 484                }
 485        }
 486
 487        mtu = dst_mtu(dst);
 488        if (mtu < IPV6_MIN_MTU)
 489                mtu = IPV6_MIN_MTU;
 490
 491        if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) ||
 492            (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) {
 493                /* Again, force OUTPUT device used as source address */
 494                skb->dev = dst->dev;
 495                icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 496                IP6_INC_STATS_BH(net,
 497                                 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
 498                IP6_INC_STATS_BH(net,
 499                                 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
 500                kfree_skb(skb);
 501                return -EMSGSIZE;
 502        }
 503
 504        if (skb_cow(skb, dst->dev->hard_header_len)) {
 505                IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
 506                goto drop;
 507        }
 508
 509        hdr = ipv6_hdr(skb);
 510
 511        /* Mangling hops number delayed to point after skb COW */
 512
 513        hdr->hop_limit--;
 514
 515        IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
 516        IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
 517        return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
 518                       ip6_forward_finish);
 519
 520error:
 521        IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
 522drop:
 523        kfree_skb(skb);
 524        return -EINVAL;
 525}
 526
 527static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 528{
 529        to->pkt_type = from->pkt_type;
 530        to->priority = from->priority;
 531        to->protocol = from->protocol;
 532        skb_dst_drop(to);
 533        skb_dst_set(to, dst_clone(skb_dst(from)));
 534        to->dev = from->dev;
 535        to->mark = from->mark;
 536
 537#ifdef CONFIG_NET_SCHED
 538        to->tc_index = from->tc_index;
 539#endif
 540        nf_copy(to, from);
 541#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
 542    defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
 543        to->nf_trace = from->nf_trace;
 544#endif
 545        skb_copy_secmark(to, from);
 546}
 547
 548int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 549{
 550        u16 offset = sizeof(struct ipv6hdr);
 551        struct ipv6_opt_hdr *exthdr =
 552                                (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
 553        unsigned int packet_len = skb->tail - skb->network_header;
 554        int found_rhdr = 0;
 555        *nexthdr = &ipv6_hdr(skb)->nexthdr;
 556
 557        while (offset + 1 <= packet_len) {
 558
 559                switch (**nexthdr) {
 560
 561                case NEXTHDR_HOP:
 562                        break;
 563                case NEXTHDR_ROUTING:
 564                        found_rhdr = 1;
 565                        break;
 566                case NEXTHDR_DEST:
 567#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 568                        if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
 569                                break;
 570#endif
 571                        if (found_rhdr)
 572                                return offset;
 573                        break;
 574                default :
 575                        return offset;
 576                }
 577
 578                offset += ipv6_optlen(exthdr);
 579                *nexthdr = &exthdr->nexthdr;
 580                exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
 581                                                 offset);
 582        }
 583
 584        return offset;
 585}
 586
 587void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
 588{
 589        static atomic_t ipv6_fragmentation_id;
 590        int old, new;
 591
 592        if (rt && !(rt->dst.flags & DST_NOPEER)) {
 593                struct inet_peer *peer;
 594                struct net *net;
 595
 596                net = dev_net(rt->dst.dev);
 597                peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
 598                if (peer) {
 599                        fhdr->identification = htonl(inet_getid(peer, 0));
 600                        inet_putpeer(peer);
 601                        return;
 602                }
 603        }
 604        do {
 605                old = atomic_read(&ipv6_fragmentation_id);
 606                new = old + 1;
 607                if (!new)
 608                        new = 1;
 609        } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old);
 610        fhdr->identification = htonl(new);
 611}
 612
 613int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 614{
 615        struct sk_buff *frag;
 616        struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
 617        struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
 618        struct ipv6hdr *tmp_hdr;
 619        struct frag_hdr *fh;
 620        unsigned int mtu, hlen, left, len;
 621        int hroom, troom;
 622        __be32 frag_id = 0;
 623        int ptr, offset = 0, err=0;
 624        u8 *prevhdr, nexthdr = 0;
 625        struct net *net = dev_net(skb_dst(skb)->dev);
 626
 627        hlen = ip6_find_1stfragopt(skb, &prevhdr);
 628        nexthdr = *prevhdr;
 629
 630        mtu = ip6_skb_dst_mtu(skb);
 631
 632        /* We must not fragment if the socket is set to force MTU discovery
 633         * or if the skb it not generated by a local socket.
 634         */
 635        if (unlikely(!skb->local_df && skb->len > mtu) ||
 636                     (IP6CB(skb)->frag_max_size &&
 637                      IP6CB(skb)->frag_max_size > mtu)) {
 638                if (skb->sk && dst_allfrag(skb_dst(skb)))
 639                        sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
 640
 641                skb->dev = skb_dst(skb)->dev;
 642                icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 643                IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 644                              IPSTATS_MIB_FRAGFAILS);
 645                kfree_skb(skb);
 646                return -EMSGSIZE;
 647        }
 648
 649        if (np && np->frag_size < mtu) {
 650                if (np->frag_size)
 651                        mtu = np->frag_size;
 652        }
 653        mtu -= hlen + sizeof(struct frag_hdr);
 654
 655        if (skb_has_frag_list(skb)) {
 656                int first_len = skb_pagelen(skb);
 657                struct sk_buff *frag2;
 658
 659                if (first_len - hlen > mtu ||
 660                    ((first_len - hlen) & 7) ||
 661                    skb_cloned(skb))
 662                        goto slow_path;
 663
 664                skb_walk_frags(skb, frag) {
 665                        /* Correct geometry. */
 666                        if (frag->len > mtu ||
 667                            ((frag->len & 7) && frag->next) ||
 668                            skb_headroom(frag) < hlen)
 669                                goto slow_path_clean;
 670
 671                        /* Partially cloned skb? */
 672                        if (skb_shared(frag))
 673                                goto slow_path_clean;
 674
 675                        BUG_ON(frag->sk);
 676                        if (skb->sk) {
 677                                frag->sk = skb->sk;
 678                                frag->destructor = sock_wfree;
 679                        }
 680                        skb->truesize -= frag->truesize;
 681                }
 682
 683                err = 0;
 684                offset = 0;
 685                frag = skb_shinfo(skb)->frag_list;
 686                skb_frag_list_init(skb);
 687                /* BUILD HEADER */
 688
 689                *prevhdr = NEXTHDR_FRAGMENT;
 690                tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
 691                if (!tmp_hdr) {
 692                        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 693                                      IPSTATS_MIB_FRAGFAILS);
 694                        return -ENOMEM;
 695                }
 696
 697                __skb_pull(skb, hlen);
 698                fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
 699                __skb_push(skb, hlen);
 700                skb_reset_network_header(skb);
 701                memcpy(skb_network_header(skb), tmp_hdr, hlen);
 702
 703                ipv6_select_ident(fh, rt);
 704                fh->nexthdr = nexthdr;
 705                fh->reserved = 0;
 706                fh->frag_off = htons(IP6_MF);
 707                frag_id = fh->identification;
 708
 709                first_len = skb_pagelen(skb);
 710                skb->data_len = first_len - skb_headlen(skb);
 711                skb->len = first_len;
 712                ipv6_hdr(skb)->payload_len = htons(first_len -
 713                                                   sizeof(struct ipv6hdr));
 714
 715                dst_hold(&rt->dst);
 716
 717                for (;;) {
 718                        /* Prepare header of the next frame,
 719                         * before previous one went down. */
 720                        if (frag) {
 721                                frag->ip_summed = CHECKSUM_NONE;
 722                                skb_reset_transport_header(frag);
 723                                fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
 724                                __skb_push(frag, hlen);
 725                                skb_reset_network_header(frag);
 726                                memcpy(skb_network_header(frag), tmp_hdr,
 727                                       hlen);
 728                                offset += skb->len - hlen - sizeof(struct frag_hdr);
 729                                fh->nexthdr = nexthdr;
 730                                fh->reserved = 0;
 731                                fh->frag_off = htons(offset);
 732                                if (frag->next != NULL)
 733                                        fh->frag_off |= htons(IP6_MF);
 734                                fh->identification = frag_id;
 735                                ipv6_hdr(frag)->payload_len =
 736                                                htons(frag->len -
 737                                                      sizeof(struct ipv6hdr));
 738                                ip6_copy_metadata(frag, skb);
 739                        }
 740
 741                        err = output(skb);
 742                        if(!err)
 743                                IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 744                                              IPSTATS_MIB_FRAGCREATES);
 745
 746                        if (err || !frag)
 747                                break;
 748
 749                        skb = frag;
 750                        frag = skb->next;
 751                        skb->next = NULL;
 752                }
 753
 754                kfree(tmp_hdr);
 755
 756                if (err == 0) {
 757                        IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 758                                      IPSTATS_MIB_FRAGOKS);
 759                        dst_release(&rt->dst);
 760                        return 0;
 761                }
 762
 763                while (frag) {
 764                        skb = frag->next;
 765                        kfree_skb(frag);
 766                        frag = skb;
 767                }
 768
 769                IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 770                              IPSTATS_MIB_FRAGFAILS);
 771                dst_release(&rt->dst);
 772                return err;
 773
 774slow_path_clean:
 775                skb_walk_frags(skb, frag2) {
 776                        if (frag2 == frag)
 777                                break;
 778                        frag2->sk = NULL;
 779                        frag2->destructor = NULL;
 780                        skb->truesize += frag2->truesize;
 781                }
 782        }
 783
 784slow_path:
 785        if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
 786            skb_checksum_help(skb))
 787                goto fail;
 788
 789        left = skb->len - hlen;         /* Space per frame */
 790        ptr = hlen;                     /* Where to start from */
 791
 792        /*
 793         *      Fragment the datagram.
 794         */
 795
 796        *prevhdr = NEXTHDR_FRAGMENT;
 797        hroom = LL_RESERVED_SPACE(rt->dst.dev);
 798        troom = rt->dst.dev->needed_tailroom;
 799
 800        /*
 801         *      Keep copying data until we run out.
 802         */
 803        while(left > 0) {
 804                len = left;
 805                /* IF: it doesn't fit, use 'mtu' - the data space left */
 806                if (len > mtu)
 807                        len = mtu;
 808                /* IF: we are not sending up to and including the packet end
 809                   then align the next start on an eight byte boundary */
 810                if (len < left) {
 811                        len &= ~7;
 812                }
 813                /*
 814                 *      Allocate buffer.
 815                 */
 816
 817                if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
 818                                      hroom + troom, GFP_ATOMIC)) == NULL) {
 819                        NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
 820                        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 821                                      IPSTATS_MIB_FRAGFAILS);
 822                        err = -ENOMEM;
 823                        goto fail;
 824                }
 825
 826                /*
 827                 *      Set up data on packet
 828                 */
 829
 830                ip6_copy_metadata(frag, skb);
 831                skb_reserve(frag, hroom);
 832                skb_put(frag, len + hlen + sizeof(struct frag_hdr));
 833                skb_reset_network_header(frag);
 834                fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
 835                frag->transport_header = (frag->network_header + hlen +
 836                                          sizeof(struct frag_hdr));
 837
 838                /*
 839                 *      Charge the memory for the fragment to any owner
 840                 *      it might possess
 841                 */
 842                if (skb->sk)
 843                        skb_set_owner_w(frag, skb->sk);
 844
 845                /*
 846                 *      Copy the packet header into the new buffer.
 847                 */
 848                skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
 849
 850                /*
 851                 *      Build fragment header.
 852                 */
 853                fh->nexthdr = nexthdr;
 854                fh->reserved = 0;
 855                if (!frag_id) {
 856                        ipv6_select_ident(fh, rt);
 857                        frag_id = fh->identification;
 858                } else
 859                        fh->identification = frag_id;
 860
 861                /*
 862                 *      Copy a block of the IP datagram.
 863                 */
 864                if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
 865                        BUG();
 866                left -= len;
 867
 868                fh->frag_off = htons(offset);
 869                if (left > 0)
 870                        fh->frag_off |= htons(IP6_MF);
 871                ipv6_hdr(frag)->payload_len = htons(frag->len -
 872                                                    sizeof(struct ipv6hdr));
 873
 874                ptr += len;
 875                offset += len;
 876
 877                /*
 878                 *      Put this fragment into the sending queue.
 879                 */
 880                err = output(frag);
 881                if (err)
 882                        goto fail;
 883
 884                IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 885                              IPSTATS_MIB_FRAGCREATES);
 886        }
 887        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 888                      IPSTATS_MIB_FRAGOKS);
 889        consume_skb(skb);
 890        return err;
 891
 892fail:
 893        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 894                      IPSTATS_MIB_FRAGFAILS);
 895        kfree_skb(skb);
 896        return err;
 897}
 898
 899static inline int ip6_rt_check(const struct rt6key *rt_key,
 900                               const struct in6_addr *fl_addr,
 901                               const struct in6_addr *addr_cache)
 902{
 903        return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
 904                (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
 905}
 906
 907static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
 908                                          struct dst_entry *dst,
 909                                          const struct flowi6 *fl6)
 910{
 911        struct ipv6_pinfo *np = inet6_sk(sk);
 912        struct rt6_info *rt = (struct rt6_info *)dst;
 913
 914        if (!dst)
 915                goto out;
 916
 917        /* Yes, checking route validity in not connected
 918         * case is not very simple. Take into account,
 919         * that we do not support routing by source, TOS,
 920         * and MSG_DONTROUTE            --ANK (980726)
 921         *
 922         * 1. ip6_rt_check(): If route was host route,
 923         *    check that cached destination is current.
 924         *    If it is network route, we still may
 925         *    check its validity using saved pointer
 926         *    to the last used address: daddr_cache.
 927         *    We do not want to save whole address now,
 928         *    (because main consumer of this service
 929         *    is tcp, which has not this problem),
 930         *    so that the last trick works only on connected
 931         *    sockets.
 932         * 2. oif also should be the same.
 933         */
 934        if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
 935#ifdef CONFIG_IPV6_SUBTREES
 936            ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
 937#endif
 938            (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
 939                dst_release(dst);
 940                dst = NULL;
 941        }
 942
 943out:
 944        return dst;
 945}
 946
 947static int ip6_dst_lookup_tail(struct sock *sk,
 948                               struct dst_entry **dst, struct flowi6 *fl6)
 949{
 950        struct net *net = sock_net(sk);
 951#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 952        struct neighbour *n;
 953        struct rt6_info *rt;
 954#endif
 955        int err;
 956
 957        if (*dst == NULL)
 958                *dst = ip6_route_output(net, sk, fl6);
 959
 960        if ((err = (*dst)->error))
 961                goto out_err_release;
 962
 963        if (ipv6_addr_any(&fl6->saddr)) {
 964                struct rt6_info *rt = (struct rt6_info *) *dst;
 965                err = ip6_route_get_saddr(net, rt, &fl6->daddr,
 966                                          sk ? inet6_sk(sk)->srcprefs : 0,
 967                                          &fl6->saddr);
 968                if (err)
 969                        goto out_err_release;
 970        }
 971
 972#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 973        /*
 974         * Here if the dst entry we've looked up
 975         * has a neighbour entry that is in the INCOMPLETE
 976         * state and the src address from the flow is
 977         * marked as OPTIMISTIC, we release the found
 978         * dst entry and replace it instead with the
 979         * dst entry of the nexthop router
 980         */
 981        rt = (struct rt6_info *) *dst;
 982        n = rt->n;
 983        if (n && !(n->nud_state & NUD_VALID)) {
 984                struct inet6_ifaddr *ifp;
 985                struct flowi6 fl_gw6;
 986                int redirect;
 987
 988                ifp = ipv6_get_ifaddr(net, &fl6->saddr,
 989                                      (*dst)->dev, 1);
 990
 991                redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
 992                if (ifp)
 993                        in6_ifa_put(ifp);
 994
 995                if (redirect) {
 996                        /*
 997                         * We need to get the dst entry for the
 998                         * default router instead
 999                         */
1000                        dst_release(*dst);
1001                        memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1002                        memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1003                        *dst = ip6_route_output(net, sk, &fl_gw6);
1004                        if ((err = (*dst)->error))
1005                                goto out_err_release;
1006                }
1007        }
1008#endif
1009
1010        return 0;
1011
1012out_err_release:
1013        if (err == -ENETUNREACH)
1014                IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1015        dst_release(*dst);
1016        *dst = NULL;
1017        return err;
1018}
1019
1020/**
1021 *      ip6_dst_lookup - perform route lookup on flow
1022 *      @sk: socket which provides route info
1023 *      @dst: pointer to dst_entry * for result
1024 *      @fl6: flow to lookup
1025 *
1026 *      This function performs a route lookup on the given flow.
1027 *
1028 *      It returns zero on success, or a standard errno code on error.
1029 */
1030int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
1031{
1032        *dst = NULL;
1033        return ip6_dst_lookup_tail(sk, dst, fl6);
1034}
1035EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1036
1037/**
1038 *      ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1039 *      @sk: socket which provides route info
1040 *      @fl6: flow to lookup
1041 *      @final_dst: final destination address for ipsec lookup
1042 *      @can_sleep: we are in a sleepable context
1043 *
1044 *      This function performs a route lookup on the given flow.
1045 *
1046 *      It returns a valid dst pointer on success, or a pointer encoded
1047 *      error code.
1048 */
1049struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1050                                      const struct in6_addr *final_dst,
1051                                      bool can_sleep)
1052{
1053        struct dst_entry *dst = NULL;
1054        int err;
1055
1056        err = ip6_dst_lookup_tail(sk, &dst, fl6);
1057        if (err)
1058                return ERR_PTR(err);
1059        if (final_dst)
1060                fl6->daddr = *final_dst;
1061        if (can_sleep)
1062                fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1063
1064        return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1065}
1066EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1067
1068/**
1069 *      ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1070 *      @sk: socket which provides the dst cache and route info
1071 *      @fl6: flow to lookup
1072 *      @final_dst: final destination address for ipsec lookup
1073 *      @can_sleep: we are in a sleepable context
1074 *
1075 *      This function performs a route lookup on the given flow with the
1076 *      possibility of using the cached route in the socket if it is valid.
1077 *      It will take the socket dst lock when operating on the dst cache.
1078 *      As a result, this function can only be used in process context.
1079 *
1080 *      It returns a valid dst pointer on success, or a pointer encoded
1081 *      error code.
1082 */
1083struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1084                                         const struct in6_addr *final_dst,
1085                                         bool can_sleep)
1086{
1087        struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1088        int err;
1089
1090        dst = ip6_sk_dst_check(sk, dst, fl6);
1091
1092        err = ip6_dst_lookup_tail(sk, &dst, fl6);
1093        if (err)
1094                return ERR_PTR(err);
1095        if (final_dst)
1096                fl6->daddr = *final_dst;
1097        if (can_sleep)
1098                fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1099
1100        return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1101}
1102EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1103
1104static inline int ip6_ufo_append_data(struct sock *sk,
1105                        int getfrag(void *from, char *to, int offset, int len,
1106                        int odd, struct sk_buff *skb),
1107                        void *from, int length, int hh_len, int fragheaderlen,
1108                        int transhdrlen, int mtu,unsigned int flags,
1109                        struct rt6_info *rt)
1110
1111{
1112        struct sk_buff *skb;
1113        int err;
1114
1115        /* There is support for UDP large send offload by network
1116         * device, so create one single skb packet containing complete
1117         * udp datagram
1118         */
1119        if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1120                skb = sock_alloc_send_skb(sk,
1121                        hh_len + fragheaderlen + transhdrlen + 20,
1122                        (flags & MSG_DONTWAIT), &err);
1123                if (skb == NULL)
1124                        return err;
1125
1126                /* reserve space for Hardware header */
1127                skb_reserve(skb, hh_len);
1128
1129                /* create space for UDP/IP header */
1130                skb_put(skb,fragheaderlen + transhdrlen);
1131
1132                /* initialize network header pointer */
1133                skb_reset_network_header(skb);
1134
1135                /* initialize protocol header pointer */
1136                skb->transport_header = skb->network_header + fragheaderlen;
1137
1138                skb->ip_summed = CHECKSUM_PARTIAL;
1139                skb->csum = 0;
1140        }
1141
1142        err = skb_append_datato_frags(sk,skb, getfrag, from,
1143                                      (length - transhdrlen));
1144        if (!err) {
1145                struct frag_hdr fhdr;
1146
1147                /* Specify the length of each IPv6 datagram fragment.
1148                 * It has to be a multiple of 8.
1149                 */
1150                skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1151                                             sizeof(struct frag_hdr)) & ~7;
1152                skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1153                ipv6_select_ident(&fhdr, rt);
1154                skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1155                __skb_queue_tail(&sk->sk_write_queue, skb);
1156
1157                return 0;
1158        }
1159        /* There is not enough support do UPD LSO,
1160         * so follow normal path
1161         */
1162        kfree_skb(skb);
1163
1164        return err;
1165}
1166
1167static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1168                                               gfp_t gfp)
1169{
1170        return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1171}
1172
1173static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1174                                                gfp_t gfp)
1175{
1176        return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1177}
1178
1179static void ip6_append_data_mtu(int *mtu,
1180                                int *maxfraglen,
1181                                unsigned int fragheaderlen,
1182                                struct sk_buff *skb,
1183                                struct rt6_info *rt)
1184{
1185        if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1186                if (skb == NULL) {
1187                        /* first fragment, reserve header_len */
1188                        *mtu = *mtu - rt->dst.header_len;
1189
1190                } else {
1191                        /*
1192                         * this fragment is not first, the headers
1193                         * space is regarded as data space.
1194                         */
1195                        *mtu = dst_mtu(rt->dst.path);
1196                }
1197                *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1198                              + fragheaderlen - sizeof(struct frag_hdr);
1199        }
1200}
1201
1202int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1203        int offset, int len, int odd, struct sk_buff *skb),
1204        void *from, int length, int transhdrlen,
1205        int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1206        struct rt6_info *rt, unsigned int flags, int dontfrag)
1207{
1208        struct inet_sock *inet = inet_sk(sk);
1209        struct ipv6_pinfo *np = inet6_sk(sk);
1210        struct inet_cork *cork;
1211        struct sk_buff *skb, *skb_prev = NULL;
1212        unsigned int maxfraglen, fragheaderlen;
1213        int exthdrlen;
1214        int dst_exthdrlen;
1215        int hh_len;
1216        int mtu;
1217        int copy;
1218        int err;
1219        int offset = 0;
1220        __u8 tx_flags = 0;
1221
1222        if (flags&MSG_PROBE)
1223                return 0;
1224        cork = &inet->cork.base;
1225        if (skb_queue_empty(&sk->sk_write_queue)) {
1226                /*
1227                 * setup for corking
1228                 */
1229                if (opt) {
1230                        if (WARN_ON(np->cork.opt))
1231                                return -EINVAL;
1232
1233                        np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1234                        if (unlikely(np->cork.opt == NULL))
1235                                return -ENOBUFS;
1236
1237                        np->cork.opt->tot_len = opt->tot_len;
1238                        np->cork.opt->opt_flen = opt->opt_flen;
1239                        np->cork.opt->opt_nflen = opt->opt_nflen;
1240
1241                        np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1242                                                            sk->sk_allocation);
1243                        if (opt->dst0opt && !np->cork.opt->dst0opt)
1244                                return -ENOBUFS;
1245
1246                        np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1247                                                            sk->sk_allocation);
1248                        if (opt->dst1opt && !np->cork.opt->dst1opt)
1249                                return -ENOBUFS;
1250
1251                        np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1252                                                           sk->sk_allocation);
1253                        if (opt->hopopt && !np->cork.opt->hopopt)
1254                                return -ENOBUFS;
1255
1256                        np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1257                                                            sk->sk_allocation);
1258                        if (opt->srcrt && !np->cork.opt->srcrt)
1259                                return -ENOBUFS;
1260
1261                        /* need source address above miyazawa*/
1262                }
1263                dst_hold(&rt->dst);
1264                cork->dst = &rt->dst;
1265                inet->cork.fl.u.ip6 = *fl6;
1266                np->cork.hop_limit = hlimit;
1267                np->cork.tclass = tclass;
1268                if (rt->dst.flags & DST_XFRM_TUNNEL)
1269                        mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1270                              rt->dst.dev->mtu : dst_mtu(&rt->dst);
1271                else
1272                        mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1273                              rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1274                if (np->frag_size < mtu) {
1275                        if (np->frag_size)
1276                                mtu = np->frag_size;
1277                }
1278                cork->fragsize = mtu;
1279                if (dst_allfrag(rt->dst.path))
1280                        cork->flags |= IPCORK_ALLFRAG;
1281                cork->length = 0;
1282                exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len;
1283                length += exthdrlen;
1284                transhdrlen += exthdrlen;
1285                dst_exthdrlen = rt->dst.header_len;
1286        } else {
1287                rt = (struct rt6_info *)cork->dst;
1288                fl6 = &inet->cork.fl.u.ip6;
1289                opt = np->cork.opt;
1290                transhdrlen = 0;
1291                exthdrlen = 0;
1292                dst_exthdrlen = 0;
1293                mtu = cork->fragsize;
1294        }
1295
1296        hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1297
1298        fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1299                        (opt ? opt->opt_nflen : 0);
1300        maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1301
1302        if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1303                if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1304                        ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen);
1305                        return -EMSGSIZE;
1306                }
1307        }
1308
1309        /* For UDP, check if TX timestamp is enabled */
1310        if (sk->sk_type == SOCK_DGRAM) {
1311                err = sock_tx_timestamp(sk, &tx_flags);
1312                if (err)
1313                        goto error;
1314        }
1315
1316        /*
1317         * Let's try using as much space as possible.
1318         * Use MTU if total length of the message fits into the MTU.
1319         * Otherwise, we need to reserve fragment header and
1320         * fragment alignment (= 8-15 octects, in total).
1321         *
1322         * Note that we may need to "move" the data from the tail of
1323         * of the buffer to the new fragment when we split
1324         * the message.
1325         *
1326         * FIXME: It may be fragmented into multiple chunks
1327         *        at once if non-fragmentable extension headers
1328         *        are too large.
1329         * --yoshfuji
1330         */
1331
1332        cork->length += length;
1333        if (length > mtu) {
1334                int proto = sk->sk_protocol;
1335                if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
1336                        ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
1337                        return -EMSGSIZE;
1338                }
1339
1340                if (proto == IPPROTO_UDP &&
1341                    (rt->dst.dev->features & NETIF_F_UFO)) {
1342
1343                        err = ip6_ufo_append_data(sk, getfrag, from, length,
1344                                                  hh_len, fragheaderlen,
1345                                                  transhdrlen, mtu, flags, rt);
1346                        if (err)
1347                                goto error;
1348                        return 0;
1349                }
1350        }
1351
1352        if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1353                goto alloc_new_skb;
1354
1355        while (length > 0) {
1356                /* Check if the remaining data fits into current packet. */
1357                copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1358                if (copy < length)
1359                        copy = maxfraglen - skb->len;
1360
1361                if (copy <= 0) {
1362                        char *data;
1363                        unsigned int datalen;
1364                        unsigned int fraglen;
1365                        unsigned int fraggap;
1366                        unsigned int alloclen;
1367alloc_new_skb:
1368                        /* There's no room in the current skb */
1369                        if (skb)
1370                                fraggap = skb->len - maxfraglen;
1371                        else
1372                                fraggap = 0;
1373                        /* update mtu and maxfraglen if necessary */
1374                        if (skb == NULL || skb_prev == NULL)
1375                                ip6_append_data_mtu(&mtu, &maxfraglen,
1376                                                    fragheaderlen, skb, rt);
1377
1378                        skb_prev = skb;
1379
1380                        /*
1381                         * If remaining data exceeds the mtu,
1382                         * we know we need more fragment(s).
1383                         */
1384                        datalen = length + fraggap;
1385
1386                        if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1387                                datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1388                        if ((flags & MSG_MORE) &&
1389                            !(rt->dst.dev->features&NETIF_F_SG))
1390                                alloclen = mtu;
1391                        else
1392                                alloclen = datalen + fragheaderlen;
1393
1394                        alloclen += dst_exthdrlen;
1395
1396                        if (datalen != length + fraggap) {
1397                                /*
1398                                 * this is not the last fragment, the trailer
1399                                 * space is regarded as data space.
1400                                 */
1401                                datalen += rt->dst.trailer_len;
1402                        }
1403
1404                        alloclen += rt->dst.trailer_len;
1405                        fraglen = datalen + fragheaderlen;
1406
1407                        /*
1408                         * We just reserve space for fragment header.
1409                         * Note: this may be overallocation if the message
1410                         * (without MSG_MORE) fits into the MTU.
1411                         */
1412                        alloclen += sizeof(struct frag_hdr);
1413
1414                        if (transhdrlen) {
1415                                skb = sock_alloc_send_skb(sk,
1416                                                alloclen + hh_len,
1417                                                (flags & MSG_DONTWAIT), &err);
1418                        } else {
1419                                skb = NULL;
1420                                if (atomic_read(&sk->sk_wmem_alloc) <=
1421                                    2 * sk->sk_sndbuf)
1422                                        skb = sock_wmalloc(sk,
1423                                                           alloclen + hh_len, 1,
1424                                                           sk->sk_allocation);
1425                                if (unlikely(skb == NULL))
1426                                        err = -ENOBUFS;
1427                                else {
1428                                        /* Only the initial fragment
1429                                         * is time stamped.
1430                                         */
1431                                        tx_flags = 0;
1432                                }
1433                        }
1434                        if (skb == NULL)
1435                                goto error;
1436                        /*
1437                         *      Fill in the control structures
1438                         */
1439                        skb->ip_summed = CHECKSUM_NONE;
1440                        skb->csum = 0;
1441                        /* reserve for fragmentation and ipsec header */
1442                        skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1443                                    dst_exthdrlen);
1444
1445                        if (sk->sk_type == SOCK_DGRAM)
1446                                skb_shinfo(skb)->tx_flags = tx_flags;
1447
1448                        /*
1449                         *      Find where to start putting bytes
1450                         */
1451                        data = skb_put(skb, fraglen);
1452                        skb_set_network_header(skb, exthdrlen);
1453                        data += fragheaderlen;
1454                        skb->transport_header = (skb->network_header +
1455                                                 fragheaderlen);
1456                        if (fraggap) {
1457                                skb->csum = skb_copy_and_csum_bits(
1458                                        skb_prev, maxfraglen,
1459                                        data + transhdrlen, fraggap, 0);
1460                                skb_prev->csum = csum_sub(skb_prev->csum,
1461                                                          skb->csum);
1462                                data += fraggap;
1463                                pskb_trim_unique(skb_prev, maxfraglen);
1464                        }
1465                        copy = datalen - transhdrlen - fraggap;
1466
1467                        if (copy < 0) {
1468                                err = -EINVAL;
1469                                kfree_skb(skb);
1470                                goto error;
1471                        } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1472                                err = -EFAULT;
1473                                kfree_skb(skb);
1474                                goto error;
1475                        }
1476
1477                        offset += copy;
1478                        length -= datalen - fraggap;
1479                        transhdrlen = 0;
1480                        exthdrlen = 0;
1481                        dst_exthdrlen = 0;
1482
1483                        /*
1484                         * Put the packet on the pending queue
1485                         */
1486                        __skb_queue_tail(&sk->sk_write_queue, skb);
1487                        continue;
1488                }
1489
1490                if (copy > length)
1491                        copy = length;
1492
1493                if (!(rt->dst.dev->features&NETIF_F_SG)) {
1494                        unsigned int off;
1495
1496                        off = skb->len;
1497                        if (getfrag(from, skb_put(skb, copy),
1498                                                offset, copy, off, skb) < 0) {
1499                                __skb_trim(skb, off);
1500                                err = -EFAULT;
1501                                goto error;
1502                        }
1503                } else {
1504                        int i = skb_shinfo(skb)->nr_frags;
1505                        struct page_frag *pfrag = sk_page_frag(sk);
1506
1507                        err = -ENOMEM;
1508                        if (!sk_page_frag_refill(sk, pfrag))
1509                                goto error;
1510
1511                        if (!skb_can_coalesce(skb, i, pfrag->page,
1512                                              pfrag->offset)) {
1513                                err = -EMSGSIZE;
1514                                if (i == MAX_SKB_FRAGS)
1515                                        goto error;
1516
1517                                __skb_fill_page_desc(skb, i, pfrag->page,
1518                                                     pfrag->offset, 0);
1519                                skb_shinfo(skb)->nr_frags = ++i;
1520                                get_page(pfrag->page);
1521                        }
1522                        copy = min_t(int, copy, pfrag->size - pfrag->offset);
1523                        if (getfrag(from,
1524                                    page_address(pfrag->page) + pfrag->offset,
1525                                    offset, copy, skb->len, skb) < 0)
1526                                goto error_efault;
1527
1528                        pfrag->offset += copy;
1529                        skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1530                        skb->len += copy;
1531                        skb->data_len += copy;
1532                        skb->truesize += copy;
1533                        atomic_add(copy, &sk->sk_wmem_alloc);
1534                }
1535                offset += copy;
1536                length -= copy;
1537        }
1538
1539        return 0;
1540
1541error_efault:
1542        err = -EFAULT;
1543error:
1544        cork->length -= length;
1545        IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1546        return err;
1547}
1548EXPORT_SYMBOL_GPL(ip6_append_data);
1549
1550static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1551{
1552        if (np->cork.opt) {
1553                kfree(np->cork.opt->dst0opt);
1554                kfree(np->cork.opt->dst1opt);
1555                kfree(np->cork.opt->hopopt);
1556                kfree(np->cork.opt->srcrt);
1557                kfree(np->cork.opt);
1558                np->cork.opt = NULL;
1559        }
1560
1561        if (inet->cork.base.dst) {
1562                dst_release(inet->cork.base.dst);
1563                inet->cork.base.dst = NULL;
1564                inet->cork.base.flags &= ~IPCORK_ALLFRAG;
1565        }
1566        memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1567}
1568
1569int ip6_push_pending_frames(struct sock *sk)
1570{
1571        struct sk_buff *skb, *tmp_skb;
1572        struct sk_buff **tail_skb;
1573        struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1574        struct inet_sock *inet = inet_sk(sk);
1575        struct ipv6_pinfo *np = inet6_sk(sk);
1576        struct net *net = sock_net(sk);
1577        struct ipv6hdr *hdr;
1578        struct ipv6_txoptions *opt = np->cork.opt;
1579        struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
1580        struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
1581        unsigned char proto = fl6->flowi6_proto;
1582        int err = 0;
1583
1584        if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1585                goto out;
1586        tail_skb = &(skb_shinfo(skb)->frag_list);
1587
1588        /* move skb->data to ip header from ext header */
1589        if (skb->data < skb_network_header(skb))
1590                __skb_pull(skb, skb_network_offset(skb));
1591        while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1592                __skb_pull(tmp_skb, skb_network_header_len(skb));
1593                *tail_skb = tmp_skb;
1594                tail_skb = &(tmp_skb->next);
1595                skb->len += tmp_skb->len;
1596                skb->data_len += tmp_skb->len;
1597                skb->truesize += tmp_skb->truesize;
1598                tmp_skb->destructor = NULL;
1599                tmp_skb->sk = NULL;
1600        }
1601
1602        /* Allow local fragmentation. */
1603        if (np->pmtudisc < IPV6_PMTUDISC_DO)
1604                skb->local_df = 1;
1605
1606        *final_dst = fl6->daddr;
1607        __skb_pull(skb, skb_network_header_len(skb));
1608        if (opt && opt->opt_flen)
1609                ipv6_push_frag_opts(skb, opt, &proto);
1610        if (opt && opt->opt_nflen)
1611                ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1612
1613        skb_push(skb, sizeof(struct ipv6hdr));
1614        skb_reset_network_header(skb);
1615        hdr = ipv6_hdr(skb);
1616
1617        *(__be32*)hdr = fl6->flowlabel |
1618                     htonl(0x60000000 | ((int)np->cork.tclass << 20));
1619
1620        hdr->hop_limit = np->cork.hop_limit;
1621        hdr->nexthdr = proto;
1622        hdr->saddr = fl6->saddr;
1623        hdr->daddr = *final_dst;
1624
1625        skb->priority = sk->sk_priority;
1626        skb->mark = sk->sk_mark;
1627
1628        skb_dst_set(skb, dst_clone(&rt->dst));
1629        IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1630        if (proto == IPPROTO_ICMPV6) {
1631                struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1632
1633                ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1634                ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1635        }
1636
1637        err = ip6_local_out(skb);
1638        if (err) {
1639                if (err > 0)
1640                        err = net_xmit_errno(err);
1641                if (err)
1642                        goto error;
1643        }
1644
1645out:
1646        ip6_cork_release(inet, np);
1647        return err;
1648error:
1649        IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1650        goto out;
1651}
1652EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1653
1654void ip6_flush_pending_frames(struct sock *sk)
1655{
1656        struct sk_buff *skb;
1657
1658        while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1659                if (skb_dst(skb))
1660                        IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1661                                      IPSTATS_MIB_OUTDISCARDS);
1662                kfree_skb(skb);
1663        }
1664
1665        ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1666}
1667EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1668
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.