linux/net/ipv4/ip_output.c
<<
>>
Prefs
   1/*
   2 * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3 *              operating system.  INET is implemented using the  BSD Socket
   4 *              interface as the means of communication with the user level.
   5 *
   6 *              The Internet Protocol (IP) output module.
   7 *
   8 * Version:     $Id: ip_output.c,v 1.100 2002/02/01 22:01:03 davem Exp $
   9 *
  10 * Authors:     Ross Biro
  11 *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12 *              Donald Becker, <becker@super.org>
  13 *              Alan Cox, <Alan.Cox@linux.org>
  14 *              Richard Underwood
  15 *              Stefan Becker, <stefanb@yello.ping.de>
  16 *              Jorge Cwik, <jorge@laser.satlink.net>
  17 *              Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  18 *              Hirokazu Takahashi, <taka@valinux.co.jp>
  19 *
  20 *      See ip_input.c for original log
  21 *
  22 *      Fixes:
  23 *              Alan Cox        :       Missing nonblock feature in ip_build_xmit.
  24 *              Mike Kilburn    :       htons() missing in ip_build_xmit.
  25 *              Bradford Johnson:       Fix faulty handling of some frames when 
  26 *                                      no route is found.
  27 *              Alexander Demenshin:    Missing sk/skb free in ip_queue_xmit
  28 *                                      (in case if packet not accepted by
  29 *                                      output firewall rules)
  30 *              Mike McLagan    :       Routing by source
  31 *              Alexey Kuznetsov:       use new route cache
  32 *              Andi Kleen:             Fix broken PMTU recovery and remove
  33 *                                      some redundant tests.
  34 *      Vitaly E. Lavrov        :       Transparent proxy revived after year coma.
  35 *              Andi Kleen      :       Replace ip_reply with ip_send_reply.
  36 *              Andi Kleen      :       Split fast and slow ip_build_xmit path 
  37 *                                      for decreased register pressure on x86 
  38 *                                      and more readibility. 
  39 *              Marc Boucher    :       When call_out_firewall returns FW_QUEUE,
  40 *                                      silently drop skb instead of failing with -EPERM.
  41 *              Detlev Wengorz  :       Copy protocol for fragments.
  42 *              Hirokazu Takahashi:     HW checksumming for outgoing UDP
  43 *                                      datagrams.
  44 *              Hirokazu Takahashi:     sendfile() on UDP works now.
  45 */
  46
  47#include <asm/uaccess.h>
  48#include <asm/system.h>
  49#include <linux/module.h>
  50#include <linux/types.h>
  51#include <linux/kernel.h>
  52#include <linux/sched.h>
  53#include <linux/mm.h>
  54#include <linux/string.h>
  55#include <linux/errno.h>
  56#include <linux/highmem.h>
  57
  58#include <linux/socket.h>
  59#include <linux/sockios.h>
  60#include <linux/in.h>
  61#include <linux/inet.h>
  62#include <linux/netdevice.h>
  63#include <linux/etherdevice.h>
  64#include <linux/proc_fs.h>
  65#include <linux/stat.h>
  66#include <linux/init.h>
  67
  68#include <net/snmp.h>
  69#include <net/ip.h>
  70#include <net/protocol.h>
  71#include <net/route.h>
  72#include <net/xfrm.h>
  73#include <linux/skbuff.h>
  74#include <net/sock.h>
  75#include <net/arp.h>
  76#include <net/icmp.h>
  77#include <net/checksum.h>
  78#include <net/inetpeer.h>
  79#include <net/checksum.h>
  80#include <linux/igmp.h>
  81#include <linux/netfilter_ipv4.h>
  82#include <linux/netfilter_bridge.h>
  83#include <linux/mroute.h>
  84#include <linux/netlink.h>
  85#include <linux/tcp.h>
  86
  87int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
  88
  89/* Generate a checksum for an outgoing IP datagram. */
  90__inline__ void ip_send_check(struct iphdr *iph)
  91{
  92        iph->check = 0;
  93        iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
  94}
  95
  96/* dev_loopback_xmit for use with netfilter. */
  97static int ip_dev_loopback_xmit(struct sk_buff *newskb)
  98{
  99        newskb->mac.raw = newskb->data;
 100        __skb_pull(newskb, newskb->nh.raw - newskb->data);
 101        newskb->pkt_type = PACKET_LOOPBACK;
 102        newskb->ip_summed = CHECKSUM_UNNECESSARY;
 103        BUG_TRAP(newskb->dst);
 104        netif_rx(newskb);
 105        return 0;
 106}
 107
 108static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
 109{
 110        int ttl = inet->uc_ttl;
 111
 112        if (ttl < 0)
 113                ttl = dst_metric(dst, RTAX_HOPLIMIT);
 114        return ttl;
 115}
 116
 117/* 
 118 *              Add an ip header to a skbuff and send it out.
 119 *
 120 */
 121int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
 122                          __be32 saddr, __be32 daddr, struct ip_options *opt)
 123{
 124        struct inet_sock *inet = inet_sk(sk);
 125        struct rtable *rt = (struct rtable *)skb->dst;
 126        struct iphdr *iph;
 127
 128        /* Build the IP header. */
 129        if (opt)
 130                iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr) + opt->optlen);
 131        else
 132                iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr));
 133
 134        iph->version  = 4;
 135        iph->ihl      = 5;
 136        iph->tos      = inet->tos;
 137        if (ip_dont_fragment(sk, &rt->u.dst))
 138                iph->frag_off = htons(IP_DF);
 139        else
 140                iph->frag_off = 0;
 141        iph->ttl      = ip_select_ttl(inet, &rt->u.dst);
 142        iph->daddr    = rt->rt_dst;
 143        iph->saddr    = rt->rt_src;
 144        iph->protocol = sk->sk_protocol;
 145        iph->tot_len  = htons(skb->len);
 146        ip_select_ident(iph, &rt->u.dst, sk);
 147        skb->nh.iph   = iph;
 148
 149        if (opt && opt->optlen) {
 150                iph->ihl += opt->optlen>>2;
 151                ip_options_build(skb, opt, daddr, rt, 0);
 152        }
 153        ip_send_check(iph);
 154
 155        skb->priority = sk->sk_priority;
 156
 157        /* Send it out. */
 158        return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
 159                       dst_output);
 160}
 161
 162EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
 163
 164static inline int ip_finish_output2(struct sk_buff *skb)
 165{
 166        struct dst_entry *dst = skb->dst;
 167        struct net_device *dev = dst->dev;
 168        int hh_len = LL_RESERVED_SPACE(dev);
 169
 170        /* Be paranoid, rather than too clever. */
 171        if (unlikely(skb_headroom(skb) < hh_len && dev->hard_header)) {
 172                struct sk_buff *skb2;
 173
 174                skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
 175                if (skb2 == NULL) {
 176                        kfree_skb(skb);
 177                        return -ENOMEM;
 178                }
 179                if (skb->sk)
 180                        skb_set_owner_w(skb2, skb->sk);
 181                kfree_skb(skb);
 182                skb = skb2;
 183        }
 184
 185        if (dst->hh)
 186                return neigh_hh_output(dst->hh, skb);
 187        else if (dst->neighbour)
 188                return dst->neighbour->output(skb);
 189
 190        if (net_ratelimit())
 191                printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n");
 192        kfree_skb(skb);
 193        return -EINVAL;
 194}
 195
 196static inline int ip_finish_output(struct sk_buff *skb)
 197{
 198#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
 199        /* Policy lookup after SNAT yielded a new policy */
 200        if (skb->dst->xfrm != NULL) {
 201                IPCB(skb)->flags |= IPSKB_REROUTED;
 202                return dst_output(skb);
 203        }
 204#endif
 205        if (skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb))
 206                return ip_fragment(skb, ip_finish_output2);
 207        else
 208                return ip_finish_output2(skb);
 209}
 210
 211int ip_mc_output(struct sk_buff *skb)
 212{
 213        struct sock *sk = skb->sk;
 214        struct rtable *rt = (struct rtable*)skb->dst;
 215        struct net_device *dev = rt->u.dst.dev;
 216
 217        /*
 218         *      If the indicated interface is up and running, send the packet.
 219         */
 220        IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
 221
 222        skb->dev = dev;
 223        skb->protocol = htons(ETH_P_IP);
 224
 225        /*
 226         *      Multicasts are looped back for other local users
 227         */
 228
 229        if (rt->rt_flags&RTCF_MULTICAST) {
 230                if ((!sk || inet_sk(sk)->mc_loop)
 231#ifdef CONFIG_IP_MROUTE
 232                /* Small optimization: do not loopback not local frames,
 233                   which returned after forwarding; they will be  dropped
 234                   by ip_mr_input in any case.
 235                   Note, that local frames are looped back to be delivered
 236                   to local recipients.
 237
 238                   This check is duplicated in ip_mr_input at the moment.
 239                 */
 240                    && ((rt->rt_flags&RTCF_LOCAL) || !(IPCB(skb)->flags&IPSKB_FORWARDED))
 241#endif
 242                ) {
 243                        struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
 244                        if (newskb)
 245                                NF_HOOK(PF_INET, NF_IP_POST_ROUTING, newskb, NULL,
 246                                        newskb->dev, 
 247                                        ip_dev_loopback_xmit);
 248                }
 249
 250                /* Multicasts with ttl 0 must not go beyond the host */
 251
 252                if (skb->nh.iph->ttl == 0) {
 253                        kfree_skb(skb);
 254                        return 0;
 255                }
 256        }
 257
 258        if (rt->rt_flags&RTCF_BROADCAST) {
 259                struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
 260                if (newskb)
 261                        NF_HOOK(PF_INET, NF_IP_POST_ROUTING, newskb, NULL,
 262                                newskb->dev, ip_dev_loopback_xmit);
 263        }
 264
 265        return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dev,
 266                            ip_finish_output,
 267                            !(IPCB(skb)->flags & IPSKB_REROUTED));
 268}
 269
 270int ip_output(struct sk_buff *skb)
 271{
 272        struct net_device *dev = skb->dst->dev;
 273
 274        IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
 275
 276        skb->dev = dev;
 277        skb->protocol = htons(ETH_P_IP);
 278
 279        return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev,
 280                            ip_finish_output,
 281                            !(IPCB(skb)->flags & IPSKB_REROUTED));
 282}
 283
 284int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
 285{
 286        struct sock *sk = skb->sk;
 287        struct inet_sock *inet = inet_sk(sk);
 288        struct ip_options *opt = inet->opt;
 289        struct rtable *rt;
 290        struct iphdr *iph;
 291
 292        /* Skip all of this if the packet is already routed,
 293         * f.e. by something like SCTP.
 294         */
 295        rt = (struct rtable *) skb->dst;
 296        if (rt != NULL)
 297                goto packet_routed;
 298
 299        /* Make sure we can route this packet. */
 300        rt = (struct rtable *)__sk_dst_check(sk, 0);
 301        if (rt == NULL) {
 302                __be32 daddr;
 303
 304                /* Use correct destination address if we have options. */
 305                daddr = inet->daddr;
 306                if(opt && opt->srr)
 307                        daddr = opt->faddr;
 308
 309                {
 310                        struct flowi fl = { .oif = sk->sk_bound_dev_if,
 311                                            .nl_u = { .ip4_u =
 312                                                      { .daddr = daddr,
 313                                                        .saddr = inet->saddr,
 314                                                        .tos = RT_CONN_FLAGS(sk) } },
 315                                            .proto = sk->sk_protocol,
 316                                            .uli_u = { .ports =
 317                                                       { .sport = inet->sport,
 318                                                         .dport = inet->dport } } };
 319
 320                        /* If this fails, retransmit mechanism of transport layer will
 321                         * keep trying until route appears or the connection times
 322                         * itself out.
 323                         */
 324                        security_sk_classify_flow(sk, &fl);
 325                        if (ip_route_output_flow(&rt, &fl, sk, 0))
 326                                goto no_route;
 327                }
 328                sk_setup_caps(sk, &rt->u.dst);
 329        }
 330        skb->dst = dst_clone(&rt->u.dst);
 331
 332packet_routed:
 333        if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
 334                goto no_route;
 335
 336        /* OK, we know where to send it, allocate and build IP header. */
 337        iph = (struct iphdr *) skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
 338        *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
 339        iph->tot_len = htons(skb->len);
 340        if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok)
 341                iph->frag_off = htons(IP_DF);
 342        else
 343                iph->frag_off = 0;
 344        iph->ttl      = ip_select_ttl(inet, &rt->u.dst);
 345        iph->protocol = sk->sk_protocol;
 346        iph->saddr    = rt->rt_src;
 347        iph->daddr    = rt->rt_dst;
 348        skb->nh.iph   = iph;
 349        /* Transport layer set skb->h.foo itself. */
 350
 351        if (opt && opt->optlen) {
 352                iph->ihl += opt->optlen >> 2;
 353                ip_options_build(skb, opt, inet->daddr, rt, 0);
 354        }
 355
 356        ip_select_ident_more(iph, &rt->u.dst, sk,
 357                             (skb_shinfo(skb)->gso_segs ?: 1) - 1);
 358
 359        /* Add an IP checksum. */
 360        ip_send_check(iph);
 361
 362        skb->priority = sk->sk_priority;
 363
 364        return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
 365                       dst_output);
 366
 367no_route:
 368        IP_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
 369        kfree_skb(skb);
 370        return -EHOSTUNREACH;
 371}
 372
 373
 374static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 375{
 376        to->pkt_type = from->pkt_type;
 377        to->priority = from->priority;
 378        to->protocol = from->protocol;
 379        dst_release(to->dst);
 380        to->dst = dst_clone(from->dst);
 381        to->dev = from->dev;
 382        to->mark = from->mark;
 383
 384        /* Copy the flags to each fragment. */
 385        IPCB(to)->flags = IPCB(from)->flags;
 386
 387#ifdef CONFIG_NET_SCHED
 388        to->tc_index = from->tc_index;
 389#endif
 390#ifdef CONFIG_NETFILTER
 391        /* Connection association is same as pre-frag packet */
 392        nf_conntrack_put(to->nfct);
 393        to->nfct = from->nfct;
 394        nf_conntrack_get(to->nfct);
 395        to->nfctinfo = from->nfctinfo;
 396#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
 397        to->ipvs_property = from->ipvs_property;
 398#endif
 399#ifdef CONFIG_BRIDGE_NETFILTER
 400        nf_bridge_put(to->nf_bridge);
 401        to->nf_bridge = from->nf_bridge;
 402        nf_bridge_get(to->nf_bridge);
 403#endif
 404#endif
 405        skb_copy_secmark(to, from);
 406}
 407
 408/*
 409 *      This IP datagram is too large to be sent in one piece.  Break it up into
 410 *      smaller pieces (each of size equal to IP header plus
 411 *      a block of the data of the original IP data part) that will yet fit in a
 412 *      single device frame, and queue such a frame for sending.
 413 */
 414
 415int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
 416{
 417        struct iphdr *iph;
 418        int raw = 0;
 419        int ptr;
 420        struct net_device *dev;
 421        struct sk_buff *skb2;
 422        unsigned int mtu, hlen, left, len, ll_rs, pad;
 423        int offset;
 424        __be16 not_last_frag;
 425        struct rtable *rt = (struct rtable*)skb->dst;
 426        int err = 0;
 427
 428        dev = rt->u.dst.dev;
 429
 430        /*
 431         *      Point into the IP datagram header.
 432         */
 433
 434        iph = skb->nh.iph;
 435
 436        if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
 437                IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
 438                icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
 439                          htonl(dst_mtu(&rt->u.dst)));
 440                kfree_skb(skb);
 441                return -EMSGSIZE;
 442        }
 443
 444        /*
 445         *      Setup starting values.
 446         */
 447
 448        hlen = iph->ihl * 4;
 449        mtu = dst_mtu(&rt->u.dst) - hlen;       /* Size of data space */
 450        IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
 451
 452        /* When frag_list is given, use it. First, check its validity:
 453         * some transformers could create wrong frag_list or break existing
 454         * one, it is not prohibited. In this case fall back to copying.
 455         *
 456         * LATER: this step can be merged to real generation of fragments,
 457         * we can switch to copy when see the first bad fragment.
 458         */
 459        if (skb_shinfo(skb)->frag_list) {
 460                struct sk_buff *frag;
 461                int first_len = skb_pagelen(skb);
 462
 463                if (first_len - hlen > mtu ||
 464                    ((first_len - hlen) & 7) ||
 465                    (iph->frag_off & htons(IP_MF|IP_OFFSET)) ||
 466                    skb_cloned(skb))
 467                        goto slow_path;
 468
 469                for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
 470                        /* Correct geometry. */
 471                        if (frag->len > mtu ||
 472                            ((frag->len & 7) && frag->next) ||
 473                            skb_headroom(frag) < hlen)
 474                            goto slow_path;
 475
 476                        /* Partially cloned skb? */
 477                        if (skb_shared(frag))
 478                                goto slow_path;
 479
 480                        BUG_ON(frag->sk);
 481                        if (skb->sk) {
 482                                sock_hold(skb->sk);
 483                                frag->sk = skb->sk;
 484                                frag->destructor = sock_wfree;
 485                                skb->truesize -= frag->truesize;
 486                        }
 487                }
 488
 489                /* Everything is OK. Generate! */
 490
 491                err = 0;
 492                offset = 0;
 493                frag = skb_shinfo(skb)->frag_list;
 494                skb_shinfo(skb)->frag_list = NULL;
 495                skb->data_len = first_len - skb_headlen(skb);
 496                skb->len = first_len;
 497                iph->tot_len = htons(first_len);
 498                iph->frag_off = htons(IP_MF);
 499                ip_send_check(iph);
 500
 501                for (;;) {
 502                        /* Prepare header of the next frame,
 503                         * before previous one went down. */
 504                        if (frag) {
 505                                frag->ip_summed = CHECKSUM_NONE;
 506                                frag->h.raw = frag->data;
 507                                frag->nh.raw = __skb_push(frag, hlen);
 508                                memcpy(frag->nh.raw, iph, hlen);
 509                                iph = frag->nh.iph;
 510                                iph->tot_len = htons(frag->len);
 511                                ip_copy_metadata(frag, skb);
 512                                if (offset == 0)
 513                                        ip_options_fragment(frag);
 514                                offset += skb->len - hlen;
 515                                iph->frag_off = htons(offset>>3);
 516                                if (frag->next != NULL)
 517                                        iph->frag_off |= htons(IP_MF);
 518                                /* Ready, complete checksum */
 519                                ip_send_check(iph);
 520                        }
 521
 522                        err = output(skb);
 523
 524                        if (!err)
 525                                IP_INC_STATS(IPSTATS_MIB_FRAGCREATES);
 526                        if (err || !frag)
 527                                break;
 528
 529                        skb = frag;
 530                        frag = skb->next;
 531                        skb->next = NULL;
 532                }
 533
 534                if (err == 0) {
 535                        IP_INC_STATS(IPSTATS_MIB_FRAGOKS);
 536                        return 0;
 537                }
 538
 539                while (frag) {
 540                        skb = frag->next;
 541                        kfree_skb(frag);
 542                        frag = skb;
 543                }
 544                IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
 545                return err;
 546        }
 547
 548slow_path:
 549        left = skb->len - hlen;         /* Space per frame */
 550        ptr = raw + hlen;               /* Where to start from */
 551
 552        /* for bridged IP traffic encapsulated inside f.e. a vlan header,
 553         * we need to make room for the encapsulating header
 554         */
 555        pad = nf_bridge_pad(skb);
 556        ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, pad);
 557        mtu -= pad;
 558
 559        /*
 560         *      Fragment the datagram.
 561         */
 562
 563        offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
 564        not_last_frag = iph->frag_off & htons(IP_MF);
 565
 566        /*
 567         *      Keep copying data until we run out.
 568         */
 569
 570        while(left > 0) {
 571                len = left;
 572                /* IF: it doesn't fit, use 'mtu' - the data space left */
 573                if (len > mtu)
 574                        len = mtu;
 575                /* IF: we are not sending upto and including the packet end
 576                   then align the next start on an eight byte boundary */
 577                if (len < left) {
 578                        len &= ~7;
 579                }
 580                /*
 581                 *      Allocate buffer.
 582                 */
 583
 584                if ((skb2 = alloc_skb(len+hlen+ll_rs, GFP_ATOMIC)) == NULL) {
 585                        NETDEBUG(KERN_INFO "IP: frag: no memory for new fragment!\n");
 586                        err = -ENOMEM;
 587                        goto fail;
 588                }
 589
 590                /*
 591                 *      Set up data on packet
 592                 */
 593
 594                ip_copy_metadata(skb2, skb);
 595                skb_reserve(skb2, ll_rs);
 596                skb_put(skb2, len + hlen);
 597                skb2->nh.raw = skb2->data;
 598                skb2->h.raw = skb2->data + hlen;
 599
 600                /*
 601                 *      Charge the memory for the fragment to any owner
 602                 *      it might possess
 603                 */
 604
 605                if (skb->sk)
 606                        skb_set_owner_w(skb2, skb->sk);
 607
 608                /*
 609                 *      Copy the packet header into the new buffer.
 610                 */
 611
 612                memcpy(skb2->nh.raw, skb->data, hlen);
 613
 614                /*
 615                 *      Copy a block of the IP datagram.
 616                 */
 617                if (skb_copy_bits(skb, ptr, skb2->h.raw, len))
 618                        BUG();
 619                left -= len;
 620
 621                /*
 622                 *      Fill in the new header fields.
 623                 */
 624                iph = skb2->nh.iph;
 625                iph->frag_off = htons((offset >> 3));
 626
 627                /* ANK: dirty, but effective trick. Upgrade options only if
 628                 * the segment to be fragmented was THE FIRST (otherwise,
 629                 * options are already fixed) and make it ONCE
 630                 * on the initial skb, so that all the following fragments
 631                 * will inherit fixed options.
 632                 */
 633                if (offset == 0)
 634                        ip_options_fragment(skb);
 635
 636                /*
 637                 *      Added AC : If we are fragmenting a fragment that's not the
 638                 *                 last fragment then keep MF on each bit
 639                 */
 640                if (left > 0 || not_last_frag)
 641                        iph->frag_off |= htons(IP_MF);
 642                ptr += len;
 643                offset += len;
 644
 645                /*
 646                 *      Put this fragment into the sending queue.
 647                 */
 648                iph->tot_len = htons(len + hlen);
 649
 650                ip_send_check(iph);
 651
 652                err = output(skb2);
 653                if (err)
 654                        goto fail;
 655
 656                IP_INC_STATS(IPSTATS_MIB_FRAGCREATES);
 657        }
 658        kfree_skb(skb);
 659        IP_INC_STATS(IPSTATS_MIB_FRAGOKS);
 660        return err;
 661
 662fail:
 663        kfree_skb(skb); 
 664        IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
 665        return err;
 666}
 667
 668EXPORT_SYMBOL(ip_fragment);
 669
 670int
 671ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
 672{
 673        struct iovec *iov = from;
 674
 675        if (skb->ip_summed == CHECKSUM_PARTIAL) {
 676                if (memcpy_fromiovecend(to, iov, offset, len) < 0)
 677                        return -EFAULT;
 678        } else {
 679                __wsum csum = 0;
 680                if (csum_partial_copy_fromiovecend(to, iov, offset, len, &csum) < 0)
 681                        return -EFAULT;
 682                skb->csum = csum_block_add(skb->csum, csum, odd);
 683        }
 684        return 0;
 685}
 686
 687static inline __wsum
 688csum_page(struct page *page, int offset, int copy)
 689{
 690        char *kaddr;
 691        __wsum csum;
 692        kaddr = kmap(page);
 693        csum = csum_partial(kaddr + offset, copy, 0);
 694        kunmap(page);
 695        return csum;
 696}
 697
 698static inline int ip_ufo_append_data(struct sock *sk,
 699                        int getfrag(void *from, char *to, int offset, int len,
 700                               int odd, struct sk_buff *skb),
 701                        void *from, int length, int hh_len, int fragheaderlen,
 702                        int transhdrlen, int mtu,unsigned int flags)
 703{
 704        struct sk_buff *skb;
 705        int err;
 706
 707        /* There is support for UDP fragmentation offload by network
 708         * device, so create one single skb packet containing complete
 709         * udp datagram
 710         */
 711        if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
 712                skb = sock_alloc_send_skb(sk,
 713                        hh_len + fragheaderlen + transhdrlen + 20,
 714                        (flags & MSG_DONTWAIT), &err);
 715
 716                if (skb == NULL)
 717                        return err;
 718
 719                /* reserve space for Hardware header */
 720                skb_reserve(skb, hh_len);
 721
 722                /* create space for UDP/IP header */
 723                skb_put(skb,fragheaderlen + transhdrlen);
 724
 725                /* initialize network header pointer */
 726                skb->nh.raw = skb->data;
 727
 728                /* initialize protocol header pointer */
 729                skb->h.raw = skb->data + fragheaderlen;
 730
 731                skb->ip_summed = CHECKSUM_PARTIAL;
 732                skb->csum = 0;
 733                sk->sk_sndmsg_off = 0;
 734        }
 735
 736        err = skb_append_datato_frags(sk,skb, getfrag, from,
 737                               (length - transhdrlen));
 738        if (!err) {
 739                /* specify the length of each IP datagram fragment*/
 740                skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
 741                skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
 742                __skb_queue_tail(&sk->sk_write_queue, skb);
 743
 744                return 0;
 745        }
 746        /* There is not enough support do UFO ,
 747         * so follow normal path
 748         */
 749        kfree_skb(skb);
 750        return err;
 751}
 752
 753/*
 754 *      ip_append_data() and ip_append_page() can make one large IP datagram
 755 *      from many pieces of data. Each pieces will be holded on the socket
 756 *      until ip_push_pending_frames() is called. Each piece can be a page
 757 *      or non-page data.
 758 *      
 759 *      Not only UDP, other transport protocols - e.g. raw sockets - can use
 760 *      this interface potentially.
 761 *
 762 *      LATER: length must be adjusted by pad at tail, when it is required.
 763 */
 764int ip_append_data(struct sock *sk,
 765                   int getfrag(void *from, char *to, int offset, int len,
 766                               int odd, struct sk_buff *skb),
 767                   void *from, int length, int transhdrlen,
 768                   struct ipcm_cookie *ipc, struct rtable *rt,
 769                   unsigned int flags)
 770{
 771        struct inet_sock *inet = inet_sk(sk);
 772        struct sk_buff *skb;
 773
 774        struct ip_options *opt = NULL;
 775        int hh_len;
 776        int exthdrlen;
 777        int mtu;
 778        int copy;
 779        int err;
 780        int offset = 0;
 781        unsigned int maxfraglen, fragheaderlen;
 782        int csummode = CHECKSUM_NONE;
 783
 784        if (flags&MSG_PROBE)
 785                return 0;
 786
 787        if (skb_queue_empty(&sk->sk_write_queue)) {
 788                /*
 789                 * setup for corking.
 790                 */
 791                opt = ipc->opt;
 792                if (opt) {
 793                        if (inet->cork.opt == NULL) {
 794                                inet->cork.opt = kmalloc(sizeof(struct ip_options) + 40, sk->sk_allocation);
 795                                if (unlikely(inet->cork.opt == NULL))
 796                                        return -ENOBUFS;
 797                        }
 798                        memcpy(inet->cork.opt, opt, sizeof(struct ip_options)+opt->optlen);
 799                        inet->cork.flags |= IPCORK_OPT;
 800                        inet->cork.addr = ipc->addr;
 801                }
 802                dst_hold(&rt->u.dst);
 803                inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path);
 804                inet->cork.rt = rt;
 805                inet->cork.length = 0;
 806                sk->sk_sndmsg_page = NULL;
 807                sk->sk_sndmsg_off = 0;
 808                if ((exthdrlen = rt->u.dst.header_len) != 0) {
 809                        length += exthdrlen;
 810                        transhdrlen += exthdrlen;
 811                }
 812        } else {
 813                rt = inet->cork.rt;
 814                if (inet->cork.flags & IPCORK_OPT)
 815                        opt = inet->cork.opt;
 816
 817                transhdrlen = 0;
 818                exthdrlen = 0;
 819                mtu = inet->cork.fragsize;
 820        }
 821        hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
 822
 823        fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
 824        maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
 825
 826        if (inet->cork.length + length > 0xFFFF - fragheaderlen) {
 827                ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu-exthdrlen);
 828                return -EMSGSIZE;
 829        }
 830
 831        /*
 832         * transhdrlen > 0 means that this is the first fragment and we wish
 833         * it won't be fragmented in the future.
 834         */
 835        if (transhdrlen &&
 836            length + fragheaderlen <= mtu &&
 837            rt->u.dst.dev->features & NETIF_F_ALL_CSUM &&
 838            !exthdrlen)
 839                csummode = CHECKSUM_PARTIAL;
 840
 841        inet->cork.length += length;
 842        if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
 843                        (rt->u.dst.dev->features & NETIF_F_UFO)) {
 844
 845                err = ip_ufo_append_data(sk, getfrag, from, length, hh_len,
 846                                         fragheaderlen, transhdrlen, mtu,
 847                                         flags);
 848                if (err)
 849                        goto error;
 850                return 0;
 851        }
 852
 853        /* So, what's going on in the loop below?
 854         *
 855         * We use calculated fragment length to generate chained skb,
 856         * each of segments is IP fragment ready for sending to network after
 857         * adding appropriate IP header.
 858         */
 859
 860        if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
 861                goto alloc_new_skb;
 862
 863        while (length > 0) {
 864                /* Check if the remaining data fits into current packet. */
 865                copy = mtu - skb->len;
 866                if (copy < length)
 867                        copy = maxfraglen - skb->len;
 868                if (copy <= 0) {
 869                        char *data;
 870                        unsigned int datalen;
 871                        unsigned int fraglen;
 872                        unsigned int fraggap;
 873                        unsigned int alloclen;
 874                        struct sk_buff *skb_prev;
 875alloc_new_skb:
 876                        skb_prev = skb;
 877                        if (skb_prev)
 878                                fraggap = skb_prev->len - maxfraglen;
 879                        else
 880                                fraggap = 0;
 881
 882                        /*
 883                         * If remaining data exceeds the mtu,
 884                         * we know we need more fragment(s).
 885                         */
 886                        datalen = length + fraggap;
 887                        if (datalen > mtu - fragheaderlen)
 888                                datalen = maxfraglen - fragheaderlen;
 889                        fraglen = datalen + fragheaderlen;
 890
 891                        if ((flags & MSG_MORE) && 
 892                            !(rt->u.dst.dev->features&NETIF_F_SG))
 893                                alloclen = mtu;
 894                        else
 895                                alloclen = datalen + fragheaderlen;
 896
 897                        /* The last fragment gets additional space at tail.
 898                         * Note, with MSG_MORE we overallocate on fragments,
 899                         * because we have no idea what fragment will be
 900                         * the last.
 901                         */
 902                        if (datalen == length + fraggap)
 903                                alloclen += rt->u.dst.trailer_len;
 904
 905                        if (transhdrlen) {
 906                                skb = sock_alloc_send_skb(sk, 
 907                                                alloclen + hh_len + 15,
 908                                                (flags & MSG_DONTWAIT), &err);
 909                        } else {
 910                                skb = NULL;
 911                                if (atomic_read(&sk->sk_wmem_alloc) <=
 912                                    2 * sk->sk_sndbuf)
 913                                        skb = sock_wmalloc(sk, 
 914                                                           alloclen + hh_len + 15, 1,
 915                                                           sk->sk_allocation);
 916                                if (unlikely(skb == NULL))
 917                                        err = -ENOBUFS;
 918                        }
 919                        if (skb == NULL)
 920                                goto error;
 921
 922                        /*
 923                         *      Fill in the control structures
 924                         */
 925                        skb->ip_summed = csummode;
 926                        skb->csum = 0;
 927                        skb_reserve(skb, hh_len);
 928
 929                        /*
 930                         *      Find where to start putting bytes.
 931                         */
 932                        data = skb_put(skb, fraglen);
 933                        skb->nh.raw = data + exthdrlen;
 934                        data += fragheaderlen;
 935                        skb->h.raw = data + exthdrlen;
 936
 937                        if (fraggap) {
 938                                skb->csum = skb_copy_and_csum_bits(
 939                                        skb_prev, maxfraglen,
 940                                        data + transhdrlen, fraggap, 0);
 941                                skb_prev->csum = csum_sub(skb_prev->csum,
 942                                                          skb->csum);
 943                                data += fraggap;
 944                                pskb_trim_unique(skb_prev, maxfraglen);
 945                        }
 946
 947                        copy = datalen - transhdrlen - fraggap;
 948                        if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
 949                                err = -EFAULT;
 950                                kfree_skb(skb);
 951                                goto error;
 952                        }
 953
 954                        offset += copy;
 955                        length -= datalen - fraggap;
 956                        transhdrlen = 0;
 957                        exthdrlen = 0;
 958                        csummode = CHECKSUM_NONE;
 959
 960                        /*
 961                         * Put the packet on the pending queue.
 962                         */
 963                        __skb_queue_tail(&sk->sk_write_queue, skb);
 964                        continue;
 965                }
 966
 967                if (copy > length)
 968                        copy = length;
 969
 970                if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
 971                        unsigned int off;
 972
 973                        off = skb->len;
 974                        if (getfrag(from, skb_put(skb, copy), 
 975                                        offset, copy, off, skb) < 0) {
 976                                __skb_trim(skb, off);
 977                                err = -EFAULT;
 978                                goto error;
 979                        }
 980                } else {
 981                        int i = skb_shinfo(skb)->nr_frags;
 982                        skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
 983                        struct page *page = sk->sk_sndmsg_page;
 984                        int off = sk->sk_sndmsg_off;
 985                        unsigned int left;
 986
 987                        if (page && (left = PAGE_SIZE - off) > 0) {
 988                                if (copy >= left)
 989                                        copy = left;
 990                                if (page != frag->page) {
 991                                        if (i == MAX_SKB_FRAGS) {
 992                                                err = -EMSGSIZE;
 993                                                goto error;
 994                                        }
 995                                        get_page(page);
 996                                        skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
 997                                        frag = &skb_shinfo(skb)->frags[i];
 998                                }
 999                        } else if (i < MAX_SKB_FRAGS) {
1000                                if (copy > PAGE_SIZE)
1001                                        copy = PAGE_SIZE;
1002                                page = alloc_pages(sk->sk_allocation, 0);
1003                                if (page == NULL)  {
1004                                        err = -ENOMEM;
1005                                        goto error;
1006                                }
1007                                sk->sk_sndmsg_page = page;
1008                                sk->sk_sndmsg_off = 0;
1009
1010                                skb_fill_page_desc(skb, i, page, 0, 0);
1011                                frag = &skb_shinfo(skb)->frags[i];
1012                                skb->truesize += PAGE_SIZE;
1013                                atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
1014                        } else {
1015                                err = -EMSGSIZE;
1016                                goto error;
1017                        }
1018                        if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1019                                err = -EFAULT;
1020                                goto error;
1021                        }
1022                        sk->sk_sndmsg_off += copy;
1023                        frag->size += copy;
1024                        skb->len += copy;
1025                        skb->data_len += copy;
1026                }
1027                offset += copy;
1028                length -= copy;
1029        }
1030
1031        return 0;
1032
1033error:
1034        inet->cork.length -= length;
1035        IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1036        return err; 
1037}
1038
1039ssize_t ip_append_page(struct sock *sk, struct page *page,
1040                       int offset, size_t size, int flags)
1041{
1042        struct inet_sock *inet = inet_sk(sk);
1043        struct sk_buff *skb;
1044        struct rtable *rt;
1045        struct ip_options *opt = NULL;
1046        int hh_len;
1047        int mtu;
1048        int len;
1049        int err;
1050        unsigned int maxfraglen, fragheaderlen, fraggap;
1051
1052        if (inet->hdrincl)
1053                return -EPERM;
1054
1055        if (flags&MSG_PROBE)
1056                return 0;
1057
1058        if (skb_queue_empty(&sk->sk_write_queue))
1059                return -EINVAL;
1060
1061        rt = inet->cork.rt;
1062        if (inet->cork.flags & IPCORK_OPT)
1063                opt = inet->cork.opt;
1064
1065        if (!(rt->u.dst.dev->features&NETIF_F_SG))
1066                return -EOPNOTSUPP;
1067
1068        hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1069        mtu = inet->cork.fragsize;
1070
1071        fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
1072        maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
1073
1074        if (inet->cork.length + size > 0xFFFF - fragheaderlen) {
1075                ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu);
1076                return -EMSGSIZE;
1077        }
1078
1079        if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1080                return -EINVAL;
1081
1082        inet->cork.length += size;
1083        if ((sk->sk_protocol == IPPROTO_UDP) &&
1084            (rt->u.dst.dev->features & NETIF_F_UFO)) {
1085                skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
1086                skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1087        }
1088
1089
1090        while (size > 0) {
1091                int i;
1092
1093                if (skb_is_gso(skb))
1094                        len = size;
1095                else {
1096
1097                        /* Check if the remaining data fits into current packet. */
1098                        len = mtu - skb->len;
1099                        if (len < size)
1100                                len = maxfraglen - skb->len;
1101                }
1102                if (len <= 0) {
1103                        struct sk_buff *skb_prev;
1104                        char *data;
1105                        struct iphdr *iph;
1106                        int alloclen;
1107
1108                        skb_prev = skb;
1109                        fraggap = skb_prev->len - maxfraglen;
1110
1111                        alloclen = fragheaderlen + hh_len + fraggap + 15;
1112                        skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation);
1113                        if (unlikely(!skb)) {
1114                                err = -ENOBUFS;
1115                                goto error;
1116                        }
1117
1118                        /*
1119                         *      Fill in the control structures
1120                         */
1121                        skb->ip_summed = CHECKSUM_NONE;
1122                        skb->csum = 0;
1123                        skb_reserve(skb, hh_len);
1124
1125                        /*
1126                         *      Find where to start putting bytes.
1127                         */
1128                        data = skb_put(skb, fragheaderlen + fraggap);
1129                        skb->nh.iph = iph = (struct iphdr *)data;
1130                        data += fragheaderlen;
1131                        skb->h.raw = data;
1132
1133                        if (fraggap) {
1134                                skb->csum = skb_copy_and_csum_bits(
1135                                        skb_prev, maxfraglen,
1136                                        data, fraggap, 0);
1137                                skb_prev->csum = csum_sub(skb_prev->csum,
1138                                                          skb->csum);
1139                                pskb_trim_unique(skb_prev, maxfraglen);
1140                        }
1141
1142                        /*
1143                         * Put the packet on the pending queue.
1144                         */
1145                        __skb_queue_tail(&sk->sk_write_queue, skb);
1146                        continue;
1147                }
1148
1149                i = skb_shinfo(skb)->nr_frags;
1150                if (len > size)
1151                        len = size;
1152                if (skb_can_coalesce(skb, i, page, offset)) {
1153                        skb_shinfo(skb)->frags[i-1].size += len;
1154                } else if (i < MAX_SKB_FRAGS) {
1155                        get_page(page);
1156                        skb_fill_page_desc(skb, i, page, offset, len);
1157                } else {
1158                        err = -EMSGSIZE;
1159                        goto error;
1160                }
1161
1162                if (skb->ip_summed == CHECKSUM_NONE) {
1163                        __wsum csum;
1164                        csum = csum_page(page, offset, len);
1165                        skb->csum = csum_block_add(skb->csum, csum, skb->len);
1166                }
1167
1168                skb->len += len;
1169                skb->data_len += len;
1170                offset += len;
1171                size -= len;
1172        }
1173        return 0;
1174
1175error:
1176        inet->cork.length -= size;
1177        IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1178        return err;
1179}
1180
1181/*
1182 *      Combined all pending IP fragments on the socket as one IP datagram
1183 *      and push them out.
1184 */
1185int ip_push_pending_frames(struct sock *sk)
1186{
1187        struct sk_buff *skb, *tmp_skb;
1188        struct sk_buff **tail_skb;
1189        struct inet_sock *inet = inet_sk(sk);
1190        struct ip_options *opt = NULL;
1191        struct rtable *rt = inet->cork.rt;
1192        struct iphdr *iph;
1193        __be16 df = 0;
1194        __u8 ttl;
1195        int err = 0;
1196
1197        if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1198                goto out;
1199        tail_skb = &(skb_shinfo(skb)->frag_list);
1200
1201        /* move skb->data to ip header from ext header */
1202        if (skb->data < skb->nh.raw)
1203                __skb_pull(skb, skb->nh.raw - skb->data);
1204        while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1205                __skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
1206                *tail_skb = tmp_skb;
1207                tail_skb = &(tmp_skb->next);
1208                skb->len += tmp_skb->len;
1209                skb->data_len += tmp_skb->len;
1210                skb->truesize += tmp_skb->truesize;
1211                __sock_put(tmp_skb->sk);
1212                tmp_skb->destructor = NULL;
1213                tmp_skb->sk = NULL;
1214        }
1215
1216        /* Unless user demanded real pmtu discovery (IP_PMTUDISC_DO), we allow
1217         * to fragment the frame generated here. No matter, what transforms
1218         * how transforms change size of the packet, it will come out.
1219         */
1220        if (inet->pmtudisc != IP_PMTUDISC_DO)
1221                skb->local_df = 1;
1222
1223        /* DF bit is set when we want to see DF on outgoing frames.
1224         * If local_df is set too, we still allow to fragment this frame
1225         * locally. */
1226        if (inet->pmtudisc == IP_PMTUDISC_DO ||
1227            (skb->len <= dst_mtu(&rt->u.dst) &&
1228             ip_dont_fragment(sk, &rt->u.dst)))
1229                df = htons(IP_DF);
1230
1231        if (inet->cork.flags & IPCORK_OPT)
1232                opt = inet->cork.opt;
1233
1234        if (rt->rt_type == RTN_MULTICAST)
1235                ttl = inet->mc_ttl;
1236        else
1237                ttl = ip_select_ttl(inet, &rt->u.dst);
1238
1239        iph = (struct iphdr *)skb->data;
1240        iph->version = 4;
1241        iph->ihl = 5;
1242        if (opt) {
1243                iph->ihl += opt->optlen>>2;
1244                ip_options_build(skb, opt, inet->cork.addr, rt, 0);
1245        }
1246        iph->tos = inet->tos;
1247        iph->tot_len = htons(skb->len);
1248        iph->frag_off = df;
1249        ip_select_ident(iph, &rt->u.dst, sk);
1250        iph->ttl = ttl;
1251        iph->protocol = sk->sk_protocol;
1252        iph->saddr = rt->rt_src;
1253        iph->daddr = rt->rt_dst;
1254        ip_send_check(iph);
1255
1256        skb->priority = sk->sk_priority;
1257        skb->dst = dst_clone(&rt->u.dst);
1258
1259        /* Netfilter gets whole the not fragmented skb. */
1260        err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, 
1261                      skb->dst->dev, dst_output);
1262        if (err) {
1263                if (err > 0)
1264                        err = inet->recverr ? net_xmit_errno(err) : 0;
1265                if (err)
1266                        goto error;
1267        }
1268
1269out:
1270        inet->cork.flags &= ~IPCORK_OPT;
1271        kfree(inet->cork.opt);
1272        inet->cork.opt = NULL;
1273        if (inet->cork.rt) {
1274                ip_rt_put(inet->cork.rt);
1275                inet->cork.rt = NULL;
1276        }
1277        return err;
1278
1279error:
1280        IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1281        goto out;
1282}
1283
1284/*
1285 *      Throw away all pending data on the socket.
1286 */
1287void ip_flush_pending_frames(struct sock *sk)
1288{
1289        struct inet_sock *inet = inet_sk(sk);
1290        struct sk_buff *skb;
1291
1292        while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
1293                kfree_skb(skb);
1294
1295        inet->cork.flags &= ~IPCORK_OPT;
1296        kfree(inet->cork.opt);
1297        inet->cork.opt = NULL;
1298        if (inet->cork.rt) {
1299                ip_rt_put(inet->cork.rt);
1300                inet->cork.rt = NULL;
1301        }
1302}
1303
1304
1305/*
1306 *      Fetch data from kernel space and fill in checksum if needed.
1307 */
1308static int ip_reply_glue_bits(void *dptr, char *to, int offset, 
1309                              int len, int odd, struct sk_buff *skb)
1310{
1311        __wsum csum;
1312
1313        csum = csum_partial_copy_nocheck(dptr+offset, to, len, 0);
1314        skb->csum = csum_block_add(skb->csum, csum, odd);
1315        return 0;  
1316}
1317
1318/* 
1319 *      Generic function to send a packet as reply to another packet.
1320 *      Used to send TCP resets so far. ICMP should use this function too.
1321 *
1322 *      Should run single threaded per socket because it uses the sock 
1323 *      structure to pass arguments.
1324 *
1325 *      LATER: switch from ip_build_xmit to ip_append_*
1326 */
1327void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
1328                   unsigned int len)
1329{
1330        struct inet_sock *inet = inet_sk(sk);
1331        struct {
1332                struct ip_options       opt;
1333                char                    data[40];
1334        } replyopts;
1335        struct ipcm_cookie ipc;
1336        __be32 daddr;
1337        struct rtable *rt = (struct rtable*)skb->dst;
1338
1339        if (ip_options_echo(&replyopts.opt, skb))
1340                return;
1341
1342        daddr = ipc.addr = rt->rt_src;
1343        ipc.opt = NULL;
1344
1345        if (replyopts.opt.optlen) {
1346                ipc.opt = &replyopts.opt;
1347
1348                if (ipc.opt->srr)
1349                        daddr = replyopts.opt.faddr;
1350        }
1351
1352        {
1353                struct flowi fl = { .nl_u = { .ip4_u =
1354                                              { .daddr = daddr,
1355                                                .saddr = rt->rt_spec_dst,
1356                                                .tos = RT_TOS(skb->nh.iph->tos) } },
1357                                    /* Not quite clean, but right. */
1358                                    .uli_u = { .ports =
1359                                               { .sport = skb->h.th->dest,
1360                                                 .dport = skb->h.th->source } },
1361                                    .proto = sk->sk_protocol };
1362                security_skb_classify_flow(skb, &fl);
1363                if (ip_route_output_key(&rt, &fl))
1364                        return;
1365        }
1366
1367        /* And let IP do all the hard work.
1368
1369           This chunk is not reenterable, hence spinlock.
1370           Note that it uses the fact, that this function is called
1371           with locally disabled BH and that sk cannot be already spinlocked.
1372         */
1373        bh_lock_sock(sk);
1374        inet->tos = skb->nh.iph->tos;
1375        sk->sk_priority = skb->priority;
1376        sk->sk_protocol = skb->nh.iph->protocol;
1377        ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
1378                       &ipc, rt, MSG_DONTWAIT);
1379        if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
1380                if (arg->csumoffset >= 0)
1381                        *((__sum16 *)skb->h.raw + arg->csumoffset) = csum_fold(csum_add(skb->csum, arg->csum));
1382                skb->ip_summed = CHECKSUM_NONE;
1383                ip_push_pending_frames(sk);
1384        }
1385
1386        bh_unlock_sock(sk);
1387
1388        ip_rt_put(rt);
1389}
1390
1391void __init ip_init(void)
1392{
1393        ip_rt_init();
1394        inet_initpeers();
1395
1396#if defined(CONFIG_IP_MULTICAST) && defined(CONFIG_PROC_FS)
1397        igmp_mc_proc_init();
1398#endif
1399}
1400
1401EXPORT_SYMBOL(ip_generic_getfrag);
1402EXPORT_SYMBOL(ip_queue_xmit);
1403EXPORT_SYMBOL(ip_send_check);
1404
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.