linux-old/net/ipv4/ip_input.c
<<
>>
Prefs
   1/*
   2 * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3 *              operating system.  INET is implemented using the  BSD Socket
   4 *              interface as the means of communication with the user level.
   5 *
   6 *              The Internet Protocol (IP) module.
   7 *
   8 * Version:     $Id: ip_input.c,v 1.36 1999/03/21 05:22:38 davem Exp $
   9 *
  10 * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11 *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12 *              Donald Becker, <becker@super.org>
  13 *              Alan Cox, <Alan.Cox@linux.org>
  14 *              Richard Underwood
  15 *              Stefan Becker, <stefanb@yello.ping.de>
  16 *              Jorge Cwik, <jorge@laser.satlink.net>
  17 *              Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  18 *              
  19 *
  20 * Fixes:
  21 *              Alan Cox        :       Commented a couple of minor bits of surplus code
  22 *              Alan Cox        :       Undefining IP_FORWARD doesn't include the code
  23 *                                      (just stops a compiler warning).
  24 *              Alan Cox        :       Frames with >=MAX_ROUTE record routes, strict routes or loose routes
  25 *                                      are junked rather than corrupting things.
  26 *              Alan Cox        :       Frames to bad broadcast subnets are dumped
  27 *                                      We used to process them non broadcast and
  28 *                                      boy could that cause havoc.
  29 *              Alan Cox        :       ip_forward sets the free flag on the
  30 *                                      new frame it queues. Still crap because
  31 *                                      it copies the frame but at least it
  32 *                                      doesn't eat memory too.
  33 *              Alan Cox        :       Generic queue code and memory fixes.
  34 *              Fred Van Kempen :       IP fragment support (borrowed from NET2E)
  35 *              Gerhard Koerting:       Forward fragmented frames correctly.
  36 *              Gerhard Koerting:       Fixes to my fix of the above 8-).
  37 *              Gerhard Koerting:       IP interface addressing fix.
  38 *              Linus Torvalds  :       More robustness checks
  39 *              Alan Cox        :       Even more checks: Still not as robust as it ought to be
  40 *              Alan Cox        :       Save IP header pointer for later
  41 *              Alan Cox        :       ip option setting
  42 *              Alan Cox        :       Use ip_tos/ip_ttl settings
  43 *              Alan Cox        :       Fragmentation bogosity removed
  44 *                                      (Thanks to Mark.Bush@prg.ox.ac.uk)
  45 *              Dmitry Gorodchanin :    Send of a raw packet crash fix.
  46 *              Alan Cox        :       Silly ip bug when an overlength
  47 *                                      fragment turns up. Now frees the
  48 *                                      queue.
  49 *              Linus Torvalds/ :       Memory leakage on fragmentation
  50 *              Alan Cox        :       handling.
  51 *              Gerhard Koerting:       Forwarding uses IP priority hints
  52 *              Teemu Rantanen  :       Fragment problems.
  53 *              Alan Cox        :       General cleanup, comments and reformat
  54 *              Alan Cox        :       SNMP statistics
  55 *              Alan Cox        :       BSD address rule semantics. Also see
  56 *                                      UDP as there is a nasty checksum issue
  57 *                                      if you do things the wrong way.
  58 *              Alan Cox        :       Always defrag, moved IP_FORWARD to the config.in file
  59 *              Alan Cox        :       IP options adjust sk->priority.
  60 *              Pedro Roque     :       Fix mtu/length error in ip_forward.
  61 *              Alan Cox        :       Avoid ip_chk_addr when possible.
  62 *      Richard Underwood       :       IP multicasting.
  63 *              Alan Cox        :       Cleaned up multicast handlers.
  64 *              Alan Cox        :       RAW sockets demultiplex in the BSD style.
  65 *              Gunther Mayer   :       Fix the SNMP reporting typo
  66 *              Alan Cox        :       Always in group 224.0.0.1
  67 *      Pauline Middelink       :       Fast ip_checksum update when forwarding
  68 *                                      Masquerading support.
  69 *              Alan Cox        :       Multicast loopback error for 224.0.0.1
  70 *              Alan Cox        :       IP_MULTICAST_LOOP option.
  71 *              Alan Cox        :       Use notifiers.
  72 *              Bjorn Ekwall    :       Removed ip_csum (from slhc.c too)
  73 *              Bjorn Ekwall    :       Moved ip_fast_csum to ip.h (inline!)
  74 *              Stefan Becker   :       Send out ICMP HOST REDIRECT
  75 *      Arnt Gulbrandsen        :       ip_build_xmit
  76 *              Alan Cox        :       Per socket routing cache
  77 *              Alan Cox        :       Fixed routing cache, added header cache.
  78 *              Alan Cox        :       Loopback didn't work right in original ip_build_xmit - fixed it.
  79 *              Alan Cox        :       Only send ICMP_REDIRECT if src/dest are the same net.
  80 *              Alan Cox        :       Incoming IP option handling.
  81 *              Alan Cox        :       Set saddr on raw output frames as per BSD.
  82 *              Alan Cox        :       Stopped broadcast source route explosions.
  83 *              Alan Cox        :       Can disable source routing
  84 *              Takeshi Sone    :       Masquerading didn't work.
  85 *      Dave Bonn,Alan Cox      :       Faster IP forwarding whenever possible.
  86 *              Alan Cox        :       Memory leaks, tramples, misc debugging.
  87 *              Alan Cox        :       Fixed multicast (by popular demand 8))
  88 *              Alan Cox        :       Fixed forwarding (by even more popular demand 8))
  89 *              Alan Cox        :       Fixed SNMP statistics [I think]
  90 *      Gerhard Koerting        :       IP fragmentation forwarding fix
  91 *              Alan Cox        :       Device lock against page fault.
  92 *              Alan Cox        :       IP_HDRINCL facility.
  93 *      Werner Almesberger      :       Zero fragment bug
  94 *              Alan Cox        :       RAW IP frame length bug
  95 *              Alan Cox        :       Outgoing firewall on build_xmit
  96 *              A.N.Kuznetsov   :       IP_OPTIONS support throughout the kernel
  97 *              Alan Cox        :       Multicast routing hooks
  98 *              Jos Vos         :       Do accounting *before* call_in_firewall
  99 *      Willy Konynenberg       :       Transparent proxying support
 100 *
 101 *  
 102 *
 103 * To Fix:
 104 *              IP fragmentation wants rewriting cleanly. The RFC815 algorithm is much more efficient
 105 *              and could be made very efficient with the addition of some virtual memory hacks to permit
 106 *              the allocation of a buffer that can then be 'grown' by twiddling page tables.
 107 *              Output fragmentation wants updating along with the buffer management to use a single 
 108 *              interleaved copy algorithm so that fragmenting has a one copy overhead. Actual packet
 109 *              output should probably do its own fragmentation at the UDP/RAW layer. TCP shouldn't cause
 110 *              fragmentation anyway.
 111 *
 112 *              This program is free software; you can redistribute it and/or
 113 *              modify it under the terms of the GNU General Public License
 114 *              as published by the Free Software Foundation; either version
 115 *              2 of the License, or (at your option) any later version.
 116 */
 117
 118#include <asm/system.h>
 119#include <linux/types.h>
 120#include <linux/kernel.h>
 121#include <linux/string.h>
 122#include <linux/errno.h>
 123#include <linux/config.h>
 124
 125#include <linux/net.h>
 126#include <linux/socket.h>
 127#include <linux/sockios.h>
 128#include <linux/in.h>
 129#include <linux/inet.h>
 130#include <linux/netdevice.h>
 131#include <linux/etherdevice.h>
 132
 133#include <net/snmp.h>
 134#include <net/ip.h>
 135#include <net/protocol.h>
 136#include <net/route.h>
 137#include <linux/skbuff.h>
 138#include <net/sock.h>
 139#include <net/arp.h>
 140#include <net/icmp.h>
 141#include <net/raw.h>
 142#include <net/checksum.h>
 143#include <linux/ip_fw.h>
 144#ifdef CONFIG_IP_MASQUERADE
 145#include <net/ip_masq.h>
 146#endif
 147#include <linux/firewall.h>
 148#include <linux/mroute.h>
 149#include <linux/netlink.h>
 150
 151/*
 152 *      SNMP management statistics
 153 */
 154
 155struct ip_mib ip_statistics={2,IPDEFTTL,};      /* Forwarding=No, Default TTL=64 */
 156
 157
 158/*
 159 *      Handle the issuing of an ioctl() request
 160 *      for the ip device. This is scheduled to
 161 *      disappear
 162 */
 163
 164int ip_ioctl(struct sock *sk, int cmd, unsigned long arg)
 165{
 166        switch(cmd)
 167        {
 168                default:
 169                        return(-EINVAL);
 170        }
 171}
 172
 173
 174#if defined(CONFIG_IP_TRANSPARENT_PROXY) && !defined(CONFIG_IP_ALWAYS_DEFRAG)
 175#define CONFIG_IP_ALWAYS_DEFRAG 1
 176#endif
 177
 178/*
 179 *      0 - deliver
 180 *      1 - block
 181 */
 182static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb)
 183{
 184        int    type;
 185
 186        type = skb->h.icmph->type;
 187        if (type < 32)
 188                return test_bit(type, &sk->tp_pinfo.tp_raw4.filter);
 189
 190        /* Do not block unknown ICMP types */
 191        return 0;
 192}
 193
 194/*
 195 *      Process Router Attention IP option
 196 */ 
 197int ip_call_ra_chain(struct sk_buff *skb)
 198{
 199        struct ip_ra_chain *ra;
 200        u8 protocol = skb->nh.iph->protocol;
 201        struct sock *last = NULL;
 202
 203        for (ra = ip_ra_chain; ra; ra = ra->next) {
 204                struct sock *sk = ra->sk;
 205                if (sk && sk->num == protocol) {
 206                        if (skb->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
 207                                skb = ip_defrag(skb);
 208                                if (skb == NULL)
 209                                        return 1;
 210                        }
 211                        if (last) {
 212                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 213                                if (skb2)
 214                                        raw_rcv(last, skb2);
 215                        }
 216                        last = sk;
 217                }
 218        }
 219
 220        if (last) {
 221                raw_rcv(last, skb);
 222                return 1;
 223        }
 224        return 0;
 225}
 226
 227/*
 228 *      Deliver IP Packets to the higher protocol layers.
 229 */ 
 230int ip_local_deliver(struct sk_buff *skb)
 231{
 232        struct iphdr *iph = skb->nh.iph;
 233        struct inet_protocol *ipprot;
 234        struct sock *raw_sk=NULL;
 235        unsigned char hash;
 236        int flag = 0;
 237
 238#ifndef CONFIG_IP_ALWAYS_DEFRAG
 239        /*
 240         *      Reassemble IP fragments.
 241         */
 242
 243        if (iph->frag_off & htons(IP_MF|IP_OFFSET)) {
 244                skb = ip_defrag(skb);
 245                if (!skb)
 246                        return 0;
 247                iph = skb->nh.iph;
 248        }
 249#endif
 250
 251#ifdef CONFIG_IP_MASQUERADE
 252        /*
 253         * Do we need to de-masquerade this packet?
 254         */
 255        {
 256                int ret = ip_fw_demasquerade(&skb);
 257                if (ret < 0) {
 258                        kfree_skb(skb);
 259                        return 0;
 260                }
 261
 262                if (ret) {
 263                        iph=skb->nh.iph;
 264                        IPCB(skb)->flags |= IPSKB_MASQUERADED;
 265                        dst_release(skb->dst);
 266                        skb->dst = NULL;
 267                        if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, skb->dev)) {
 268                                kfree_skb(skb);
 269                                return 0;
 270                        }
 271                        return skb->dst->input(skb);
 272                }
 273        }
 274#endif
 275
 276        /*
 277         *      Point into the IP datagram, just past the header.
 278         */
 279
 280        skb->h.raw = skb->nh.raw + iph->ihl*4;
 281
 282        /*
 283         *      Deliver to raw sockets. This is fun as to avoid copies we want to make no 
 284         *      surplus copies.
 285         *
 286         *      RFC 1122: SHOULD pass TOS value up to the transport layer.
 287         *      -> It does. And not only TOS, but all IP header.
 288         */
 289 
 290        /* Note: See raw.c and net/raw.h, RAWV4_HTABLE_SIZE==MAX_INET_PROTOS */
 291        hash = iph->protocol & (MAX_INET_PROTOS - 1);
 292
 293        /* 
 294         *      If there maybe a raw socket we must check - if not we don't care less 
 295         */
 296                 
 297        if((raw_sk = raw_v4_htable[hash]) != NULL) {
 298                struct sock *sknext = NULL;
 299                struct sk_buff *skb1;
 300                raw_sk = raw_v4_lookup(raw_sk, iph->protocol, iph->saddr, iph->daddr, skb->dev->ifindex);
 301                if(raw_sk) {    /* Any raw sockets */
 302                        do {
 303                                /* Find the next */
 304                                sknext = raw_v4_lookup(raw_sk->next, iph->protocol,
 305                                                       iph->saddr, iph->daddr, skb->dev->ifindex);
 306                                if (iph->protocol != IPPROTO_ICMP || !icmp_filter(raw_sk, skb)) {
 307                                        if (sknext == NULL)
 308                                                break;
 309                                        skb1 = skb_clone(skb, GFP_ATOMIC);
 310                                        if(skb1)
 311                                        {
 312                                                raw_rcv(raw_sk, skb1);
 313                                        }
 314                                }
 315                                raw_sk = sknext;
 316                        } while(raw_sk!=NULL);
 317                                
 318                        /*      Here either raw_sk is the last raw socket, or NULL if
 319                         *      none.  We deliver to the last raw socket AFTER the
 320                         *      protocol checks as it avoids a surplus copy.
 321                         */
 322                }
 323        }
 324        
 325        /*
 326         *      skb->h.raw now points at the protocol beyond the IP header.
 327         */
 328        
 329        for (ipprot = (struct inet_protocol *)inet_protos[hash];ipprot != NULL;ipprot=(struct inet_protocol *)ipprot->next)
 330        {
 331                struct sk_buff *skb2;
 332        
 333                if (ipprot->protocol != iph->protocol)
 334                        continue;
 335                /*
 336                 *      See if we need to make a copy of it.  This will
 337                 *      only be set if more than one protocol wants it.
 338                 *      and then not for the last one. If there is a pending
 339                 *      raw delivery wait for that
 340                 */
 341        
 342                if (ipprot->copy || raw_sk)
 343                {
 344                        skb2 = skb_clone(skb, GFP_ATOMIC);
 345                        if(skb2==NULL)
 346                                continue;
 347                }
 348                else
 349                {
 350                        skb2 = skb;
 351                }
 352                flag = 1;
 353
 354                /*
 355                 *      Pass on the datagram to each protocol that wants it,
 356                 *      based on the datagram protocol.  We should really
 357                 *      check the protocol handler's return values here...
 358                 */
 359
 360                ipprot->handler(skb2, ntohs(iph->tot_len) - (iph->ihl * 4));
 361        }
 362
 363        /*
 364         *      All protocols checked.
 365         *      If this packet was a broadcast, we may *not* reply to it, since that
 366         *      causes (proven, grin) ARP storms and a leakage of memory (i.e. all
 367         *      ICMP reply messages get queued up for transmission...)
 368         */
 369
 370        if(raw_sk!=NULL)        /* Shift to last raw user */
 371        {
 372                raw_rcv(raw_sk, skb);
 373
 374        }
 375        else if (!flag)         /* Free and report errors */
 376        {
 377                icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0);        
 378                kfree_skb(skb);
 379        }
 380
 381        return(0);
 382}
 383
 384/*
 385 *      Main IP Receive routine.
 386 */ 
 387int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
 388{
 389        struct iphdr *iph = skb->nh.iph;
 390#ifdef  CONFIG_FIREWALL
 391        int fwres;
 392        u16 rport;
 393#endif /* CONFIG_FIREWALL */
 394
 395        /*
 396         *      When the interface is in promisc. mode, drop all the crap
 397         *      that it receives, do not try to analyse it.
 398         */
 399        if (skb->pkt_type == PACKET_OTHERHOST)
 400                goto drop;
 401
 402        ip_statistics.IpInReceives++;
 403
 404        /*
 405         *      RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the checksum.
 406         *
 407         *      Is the datagram acceptable?
 408         *
 409         *      1.      Length at least the size of an ip header
 410         *      2.      Version of 4
 411         *      3.      Checksums correctly. [Speed optimisation for later, skip loopback checksums]
 412         *      4.      Doesn't have a bogus length
 413         */
 414
 415        if (skb->len < sizeof(struct iphdr))
 416                goto inhdr_error; 
 417        if (iph->ihl < 5 || iph->version != 4 || ip_fast_csum((u8 *)iph, iph->ihl) != 0)
 418                goto inhdr_error; 
 419
 420        {
 421        __u32 len = ntohs(iph->tot_len); 
 422        if (skb->len < len)
 423                goto inhdr_error; 
 424
 425        /*
 426         *      Our transport medium may have padded the buffer out. Now we know it
 427         *      is IP we can trim to the true length of the frame.
 428         *      Note this now means skb->len holds ntohs(iph->tot_len).
 429         */
 430
 431        __skb_trim(skb, len);
 432        }
 433        
 434#ifdef CONFIG_IP_ALWAYS_DEFRAG
 435        /* Won't send ICMP reply, since skb->dst == NULL. --RR */
 436        if (iph->frag_off & htons(IP_MF|IP_OFFSET)) {
 437                skb = ip_defrag(skb);
 438                if (!skb)
 439                        return 0;
 440                iph = skb->nh.iph;
 441                ip_send_check(iph);
 442        }
 443#endif
 444
 445#ifdef CONFIG_FIREWALL
 446        /*
 447         *      See if the firewall wants to dispose of the packet. 
 448         *
 449         * We can't do ICMP reply or local delivery before routing,
 450         * so we delay those decisions until after route. --RR
 451         */
 452        fwres = call_in_firewall(PF_INET, dev, iph, &rport, &skb);
 453        if (fwres < FW_ACCEPT && fwres != FW_REJECT)
 454                goto drop;
 455        iph = skb->nh.iph;
 456#endif /* CONFIG_FIREWALL */
 457
 458        /*
 459         *      Initialise the virtual path cache for the packet. It describes
 460         *      how the packet travels inside Linux networking.
 461         */ 
 462        if (skb->dst == NULL) {
 463                if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))
 464                        goto drop; 
 465#ifdef CONFIG_CPU_IS_SLOW
 466                if (net_cpu_congestion > 10 && !(iph->tos&IPTOS_RELIABILITY) &&
 467                    IPTOS_PREC(iph->tos) < IPTOS_PREC_INTERNETCONTROL) {
 468                        goto drop;
 469                }
 470#endif
 471        }
 472
 473#ifdef CONFIG_NET_CLS_ROUTE
 474        if (skb->dst->tclassid) {
 475                u32 idx = skb->dst->tclassid;
 476                ip_rt_acct[idx&0xFF].o_packets++;
 477                ip_rt_acct[idx&0xFF].o_bytes+=skb->len;
 478                ip_rt_acct[(idx>>16)&0xFF].i_packets++;
 479                ip_rt_acct[(idx>>16)&0xFF].i_bytes+=skb->len;
 480        }
 481#endif
 482
 483        if (iph->ihl > 5) {
 484                struct ip_options *opt;
 485
 486                /* It looks as overkill, because not all
 487                   IP options require packet mangling.
 488                   But it is the easiest for now, especially taking
 489                   into account that combination of IP options
 490                   and running sniffer is extremely rare condition.
 491                                                      --ANK (980813)
 492                */
 493
 494                skb = skb_cow(skb, skb_headroom(skb));
 495                if (skb == NULL)
 496                        return 0;
 497                iph = skb->nh.iph;
 498
 499                skb->ip_summed = 0;
 500                if (ip_options_compile(NULL, skb))
 501                        goto inhdr_error;
 502
 503                opt = &(IPCB(skb)->opt);
 504                if (opt->srr) {
 505                        struct in_device *in_dev = dev->ip_ptr;
 506                        if (in_dev && !IN_DEV_SOURCE_ROUTE(in_dev)) {
 507                                if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit())
 508                                        printk(KERN_INFO "source route option %d.%d.%d.%d -> %d.%d.%d.%d\n",
 509                                               NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));
 510                                goto drop;
 511                        }
 512                        if (ip_options_rcv_srr(skb))
 513                                goto drop;
 514                }
 515        }
 516
 517#ifdef CONFIG_FIREWALL
 518#ifdef  CONFIG_IP_TRANSPARENT_PROXY
 519        if (fwres == FW_REDIRECT && (IPCB(skb)->redirport = rport) != 0)
 520                return ip_local_deliver(skb);
 521#endif /* CONFIG_IP_TRANSPARENT_PROXY */
 522
 523        if (fwres == FW_REJECT) {
 524                icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
 525                goto drop;
 526        }
 527#endif /* CONFIG_FIREWALL */
 528
 529        return skb->dst->input(skb);
 530
 531inhdr_error:
 532        ip_statistics.IpInHdrErrors++;
 533drop:
 534        kfree_skb(skb);
 535        return(0);
 536}
 537
 538
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.