linux-old/net/ipv4/ip_fw.c
<<
>>
Prefs
   1/*
   2 * This code is heavily based on the code on the old ip_fw.c code; see below for
   3 * copyrights and attributions of the old code.  This code is basically GPL.
   4 *
   5 * 15-Aug-1997: Major changes to allow graphs for firewall rules.
   6 *              Paul Russell <Paul.Russell@rustcorp.com.au> and
   7 *              Michael Neuling <Michael.Neuling@rustcorp.com.au> 
   8 * 24-Aug-1997: Generalised protocol handling (not just TCP/UDP/ICMP).
   9 *              Added explicit RETURN from chains.
  10 *              Removed TOS mangling (done in ipchains 1.0.1).
  11 *              Fixed read & reset bug by reworking proc handling.
  12 *              Paul Russell <Paul.Russell@rustcorp.com.au>
  13 * 28-Sep-1997: Added packet marking for net sched code.
  14 *              Removed fw_via comparisons: all done on device name now,
  15 *              similar to changes in ip_fw.c in DaveM's CVS970924 tree.
  16 *              Paul Russell <Paul.Russell@rustcorp.com.au>
  17 * 2-Nov-1997:  Moved types across to __u16, etc.
  18 *              Added inverse flags.
  19 *              Fixed fragment bug (in args to port_match).
  20 *              Changed mark to only one flag (MARKABS).
  21 * 21-Nov-1997: Added ability to test ICMP code.
  22 * 19-Jan-1998: Added wildcard interfaces.
  23 * 6-Feb-1998:  Merged 2.0 and 2.1 versions.
  24 *              Initialised ip_masq for 2.0.x version.
  25 *              Added explicit NETLINK option for 2.1.x version.
  26 *              Added packet and byte counters for policy matches.
  27 * 26-Feb-1998: Fixed race conditions, added SMP support.
  28 * 18-Mar-1998: Fix SMP, fix race condition fix.
  29 * 1-May-1998:  Remove caching of device pointer.
  30 * 12-May-1998: Allow tiny fragment case for TCP/UDP.
  31 * 15-May-1998: Treat short packets as fragments, don't just block.
  32 * 3-Jan-1999:  Fixed serious procfs security hole -- users should never
  33 *              be allowed to view the chains!
  34 *              Marc Santoro <ultima@snicker.emoti.com>
  35 */
  36
  37/*
  38 *
  39 * The origina Linux port was done Alan Cox, with changes/fixes from
  40 * Pauline Middlelink, Jos Vos, Thomas Quinot, Wouter Gadeyne, Juan
  41 * Jose Ciarlante, Bernd Eckenfels, Keith Owens and others.
  42 * 
  43 * Copyright from the original FreeBSD version follows:
  44 *
  45 * Copyright (c) 1993 Daniel Boulet
  46 * Copyright (c) 1994 Ugen J.S.Antsilevich
  47 *
  48 * Redistribution and use in source forms, with and without modification,
  49 * are permitted provided that this entire comment appears intact.
  50 *
  51 * Redistribution in binary form may occur without any restrictions.
  52 * Obviously, it would be nice if you gave credit where credit is due
  53 * but requiring it would be too onerous.
  54 *
  55 * This software is provided ``AS IS'' without any warranties of any kind.  */
  56
  57
  58#include <linux/config.h>
  59
  60#include <asm/uaccess.h>
  61#include <asm/system.h>
  62#include <linux/types.h>
  63#include <linux/sched.h>
  64#include <linux/string.h>
  65#include <linux/errno.h>
  66
  67#include <linux/socket.h>
  68#include <linux/sockios.h>
  69#include <linux/in.h>
  70#include <linux/inet.h>
  71#include <linux/netdevice.h>
  72#include <linux/icmp.h>
  73#include <linux/udp.h>
  74#include <net/ip.h>
  75#include <net/protocol.h>
  76#include <net/route.h>
  77#include <net/tcp.h>
  78#include <net/udp.h>
  79#include <net/sock.h>
  80#include <net/icmp.h>
  81#include <linux/netlink.h>
  82#include <linux/init.h>
  83#include <linux/firewall.h>
  84#include <linux/ip_fw.h>
  85
  86#ifdef CONFIG_IP_MASQUERADE
  87#include <net/ip_masq.h>
  88#endif
  89
  90#include <net/checksum.h>
  91#include <linux/proc_fs.h>
  92#include <linux/stat.h>
  93
  94/* Understanding locking in this code: (thanks to Alan Cox for using
  95 * little words to explain this to me). -- PR
  96 *
  97 * In UP, there can be two packets traversing the chains:
  98 * 1) A packet from the current userspace context
  99 * 2) A packet off the bh handlers (timer or net).
 100 *
 101 * For SMP (kernel v2.1+), multiply this by # CPUs.
 102 *
 103 * [Note that this in not correct for 2.2 - because the socket code always
 104 *  uses lock_kernel() to serialize, and bottom halves (timers and net_bhs)
 105 *  only run on one CPU at a time.  This will probably change for 2.3.
 106 *  It is still good to use spinlocks because that avoids the global cli() 
 107 *  for updating the tables, which is rather costly in SMP kernels -AK]
 108 *
 109 * This means counters and backchains can get corrupted if no precautions
 110 * are taken.
 111 *
 112 * To actually alter a chain on UP, we need only do a cli(), as this will
 113 * stop a bh handler firing, as we are in the current userspace context
 114 * (coming from a setsockopt()).
 115 *
 116 * On SMP, we need a write_lock_irqsave(), which is a simple cli() in
 117 * UP.
 118 *
 119 * For backchains and counters, we use an array, indexed by
 120 * [cpu_number_map[smp_processor_id()]*2 + !in_interrupt()]; the array is of 
 121 * size [smp_num_cpus*2].  For v2.0, smp_num_cpus is effectively 1.  So,
 122 * confident of uniqueness, we modify counters even though we only
 123 * have a read lock (to read the counters, you need a write lock,
 124 * though).  */
 125
 126/* Why I didn't use straight locking... -- PR
 127 * 
 128 * The backchains can be separated out of the ip_chains structure, and
 129 * allocated as needed inside ip_fw_check().
 130 *
 131 * The counters, however, can't.  Trying to lock these means blocking
 132 * interrupts every time we want to access them.  This would suck HARD
 133 * performance-wise.  Not locking them leads to possible corruption,
 134 * made worse on 32-bit machines (counters are 64-bit).  */
 135
 136/*#define DEBUG_IP_FIREWALL*/
 137/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
 138/*#define DEBUG_IP_FIREWALL_USER*/
 139/*#define DEBUG_IP_FIREWALL_LOCKING*/
 140
 141#ifdef CONFIG_IP_FIREWALL_NETLINK
 142static struct sock *ipfwsk;
 143#endif
 144
 145#ifdef __SMP__
 146#define SLOT_NUMBER() (cpu_number_map[smp_processor_id()]*2 + !in_interrupt())
 147#else
 148#define SLOT_NUMBER() (!in_interrupt())
 149#endif
 150#define NUM_SLOTS (smp_num_cpus*2)
 151
 152#define SIZEOF_STRUCT_IP_CHAIN (sizeof(struct ip_chain) \
 153                                + NUM_SLOTS*sizeof(struct ip_reent))
 154#define SIZEOF_STRUCT_IP_FW_KERNEL (sizeof(struct ip_fwkernel) \
 155                                    + NUM_SLOTS*sizeof(struct ip_counters))
 156
 157#ifdef DEBUG_IP_FIREWALL_LOCKING
 158static unsigned int fwc_rlocks, fwc_wlocks;
 159#define FWC_DEBUG_LOCK(d)                       \
 160do {                                            \
 161        FWC_DONT_HAVE_LOCK(d);                  \
 162        d |= (1 << SLOT_NUMBER());              \
 163} while (0)
 164
 165#define FWC_DEBUG_UNLOCK(d)                     \
 166do {                                            \
 167        FWC_HAVE_LOCK(d);                       \
 168        d &= ~(1 << SLOT_NUMBER());             \
 169} while (0)
 170
 171#define FWC_DONT_HAVE_LOCK(d)                                   \
 172do {                                                            \
 173        if ((d) & (1 << SLOT_NUMBER()))                         \
 174                printk("%s:%i: Got lock on %i already!\n",      \
 175                       __FILE__, __LINE__, SLOT_NUMBER());      \
 176} while(0)
 177
 178#define FWC_HAVE_LOCK(d)                                \
 179do {                                                    \
 180        if (!((d) & (1 << SLOT_NUMBER())))              \
 181        printk("%s:%i:No lock on %i!\n",                \
 182               __FILE__, __LINE__, SLOT_NUMBER());      \
 183} while (0)
 184
 185#else
 186#define FWC_DEBUG_LOCK(d) do { } while(0)
 187#define FWC_DEBUG_UNLOCK(d) do { } while(0)
 188#define FWC_DONT_HAVE_LOCK(d) do { } while(0)
 189#define FWC_HAVE_LOCK(d) do { } while(0)
 190#endif /*DEBUG_IP_FIRWALL_LOCKING*/
 191
 192#define FWC_READ_LOCK(l) do { FWC_DEBUG_LOCK(fwc_rlocks); read_lock(l); } while (0)
 193#define FWC_WRITE_LOCK(l) do { FWC_DEBUG_LOCK(fwc_wlocks); write_lock(l); } while (0)
 194#define FWC_READ_LOCK_IRQ(l,f) do { FWC_DEBUG_LOCK(fwc_rlocks); read_lock_irqsave(l,f); } while (0)
 195#define FWC_WRITE_LOCK_IRQ(l,f) do { FWC_DEBUG_LOCK(fwc_wlocks); write_lock_irqsave(l,f); } while (0)
 196#define FWC_READ_UNLOCK(l) do { FWC_DEBUG_UNLOCK(fwc_rlocks); read_unlock(l); } while (0)
 197#define FWC_WRITE_UNLOCK(l) do { FWC_DEBUG_UNLOCK(fwc_wlocks); write_unlock(l); } while (0)
 198#define FWC_READ_UNLOCK_IRQ(l,f) do { FWC_DEBUG_UNLOCK(fwc_rlocks); read_unlock_irqrestore(l,f); } while (0)
 199#define FWC_WRITE_UNLOCK_IRQ(l,f) do { FWC_DEBUG_UNLOCK(fwc_wlocks); write_unlock_irqrestore(l,f); } while (0)
 200
 201struct ip_chain;
 202
 203struct ip_counters
 204{
 205        __u64 pcnt, bcnt;                       /* Packet and byte counters */
 206};
 207
 208struct ip_fwkernel
 209{
 210        struct ip_fw ipfw;
 211        struct ip_fwkernel *next;       /* where to go next if current
 212                                         * rule doesn't match */
 213        struct ip_chain *branch;        /* which branch to jump to if
 214                                         * current rule matches */
 215        int simplebranch;               /* Use this if branch == NULL */
 216        struct ip_counters counters[0]; /* Actually several of these */
 217};
 218
 219struct ip_reent 
 220{
 221        struct ip_chain *prevchain;     /* Pointer to referencing chain */
 222        struct ip_fwkernel *prevrule;   /* Pointer to referencing rule */
 223        struct ip_counters counters;
 224};
 225
 226struct ip_chain
 227{
 228        ip_chainlabel label;        /* Defines the label for each block */
 229        struct ip_chain *next;      /* Pointer to next block */
 230        struct ip_fwkernel *chain;  /* Pointer to first rule in block */
 231        __u32 refcount;             /* Number of refernces to block */
 232        int policy;                 /* Default rule for chain.  Only *
 233                                     * used in built in chains */
 234        struct ip_reent reent[0];   /* Actually several of these */
 235};
 236
 237/*
 238 *      Implement IP packet firewall
 239 */
 240
 241#ifdef DEBUG_IP_FIREWALL 
 242#define dprintf(format, args...)  printk(format , ## args)
 243#else
 244#define dprintf(format, args...)
 245#endif
 246
 247#ifdef DEBUG_IP_FIREWALL_USER
 248#define duprintf(format, args...) printk(format , ## args)
 249#else
 250#define duprintf(format, args...)
 251#endif
 252
 253/* Lock around ip_fw_chains linked list structure */
 254rwlock_t ip_fw_lock = RW_LOCK_UNLOCKED;
 255
 256/* Head of linked list of fw rules */
 257static struct ip_chain *ip_fw_chains; 
 258
 259#define IP_FW_INPUT_CHAIN ip_fw_chains
 260#define IP_FW_FORWARD_CHAIN (ip_fw_chains->next)
 261#define IP_FW_OUTPUT_CHAIN (ip_fw_chains->next->next)
 262
 263/* Returns 1 if the port is matched by the range, 0 otherwise */
 264extern inline int port_match(__u16 min, __u16 max, __u16 port,
 265                             int frag, int invert)
 266{
 267        if (frag) /* Fragments fail ANY port test. */
 268                return (min == 0 && max == 0xFFFF);
 269        else return (port >= min && port <= max) ^ invert;
 270}
 271
 272/* Returns whether matches rule or not. */
 273static int ip_rule_match(struct ip_fwkernel *f, 
 274                         const char *ifname, 
 275                         struct iphdr *ip, 
 276                         char tcpsyn,
 277                         __u16 src_port, __u16 dst_port,
 278                         char isfrag)
 279{
 280#define FWINV(bool,invflg) ((bool) ^ !!(f->ipfw.fw_invflg & invflg))
 281        /*
 282         *      This is a bit simpler as we don't have to walk
 283         *      an interface chain as you do in BSD - same logic
 284         *      however.
 285         */
 286
 287        if (FWINV((ip->saddr&f->ipfw.fw_smsk.s_addr) != f->ipfw.fw_src.s_addr,
 288                  IP_FW_INV_SRCIP)
 289            || FWINV((ip->daddr&f->ipfw.fw_dmsk.s_addr)!=f->ipfw.fw_dst.s_addr,
 290                     IP_FW_INV_DSTIP)) {
 291                dprintf("Source or dest mismatch.\n");
 292
 293                dprintf("SRC: %u. Mask: %u. Target: %u.%s\n", ip->saddr,
 294                        f->ipfw.fw_smsk.s_addr, f->ipfw.fw_src.s_addr,
 295                        f->ipfw.fw_invflg & IP_FW_INV_SRCIP ? " (INV)" : "");
 296                dprintf("DST: %u. Mask: %u. Target: %u.%s\n", ip->daddr,
 297                        f->ipfw.fw_dmsk.s_addr, f->ipfw.fw_dst.s_addr,
 298                        f->ipfw.fw_invflg & IP_FW_INV_DSTIP ? " (INV)" : "");
 299                return 0;
 300        }
 301
 302        /*
 303         *      Look for a VIA device match 
 304         */
 305        if (f->ipfw.fw_flg & IP_FW_F_WILDIF) {
 306            if (FWINV(strncmp(ifname, f->ipfw.fw_vianame,
 307                              strlen(f->ipfw.fw_vianame)) != 0,
 308                      IP_FW_INV_VIA)) { 
 309                dprintf("Wildcard interface mismatch.%s\n",
 310                        f->ipfw.fw_invflg & IP_FW_INV_VIA ? " (INV)" : "");
 311                return 0;       /* Mismatch */
 312            }
 313        }
 314        else if (FWINV(strcmp(ifname, f->ipfw.fw_vianame) != 0,
 315                       IP_FW_INV_VIA)) {
 316            dprintf("Interface name does not match.%s\n",
 317                    f->ipfw.fw_invflg & IP_FW_INV_VIA
 318                    ? " (INV)" : "");
 319            return 0;   /* Mismatch */
 320        }
 321
 322        /*
 323         *      Ok the chain addresses match.
 324         */
 325        
 326        /* If we have a fragment rule but the packet is not a fragment
 327         * the we return zero */
 328        if (FWINV((f->ipfw.fw_flg&IP_FW_F_FRAG) && !isfrag, IP_FW_INV_FRAG)) { 
 329                dprintf("Fragment rule but not fragment.%s\n",
 330                        f->ipfw.fw_invflg & IP_FW_INV_FRAG ? " (INV)" : "");
 331                return 0;
 332        }
 333
 334        /* Fragment NEVER passes a SYN test, even an inverted one. */
 335        if (FWINV((f->ipfw.fw_flg&IP_FW_F_TCPSYN) && !tcpsyn, IP_FW_INV_SYN)
 336            || (isfrag && (f->ipfw.fw_flg&IP_FW_F_TCPSYN))) {
 337                dprintf("Rule requires SYN and packet has no SYN.%s\n",
 338                        f->ipfw.fw_invflg & IP_FW_INV_SYN ? " (INV)" : "");
 339                return 0;
 340        }
 341
 342        if (f->ipfw.fw_proto) {
 343                /*
 344                 *      Specific firewall - packet's protocol
 345                 *      must match firewall's.
 346                 */
 347
 348                if (FWINV(ip->protocol!=f->ipfw.fw_proto, IP_FW_INV_PROTO)) {
 349                        dprintf("Packet protocol %hi does not match %hi.%s\n",
 350                                ip->protocol, f->ipfw.fw_proto,
 351                                f->ipfw.fw_invflg&IP_FW_INV_PROTO ? " (INV)":"");
 352                        return 0;
 353                }
 354
 355                /* For non TCP/UDP/ICMP, port range is max anyway. */
 356                if (!port_match(f->ipfw.fw_spts[0], 
 357                                f->ipfw.fw_spts[1],
 358                                src_port, isfrag, 
 359                                !!(f->ipfw.fw_invflg&IP_FW_INV_SRCPT))
 360                    || !port_match(f->ipfw.fw_dpts[0], 
 361                                   f->ipfw.fw_dpts[1],
 362                                   dst_port, isfrag, 
 363                                   !!(f->ipfw.fw_invflg
 364                                      &IP_FW_INV_DSTPT))) {
 365                    dprintf("Port match failed.\n");
 366                    return 0;
 367                }
 368        }
 369
 370        dprintf("Match succeeded.\n");
 371        return 1;
 372}
 373
 374static const char *branchname(struct ip_chain *branch,int simplebranch)
 375{
 376        if (branch)
 377                return branch->label;
 378        switch (simplebranch)
 379        {
 380        case FW_BLOCK: return IP_FW_LABEL_BLOCK;
 381        case FW_ACCEPT: return IP_FW_LABEL_ACCEPT;
 382        case FW_REJECT: return IP_FW_LABEL_REJECT;
 383        case FW_REDIRECT: return IP_FW_LABEL_REDIRECT;
 384        case FW_MASQUERADE: return IP_FW_LABEL_MASQUERADE;
 385        case FW_SKIP: return "-";
 386        case FW_SKIP+1: return IP_FW_LABEL_RETURN;
 387        default:
 388                return "UNKNOWN";
 389        }
 390}
 391
 392/*
 393 * VERY ugly piece of code which actually
 394 * makes kernel printf for matching packets...
 395 */
 396static void dump_packet(const struct iphdr *ip, 
 397                        const char *ifname,
 398                        struct ip_fwkernel *f, 
 399                        const ip_chainlabel chainlabel,
 400                        __u16 src_port, 
 401                        __u16 dst_port)
 402{
 403        __u32 *opt = (__u32 *) (ip + 1);
 404        int opti;
 405        
 406        if (f)
 407        {
 408                printk(KERN_INFO "Packet log: %s ",chainlabel);
 409                
 410                printk("%s ",branchname(f->branch,f->simplebranch));
 411                if (f->simplebranch==FW_REDIRECT)
 412                        printk("%d ",f->ipfw.fw_redirpt);
 413        }
 414
 415        printk("%s PROTO=%d %ld.%ld.%ld.%ld:%hu %ld.%ld.%ld.%ld:%hu"
 416               " L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu",
 417               ifname, ip->protocol,
 418               (ntohl(ip->saddr)>>24)&0xFF,
 419               (ntohl(ip->saddr)>>16)&0xFF,
 420               (ntohl(ip->saddr)>>8)&0xFF,
 421               (ntohl(ip->saddr))&0xFF,
 422               src_port,
 423               (ntohl(ip->daddr)>>24)&0xFF,
 424               (ntohl(ip->daddr)>>16)&0xFF,
 425               (ntohl(ip->daddr)>>8)&0xFF,
 426               (ntohl(ip->daddr))&0xFF,
 427               dst_port,
 428               ntohs(ip->tot_len), ip->tos, ntohs(ip->id),
 429               ntohs(ip->frag_off), ip->ttl);
 430
 431        for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++)
 432                printk(" O=0x%8.8X", *opt++);
 433        printk("\n");
 434}
 435
 436/* function for checking chain labels for user space. */
 437static int check_label(ip_chainlabel label)
 438{
 439        unsigned int i;
 440        /* strlen must be < IP_FW_MAX_LABEL_LENGTH. */
 441        for (i = 0; i < IP_FW_MAX_LABEL_LENGTH + 1; i++)
 442                if (label[i] == '\0') return 1;
 443
 444        return 0;
 445}       
 446
 447/*      This function returns a pointer to the first chain with a label
 448 *      that matches the one given. */
 449static struct ip_chain *find_label(ip_chainlabel label)
 450{
 451        struct ip_chain *tmp;
 452        FWC_HAVE_LOCK(fwc_rlocks | fwc_wlocks);
 453        for (tmp = ip_fw_chains; tmp; tmp = tmp->next)
 454                if (strcmp(tmp->label,label) == 0)
 455                        break;
 456        return tmp;
 457}
 458
 459/* This function returns a boolean which when true sets answer to one
 460   of the FW_*. */
 461static int find_special(ip_chainlabel label, int *answer)
 462{
 463        if (label[0] == '\0') {
 464                *answer = FW_SKIP; /* => pass-through rule */
 465                return 1;
 466        } else if (strcmp(label,IP_FW_LABEL_ACCEPT) == 0) {
 467                *answer = FW_ACCEPT;
 468                return 1;
 469        } else if (strcmp(label,IP_FW_LABEL_BLOCK) == 0) {
 470                *answer = FW_BLOCK;
 471                return 1;
 472        } else if (strcmp(label,IP_FW_LABEL_REJECT) == 0) {
 473                *answer = FW_REJECT;
 474                return 1;
 475#ifdef CONFIG_IP_TRANSPARENT_PROXY
 476        } else if (strcmp(label,IP_FW_LABEL_REDIRECT) == 0) {
 477                *answer = FW_REDIRECT;
 478                return 1;
 479#endif
 480#ifdef CONFIG_IP_MASQUERADE
 481        } else if (strcmp(label,IP_FW_LABEL_MASQUERADE) == 0) {
 482                *answer = FW_MASQUERADE;
 483                return 1;
 484#endif
 485        } else if (strcmp(label, IP_FW_LABEL_RETURN) == 0) {
 486                *answer = FW_SKIP+1;
 487                return 1;
 488        } else {
 489                return 0;
 490        }
 491}
 492
 493/* This function cleans up the prevchain and prevrule.  If the verbose
 494 * flag is set then he names of the chains will be printed as it
 495 * cleans up.  */
 496static void cleanup(struct ip_chain *chain, 
 497                    const int verbose, 
 498                    unsigned int slot)
 499{ 
 500        struct ip_chain *tmpchain = chain->reent[slot].prevchain;
 501        if (verbose)
 502                printk(KERN_ERR "Chain backtrace: ");
 503        while (tmpchain) {
 504                if (verbose)
 505                        printk("%s<-",chain->label);
 506                chain->reent[slot].prevchain = NULL;
 507                chain = tmpchain;
 508                tmpchain = chain->reent[slot].prevchain;
 509        }
 510        if (verbose)
 511                printk("%s\n",chain->label);
 512}
 513
 514static inline int
 515ip_fw_domatch(struct ip_fwkernel *f,
 516              struct iphdr *ip, 
 517              const char *rif,
 518              const ip_chainlabel label,
 519              struct sk_buff *skb,
 520              unsigned int slot,
 521              __u16 src_port, __u16 dst_port)
 522{
 523        f->counters[slot].bcnt+=ntohs(ip->tot_len);
 524        f->counters[slot].pcnt++;
 525        if (f->ipfw.fw_flg & IP_FW_F_PRN) {
 526                dump_packet(ip,rif,f,label,src_port,dst_port);
 527        }
 528        ip->tos = (ip->tos & f->ipfw.fw_tosand) ^ f->ipfw.fw_tosxor;
 529
 530/* This functionality is useless in stock 2.0.x series, but we don't
 531 * discard the mark thing altogether, to avoid breaking ipchains (and,
 532 * more importantly, the ipfwadm wrapper) --PR */
 533        if (f->ipfw.fw_flg & IP_FW_F_MARKABS)
 534                skb->fwmark = f->ipfw.fw_mark;
 535        else
 536                skb->fwmark+=f->ipfw.fw_mark;
 537#ifdef CONFIG_IP_FIREWALL_NETLINK
 538        if (f->ipfw.fw_flg & IP_FW_F_NETLINK) {
 539                size_t len = min(f->ipfw.fw_outputsize, ntohs(ip->tot_len)) 
 540                        + sizeof(__u32) + sizeof(skb->fwmark) + IFNAMSIZ;
 541                struct sk_buff *outskb=alloc_skb(len, GFP_ATOMIC);
 542
 543                duprintf("Sending packet out NETLINK (length = %u).\n", 
 544                         (unsigned int)len);
 545                if (outskb) {
 546                        /* Prepend length, mark & interface */
 547                        skb_put(outskb, len);
 548                        *((__u32 *)outskb->data) = (__u32)len;
 549                        *((__u32 *)(outskb->data+sizeof(__u32))) = skb->fwmark;
 550                        strcpy(outskb->data+sizeof(__u32)*2, rif);
 551                        memcpy(outskb->data+sizeof(__u32)*2+IFNAMSIZ, ip, 
 552                               len-(sizeof(__u32)*2+IFNAMSIZ));
 553                        netlink_broadcast(ipfwsk, outskb, 0, ~0, GFP_KERNEL);
 554                }
 555                else {
 556                        if (net_ratelimit())
 557                                printk(KERN_WARNING "ip_fw: packet drop due to "
 558                                       "netlink failure\n");
 559                        return 0;
 560                }
 561        }
 562#endif
 563        return 1;
 564}
 565
 566/*
 567 *      Returns one of the generic firewall policies, like FW_ACCEPT.
 568 *
 569 *      The testing is either false for normal firewall mode or true for
 570 *      user checking mode (counters are not updated, TOS & mark not done).
 571 */
 572static int 
 573ip_fw_check(struct iphdr *ip, 
 574            const char *rif,
 575            __u16 *redirport,
 576            struct ip_chain *chain,
 577            struct sk_buff *skb,
 578            unsigned int slot,
 579            int testing)
 580{
 581        struct tcphdr           *tcp=(struct tcphdr *)((__u32 *)ip+ip->ihl);
 582        struct udphdr           *udp=(struct udphdr *)((__u32 *)ip+ip->ihl);
 583        struct icmphdr          *icmp=(struct icmphdr *)((__u32 *)ip+ip->ihl);
 584        __u32                   src, dst;
 585        __u16                   src_port = 0xFFFF, dst_port = 0xFFFF;
 586        char                    tcpsyn=0;
 587        __u16                   offset;
 588        unsigned char           oldtos;
 589        struct ip_fwkernel      *f;     
 590        int                     ret = FW_SKIP+2;
 591
 592        /* We handle fragments by dealing with the first fragment as
 593         * if it was a normal packet.  All other fragments are treated
 594         * normally, except that they will NEVER match rules that ask
 595         * things we don't know, ie. tcp syn flag or ports).  If the
 596         * rule is also a fragment-specific rule, non-fragments won't
 597         * match it. */
 598
 599        offset = ntohs(ip->frag_off) & IP_OFFSET;
 600        
 601        /*
 602         *      Don't allow a fragment of TCP 8 bytes in. Nobody
 603         *      normal causes this. Its a cracker trying to break
 604         *      in by doing a flag overwrite to pass the direction
 605         *      checks.
 606         */
 607         
 608        if (offset == 1 && ip->protocol == IPPROTO_TCP) {
 609                if (!testing && net_ratelimit()) {
 610                        printk("Suspect TCP fragment.\n");
 611                        dump_packet(ip,rif,NULL,NULL,0,0);
 612                }
 613                return FW_BLOCK;
 614        }
 615
 616        /* If we can't investigate ports, treat as fragment.  It's
 617         * either a trucated whole packet, or a truncated first
 618         * fragment, or a TCP first fragment of length 8-15, in which
 619         * case the above rule stops reassembly.
 620         */
 621        if (offset == 0) {
 622                unsigned int size_req;
 623                switch (ip->protocol) {
 624                case IPPROTO_TCP:
 625                        /* Don't care about things past flags word */
 626                        size_req = 16; 
 627                        break;
 628
 629                case IPPROTO_UDP:
 630                case IPPROTO_ICMP:
 631                        size_req = 8;
 632                        break;
 633
 634                default:
 635                        size_req = 0;
 636                }
 637                offset = (ntohs(ip->tot_len) < (ip->ihl<<2)+size_req);
 638        }
 639
 640        src = ip->saddr;
 641        dst = ip->daddr;
 642        oldtos = ip->tos;
 643        
 644        /*
 645         *      If we got interface from which packet came
 646         *      we can use the address directly. Linux 2.1 now uses address
 647         *      chains per device too, but unlike BSD we first check if the
 648         *      incoming packet matches a device address and the routing
 649         *      table before calling the firewall. 
 650         */
 651         
 652        dprintf("Packet ");
 653        switch(ip->protocol) 
 654        {
 655                case IPPROTO_TCP:
 656                        dprintf("TCP ");
 657                        if (!offset) {
 658                                src_port=ntohs(tcp->source);
 659                                dst_port=ntohs(tcp->dest);
 660
 661                                /* Connection initilisation can only
 662                                 * be made when the syn bit is set and
 663                                 * neither of the ack or reset is
 664                                 * set. */
 665                                if(tcp->syn && !(tcp->ack || tcp->rst))
 666                                        tcpsyn=1;
 667                        }
 668                        break;
 669                case IPPROTO_UDP:
 670                        dprintf("UDP ");
 671                        if (!offset) {
 672                                src_port=ntohs(udp->source);
 673                                dst_port=ntohs(udp->dest);
 674                        }
 675                        break;
 676                case IPPROTO_ICMP:
 677                        if (!offset) {
 678                                src_port=(__u16)icmp->type;
 679                                dst_port=(__u16)icmp->code;
 680                        }
 681                        dprintf("ICMP ");
 682                        break;
 683                default:
 684                        dprintf("p=%d ",ip->protocol);
 685                        break;
 686        }
 687#ifdef DEBUG_IP_FIREWALL
 688        print_ip(ip->saddr);
 689        
 690        if (offset) 
 691                dprintf(":fragment (%i) ", ((int)offset)<<2);
 692        else if (ip->protocol==IPPROTO_TCP || ip->protocol==IPPROTO_UDP 
 693                 || ip->protocol==IPPROTO_ICMP)
 694                dprintf(":%hu:%hu", src_port, dst_port);
 695        dprintf("\n");
 696#endif
 697
 698        if (!testing) FWC_READ_LOCK(&ip_fw_lock);
 699        else FWC_HAVE_LOCK(fwc_rlocks);
 700
 701        f = chain->chain;
 702        do {
 703                for (; f; f = f->next) {
 704                        if (ip_rule_match(f,rif,ip,
 705                                          tcpsyn,src_port,dst_port,offset)) {
 706                                if (!testing
 707                                    && !ip_fw_domatch(f, ip, rif, chain->label,
 708                                                      skb, slot, 
 709                                                      src_port, dst_port)) {
 710                                        ret = FW_BLOCK;
 711                                        goto out;
 712                                }
 713                                break;
 714                        }
 715                }
 716                if (f) {
 717                        if (f->branch) {
 718                                /* Do sanity check to see if we have
 719                                 * already set prevchain and if so we
 720                                 * must be in a loop */
 721                                if (f->branch->reent[slot].prevchain) {
 722                                        if (!testing) {
 723                                                printk(KERN_ERR 
 724                                                       "IP firewall: "
 725                                                       "Loop detected "
 726                                                       "at `%s'.\n",
 727                                                       f->branch->label);
 728                                                cleanup(chain, 1, slot);
 729                                                ret = FW_BLOCK;
 730                                        } else {
 731                                                cleanup(chain, 0, slot);
 732                                                ret = FW_SKIP+1;
 733                                        }
 734                                }
 735                                else {
 736                                        f->branch->reent[slot].prevchain 
 737                                                = chain;
 738                                        f->branch->reent[slot].prevrule 
 739                                                = f->next;
 740                                        chain = f->branch;
 741                                        f = chain->chain;
 742                                }
 743                        }
 744                        else if (f->simplebranch == FW_SKIP) 
 745                                f = f->next;
 746                        else if (f->simplebranch == FW_SKIP+1) {
 747                                /* Just like falling off the chain */
 748                                goto fall_off_chain;
 749                        }
 750                        else {  
 751                                cleanup(chain, 0, slot);
 752                                ret = f->simplebranch;
 753                        }
 754                } /* f == NULL */
 755                else {
 756                fall_off_chain:
 757                        if (chain->reent[slot].prevchain) {
 758                                struct ip_chain *tmp = chain;
 759                                f = chain->reent[slot].prevrule;
 760                                chain = chain->reent[slot].prevchain;
 761                                tmp->reent[slot].prevchain = NULL;
 762                        }
 763                        else {
 764                                ret = chain->policy;
 765                                if (!testing) {
 766                                        chain->reent[slot].counters.pcnt++;
 767                                        chain->reent[slot].counters.bcnt
 768                                                += ntohs(ip->tot_len);
 769                                }
 770                        }
 771                }
 772        } while (ret == FW_SKIP+2);
 773
 774 out:
 775        if (!testing) FWC_READ_UNLOCK(&ip_fw_lock);
 776
 777        /* Recalculate checksum if not going to reject, and TOS changed. */
 778        if (ip->tos != oldtos 
 779            && ret != FW_REJECT && ret != FW_BLOCK 
 780            && !testing)
 781                ip_send_check(ip);
 782
 783#ifdef CONFIG_IP_TRANSPARENT_PROXY
 784        if (ret == FW_REDIRECT && redirport) {
 785                if ((*redirport = htons(f->ipfw.fw_redirpt)) == 0) {
 786                        /* Wildcard redirection.
 787                         * Note that redirport will become
 788                         * 0xFFFF for non-TCP/UDP packets.
 789                         */
 790                        *redirport = htons(dst_port);
 791                }
 792        }
 793#endif
 794
 795#ifdef DEBUG_ALLOW_ALL
 796        return (testing ? ret : FW_ACCEPT);
 797#else
 798        return ret;
 799#endif
 800}
 801
 802/* Must have write lock & interrupts off for any of these */
 803
 804/* This function sets all the byte counters in a chain to zero.  The
 805 * input is a pointer to the chain required for zeroing */
 806static int zero_fw_chain(struct ip_chain *chainptr)
 807{
 808        struct ip_fwkernel *i;
 809
 810        FWC_HAVE_LOCK(fwc_wlocks);
 811        for (i = chainptr->chain; i; i = i->next)
 812                memset(i->counters, 0, sizeof(struct ip_counters)*NUM_SLOTS);
 813        return 0;
 814}
 815
 816static int clear_fw_chain(struct ip_chain *chainptr)
 817{
 818        struct ip_fwkernel *i= chainptr->chain;
 819
 820        FWC_HAVE_LOCK(fwc_wlocks);
 821        chainptr->chain=NULL;
 822
 823        while (i) {
 824                struct ip_fwkernel *tmp = i->next;
 825                if (i->branch)
 826                        i->branch->refcount--;
 827                kfree(i);
 828                i = tmp;
 829        }
 830        return 0;
 831}
 832
 833static int replace_in_chain(struct ip_chain *chainptr, 
 834                            struct ip_fwkernel *frwl,
 835                            __u32 position)
 836{
 837        struct ip_fwkernel *f = chainptr->chain;
 838        
 839        FWC_HAVE_LOCK(fwc_wlocks);
 840
 841        while (--position && f != NULL) f = f->next;
 842        if (f == NULL)
 843                return EINVAL;
 844                
 845        if (f->branch) f->branch->refcount--;
 846        if (frwl->branch) frwl->branch->refcount++;
 847
 848        frwl->next = f->next;
 849        memcpy(f,frwl,sizeof(struct ip_fwkernel));
 850        kfree(frwl);
 851        return 0;
 852}
 853
 854static int append_to_chain(struct ip_chain *chainptr, struct ip_fwkernel *rule)
 855{
 856        struct ip_fwkernel *i;
 857
 858        FWC_HAVE_LOCK(fwc_wlocks);
 859        /* Special case if no rules already present */
 860        if (chainptr->chain == NULL) {
 861
 862                /* If pointer writes are atomic then turning off
 863                 * interupts is not necessary. */
 864                chainptr->chain = rule;
 865                if (rule->branch) rule->branch->refcount++;
 866                return 0;
 867        }
 868
 869        /* Find the rule before the end of the chain */
 870        for (i = chainptr->chain; i->next; i = i->next); 
 871        i->next = rule;
 872        if (rule->branch) rule->branch->refcount++;
 873        return 0;
 874}
 875
 876/* This function inserts a rule at the position of position in the
 877 * chain refenced by chainptr.  If position is 1 then this rule will
 878 * become the new rule one. */
 879static int insert_in_chain(struct ip_chain *chainptr, 
 880                           struct ip_fwkernel *frwl,
 881                           __u32 position)
 882{
 883        struct ip_fwkernel *f = chainptr->chain;
 884        
 885        FWC_HAVE_LOCK(fwc_wlocks);
 886        /* special case if the position is number 1 */
 887        if (position == 1) {
 888                frwl->next = chainptr->chain;
 889                if (frwl->branch) frwl->branch->refcount++; 
 890                chainptr->chain = frwl;
 891                return 0;
 892        }
 893        position--;
 894        while (--position && f != NULL) f = f->next;
 895        if (f == NULL)
 896                return EINVAL;
 897        if (frwl->branch) frwl->branch->refcount++;
 898        frwl->next = f->next;
 899        
 900        f->next = frwl;
 901        return 0;
 902}
 903
 904/* This function deletes the a rule from a given rulenum and chain.
 905 * With rulenum = 1 is the first rule is deleted. */
 906
 907static int del_num_from_chain(struct ip_chain *chainptr, __u32 rulenum)
 908{
 909        struct ip_fwkernel *i=chainptr->chain,*tmp;
 910        
 911        FWC_HAVE_LOCK(fwc_wlocks);
 912
 913        if (!chainptr->chain)
 914                return ENOENT;
 915
 916        /* Need a special case for the first rule */
 917        if (rulenum == 1) {
 918                /* store temp to allow for freeing up of memory */
 919                tmp = chainptr->chain;
 920                if (chainptr->chain->branch) chainptr->chain->branch->refcount--;
 921                chainptr->chain = chainptr->chain->next;
 922                kfree(tmp); /* free memory that is now unused */
 923        } else {  
 924                rulenum--;
 925                while (--rulenum && i->next ) i = i->next;
 926                if (!i->next)
 927                        return ENOENT;
 928                tmp = i->next;
 929                if (i->next->branch)
 930                        i->next->branch->refcount--;
 931                i->next = i->next->next;
 932                kfree(tmp);
 933        }
 934        return 0;
 935}
 936
 937
 938/* This function deletes the a rule from a given rule and chain.
 939 * The rule that is deleted is the first occursance of that rule. */
 940static int del_rule_from_chain(struct ip_chain *chainptr, 
 941                               struct ip_fwkernel *frwl)
 942{
 943        struct ip_fwkernel *ltmp,*ftmp = chainptr->chain ;
 944        int was_found;
 945
 946        FWC_HAVE_LOCK(fwc_wlocks);
 947        
 948        /* Sure, we should compare marks, but since the `ipfwadm'
 949         * script uses it for an unholy hack... well, life is easier
 950         * this way.  We also mask it out of the flags word. --PR */
 951        for (ltmp=NULL, was_found=0; 
 952             !was_found && ftmp != NULL;
 953             ltmp = ftmp,ftmp = ftmp->next) {   
 954                if (ftmp->ipfw.fw_src.s_addr!=frwl->ipfw.fw_src.s_addr 
 955                    || ftmp->ipfw.fw_dst.s_addr!=frwl->ipfw.fw_dst.s_addr
 956                    || ftmp->ipfw.fw_smsk.s_addr!=frwl->ipfw.fw_smsk.s_addr
 957                    || ftmp->ipfw.fw_dmsk.s_addr!=frwl->ipfw.fw_dmsk.s_addr
 958#if 0
 959                    || ftmp->ipfw.fw_flg!=frwl->ipfw.fw_flg
 960#else
 961                    || ((ftmp->ipfw.fw_flg & ~IP_FW_F_MARKABS) 
 962                        != (frwl->ipfw.fw_flg & ~IP_FW_F_MARKABS))
 963#endif
 964                    || ftmp->ipfw.fw_invflg!=frwl->ipfw.fw_invflg
 965                    || ftmp->ipfw.fw_proto!=frwl->ipfw.fw_proto
 966#if 0
 967                    || ftmp->ipfw.fw_mark!=frwl->ipfw.fw_mark
 968#endif
 969                    || ftmp->ipfw.fw_redirpt!=frwl->ipfw.fw_redirpt
 970                    || ftmp->ipfw.fw_spts[0]!=frwl->ipfw.fw_spts[0]
 971                    || ftmp->ipfw.fw_spts[1]!=frwl->ipfw.fw_spts[1]
 972                    || ftmp->ipfw.fw_dpts[0]!=frwl->ipfw.fw_dpts[0]
 973                    || ftmp->ipfw.fw_dpts[1]!=frwl->ipfw.fw_dpts[1]
 974                    || ftmp->ipfw.fw_outputsize!=frwl->ipfw.fw_outputsize) {
 975                        duprintf("del_rule_from_chain: mismatch:"
 976                                 "src:%u/%u dst:%u/%u smsk:%u/%u dmsk:%u/%u "
 977                                 "flg:%hX/%hX invflg:%hX/%hX proto:%u/%u "
 978                                 "mark:%u/%u "
 979                                 "ports:%hu-%hu/%hu-%hu %hu-%hu/%hu-%hu "
 980                                 "outputsize:%hu-%hu\n",
 981                                 ftmp->ipfw.fw_src.s_addr,
 982                                 frwl->ipfw.fw_src.s_addr,
 983                                 ftmp->ipfw.fw_dst.s_addr,
 984                                 frwl->ipfw.fw_dst.s_addr,
 985                                 ftmp->ipfw.fw_smsk.s_addr,
 986                                 frwl->ipfw.fw_smsk.s_addr,
 987                                 ftmp->ipfw.fw_dmsk.s_addr,
 988                                 frwl->ipfw.fw_dmsk.s_addr,
 989                                 ftmp->ipfw.fw_flg,
 990                                 frwl->ipfw.fw_flg,
 991                                 ftmp->ipfw.fw_invflg,
 992                                 frwl->ipfw.fw_invflg,
 993                                 ftmp->ipfw.fw_proto,
 994                                 frwl->ipfw.fw_proto,
 995                                 ftmp->ipfw.fw_mark,
 996                                 frwl->ipfw.fw_mark,
 997                                 ftmp->ipfw.fw_spts[0],
 998                                 frwl->ipfw.fw_spts[0],
 999                                 ftmp->ipfw.fw_spts[1],
1000                                 frwl->ipfw.fw_spts[1],
1001                                 ftmp->ipfw.fw_dpts[0],
1002                                 frwl->ipfw.fw_dpts[0],
1003                                 ftmp->ipfw.fw_dpts[1],
1004                                 frwl->ipfw.fw_dpts[1],
1005                                 ftmp->ipfw.fw_outputsize,
1006                                 frwl->ipfw.fw_outputsize);
1007                        continue;
1008                }
1009
1010                if (strncmp(ftmp->ipfw.fw_vianame, 
1011                            frwl->ipfw.fw_vianame, 
1012                            IFNAMSIZ)) {
1013                        duprintf("del_rule_from_chain: if mismatch: %s/%s\n",
1014                                 ftmp->ipfw.fw_vianame, 
1015                                 frwl->ipfw.fw_vianame);
1016                        continue;
1017                }
1018                if (ftmp->branch != frwl->branch) {
1019                        duprintf("del_rule_from_chain: branch mismatch: "
1020                                 "%s/%s\n", 
1021                                 ftmp->branch?ftmp->branch->label:"(null)",
1022                                 frwl->branch?frwl->branch->label:"(null)");
1023                        continue;
1024                }
1025                if (ftmp->branch == NULL 
1026                    && ftmp->simplebranch != frwl->simplebranch) {
1027                        duprintf("del_rule_from_chain: simplebranch mismatch: "
1028                                 "%i/%i\n", 
1029                                 ftmp->simplebranch, frwl->simplebranch);
1030                        continue;
1031                }
1032                was_found = 1;
1033                if (ftmp->branch)
1034                        ftmp->branch->refcount--;
1035                if (ltmp)
1036                        ltmp->next = ftmp->next;
1037                else
1038                        chainptr->chain = ftmp->next; 
1039                kfree(ftmp);
1040                break;
1041        }
1042        
1043        if (was_found)
1044                return 0;
1045        else {
1046                duprintf("del_rule_from_chain: no matching rule found\n");
1047                return EINVAL;
1048        }
1049}
1050
1051/* This function takes the label of a chain and deletes the first
1052 * chain with that name.  No special cases required for the built in
1053 * chains as they have their refcount initilised to 1 so that they are
1054 * never deleted.  */
1055static int del_chain(ip_chainlabel label) 
1056{
1057        struct ip_chain *tmp,*tmp2;
1058
1059        FWC_HAVE_LOCK(fwc_wlocks);
1060        /* Corner case: return EBUSY not ENOENT for first elem ("input") */
1061        if (strcmp(label, ip_fw_chains->label) == 0) 
1062                return EBUSY;
1063
1064        for (tmp = ip_fw_chains; tmp->next; tmp = tmp->next)
1065                if(strcmp(tmp->next->label,label) == 0)
1066                        break;
1067
1068        tmp2 = tmp->next;
1069        if (!tmp2)
1070                return ENOENT;
1071
1072        if (tmp2->refcount)
1073                return EBUSY;
1074
1075        if (tmp2->chain)
1076                return ENOTEMPTY;
1077        
1078        tmp->next = tmp2->next;
1079        kfree(tmp2);
1080        return 0;
1081}
1082
1083/* This is a function to initilise a chain.  Built in rules start with
1084 * refcount = 1 so that they cannot be deleted.  User defined rules
1085 * start with refcount = 0 so they can be deleted. */
1086static struct ip_chain *ip_init_chain(ip_chainlabel name, 
1087                                      __u32 ref, 
1088                                      int policy)
1089{
1090        unsigned int i;
1091        struct ip_chain *label 
1092                = kmalloc(SIZEOF_STRUCT_IP_CHAIN, GFP_KERNEL);
1093        if (label == NULL)
1094                panic("Can't kmalloc for firewall chains.\n");
1095        strcpy(label->label,name);
1096        label->next = NULL;
1097        label->chain = NULL;
1098        label->refcount = ref;
1099        label->policy = policy;
1100        for (i = 0; i < smp_num_cpus*2; i++) {
1101                label->reent[i].counters.pcnt = label->reent[i].counters.bcnt 
1102                        = 0;
1103                label->reent[i].prevchain = NULL;
1104                label->reent[i].prevrule = NULL;
1105        }
1106
1107        return label;
1108}
1109
1110/* This is a function for reating a new chain.  The chains is not
1111 * created if a chain of the same name already exists */
1112static int create_chain(ip_chainlabel label) 
1113{
1114        struct ip_chain *tmp;
1115
1116        if (!check_label(label))
1117                return EINVAL;
1118
1119        FWC_HAVE_LOCK(fwc_wlocks);
1120        for (tmp = ip_fw_chains; tmp->next; tmp = tmp->next)
1121                if (strcmp(tmp->label,label) == 0)
1122                        return EEXIST;
1123        
1124        if (strcmp(tmp->label,label) == 0)
1125                return EEXIST;
1126        
1127        tmp->next = ip_init_chain(label, 0, FW_SKIP); /* refcount is
1128                                              * zero since this is a
1129                                              * user defined chain *
1130                                              * and therefore can be
1131                                              * deleted */
1132        return 0;
1133}
1134
1135/* This function simply changes the policy on one of the built in
1136 * chains.  checking must be done before this is call to ensure that
1137 * chainptr is pointing to one of the three possible chains */
1138static int change_policy(struct ip_chain *chainptr, int policy)
1139{ 
1140        FWC_HAVE_LOCK(fwc_wlocks);
1141        chainptr->policy = policy;
1142        return 0;
1143}
1144
1145/* This function takes an ip_fwuser and converts it to a ip_fwkernel.  It also
1146 * performs some checks in the structure. */
1147static struct ip_fwkernel *convert_ipfw(struct ip_fwuser *fwuser, int *errno)
1148{
1149        struct ip_fwkernel *fwkern;
1150
1151        if ( (fwuser->ipfw.fw_flg & ~IP_FW_F_MASK) != 0 ) {
1152                duprintf("convert_ipfw: undefined flag bits set (flags=%x)\n",
1153                         fwuser->ipfw.fw_flg);
1154                *errno = EINVAL;
1155                return NULL;
1156        }
1157
1158#ifdef DEBUG_IP_FIREWALL_USER
1159        /* These are sanity checks that don't really matter.
1160         * We can get rid of these once testing is complete. 
1161         */
1162        if ((fwuser->ipfw.fw_flg & IP_FW_F_TCPSYN)
1163            && ((fwuser->ipfw.fw_invflg & IP_FW_INV_PROTO)
1164                || fwuser->ipfw.fw_proto != IPPROTO_TCP)) {
1165                duprintf("convert_ipfw: TCP SYN flag set but proto != TCP!\n");
1166                *errno = EINVAL;
1167                return NULL;
1168        }
1169
1170        if (strcmp(fwuser->label, IP_FW_LABEL_REDIRECT) != 0
1171            && fwuser->ipfw.fw_redirpt != 0) {
1172                duprintf("convert_ipfw: Target not REDIR but redirpt != 0!\n");
1173                *errno = EINVAL;
1174                return NULL;
1175        }
1176
1177        if ((!(fwuser->ipfw.fw_flg & IP_FW_F_FRAG) 
1178             && (fwuser->ipfw.fw_invflg & IP_FW_INV_FRAG))
1179            || (!(fwuser->ipfw.fw_flg & IP_FW_F_TCPSYN) 
1180                && (fwuser->ipfw.fw_invflg & IP_FW_INV_SYN))) {
1181                duprintf("convert_ipfw: Can't have INV flag if flag unset!\n");
1182                *errno = EINVAL;
1183                return NULL;
1184        }
1185
1186        if (((fwuser->ipfw.fw_invflg & IP_FW_INV_SRCPT) 
1187             && fwuser->ipfw.fw_spts[0] == 0 
1188             && fwuser->ipfw.fw_spts[1] == 0xFFFF)
1189            || ((fwuser->ipfw.fw_invflg & IP_FW_INV_DSTPT) 
1190                && fwuser->ipfw.fw_dpts[0] == 0 
1191                && fwuser->ipfw.fw_dpts[1] == 0xFFFF)
1192            || ((fwuser->ipfw.fw_invflg & IP_FW_INV_VIA) 
1193                && (fwuser->ipfw.fw_vianame)[0] == '\0')
1194            || ((fwuser->ipfw.fw_invflg & IP_FW_INV_SRCIP)
1195                && fwuser->ipfw.fw_smsk.s_addr == 0)
1196            || ((fwuser->ipfw.fw_invflg & IP_FW_INV_DSTIP)
1197                && fwuser->ipfw.fw_dmsk.s_addr == 0)) {
1198                duprintf("convert_ipfw: INV flag makes rule unmatchable!\n");
1199                *errno = EINVAL;
1200                return NULL;
1201        }
1202
1203        if ((fwuser->ipfw.fw_flg & IP_FW_F_FRAG)
1204            && !(fwuser->ipfw.fw_invflg & IP_FW_INV_FRAG)
1205            && (fwuser->ipfw.fw_spts[0] != 0
1206                || fwuser->ipfw.fw_spts[1] != 0xFFFF
1207                || fwuser->ipfw.fw_dpts[0] != 0
1208                || fwuser->ipfw.fw_dpts[1] != 0xFFFF
1209                || (fwuser->ipfw.fw_flg & IP_FW_F_TCPSYN))) {
1210                duprintf("convert_ipfw: Can't test ports or SYN with frag!\n");
1211                *errno = EINVAL;
1212                return NULL;
1213        }
1214#endif
1215
1216        if ((fwuser->ipfw.fw_spts[0] != 0
1217             || fwuser->ipfw.fw_spts[1] != 0xFFFF
1218             || fwuser->ipfw.fw_dpts[0] != 0
1219             || fwuser->ipfw.fw_dpts[1] != 0xFFFF)
1220            && ((fwuser->ipfw.fw_invflg & IP_FW_INV_PROTO)
1221                || (fwuser->ipfw.fw_proto != IPPROTO_TCP
1222                    && fwuser->ipfw.fw_proto != IPPROTO_UDP
1223                    && fwuser->ipfw.fw_proto != IPPROTO_ICMP))) {
1224                duprintf("convert_ipfw: Can only test ports for TCP/UDP/ICMP!\n");
1225                *errno = EINVAL;
1226                return NULL;
1227        }
1228
1229        fwkern = kmalloc(SIZEOF_STRUCT_IP_FW_KERNEL, GFP_KERNEL);
1230        if (!fwkern) {
1231                duprintf("convert_ipfw: kmalloc failed!\n");
1232                *errno = ENOMEM;
1233                return NULL;
1234        }
1235        memcpy(&fwkern->ipfw,&fwuser->ipfw,sizeof(struct ip_fw));
1236
1237        if (!find_special(fwuser->label, &fwkern->simplebranch)) {
1238                fwkern->branch = find_label(fwuser->label);
1239                if (!fwkern->branch) { 
1240                        duprintf("convert_ipfw: chain doesn't exist `%s'.\n",
1241                                 fwuser->label);
1242                        kfree(fwkern);
1243                        *errno = ENOENT;
1244                        return NULL;
1245                } else if (fwkern->branch == IP_FW_INPUT_CHAIN 
1246                           || fwkern->branch == IP_FW_FORWARD_CHAIN
1247                           || fwkern->branch == IP_FW_OUTPUT_CHAIN) {
1248                        duprintf("convert_ipfw: Can't branch to builtin chain `%s'.\n",
1249                                 fwuser->label);
1250                        kfree(fwkern);
1251                        *errno = ENOENT;
1252                        return NULL; 
1253                }
1254        } else 
1255                fwkern->branch = NULL;
1256        memset(fwkern->counters, 0, sizeof(struct ip_counters)*NUM_SLOTS);
1257
1258        /* Handle empty vianame by making it a wildcard */
1259        if ((fwkern->ipfw.fw_vianame)[0] == '\0')
1260            fwkern->ipfw.fw_flg |= IP_FW_F_WILDIF;
1261
1262        fwkern->next = NULL;
1263        return fwkern;
1264}
1265
1266int ip_fw_ctl(int cmd, void *m, int len)
1267{
1268        int ret;
1269        struct ip_chain *chain;
1270        unsigned long flags;
1271
1272        FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags);
1273
1274        switch (cmd) {
1275        case IP_FW_FLUSH:
1276                if (len != sizeof(ip_chainlabel) || !check_label(m))
1277                        ret = EINVAL;
1278                else if ((chain = find_label(m)) == NULL)
1279                        ret = ENOENT;           
1280                else ret = clear_fw_chain(chain);
1281                break;
1282
1283        case IP_FW_ZERO:
1284                if (len != sizeof(ip_chainlabel) || !check_label(m))
1285                        ret = EINVAL;
1286                else if ((chain = find_label(m)) == NULL)
1287                        ret = ENOENT;
1288                else ret = zero_fw_chain(chain);
1289                break;
1290
1291        case IP_FW_CHECK: {
1292                struct ip_fwtest *new = m;
1293                struct iphdr *ip;
1294
1295                /* Don't need write lock. */
1296                FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags);
1297                
1298                if (len != sizeof(struct ip_fwtest) || !check_label(m))
1299                        return EINVAL;
1300
1301                /* Need readlock to do find_label */
1302                FWC_READ_LOCK(&ip_fw_lock);
1303
1304                if ((chain = find_label(new->fwt_label)) == NULL)
1305                        ret = ENOENT;
1306                else {
1307                        ip = &(new->fwt_packet.fwp_iph);
1308
1309                        if (ip->ihl != sizeof(struct iphdr) / sizeof(int)) {
1310                            duprintf("ip_fw_ctl: ip->ihl=%d, want %d\n",
1311                                     ip->ihl,
1312                                     sizeof(struct iphdr) / sizeof(int));
1313                            ret = EINVAL;
1314                        }
1315                        else {
1316                                ret = ip_fw_check(ip, new->fwt_packet.fwp_vianame,
1317                                                  NULL, chain,
1318                                                  NULL, SLOT_NUMBER(), 1);
1319                                switch (ret) {
1320                                case FW_ACCEPT:
1321                                        ret = 0; break;
1322                                case FW_REDIRECT:
1323                                        ret = ECONNABORTED; break;
1324                                case FW_MASQUERADE:
1325                                        ret = ECONNRESET; break;
1326                                case FW_REJECT:
1327                                        ret = ECONNREFUSED; break;
1328                                        /* Hack to help diag; these only get
1329                                           returned when testing. */
1330                                case FW_SKIP+1:
1331                                        ret = ELOOP; break;
1332                                case FW_SKIP:
1333                                        ret = ENFILE; break;
1334                                default: /* FW_BLOCK */
1335                                        ret = ETIMEDOUT; break;
1336                                }
1337                        }
1338                }
1339                FWC_READ_UNLOCK(&ip_fw_lock);
1340                return ret;
1341        }
1342
1343        case IP_FW_MASQ_TIMEOUTS: {
1344#ifdef CONFIG_IP_MASQUERADE
1345                ret = ip_fw_masq_timeouts(m, len);
1346#else
1347                ret = EINVAL;
1348#endif
1349        }
1350        break;
1351
1352        case IP_FW_REPLACE: {
1353                struct ip_fwkernel *ip_fwkern;
1354                struct ip_fwnew *new = m;
1355
1356                if (len != sizeof(struct ip_fwnew) 
1357                    || !check_label(new->fwn_label))
1358                        ret = EINVAL;
1359                else if ((chain = find_label(new->fwn_label)) == NULL)
1360                        ret = ENOENT;
1361                else if ((ip_fwkern = convert_ipfw(&new->fwn_rule, &ret))
1362                         != NULL)
1363                        ret = replace_in_chain(chain, ip_fwkern, 
1364                                               new->fwn_rulenum);
1365        }
1366        break;
1367
1368        case IP_FW_APPEND: {
1369                struct ip_fwchange *new = m;
1370                struct ip_fwkernel *ip_fwkern;
1371
1372                if (len != sizeof(struct ip_fwchange)
1373                    || !check_label(new->fwc_label))
1374                        ret = EINVAL;
1375                else if ((chain = find_label(new->fwc_label)) == NULL)
1376                        ret = ENOENT;
1377                else if ((ip_fwkern = convert_ipfw(&new->fwc_rule, &ret))
1378                         != NULL)
1379                        ret = append_to_chain(chain, ip_fwkern);
1380        }
1381        break;
1382
1383        case IP_FW_INSERT: {
1384                struct ip_fwkernel *ip_fwkern;
1385                struct ip_fwnew *new = m;
1386
1387                if (len != sizeof(struct ip_fwnew)
1388                    || !check_label(new->fwn_label))
1389                        ret = EINVAL;
1390                else if ((chain = find_label(new->fwn_label)) == NULL)
1391                        ret = ENOENT;
1392                else if ((ip_fwkern = convert_ipfw(&new->fwn_rule, &ret))
1393                         != NULL)
1394                        ret = insert_in_chain(chain, ip_fwkern,
1395                                              new->fwn_rulenum);
1396        }
1397        break;
1398
1399        case IP_FW_DELETE: {
1400                struct ip_fwchange *new = m;
1401                struct ip_fwkernel *ip_fwkern;
1402
1403                if (len != sizeof(struct ip_fwchange)
1404                    || !check_label(new->fwc_label))
1405                        ret = EINVAL;
1406                else if ((chain = find_label(new->fwc_label)) == NULL)
1407                        ret = ENOENT;
1408                else if ((ip_fwkern = convert_ipfw(&new->fwc_rule, &ret))
1409                         != NULL)
1410                        ret = del_rule_from_chain(chain, ip_fwkern);
1411        }
1412        break;
1413
1414        case IP_FW_DELETE_NUM: {
1415                struct ip_fwdelnum *new = m;
1416
1417                if (len != sizeof(struct ip_fwdelnum)
1418                    || !check_label(new->fwd_label))
1419                        ret = EINVAL;
1420                else if ((chain = find_label(new->fwd_label)) == NULL)
1421                        ret = ENOENT;           
1422                else ret = del_num_from_chain(chain, new->fwd_rulenum);
1423        }
1424        break;
1425
1426        case IP_FW_CREATECHAIN: {
1427                if (len != sizeof(ip_chainlabel)) {
1428                        duprintf("create_chain: bad size %i\n", len);
1429                        ret = EINVAL;
1430                }
1431                else ret = create_chain(m);
1432        }
1433        break;
1434
1435        case IP_FW_DELETECHAIN: {
1436                if (len != sizeof(ip_chainlabel)) {
1437                        duprintf("delete_chain: bad size %i\n", len);
1438                        ret = EINVAL;
1439                }
1440                else ret = del_chain(m);
1441        }
1442        break;
1443
1444        case IP_FW_POLICY: {
1445                struct ip_fwpolicy *new = m;
1446
1447                if (len != sizeof(struct ip_fwpolicy)
1448                    || !check_label(new->fwp_label))
1449                        ret = EINVAL;
1450                else if ((chain = find_label(new->fwp_label)) == NULL)
1451                        ret = ENOENT;
1452                else if (chain != IP_FW_INPUT_CHAIN
1453                         && chain != IP_FW_FORWARD_CHAIN
1454                         && chain != IP_FW_OUTPUT_CHAIN) {
1455                        duprintf("change_policy: can't change policy on user" 
1456                                 " defined chain.\n");
1457                        ret = EINVAL;
1458                }
1459                else {
1460                        int pol = FW_SKIP;
1461                        find_special(new->fwp_policy, &pol);
1462
1463                        switch(pol) {
1464                        case FW_MASQUERADE:
1465                                if (chain != IP_FW_FORWARD_CHAIN) {
1466                                        ret = EINVAL;
1467                                        break;
1468                                }
1469                                /* Fall thru... */
1470                        case FW_BLOCK:
1471                        case FW_ACCEPT:
1472                        case FW_REJECT:
1473                                ret = change_policy(chain, pol);
1474                                break;
1475                        default:
1476                                duprintf("change_policy: bad policy `%s'\n",
1477                                         new->fwp_policy);
1478                                ret = EINVAL;
1479                        }
1480                }
1481                break;
1482                
1483        }
1484        default:
1485                duprintf("ip_fw_ctl:  unknown request %d\n",cmd);
1486                ret = EINVAL;
1487        }
1488
1489        FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags);
1490        return ret;
1491}
1492
1493/* Returns bytes used - doesn't NUL terminate */
1494static int dump_rule(char *buffer, 
1495                     const char *chainlabel, 
1496                     const struct ip_fwkernel *rule)
1497{
1498        int len;
1499        unsigned int i;
1500        __u64 packets = 0, bytes = 0;
1501
1502        FWC_HAVE_LOCK(fwc_wlocks);
1503        for (i = 0; i < NUM_SLOTS; i++) {
1504                packets += rule->counters[i].pcnt;
1505                bytes += rule->counters[i].bcnt;
1506        }
1507
1508        len=sprintf(buffer,
1509                    "%9s "                      /* Chain name */
1510                    "%08lX/%08lX->%08lX/%08lX " /* Source & Destination IPs */
1511                    "%.16s "                    /* Interface */
1512                    "%X %X "                    /* fw_flg and fw_invflg fields */
1513                    "%u "                       /* Protocol */
1514                    "%-9u %-9u %-9u %-9u "      /* Packet & byte counters */
1515                    "%u-%u %u-%u "              /* Source & Dest port ranges */
1516                    "A%02X X%02X "              /* TOS and and xor masks */
1517                    "%08X "                     /* Redirection port */
1518                    "%u "                       /* fw_mark field */
1519                    "%u "                       /* output size */
1520                    "%9s\n",                    /* Target */
1521                    chainlabel,
1522                    ntohl(rule->ipfw.fw_src.s_addr),
1523                    ntohl(rule->ipfw.fw_smsk.s_addr),
1524                    ntohl(rule->ipfw.fw_dst.s_addr),
1525                    ntohl(rule->ipfw.fw_dmsk.s_addr),
1526                    (rule->ipfw.fw_vianame)[0] ? rule->ipfw.fw_vianame : "-",
1527                    rule->ipfw.fw_flg,
1528                    rule->ipfw.fw_invflg,
1529                    rule->ipfw.fw_proto,
1530                    (__u32)(packets >> 32), (__u32)packets,
1531                    (__u32)(bytes >> 32), (__u32)bytes,
1532                    rule->ipfw.fw_spts[0], rule->ipfw.fw_spts[1],
1533                    rule->ipfw.fw_dpts[0], rule->ipfw.fw_dpts[1], 
1534                    rule->ipfw.fw_tosand, rule->ipfw.fw_tosxor, 
1535                    rule->ipfw.fw_redirpt, 
1536                    rule->ipfw.fw_mark, 
1537                    rule->ipfw.fw_outputsize,
1538                    branchname(rule->branch,rule->simplebranch));
1539
1540        duprintf("dump_rule: %i bytes done.\n", len);
1541        return len;
1542}
1543
1544/* File offset is actually in records, not bytes. */
1545static int ip_chain_procinfo(char *buffer, char **start,
1546                             off_t offset, int length, int reset)
1547{
1548        struct ip_chain *i;
1549        struct ip_fwkernel *j = ip_fw_chains->chain;
1550        unsigned long flags;
1551        int len = 0;
1552        int last_len = 0;
1553        off_t upto = 0;
1554
1555        duprintf("Offset starts at %lu\n", offset);
1556        duprintf("ip_fw_chains is 0x%0lX\n", (unsigned long int)ip_fw_chains);
1557
1558        /* Need a write lock to lock out ``readers'' which update counters. */
1559        FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags);
1560
1561        for (i = ip_fw_chains; i; i = i->next) {
1562            for (j = i->chain; j; j = j->next) {
1563                if (upto == offset) break;
1564                duprintf("Skipping rule in chain `%s'\n", 
1565                         i->label);
1566                upto++;
1567            }
1568            if (upto == offset) break;
1569        }
1570
1571        /* Don't init j first time, or once i = NULL */
1572        for (; i; (void)((i = i->next) && (j = i->chain))) {
1573                duprintf("Dumping chain `%s'\n", i->label);
1574                for (; j; j = j->next, upto++, last_len = len)
1575                {
1576                        len += dump_rule(buffer+len, i->label, j);
1577                        if (len > length) {
1578                                duprintf("Dumped to %i (past %i).  "
1579                                         "Moving back to %i.\n",
1580                                         len, length, last_len);
1581                                len = last_len;
1582                                goto outside;
1583                        }
1584                        else if (reset)
1585                                memset(j->counters, 0,
1586                                       sizeof(struct ip_counters)*NUM_SLOTS);
1587                }
1588        }
1589outside:
1590        FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags);
1591        buffer[len] = '\0';
1592
1593        duprintf("ip_chain_procinfo: Length = %i (of %i).  Offset = %li.\n",
1594                 len, length, upto);
1595        /* `start' hack - see fs/proc/generic.c line ~165 */
1596        *start=(char *)((unsigned int)upto-offset);
1597        return len;
1598}
1599
1600static int ip_chain_name_procinfo(char *buffer, char **start,
1601                                  off_t offset, int length, int reset)
1602{
1603        struct ip_chain *i;
1604        int len = 0,last_len = 0;
1605        off_t pos = 0,begin = 0;
1606        unsigned long flags;
1607
1608        /* Need a write lock to lock out ``readers'' which update counters. */
1609        FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags);
1610
1611        for (i = ip_fw_chains; i; i = i->next)
1612        {
1613                unsigned int j;
1614                __u32 packetsHi = 0, packetsLo = 0, bytesHi = 0, bytesLo = 0;
1615
1616                for (j = 0; j < NUM_SLOTS; j++) {
1617                        packetsLo += i->reent[j].counters.pcnt & 0xFFFFFFFF;
1618                        packetsHi += ((i->reent[j].counters.pcnt >> 32) 
1619                                      & 0xFFFFFFFF);
1620                        bytesLo += i->reent[j].counters.bcnt & 0xFFFFFFFF;
1621                        bytesHi += ((i->reent[j].counters.bcnt >> 32) 
1622                                    & 0xFFFFFFFF);
1623                }
1624
1625                /* print the label and the policy */
1626                len+=sprintf(buffer+len,"%s %s %i %u %u %u %u\n",
1627                             i->label,branchname(NULL, i->policy),i->refcount,
1628                             packetsHi, packetsLo, bytesHi, bytesLo);
1629                pos=begin+len;
1630                if(pos<offset) {
1631                        len=0;
1632                        begin=pos;
1633                }
1634                else if(pos>offset+length) {
1635                        len = last_len;
1636                        break;          
1637                }
1638                
1639                last_len = len;
1640        }
1641        FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags);
1642
1643        *start = buffer+(offset-begin);
1644        len-=(offset-begin);
1645        if(len>length)
1646                len=length;
1647        return len;
1648}
1649
1650/*
1651 *      Interface to the generic firewall chains.
1652 */
1653int ipfw_input_check(struct firewall_ops *this, int pf, struct device *dev, 
1654                     void *phdr, void *arg, struct sk_buff **pskb)
1655{
1656        return ip_fw_check(phdr, dev->name,
1657                           arg, IP_FW_INPUT_CHAIN, *pskb, SLOT_NUMBER(), 0);
1658}
1659
1660int ipfw_output_check(struct firewall_ops *this, int pf, struct device *dev, 
1661                      void *phdr, void *arg, struct sk_buff **pskb)
1662{
1663        return ip_fw_check(phdr, dev->name,
1664                           arg, IP_FW_OUTPUT_CHAIN, *pskb, SLOT_NUMBER(), 0);
1665}
1666
1667int ipfw_forward_check(struct firewall_ops *this, int pf, struct device *dev, 
1668                       void *phdr, void *arg, struct sk_buff **pskb)
1669{
1670        return ip_fw_check(phdr, dev->name,
1671                           arg, IP_FW_FORWARD_CHAIN, *pskb, SLOT_NUMBER(), 0);
1672}
1673
1674struct firewall_ops ipfw_ops=
1675{
1676        NULL,
1677        ipfw_forward_check,
1678        ipfw_input_check,
1679        ipfw_output_check,
1680        PF_INET,
1681        0       /* We don't even allow a fall through so we are last */
1682};
1683
1684#ifdef CONFIG_PROC_FS           
1685static struct proc_dir_entry proc_net_ipfwchains_chain = {
1686        PROC_NET_IPFW_CHAINS, sizeof(IP_FW_PROC_CHAINS)-1, 
1687        IP_FW_PROC_CHAINS, S_IFREG | S_IRUSR | S_IWUSR, 1, 0, 0,
1688        0, &proc_net_inode_operations, ip_chain_procinfo
1689};
1690
1691static struct proc_dir_entry proc_net_ipfwchains_chainnames = {
1692        PROC_NET_IPFW_CHAIN_NAMES, sizeof(IP_FW_PROC_CHAIN_NAMES)-1, 
1693        IP_FW_PROC_CHAIN_NAMES, S_IFREG | S_IRUSR | S_IWUSR, 1, 0, 0,
1694        0, &proc_net_inode_operations, ip_chain_name_procinfo
1695};
1696
1697#endif
1698
1699__initfunc(void ip_fw_init(void))
1700{
1701#ifdef DEBUG_IP_FIRWALL_LOCKING
1702        fwc_wlocks = fwc_rlocks = 0;
1703#endif
1704
1705        IP_FW_INPUT_CHAIN = ip_init_chain(IP_FW_LABEL_INPUT, 1, FW_ACCEPT);
1706        IP_FW_FORWARD_CHAIN = ip_init_chain(IP_FW_LABEL_FORWARD, 1, FW_ACCEPT);
1707        IP_FW_OUTPUT_CHAIN = ip_init_chain(IP_FW_LABEL_OUTPUT, 1, FW_ACCEPT);
1708
1709        if(register_firewall(PF_INET,&ipfw_ops)<0)
1710                panic("Unable to register IP firewall.\n");
1711
1712#ifdef CONFIG_PROC_FS           
1713        proc_net_register(&proc_net_ipfwchains_chain);
1714        proc_net_register(&proc_net_ipfwchains_chainnames);
1715#endif
1716
1717#ifdef CONFIG_IP_FIREWALL_NETLINK
1718        ipfwsk = netlink_kernel_create(NETLINK_FIREWALL, NULL);
1719        if (ipfwsk == NULL)
1720                panic("ip_fw_init: cannot initialize netlink\n");
1721#endif
1722#if defined(DEBUG_IP_FIREWALL) || defined(DEBUG_IP_FIREWALL_USER)
1723        printk("Firewall graphs enabled! Untested kernel coming thru. \n");
1724#endif
1725}
1726
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.