linux-old/net/ipv4/ip_masq.c
<<
>>
Prefs
   1/*
   2 *
   3 *      Masquerading functionality
   4 *
   5 *      Copyright (c) 1994 Pauline Middelink
   6 *
   7 *      $Id: ip_masq.c,v 1.34 1999/03/17 01:53:51 davem Exp $
   8 *
   9 *
  10 *      See ip_fw.c for original log
  11 *
  12 * Fixes:
  13 *      Juan Jose Ciarlante     :       Modularized application masquerading (see ip_masq_app.c)
  14 *      Juan Jose Ciarlante     :       New struct ip_masq_seq that holds output/input delta seq.
  15 *      Juan Jose Ciarlante     :       Added hashed lookup by proto,maddr,mport and proto,saddr,sport
  16 *      Juan Jose Ciarlante     :       Fixed deadlock if free ports get exhausted
  17 *      Juan Jose Ciarlante     :       Added NO_ADDR status flag.
  18 *      Richard Lynch           :       Added IP Autoforward
  19 *      Nigel Metheringham      :       Added ICMP handling for demasquerade
  20 *      Nigel Metheringham      :       Checksum checking of masqueraded data
  21 *      Nigel Metheringham      :       Better handling of timeouts of TCP conns
  22 *      Delian Delchev          :       Added support for ICMP requests and replys
  23 *      Nigel Metheringham      :       ICMP in ICMP handling, tidy ups, bug fixes, made ICMP optional
  24 *      Juan Jose Ciarlante     :       re-assign maddr if no packet received from outside
  25 *      Juan Jose Ciarlante     :       ported to 2.1 tree
  26 *      Juan Jose Ciarlante     :       reworked control connections
  27 *      Steven Clarke           :       Added Port Forwarding
  28 *      Juan Jose Ciarlante     :       Just ONE ip_masq_new (!)
  29 *      Juan Jose Ciarlante     :       IP masq modules support
  30 *      Juan Jose Ciarlante     :       don't go into search loop if mport specified
  31 *      Juan Jose Ciarlante     :       locking
  32 *      Steven Clarke           :       IP_MASQ_S_xx state design
  33 *      Juan Jose Ciarlante     :       IP_MASQ_S state implementation 
  34 *      Juan Jose Ciarlante     :       xx_get() clears timer, _put() inserts it
  35 *      Juan Jose Ciarlante     :       create /proc/net/ip_masq/ 
  36 *      Juan Jose Ciarlante     :       reworked checksums (save payload csum if possible)
  37 *      Juan Jose Ciarlante     :       added missing ip_fw_masquerade checksum
  38 *      Juan Jose Ciarlante     :       csum savings
  39 *      Juan Jose Ciarlante     :       added user-space tunnel creation/del, etc
  40 *      Juan Jose Ciarlante     :       (last) moved to ip_masq_user runtime module
  41 *      Juan Jose Ciarlante     :       user timeout handling again
  42 *      Juan Jose Ciarlante     :       make new modules support optional
  43 *      Juan Jose Ciarlante     :       u-space context => locks reworked
  44 *      Juan Jose Ciarlante     :       fixed stupid SMP locking bug
  45 *      Juan Jose Ciarlante     :       fixed "tap"ing in demasq path by copy-on-w
  46 *      Juan Jose Ciarlante     :       make masq_proto_doff() robust against fake sized/corrupted packets
  47 *      Kai Bankett             :       do not toss other IP protos in proto_doff()
  48 *      Dan Kegel               :       pointed correct NAT behavior for UDP streams
  49 *      
  50 */
  51
  52#include <linux/config.h>
  53#include <linux/module.h>
  54#ifdef CONFIG_KMOD
  55#include <linux/kmod.h>
  56#endif
  57#include <linux/types.h>
  58#include <linux/kernel.h>
  59#include <linux/errno.h>
  60#include <linux/skbuff.h>
  61#include <asm/system.h>
  62#include <linux/stat.h>
  63#include <linux/proc_fs.h>
  64#include <linux/in.h>
  65#include <linux/ip.h>
  66#include <linux/inet.h>
  67#include <linux/init.h>
  68#include <net/protocol.h>
  69#include <net/icmp.h>
  70#include <net/tcp.h>
  71#include <net/udp.h>
  72#include <net/checksum.h>
  73#include <net/ip_masq.h>
  74
  75#ifdef CONFIG_IP_MASQUERADE_MOD
  76#include <net/ip_masq_mod.h>
  77#endif
  78
  79#include <linux/sysctl.h>
  80#include <linux/ip_fw.h>
  81#include <linux/ip_masq.h>
  82
  83int sysctl_ip_masq_debug = 0;
  84
  85/*
  86 *      Exported wrapper 
  87 */
  88int ip_masq_get_debug_level(void)
  89{
  90        return sysctl_ip_masq_debug;
  91}
  92
  93struct ip_masq_hook *ip_masq_user_hook = NULL;
  94
  95/*
  96 *      Timeout table[state]
  97 */
  98/* static int masq_timeout_table[IP_MASQ_S_LAST+1] = { */
  99static struct ip_masq_timeout_table masq_timeout_table = {
 100        ATOMIC_INIT(0), /* refcnt */
 101        0,              /* scale  */
 102        {
 103                30*60*HZ,       /*      IP_MASQ_S_NONE, */
 104                15*60*HZ,       /*      IP_MASQ_S_ESTABLISHED,  */
 105                2*60*HZ,        /*      IP_MASQ_S_SYN_SENT,     */
 106                1*60*HZ,        /*      IP_MASQ_S_SYN_RECV,     */
 107                2*60*HZ,        /*      IP_MASQ_S_FIN_WAIT,     */
 108                2*60*HZ,        /*      IP_MASQ_S_TIME_WAIT,    */
 109                10*HZ,          /*      IP_MASQ_S_CLOSE,        */
 110                60*HZ,          /*      IP_MASQ_S_CLOSE_WAIT,   */
 111                30*HZ,          /*      IP_MASQ_S_LAST_ACK,     */
 112                2*60*HZ,        /*      IP_MASQ_S_LISTEN,       */
 113                5*60*HZ,        /*      IP_MASQ_S_UDP,  */
 114                1*60*HZ,        /*      IP_MASQ_S_ICMP, */
 115                2*HZ,/* IP_MASQ_S_LAST  */
 116        },      /* timeout */
 117};
 118
 119#define MASQUERADE_EXPIRE_RETRY      masq_timeout_table.timeout[IP_MASQ_S_TIME_WAIT]
 120
 121static const char * state_name_table[IP_MASQ_S_LAST+1] = {
 122        "NONE",         /*      IP_MASQ_S_NONE, */
 123        "ESTABLISHED",  /*      IP_MASQ_S_ESTABLISHED,  */
 124        "SYN_SENT",     /*      IP_MASQ_S_SYN_SENT,     */
 125        "SYN_RECV",     /*      IP_MASQ_S_SYN_RECV,     */
 126        "FIN_WAIT",     /*      IP_MASQ_S_FIN_WAIT,     */
 127        "TIME_WAIT",    /*      IP_MASQ_S_TIME_WAIT,    */
 128        "CLOSE",        /*      IP_MASQ_S_CLOSE,        */
 129        "CLOSE_WAIT",   /*      IP_MASQ_S_CLOSE_WAIT,   */
 130        "LAST_ACK",     /*      IP_MASQ_S_LAST_ACK,     */
 131        "LISTEN",       /*      IP_MASQ_S_LISTEN,       */
 132        "UDP",          /*      IP_MASQ_S_UDP,  */
 133        "ICMP",         /*      IP_MASQ_S_ICMP, */
 134        "BUG!",         /*      IP_MASQ_S_LAST  */
 135};
 136
 137#define mNO IP_MASQ_S_NONE
 138#define mES IP_MASQ_S_ESTABLISHED
 139#define mSS IP_MASQ_S_SYN_SENT
 140#define mSR IP_MASQ_S_SYN_RECV
 141#define mFW IP_MASQ_S_FIN_WAIT
 142#define mTW IP_MASQ_S_TIME_WAIT
 143#define mCL IP_MASQ_S_CLOSE
 144#define mCW IP_MASQ_S_CLOSE_WAIT
 145#define mLA IP_MASQ_S_LAST_ACK
 146#define mLI IP_MASQ_S_LISTEN
 147
 148struct masq_tcp_states_t {
 149        int next_state[IP_MASQ_S_LAST]; /* should be _LAST_TCP */
 150};
 151
 152const char * ip_masq_state_name(int state)
 153{
 154        if (state >= IP_MASQ_S_LAST)
 155                return "ERR!";
 156        return state_name_table[state];
 157}
 158
 159struct masq_tcp_states_t masq_tcp_states [] = {
 160/*      INPUT */
 161/*        mNO, mES, mSS, mSR, mFW, mTW, mCL, mCW, mLA, mLI      */
 162/*syn*/ {{mSR, mES, mES, mSR, mSR, mSR, mSR, mSR, mSR, mSR }},
 163/*fin*/ {{mCL, mCW, mSS, mTW, mTW, mTW, mCL, mCW, mLA, mLI }},
 164/*ack*/ {{mCL, mES, mSS, mSR, mFW, mTW, mCL, mCW, mCL, mLI }},
 165/*rst*/ {{mCL, mCL, mCL, mSR, mCL, mCL, mCL, mCL, mLA, mLI }},
 166
 167/*      OUTPUT */
 168/*        mNO, mES, mSS, mSR, mFW, mTW, mCL, mCW, mLA, mLI      */
 169/*syn*/ {{mSS, mES, mSS, mES, mSS, mSS, mSS, mSS, mSS, mLI }},
 170/*fin*/ {{mTW, mFW, mSS, mTW, mFW, mTW, mCL, mTW, mLA, mLI }},
 171/*ack*/ {{mES, mES, mSS, mSR, mFW, mTW, mCL, mCW, mLA, mES }},
 172/*rst*/ {{mCL, mCL, mSS, mCL, mCL, mTW, mCL, mCL, mCL, mCL }},
 173};
 174
 175static __inline__ int masq_tcp_state_idx(struct tcphdr *th, int output) 
 176{
 177        /*
 178         *      [0-3]: input states, [4-7]: output.
 179         */
 180        if (output) 
 181                output=4;
 182
 183        if (th->rst)
 184                return output+3;
 185        if (th->syn)
 186                return output+0;
 187        if (th->fin)
 188                return output+1;
 189        if (th->ack)
 190                return output+2;
 191        return -1;
 192}
 193
 194
 195
 196static int masq_set_state_timeout(struct ip_masq *ms, int state)
 197{
 198        struct ip_masq_timeout_table *mstim = ms->timeout_table;
 199        int scale;
 200
 201        /*
 202         *      Use default timeout table if no specific for this entry
 203         */
 204        if (!mstim) 
 205                mstim = &masq_timeout_table;
 206
 207        ms->timeout = mstim->timeout[ms->state=state];
 208        scale = mstim->scale;
 209
 210        if (scale<0)
 211                ms->timeout >>= -scale;
 212        else if (scale > 0)
 213                ms->timeout <<= scale;
 214
 215        return state;
 216}
 217
 218static int masq_tcp_state(struct ip_masq *ms, int output, struct tcphdr *th)
 219{
 220        int state_idx;
 221        int new_state = IP_MASQ_S_CLOSE;
 222
 223        if ((state_idx = masq_tcp_state_idx(th, output)) < 0) {
 224                IP_MASQ_DEBUG(1, "masq_state_idx(%d)=%d!!!\n", 
 225                        output, state_idx);
 226                goto tcp_state_out;
 227        }
 228
 229        new_state = masq_tcp_states[state_idx].next_state[ms->state];
 230        
 231tcp_state_out:
 232        if (new_state!=ms->state)
 233                IP_MASQ_DEBUG(1, "%s %s [%c%c%c%c] %08lX:%04X-%08lX:%04X state: %s->%s\n",
 234                                masq_proto_name(ms->protocol),
 235                                output? "output" : "input ",
 236                                th->syn? 'S' : '.',
 237                                th->fin? 'F' : '.',
 238                                th->ack? 'A' : '.',
 239                                th->rst? 'R' : '.',
 240                                ntohl(ms->saddr), ntohs(ms->sport),
 241                                ntohl(ms->daddr), ntohs(ms->dport),
 242                                ip_masq_state_name(ms->state),
 243                                ip_masq_state_name(new_state));
 244        return masq_set_state_timeout(ms, new_state);
 245}
 246
 247
 248/*
 249 *      Handle state transitions
 250 */
 251static int masq_set_state(struct ip_masq *ms, int output, struct iphdr *iph, void *tp)
 252{
 253        switch (iph->protocol) {
 254                case IPPROTO_ICMP:
 255                        return masq_set_state_timeout(ms, IP_MASQ_S_ICMP);
 256                case IPPROTO_UDP:
 257                        return masq_set_state_timeout(ms, IP_MASQ_S_UDP);
 258                case IPPROTO_TCP:
 259                        return masq_tcp_state(ms, output, tp);
 260        }
 261        return -1;
 262}
 263
 264/*
 265 *      Set LISTEN timeout. (ip_masq_put will setup timer)
 266 */
 267int ip_masq_listen(struct ip_masq *ms)
 268{
 269        masq_set_state_timeout(ms, IP_MASQ_S_LISTEN);
 270        return ms->timeout;
 271}
 272
 273/* 
 274 *      Dynamic address rewriting 
 275 */
 276extern int sysctl_ip_dynaddr;
 277
 278/*
 279 *      Lookup lock
 280 */
 281rwlock_t __ip_masq_lock = RW_LOCK_UNLOCKED;
 282
 283/*
 284 *      Implement IP packet masquerading
 285 */
 286
 287/*
 288 * Converts an ICMP reply code into the equivalent request code
 289 */
 290static __inline__ const __u8 icmp_type_request(__u8 type)
 291{
 292   switch (type)
 293   {
 294      case ICMP_ECHOREPLY: return ICMP_ECHO; break;
 295      case ICMP_TIMESTAMPREPLY: return ICMP_TIMESTAMP; break;
 296      case ICMP_INFO_REPLY: return ICMP_INFO_REQUEST; break;
 297      case ICMP_ADDRESSREPLY: return ICMP_ADDRESS; break;
 298      default: return (255); break;
 299   }
 300}
 301
 302/*
 303 * Helper macros - attempt to make code clearer! 
 304 */
 305
 306/* ID used in ICMP lookups */
 307#define icmp_id(icmph)          ((icmph->un).echo.id)
 308/* (port) hash value using in ICMP lookups for requests */
 309#define icmp_hv_req(icmph)      ((__u16)(icmph->code+(__u16)(icmph->type<<8)))
 310/* (port) hash value using in ICMP lookups for replies */
 311#define icmp_hv_rep(icmph)      ((__u16)(icmph->code+(__u16)(icmp_type_request(icmph->type)<<8)))
 312
 313/*
 314 *      Last masq_port number in use.
 315 *      Will cycle in MASQ_PORT boundaries.
 316 */
 317static __u16 masq_port = PORT_MASQ_BEGIN;
 318#ifdef __SMP__
 319static spinlock_t masq_port_lock = SPIN_LOCK_UNLOCKED;
 320#endif
 321
 322/*
 323 *      free ports counters (UDP & TCP)
 324 *
 325 *      Their value is _less_ or _equal_ to actual free ports:
 326 *      same masq port, diff masq addr (firewall iface address) allocated
 327 *      entries are accounted but their actually don't eat a more than 1 port.
 328 *
 329 *      Greater values could lower MASQ_EXPIRATION setting as a way to
 330 *      manage 'masq_entries resource'.
 331 *
 332 *      By default we will reuse masq.port iff (output) connection
 333 *      (5-upla) if not duplicated. 
 334 *      This may break midentd and others ...
 335 */
 336
 337#ifdef CONFIG_IP_MASQ_NREUSE
 338#define PORT_MASQ_MUL 1
 339#else
 340#define PORT_MASQ_MUL 10
 341#endif
 342
 343/*
 344 *      At the moment, hardcore in sync with masq_proto_num
 345 */
 346atomic_t ip_masq_free_ports[3] = {
 347        ATOMIC_INIT((PORT_MASQ_END-PORT_MASQ_BEGIN) * PORT_MASQ_MUL),/* UDP */
 348        ATOMIC_INIT((PORT_MASQ_END-PORT_MASQ_BEGIN) * PORT_MASQ_MUL),/* TCP */
 349        ATOMIC_INIT((PORT_MASQ_END-PORT_MASQ_BEGIN) * PORT_MASQ_MUL),/* ICMP */
 350};
 351
 352/*
 353 *      Counts entries that have been requested with specific mport.
 354 *      Used for incoming packets to "relax" input rule (port in MASQ range).
 355 */
 356atomic_t mport_count = ATOMIC_INIT(0);
 357
 358EXPORT_SYMBOL(ip_masq_get_debug_level);
 359EXPORT_SYMBOL(ip_masq_new);
 360EXPORT_SYMBOL(ip_masq_listen);
 361EXPORT_SYMBOL(ip_masq_free_ports);
 362EXPORT_SYMBOL(ip_masq_out_get);
 363EXPORT_SYMBOL(ip_masq_in_get);
 364EXPORT_SYMBOL(ip_masq_put);
 365EXPORT_SYMBOL(ip_masq_control_add);
 366EXPORT_SYMBOL(ip_masq_control_del);
 367EXPORT_SYMBOL(ip_masq_control_get);
 368EXPORT_SYMBOL(ip_masq_user_hook);
 369EXPORT_SYMBOL(ip_masq_m_tab);
 370EXPORT_SYMBOL(ip_masq_state_name);
 371EXPORT_SYMBOL(ip_masq_select_addr);
 372EXPORT_SYMBOL(__ip_masq_lock);
 373
 374/*
 375 *      2 ip_masq hash tables: for input and output pkts lookups.
 376 */
 377
 378struct ip_masq *ip_masq_m_tab[IP_MASQ_TAB_SIZE];
 379struct ip_masq *ip_masq_s_tab[IP_MASQ_TAB_SIZE];
 380
 381/*
 382 * timeouts
 383 */
 384
 385#if 000 /* FIXED timeout handling */
 386static struct ip_fw_masq ip_masq_dummy = {
 387        MASQUERADE_EXPIRE_TCP,
 388        MASQUERADE_EXPIRE_TCP_FIN,
 389        MASQUERADE_EXPIRE_UDP
 390};
 391
 392EXPORT_SYMBOL(ip_masq_expire);
 393struct ip_fw_masq *ip_masq_expire = &ip_masq_dummy;
 394#endif
 395
 396/*
 397 *      These flags enable non-strict d{addr,port} checks
 398 *      Given that both (in/out) lookup tables are hashed
 399 *      by m{addr,port} and s{addr,port} this is quite easy 
 400 */
 401
 402#define MASQ_DADDR_PASS (IP_MASQ_F_NO_DADDR|IP_MASQ_F_DLOOSE)
 403#define MASQ_DPORT_PASS (IP_MASQ_F_NO_DPORT|IP_MASQ_F_DLOOSE)
 404
 405/*
 406 *      By default enable dest loose semantics
 407 */
 408#define CONFIG_IP_MASQ_LOOSE_DEFAULT 1
 409
 410
 411/*
 412 *      Set masq expiration (deletion) and adds timer,
 413 *      if timeout==0 cancel expiration.
 414 *      Warning: it does not check/delete previous timer!
 415 */
 416
 417static void __ip_masq_set_expire(struct ip_masq *ms, unsigned long tout)
 418{
 419        if (tout) {
 420                ms->timer.expires = jiffies+tout;
 421                add_timer(&ms->timer);
 422        } else {
 423                del_timer(&ms->timer);
 424        }
 425}
 426
 427
 428/*
 429 *      Returns hash value
 430 */
 431
 432static __inline__ unsigned 
 433ip_masq_hash_key(unsigned proto, __u32 addr, __u16 port)
 434{
 435        return (proto^ntohl(addr)^ntohs(port)) & (IP_MASQ_TAB_SIZE-1);
 436}
 437
 438/*
 439 *      Hashes ip_masq by its proto,addrs,ports.
 440 *      should be called with locked tables.
 441 *      returns bool success.
 442 */
 443
 444static int ip_masq_hash(struct ip_masq *ms)
 445{
 446        unsigned hash;
 447
 448        if (ms->flags & IP_MASQ_F_HASHED) {
 449                IP_MASQ_ERR( "ip_masq_hash(): request for already hashed, called from %p\n",
 450                        __builtin_return_address(0));
 451                return 0;
 452        }
 453        /*
 454         *      Hash by proto,m{addr,port}
 455         */
 456        hash = ip_masq_hash_key(ms->protocol, ms->maddr, ms->mport);
 457        ms->m_link = ip_masq_m_tab[hash];
 458        atomic_inc(&ms->refcnt);
 459        ip_masq_m_tab[hash] = ms;
 460
 461        /*
 462         *      Hash by proto,s{addr,port}
 463         */
 464        hash = ip_masq_hash_key(ms->protocol, ms->saddr, ms->sport);
 465        ms->s_link = ip_masq_s_tab[hash];
 466        atomic_inc(&ms->refcnt);
 467        ip_masq_s_tab[hash] = ms;
 468
 469
 470        ms->flags |= IP_MASQ_F_HASHED;
 471        return 1;
 472}
 473
 474/*
 475 *      UNhashes ip_masq from ip_masq_[ms]_tables.
 476 *      should be called with locked tables.
 477 *      returns bool success.
 478 */
 479
 480static int ip_masq_unhash(struct ip_masq *ms)
 481{
 482        unsigned hash;
 483        struct ip_masq ** ms_p;
 484        if (!(ms->flags & IP_MASQ_F_HASHED)) {
 485                IP_MASQ_ERR( "ip_masq_unhash(): request for unhash flagged, called from %p\n",
 486                        __builtin_return_address(0));
 487                return 0;
 488        }
 489        /*
 490         *      UNhash by m{addr,port}
 491         */
 492        hash = ip_masq_hash_key(ms->protocol, ms->maddr, ms->mport);
 493        for (ms_p = &ip_masq_m_tab[hash]; *ms_p ; ms_p = &(*ms_p)->m_link)
 494                if (ms == (*ms_p))  {
 495                        atomic_dec(&ms->refcnt);
 496                        *ms_p = ms->m_link;
 497                        break;
 498                }
 499
 500        /*
 501         *      UNhash by s{addr,port}
 502         */
 503        hash = ip_masq_hash_key(ms->protocol, ms->saddr, ms->sport);
 504        for (ms_p = &ip_masq_s_tab[hash]; *ms_p ; ms_p = &(*ms_p)->s_link)
 505                if (ms == (*ms_p))  {
 506                        atomic_dec(&ms->refcnt);
 507                        *ms_p = ms->s_link;
 508                        break;
 509                }
 510
 511        ms->flags &= ~IP_MASQ_F_HASHED;
 512        return 1;
 513}
 514
 515/*
 516 *      Returns ip_masq associated with supplied parameters, either
 517 *      broken out of the ip/tcp headers or directly supplied for those
 518 *      pathological protocols with address/port in the data stream
 519 *      (ftp, irc).  addresses and ports are in network order.
 520 *      called for pkts coming from OUTside-to-INside the firewall.
 521 *
 522 *      s_addr, s_port: pkt source address (foreign host)
 523 *      d_addr, d_port: pkt dest address (firewall)
 524 *
 525 *      NB. Cannot check destination address, just for the incoming port.
 526 *      reason: archie.doc.ac.uk has 6 interfaces, you send to
 527 *      phoenix and get a reply from any other interface(==dst)!
 528 *
 529 *      [Only for UDP] - AC
 530 *      
 531 *      Caller must lock tables
 532 */
 533
 534static struct ip_masq * __ip_masq_in_get(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port)
 535{
 536        unsigned hash;
 537        struct ip_masq *ms = NULL;
 538
 539        hash = ip_masq_hash_key(protocol, d_addr, d_port);
 540
 541        for(ms = ip_masq_m_tab[hash]; ms ; ms = ms->m_link) {
 542                if (protocol==ms->protocol && 
 543                    (d_addr==ms->maddr && d_port==ms->mport) &&
 544                    (s_addr==ms->daddr || ms->flags & MASQ_DADDR_PASS) &&
 545                    (s_port==ms->dport || ms->flags & MASQ_DPORT_PASS)
 546                    ) {
 547                        IP_MASQ_DEBUG(2, "look/in %d %08X:%04hX->%08X:%04hX OK\n",
 548                               protocol,
 549                               s_addr,
 550                               s_port,
 551                               d_addr,
 552                               d_port);
 553                        atomic_inc(&ms->refcnt);
 554                        goto out;
 555                }
 556        }
 557        IP_MASQ_DEBUG(2, "look/in %d %08X:%04hX->%08X:%04hX fail\n",
 558               protocol,
 559               s_addr,
 560               s_port,
 561               d_addr,
 562               d_port);
 563
 564out:
 565        return ms;
 566}
 567
 568/*
 569 *      Returns ip_masq associated with supplied parameters, either
 570 *      broken out of the ip/tcp headers or directly supplied for those
 571 *      pathological protocols with address/port in the data stream
 572 *      (ftp, irc).  addresses and ports are in network order.
 573 *      called for pkts coming from inside-to-OUTside the firewall.
 574 *
 575 *      Normally we know the source address and port but for some protocols
 576 *      (e.g. ftp PASV) we do not know the source port initially.  Alas the
 577 *      hash is keyed on source port so if the first lookup fails then try again
 578 *      with a zero port, this time only looking at entries marked "no source
 579 *      port".
 580 *      
 581 *      Caller must lock tables
 582 */
 583
 584static struct ip_masq * __ip_masq_out_get(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port)
 585{
 586        unsigned hash;
 587        struct ip_masq *ms = NULL;
 588
 589        /*      
 590         *      Check for "full" addressed entries
 591         */
 592        hash = ip_masq_hash_key(protocol, s_addr, s_port);
 593        
 594        for(ms = ip_masq_s_tab[hash]; ms ; ms = ms->s_link) {
 595                if (protocol == ms->protocol &&
 596                    s_addr == ms->saddr && s_port == ms->sport &&
 597                    (d_addr==ms->daddr || ms->flags & MASQ_DADDR_PASS) &&
 598                    (d_port==ms->dport || ms->flags & MASQ_DPORT_PASS)
 599                   ) {
 600                        IP_MASQ_DEBUG(2, "lk/out1 %d %08X:%04hX->%08X:%04hX OK\n",
 601                               protocol,
 602                               s_addr,
 603                               s_port,
 604                               d_addr,
 605                               d_port);
 606
 607                        atomic_inc(&ms->refcnt);
 608                        goto out;
 609                }
 610
 611        }
 612
 613        /*      
 614         *      Check for NO_SPORT entries
 615         */
 616        hash = ip_masq_hash_key(protocol, s_addr, 0);
 617        for(ms = ip_masq_s_tab[hash]; ms ; ms = ms->s_link) {
 618                if (ms->flags & IP_MASQ_F_NO_SPORT &&
 619                    protocol == ms->protocol &&
 620                    s_addr == ms->saddr && 
 621                    (d_addr==ms->daddr || ms->flags & MASQ_DADDR_PASS) &&
 622                    (d_port==ms->dport || ms->flags & MASQ_DPORT_PASS)
 623                    ) {
 624                        IP_MASQ_DEBUG(2, "lk/out2 %d %08X:%04hX->%08X:%04hX OK\n",
 625                               protocol,
 626                               s_addr,
 627                               s_port,
 628                               d_addr,
 629                               d_port);
 630
 631                        atomic_inc(&ms->refcnt);
 632                        goto out;
 633                }
 634        }
 635        IP_MASQ_DEBUG(2, "lk/out1 %d %08X:%04hX->%08X:%04hX fail\n",
 636               protocol,
 637               s_addr,
 638               s_port,
 639               d_addr,
 640               d_port);
 641
 642out:
 643        return ms;
 644}
 645
 646#ifdef CONFIG_IP_MASQ_NREUSE
 647/*
 648 *      Returns ip_masq for given proto,m_addr,m_port.
 649 *      called by allocation routine to find an unused m_port.
 650 *      
 651 *      Caller must lock tables
 652 */
 653
 654static struct ip_masq * __ip_masq_getbym(int protocol, __u32 m_addr, __u16 m_port)
 655{
 656        unsigned hash;
 657        struct ip_masq *ms = NULL;
 658
 659        hash = ip_masq_hash_key(protocol, m_addr, m_port);
 660
 661        for(ms = ip_masq_m_tab[hash]; ms ; ms = ms->m_link) {
 662                if ( protocol==ms->protocol &&
 663                    (m_addr==ms->maddr && m_port==ms->mport)) {
 664                        atomic_inc(&ms->refcnt);
 665                        goto out;
 666                }
 667        }
 668
 669out:
 670        return ms;
 671}
 672#endif
 673
 674struct ip_masq * ip_masq_out_get(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port) 
 675{
 676        struct ip_masq *ms;
 677
 678        read_lock(&__ip_masq_lock);
 679        ms = __ip_masq_out_get(protocol, s_addr, s_port, d_addr, d_port);
 680        read_unlock(&__ip_masq_lock);
 681
 682        if (ms)
 683                __ip_masq_set_expire(ms, 0);
 684        return ms;
 685}
 686
 687struct ip_masq * ip_masq_in_get(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port)
 688{
 689        struct ip_masq *ms;
 690
 691        read_lock(&__ip_masq_lock);
 692        ms =  __ip_masq_in_get(protocol, s_addr, s_port, d_addr, d_port);
 693        read_unlock(&__ip_masq_lock);
 694
 695        if (ms)
 696                __ip_masq_set_expire(ms, 0);
 697        return ms;
 698}
 699
 700static __inline__ void __ip_masq_put(struct ip_masq *ms) 
 701{
 702        atomic_dec(&ms->refcnt);
 703}
 704
 705void ip_masq_put(struct ip_masq *ms)
 706{
 707        /*
 708         *      Decrement refcnt
 709         */
 710        __ip_masq_put(ms);
 711
 712        /*
 713         *      if refcnt==2  (2 hashes)        
 714         */
 715        if (atomic_read(&ms->refcnt)==2) {
 716                __ip_masq_set_expire(ms, ms->timeout);
 717        } else {
 718                IP_MASQ_DEBUG(0, "did not set timer with refcnt=%d, called from %p\n",
 719                        atomic_read(&ms->refcnt),
 720                        __builtin_return_address(0));
 721        }
 722}
 723
 724static void masq_expire(unsigned long data)
 725{
 726        struct ip_masq *ms = (struct ip_masq *)data;
 727        ms->timeout = MASQUERADE_EXPIRE_RETRY;
 728
 729        /*
 730         *      hey, I'm using it
 731         */
 732        atomic_inc(&ms->refcnt);
 733
 734        IP_MASQ_DEBUG(1, "Masqueraded %s %08lX:%04X expired\n",
 735                        masq_proto_name(ms->protocol),
 736                        ntohl(ms->saddr),ntohs(ms->sport));
 737
 738        write_lock(&__ip_masq_lock);
 739
 740#if 0000
 741        /*
 742         *      Already locked, do bounce ...
 743         */
 744        if (ip_masq_nlocks(&__ip_masq_lock) != 1) {
 745                goto masq_expire_later;
 746        }
 747
 748#endif
 749        /*
 750         *      do I control anybody?
 751         */
 752        if (atomic_read(&ms->n_control)) 
 753                goto masq_expire_later;
 754
 755        /*      
 756         *      does anybody controls me?
 757         */
 758
 759        if (ms->control) 
 760                ip_masq_control_del(ms);
 761
 762        if (ip_masq_unhash(ms)) {
 763                if (ms->flags&IP_MASQ_F_MPORT) {
 764                        atomic_dec(&mport_count);
 765                } else {
 766                        atomic_inc(ip_masq_free_ports + masq_proto_num(ms->protocol));
 767                }
 768                ip_masq_unbind_app(ms);
 769        }
 770
 771        /*
 772         *      refcnt==1 implies I'm the only one referrer
 773         */
 774        if (atomic_read(&ms->refcnt) == 1) {
 775                kfree_s(ms,sizeof(*ms));
 776                MOD_DEC_USE_COUNT;
 777                goto masq_expire_out;
 778        }
 779
 780masq_expire_later:
 781        IP_MASQ_DEBUG(0, "masq_expire delayed: %s %08lX:%04X->%08lX:%04X masq.refcnt-1=%d masq.n_control=%d\n",
 782                masq_proto_name(ms->protocol),
 783                ntohl(ms->saddr), ntohs(ms->sport),
 784                ntohl(ms->daddr), ntohs(ms->dport),
 785                atomic_read(&ms->refcnt)-1,
 786                atomic_read(&ms->n_control));
 787
 788        ip_masq_put(ms);
 789
 790masq_expire_out:
 791        write_unlock(&__ip_masq_lock);
 792}
 793
 794static __u16 get_next_mport(void)
 795{
 796        __u16 mport;
 797        
 798        spin_lock_irq(&masq_port_lock);
 799        /*
 800         *      Try the next available port number
 801         */
 802        mport = htons(masq_port++);
 803        if (masq_port==PORT_MASQ_END) masq_port = PORT_MASQ_BEGIN;
 804
 805        spin_unlock_irq(&masq_port_lock);
 806        return mport;
 807}
 808
 809/*
 810 *      Create a new masquerade list entry, also allocate an
 811 *      unused mport, keeping the portnumber between the
 812 *      given boundaries MASQ_BEGIN and MASQ_END.
 813 *
 814 *      Be careful, it can be called from u-space
 815 */
 816
 817struct ip_masq * ip_masq_new(int proto, __u32 maddr, __u16 mport, __u32 saddr, __u16 sport, __u32 daddr, __u16 dport, unsigned mflags)
 818{
 819        struct ip_masq *ms, *mst;
 820        int ports_tried;
 821        atomic_t *free_ports_p = NULL;
 822        static int n_fails = 0;
 823        int prio;
 824
 825
 826        if (masq_proto_num(proto)!=-1 && mport == 0) {
 827                free_ports_p = ip_masq_free_ports + masq_proto_num(proto);
 828
 829                if (atomic_read(free_ports_p) == 0) {
 830                        if (++n_fails < 5)
 831                                IP_MASQ_ERR( "ip_masq_new(proto=%s): no free ports.\n",
 832                                       masq_proto_name(proto));
 833                        return NULL;
 834                }
 835        }
 836
 837        prio = (mflags&IP_MASQ_F_USER) ? GFP_KERNEL : GFP_ATOMIC;
 838
 839        ms = (struct ip_masq *) kmalloc(sizeof(struct ip_masq), prio);
 840        if (ms == NULL) {
 841                if (++n_fails < 5)
 842                        IP_MASQ_ERR("ip_masq_new(proto=%s): no memory available.\n",
 843                               masq_proto_name(proto));
 844                return NULL;
 845        }
 846        MOD_INC_USE_COUNT;
 847        memset(ms, 0, sizeof(*ms));
 848        init_timer(&ms->timer);
 849        ms->timer.data     = (unsigned long)ms;
 850        ms->timer.function = masq_expire;
 851        ms->protocol       = proto;
 852        ms->saddr          = saddr;
 853        ms->sport          = sport;
 854        ms->daddr          = daddr;
 855        ms->dport          = dport;
 856        ms->flags          = mflags;
 857        ms->app_data       = NULL;
 858        ms->control        = NULL;
 859        
 860        atomic_set(&ms->n_control,0);
 861        atomic_set(&ms->refcnt,0);
 862
 863        if (proto == IPPROTO_UDP && !mport)
 864#ifdef CONFIG_IP_MASQ_LOOSE_DEFAULT
 865                /*
 866                 *      Flag this tunnel as "dest loose"
 867                 *      
 868                 */
 869                ms->flags |= IP_MASQ_F_DLOOSE;
 870#else
 871                ms->flags |= IP_MASQ_F_NO_DADDR;
 872#endif
 873
 874        
 875        /* get masq address from rif */
 876        ms->maddr          = maddr;
 877
 878        /*
 879         *      This flag will allow masq. addr (ms->maddr)
 880         *      to follow forwarding interface address.
 881         */
 882        ms->flags         |= IP_MASQ_F_NO_REPLY;
 883  
 884        /*
 885         *      We want a specific mport. Be careful.
 886         */
 887        if (masq_proto_num(proto) == -1 || mport) {
 888                ms->mport = mport;
 889
 890                /* 
 891                 *      Check 5-upla uniqueness
 892                 */
 893                if (mflags & IP_MASQ_F_USER)    
 894                        write_lock_bh(&__ip_masq_lock);
 895                else 
 896                        write_lock(&__ip_masq_lock);
 897
 898                mst = __ip_masq_in_get(proto, daddr, dport, maddr, mport);
 899                if (mst==NULL) {
 900                        ms->flags |= IP_MASQ_F_MPORT;
 901
 902                        atomic_inc(&mport_count);
 903                        ip_masq_hash(ms);
 904
 905                        if (mflags & IP_MASQ_F_USER)    
 906                                write_unlock_bh(&__ip_masq_lock);
 907                        else 
 908                                write_unlock(&__ip_masq_lock);
 909
 910                        ip_masq_bind_app(ms);
 911                        atomic_inc(&ms->refcnt);
 912                        masq_set_state_timeout(ms, IP_MASQ_S_NONE);
 913                        return ms;
 914                }
 915                if (mflags & IP_MASQ_F_USER)    
 916                        write_unlock_bh(&__ip_masq_lock);
 917                else 
 918                        write_unlock(&__ip_masq_lock);
 919
 920                __ip_masq_put(mst);
 921
 922                IP_MASQ_ERR( "Already used connection: %s, %d.%d.%d.%d:%d => %d.%d.%d.%d:%d, called from %p\n",
 923                        masq_proto_name(proto),
 924                        NIPQUAD(maddr), ntohs(mport),
 925                        NIPQUAD(daddr), ntohs(dport),
 926                        __builtin_return_address(0));
 927
 928
 929                goto mport_nono;
 930        }
 931        
 932
 933        for (ports_tried = 0; 
 934             (atomic_read(free_ports_p) && (ports_tried <= (PORT_MASQ_END - PORT_MASQ_BEGIN)));
 935             ports_tried++){
 936
 937                mport = ms->mport = get_next_mport();
 938                /*
 939                 *      lookup to find out if this connection is used.
 940                 */
 941
 942                if (mflags & IP_MASQ_F_USER) 
 943                        write_lock_bh(&__ip_masq_lock);
 944                else
 945                        write_lock(&__ip_masq_lock);
 946
 947#ifdef CONFIG_IP_MASQ_NREUSE
 948                mst = __ip_masq_getbym(proto, maddr, mport);
 949#else
 950                mst = __ip_masq_in_get(proto, daddr, dport, maddr, mport);
 951#endif
 952                if (mst == NULL) {
 953
 954                        if (atomic_read(free_ports_p) == 0) {
 955                                if (mflags & IP_MASQ_F_USER) 
 956                                        write_unlock_bh(&__ip_masq_lock);
 957                                else
 958                                        write_unlock(&__ip_masq_lock);
 959
 960                                break;
 961                        }
 962                        atomic_dec(free_ports_p);
 963                        ip_masq_hash(ms);
 964
 965                        if (mflags & IP_MASQ_F_USER) 
 966                                write_unlock_bh(&__ip_masq_lock);
 967                        else
 968                                write_unlock(&__ip_masq_lock);
 969
 970                        ip_masq_bind_app(ms);
 971                        n_fails = 0;
 972                        atomic_inc(&ms->refcnt);
 973                        masq_set_state_timeout(ms, IP_MASQ_S_NONE);
 974                        return ms;
 975                }
 976                if (mflags & IP_MASQ_F_USER) 
 977                        write_unlock_bh(&__ip_masq_lock);
 978                else
 979                        write_unlock(&__ip_masq_lock);
 980
 981                __ip_masq_put(mst);
 982        }
 983
 984        if (++n_fails < 5)
 985                IP_MASQ_ERR( "ip_masq_new(proto=%s): could not get free masq entry (free=%d).\n",
 986                       masq_proto_name(ms->protocol), 
 987                       atomic_read(free_ports_p));
 988mport_nono:
 989        kfree_s(ms, sizeof(*ms));
 990
 991        MOD_DEC_USE_COUNT;
 992        return NULL;
 993}
 994
 995/*
 996 *      Get transport protocol data offset, check against size
 997 *      return:
 998 *              0  if other IP proto
 999 *              -1 if error
1000 */
1001static __inline__ int proto_doff(unsigned proto, char *th, unsigned size)
1002{
1003        int ret = -1;
1004        switch (proto) {
1005                case IPPROTO_ICMP:
1006                        if (size >= sizeof(struct icmphdr))
1007                                ret = sizeof(struct icmphdr);
1008                        break;
1009                case IPPROTO_UDP:
1010                        if (size >= sizeof(struct udphdr))
1011                                ret = sizeof(struct udphdr);
1012                        break;
1013                case IPPROTO_TCP:
1014                        /*
1015                        *       Is this case, this check _also_ avoids
1016                        *       touching an invalid pointer if 
1017                        *       size is invalid
1018                        */
1019                        if (size >= sizeof(struct tcphdr)) {
1020                                ret = ((struct tcphdr*)th)->doff << 2;
1021                                if (ret > size) {
1022                                        ret = -1 ;
1023                                }
1024                        }
1025
1026                        break;
1027                default:
1028                        /*      Other proto: nothing to say, by now :) */
1029                        ret = 0;
1030        }
1031        if (ret < 0)
1032                IP_MASQ_DEBUG(0, "mess proto_doff for proto=%d, size =%d\n",
1033                        proto, size);
1034        return ret;
1035}
1036
1037int ip_fw_masquerade(struct sk_buff **skb_p, __u32 maddr)
1038{
1039        struct sk_buff  *skb = *skb_p;
1040        struct iphdr    *iph = skb->nh.iph;
1041        union ip_masq_tphdr h;
1042        struct ip_masq  *ms;
1043        int             size;
1044
1045        /* 
1046         *      doff holds transport protocol data offset
1047         *      csum holds its checksum
1048         *      csum_ok says if csum is valid
1049         */
1050        int doff = 0;
1051        int csum = 0;
1052        int csum_ok = 0;
1053
1054        /*
1055         * We can only masquerade protocols with ports... and hack some ICMPs
1056         */
1057
1058        h.raw = (char*) iph + iph->ihl * 4;
1059        size = ntohs(iph->tot_len) - (iph->ihl * 4);
1060
1061
1062        doff = proto_doff(iph->protocol, h.raw, size);
1063        if (doff <= 0) {
1064                /*      
1065                 *      Output path: do not pass other IP protos nor
1066                 *      invalid packets.
1067                 */
1068                return -1;
1069        }
1070
1071        switch (iph->protocol) {
1072        case IPPROTO_ICMP:
1073                return(ip_fw_masq_icmp(skb_p, maddr));
1074        case IPPROTO_UDP:
1075                if (h.uh->check == 0)
1076                        /* No UDP checksum */
1077                        break;
1078        case IPPROTO_TCP:
1079                /* Make sure packet is in the masq range */
1080                IP_MASQ_DEBUG(3, "O-pkt: %s size=%d\n",
1081                                masq_proto_name(iph->protocol),
1082                                size);
1083
1084#ifdef CONFIG_IP_MASQ_DEBUG
1085                if (ip_masq_get_debug_level() > 3) {
1086                        skb->ip_summed = CHECKSUM_NONE;
1087                }
1088#endif
1089                /* Check that the checksum is OK */
1090                switch (skb->ip_summed)
1091                {
1092                        case CHECKSUM_NONE:
1093                        {
1094                                csum = csum_partial(h.raw + doff, size - doff, 0);
1095                                IP_MASQ_DEBUG(3, "O-pkt: %s I-datacsum=%d\n",
1096                                                masq_proto_name(iph->protocol),
1097                                                csum);
1098
1099                                skb->csum = csum_partial(h.raw , doff, csum);
1100                        }
1101                        case CHECKSUM_HW:
1102                                if (csum_tcpudp_magic(iph->saddr, iph->daddr, 
1103                                                size, iph->protocol, skb->csum))
1104                                {
1105                                        IP_MASQ_DEBUG(0, "Outgoing failed %s checksum from %d.%d.%d.%d (size=%d)!\n",
1106                                               masq_proto_name(iph->protocol),
1107                                               NIPQUAD(iph->saddr),
1108                                               size);
1109                                        return -1;
1110                                }
1111                        default:
1112                                /* CHECKSUM_UNNECESSARY */
1113                }
1114                break;
1115        default:
1116                return -1;
1117        }
1118        /*
1119         *      Now hunt the list to see if we have an old entry
1120         */
1121
1122        /* h.raw = (char*) iph + iph->ihl * 4; */
1123
1124        IP_MASQ_DEBUG(2, "Outgoing %s %08lX:%04X -> %08lX:%04X\n",
1125                masq_proto_name(iph->protocol),
1126                ntohl(iph->saddr), ntohs(h.portp[0]),
1127                ntohl(iph->daddr), ntohs(h.portp[1]));
1128
1129        ms = ip_masq_out_get_iph(iph);
1130        if (ms!=NULL) {
1131
1132                /*
1133                 *      If sysctl !=0 and no pkt has been received yet
1134                 *      in this tunnel and routing iface address has changed...
1135                 *       "You are welcome, diald".
1136                 */
1137                if ( sysctl_ip_dynaddr && ms->flags & IP_MASQ_F_NO_REPLY && maddr != ms->maddr) {
1138
1139                        if (sysctl_ip_dynaddr > 1) {
1140                                IP_MASQ_INFO( "ip_fw_masquerade(): change masq.addr from %d.%d.%d.%d to %d.%d.%d.%d\n",
1141                                       NIPQUAD(ms->maddr),NIPQUAD(maddr));
1142                        }
1143
1144                        write_lock(&__ip_masq_lock);
1145
1146                        ip_masq_unhash(ms);
1147                        ms->maddr = maddr;
1148                        ip_masq_hash(ms);
1149
1150                        write_unlock(&__ip_masq_lock);
1151                }
1152                
1153                /*
1154                 *      Set sport if not defined yet (e.g. ftp PASV).  Because
1155                 *      masq entries are hashed on sport, unhash with old value
1156                 *      and hash with new.
1157                 */
1158
1159                if ( ms->flags & IP_MASQ_F_NO_SPORT && ms->protocol == IPPROTO_TCP ) {
1160                        ms->flags &= ~IP_MASQ_F_NO_SPORT;
1161
1162                        write_lock(&__ip_masq_lock);
1163                        
1164                        ip_masq_unhash(ms);
1165                        ms->sport = h.portp[0];
1166                        ip_masq_hash(ms);       /* hash on new sport */
1167
1168                        write_unlock(&__ip_masq_lock);
1169                        
1170                        IP_MASQ_DEBUG(1, "ip_fw_masquerade(): filled sport=%d\n",
1171                               ntohs(ms->sport));
1172                }
1173                if (ms->flags & IP_MASQ_F_DLOOSE) {
1174                        /*
1175                         *      update dest loose values
1176                         */
1177                        ms->dport = h.portp[1];
1178                        ms->daddr = iph->daddr;
1179                }
1180        } else {
1181                /*
1182                 *      Nope, not found, create a new entry for it
1183                 */
1184
1185#ifdef CONFIG_IP_MASQUERADE_MOD
1186                if (!(ms = ip_masq_mod_out_create(skb, iph, maddr))) 
1187#endif
1188                        ms = ip_masq_new(iph->protocol,
1189                                        maddr, 0,
1190                                        iph->saddr, h.portp[0],
1191                                        iph->daddr, h.portp[1],
1192                                        0);
1193                if (ms == NULL)
1194                        return -1;
1195        }
1196
1197        /*
1198         *      Call module's output update hook
1199         */
1200
1201#ifdef CONFIG_IP_MASQUERADE_MOD
1202        ip_masq_mod_out_update(skb, iph, ms);
1203#endif
1204
1205        /*
1206         *      Change the fragments origin
1207         */
1208
1209        size = skb->len - (h.raw - skb->nh.raw);
1210
1211        /*
1212         *      Set iph addr and port from ip_masq obj.
1213         */
1214        iph->saddr = ms->maddr;
1215        h.portp[0] = ms->mport;
1216
1217        /*
1218         *      Invalidate csum saving if tunnel has masq helper
1219         */
1220
1221        if (ms->app) 
1222                csum_ok = 0;
1223
1224        /*
1225         *      Attempt ip_masq_app call.
1226         *      will fix ip_masq and iph seq stuff
1227         */
1228        if (ip_masq_app_pkt_out(ms, skb_p, maddr) != 0)
1229        {
1230                /*
1231                 *      skb has possibly changed, update pointers.
1232                 */
1233                skb = *skb_p;
1234                iph = skb->nh.iph;
1235                h.raw = (char*) iph + iph->ihl *4;
1236                size = skb->len - (h.raw - skb->nh.raw);
1237                /* doff should have not changed */
1238        }
1239
1240        /*
1241         *      Adjust packet accordingly to protocol
1242         */
1243
1244        /*
1245         *      Transport's payload partial csum
1246         */
1247
1248        if (!csum_ok) {
1249                csum = csum_partial(h.raw + doff, size - doff, 0);
1250        }
1251        skb->csum = csum;
1252
1253        IP_MASQ_DEBUG(3, "O-pkt: %s size=%d O-datacsum=%d\n",
1254                        masq_proto_name(iph->protocol),
1255                        size,
1256                        csum);
1257
1258        /*
1259         *      Protocol csum
1260         */
1261        switch (iph->protocol) {
1262                case IPPROTO_TCP:
1263                        h.th->check = 0;
1264                        h.th->check=csum_tcpudp_magic(iph->saddr, iph->daddr, 
1265                                        size, iph->protocol, 
1266                                        csum_partial(h.raw , doff, csum));
1267                        IP_MASQ_DEBUG(3, "O-pkt: %s O-csum=%d (+%d)\n",
1268                                        masq_proto_name(iph->protocol),
1269                                        h.th->check,
1270                                        (char*) & (h.th->check) - (char*) h.raw);
1271
1272                        break;
1273                case IPPROTO_UDP:
1274                        h.uh->check = 0;
1275                        h.uh->check=csum_tcpudp_magic(iph->saddr, iph->daddr, 
1276                                        size, iph->protocol, 
1277                                        csum_partial(h.raw , doff, csum));
1278                        if (h.uh->check == 0) 
1279                                h.uh->check = 0xFFFF;
1280                        IP_MASQ_DEBUG(3, "O-pkt: %s O-csum=%d (+%d)\n",
1281                                        masq_proto_name(iph->protocol),
1282                                        h.uh->check,
1283                                        (char*) &(h.uh->check)- (char*) h.raw);
1284                        break;
1285        }
1286        ip_send_check(iph);
1287
1288        IP_MASQ_DEBUG(2, "O-routed from %08lX:%04X with masq.addr %08lX\n",
1289                ntohl(ms->maddr),ntohs(ms->mport),ntohl(maddr));
1290
1291        masq_set_state(ms, 1, iph, h.portp);
1292        ip_masq_put(ms);
1293
1294        return 0;
1295 }
1296
1297/*
1298 *      Restore original addresses and ports in the original IP
1299 *      datagram if the failing packet has been [de]masqueraded.
1300 *      This is ugly in the extreme.  We no longer have the original
1301 *      packet so we have to reconstruct it from the failing packet
1302 *      plus data in the masq tables.  The resulting "original data"
1303 *      should be good enough to tell the sender which session to
1304 *      throttle.  Relies on far too much knowledge of masq internals,
1305 *      there ought to be a better way - KAO 990303.
1306 *
1307 *      Moved here from icmp.c - JJC.
1308 *      Already known: type == ICMP_DEST_UNREACH, IPSKB_MASQUERADED
1309 *      skb->nh.iph points to original header.
1310 *
1311 *      Must try both OUT and IN tables; we could add a flag
1312 *      ala IPSKB_MASQUERADED to avoid 2nd tables lookup, but this is VERY
1313 *      unlike because routing makes mtu decision before reaching 
1314 *      ip_fw_masquerade().
1315 *      
1316 */
1317int ip_fw_unmasq_icmp(struct sk_buff *skb) {
1318        struct ip_masq *ms;
1319        struct iphdr *iph = skb->nh.iph;
1320        __u16 *portp = (__u16 *)&(((char *)iph)[iph->ihl*4]);
1321
1322        /* 
1323         *      Always called from _bh context: use read_[un]lock()
1324         */
1325
1326        /*
1327         *      Peek "out" table, this packet has bounced:
1328         *      out->in(frag_needed!)->OUT[icmp]
1329         *
1330         *      iph->daddr is IN host
1331         *      iph->saddr is OUT host
1332         */
1333        read_lock(&__ip_masq_lock);
1334        ms = __ip_masq_out_get(iph->protocol,
1335                        iph->daddr, portp[1],
1336                        iph->saddr, portp[0]);
1337        read_unlock(&__ip_masq_lock);
1338        if (ms) {
1339                IP_MASQ_DEBUG(1, "Incoming frag_need rewrited from %d.%d.%d.%d to %d.%d.%d.%d\n",
1340                        NIPQUAD(iph->daddr), NIPQUAD(ms->maddr));
1341                iph->daddr = ms->maddr;
1342                portp[1] = ms->mport;
1343                __ip_masq_put(ms);
1344                return 1;
1345        }
1346        /*
1347         *      Peek "in" table
1348         *      in->out(frag_needed!)->IN[icmp]
1349         *
1350         *      iph->daddr is OUT host
1351         *      iph->saddr is MASQ host
1352         *
1353         */
1354        read_lock(&__ip_masq_lock);
1355        ms = __ip_masq_in_get(iph->protocol,
1356                        iph->daddr, portp[1],
1357                        iph->saddr, portp[0]);
1358        read_unlock(&__ip_masq_lock);
1359        if (ms) {
1360                IP_MASQ_DEBUG(1, "Outgoing frag_need rewrited from %d.%d.%d.%d to %d.%d.%d.%d\n",
1361                        NIPQUAD(iph->saddr), NIPQUAD(ms->saddr));
1362                iph->saddr = ms->saddr;
1363                portp[0] = ms->sport;
1364                __ip_masq_put(ms);
1365                return 1;
1366        }
1367        return 0;
1368
1369}
1370/*
1371 *      Handle ICMP messages in forward direction.
1372 *      Find any that might be relevant, check against existing connections,
1373 *      forward to masqueraded host if relevant.
1374 *      Currently handles error types - unreachable, quench, ttl exceeded
1375 */
1376
1377int ip_fw_masq_icmp(struct sk_buff **skb_p, __u32 maddr)
1378{
1379        struct sk_buff  *skb   = *skb_p;
1380        struct iphdr    *iph   = skb->nh.iph;
1381        struct icmphdr  *icmph = (struct icmphdr *)((char *)iph + (iph->ihl<<2));
1382        struct iphdr    *ciph;  /* The ip header contained within the ICMP */
1383        __u16           *pptr;  /* port numbers from TCP/UDP contained header */
1384        struct ip_masq  *ms;
1385        unsigned short   len   = ntohs(iph->tot_len) - (iph->ihl * 4);
1386
1387        IP_MASQ_DEBUG(2, "Incoming forward ICMP (%d,%d) %lX -> %lX\n",
1388                icmph->type, ntohs(icmp_id(icmph)),
1389                ntohl(iph->saddr), ntohl(iph->daddr));
1390
1391#ifdef CONFIG_IP_MASQUERADE_ICMP                
1392        if ((icmph->type == ICMP_ECHO ) ||
1393            (icmph->type == ICMP_TIMESTAMP ) ||
1394            (icmph->type == ICMP_INFO_REQUEST ) ||
1395            (icmph->type == ICMP_ADDRESS )) {
1396
1397                IP_MASQ_DEBUG(2, "icmp request rcv %lX->%lX  id %d type %d\n",
1398                       ntohl(iph->saddr),
1399                       ntohl(iph->daddr),
1400                       ntohs(icmp_id(icmph)),
1401                       icmph->type);
1402
1403                ms = ip_masq_out_get(iph->protocol,
1404                                       iph->saddr,
1405                                       icmp_id(icmph),
1406                                       iph->daddr,
1407                                       icmp_hv_req(icmph));
1408                if (ms == NULL) {
1409                        ms = ip_masq_new(iph->protocol,
1410                                         maddr, 0,
1411                                         iph->saddr, icmp_id(icmph),
1412                                         iph->daddr, icmp_hv_req(icmph),
1413                                         0);
1414                        if (ms == NULL)
1415                                return (-1);
1416                        IP_MASQ_DEBUG(1, "Created new icmp entry\n");
1417                }
1418                /* Rewrite source address */
1419                
1420                /*
1421                 *      If sysctl !=0 and no pkt has been received yet
1422                 *      in this tunnel and routing iface address has changed...
1423                 *       "You are welcome, diald".
1424                 */
1425                if ( sysctl_ip_dynaddr && ms->flags & IP_MASQ_F_NO_REPLY && maddr != ms->maddr) {
1426
1427                        if (sysctl_ip_dynaddr > 1) {
1428                                IP_MASQ_INFO( "ip_fw_masq_icmp(): change masq.addr %d.%d.%d.%d to %d.%d.%d.%d",
1429                                       NIPQUAD(ms->maddr), NIPQUAD(maddr));
1430                        }
1431
1432                        write_lock(&__ip_masq_lock);
1433                        
1434                        ip_masq_unhash(ms);
1435                        ms->maddr = maddr;
1436                        ip_masq_hash(ms);
1437
1438                        write_unlock(&__ip_masq_lock);
1439                }
1440                
1441                iph->saddr = ms->maddr;
1442                ip_send_check(iph);
1443                /* Rewrite port (id) */
1444                (icmph->un).echo.id = ms->mport;
1445                icmph->checksum = 0;
1446                icmph->checksum = ip_compute_csum((unsigned char *)icmph, len);
1447
1448                IP_MASQ_DEBUG(2, "icmp request rwt %lX->%lX id %d type %d\n",
1449                       ntohl(iph->saddr),
1450                       ntohl(iph->daddr),
1451                       ntohs(icmp_id(icmph)),
1452                       icmph->type);
1453
1454                masq_set_state(ms, 1, iph, icmph);
1455                ip_masq_put(ms);
1456
1457                return 1;
1458        }
1459#endif
1460
1461        /*
1462         * Work through seeing if this is for us.
1463         * These checks are supposed to be in an order that
1464         * means easy things are checked first to speed up
1465         * processing.... however this means that some
1466         * packets will manage to get a long way down this
1467         * stack and then be rejected, but thats life
1468         */
1469        if ((icmph->type != ICMP_DEST_UNREACH) &&
1470            (icmph->type != ICMP_SOURCE_QUENCH) &&
1471            (icmph->type != ICMP_TIME_EXCEEDED))
1472                return 0;
1473
1474        /* Now find the contained IP header */
1475        ciph = (struct iphdr *) (icmph + 1);
1476
1477#ifdef CONFIG_IP_MASQUERADE_ICMP
1478        if (ciph->protocol == IPPROTO_ICMP) {
1479                /*
1480                 * This section handles ICMP errors for ICMP packets
1481                 */
1482                struct icmphdr  *cicmph = (struct icmphdr *)((char *)ciph + 
1483                                                             (ciph->ihl<<2));
1484
1485
1486                IP_MASQ_DEBUG(2, "fw icmp/icmp rcv %lX->%lX id %d type %d\n",
1487                       ntohl(ciph->saddr),
1488                       ntohl(ciph->daddr),
1489                       ntohs(icmp_id(cicmph)),
1490                       cicmph->type);
1491
1492                read_lock(&__ip_masq_lock);
1493                ms = __ip_masq_out_get(ciph->protocol, 
1494                                      ciph->daddr,
1495                                      icmp_id(cicmph),
1496                                      ciph->saddr,
1497                                      icmp_hv_rep(cicmph));
1498                read_unlock(&__ip_masq_lock);
1499
1500                if (ms == NULL)
1501                        return 0;
1502
1503                /* Now we do real damage to this packet...! */
1504                /* First change the source IP address, and recalc checksum */
1505                iph->saddr = ms->maddr;
1506                ip_send_check(iph);
1507        
1508                /* Now change the *dest* address in the contained IP */
1509                ciph->daddr = ms->maddr;
1510                __ip_masq_put(ms);
1511
1512                ip_send_check(ciph);
1513
1514                /* Change the ID to the masqed one! */
1515                (cicmph->un).echo.id = ms->mport;
1516        
1517                /* And finally the ICMP checksum */
1518                icmph->checksum = 0;
1519                icmph->checksum = ip_compute_csum((unsigned char *) icmph, len);
1520
1521
1522                IP_MASQ_DEBUG(2, "fw icmp/icmp rwt %lX->%lX id %d type %d\n",
1523                       ntohl(ciph->saddr),
1524                       ntohl(ciph->daddr),
1525                       ntohs(icmp_id(cicmph)),
1526                       cicmph->type);
1527
1528                return 1;
1529        }
1530#endif /* CONFIG_IP_MASQUERADE_ICMP */
1531
1532        /* We are only interested ICMPs generated from TCP or UDP packets */
1533        if ((ciph->protocol != IPPROTO_UDP) && (ciph->protocol != IPPROTO_TCP))
1534                return 0;
1535
1536        /*
1537         * Find the ports involved - this packet was
1538         * incoming so the ports are right way round
1539         * (but reversed relative to outer IP header!)
1540         */
1541        pptr = (__u16 *)&(((char *)ciph)[ciph->ihl*4]);
1542#if 0
1543        if (ntohs(pptr[1]) < PORT_MASQ_BEGIN ||
1544            ntohs(pptr[1]) > PORT_MASQ_END)
1545                return 0;
1546#endif
1547
1548        /* Ensure the checksum is correct */
1549        if (ip_compute_csum((unsigned char *) icmph, len))
1550        {
1551                /* Failed checksum! */
1552                IP_MASQ_DEBUG(0, "forward ICMP: failed checksum from %d.%d.%d.%d!\n",
1553                              NIPQUAD(iph->saddr));
1554                return(-1);
1555        }
1556
1557
1558        IP_MASQ_DEBUG(2, "Handling forward ICMP for %08lX:%04X -> %08lX:%04X\n",
1559               ntohl(ciph->saddr), ntohs(pptr[0]),
1560               ntohl(ciph->daddr), ntohs(pptr[1]));
1561
1562
1563#if 0
1564        /* This is pretty much what __ip_masq_in_get_iph() does */
1565        ms = __ip_masq_in_get(ciph->protocol, ciph->saddr, pptr[0], ciph->daddr, pptr[1]);
1566#endif
1567        read_lock(&__ip_masq_lock);
1568        ms = __ip_masq_out_get(ciph->protocol,
1569                               ciph->daddr,
1570                               pptr[1],
1571                               ciph->saddr,
1572                               pptr[0]);
1573        read_unlock(&__ip_masq_lock);
1574
1575        if (ms == NULL)
1576                return 0;
1577
1578        /* Now we do real damage to this packet...! */
1579        /* First change the source IP address, and recalc checksum */
1580        iph->saddr = ms->maddr;
1581        ip_send_check(iph);
1582
1583        /* Now change the *dest* address in the contained IP */
1584        ciph->daddr = ms->maddr;
1585        ip_send_check(ciph);
1586
1587        /* the TCP/UDP dest port - cannot redo check */
1588        pptr[1] = ms->mport;
1589        __ip_masq_put(ms);
1590
1591        /* And finally the ICMP checksum */
1592        icmph->checksum = 0;
1593        icmph->checksum = ip_compute_csum((unsigned char *) icmph, len);
1594
1595
1596        IP_MASQ_DEBUG(2, "Rewrote forward ICMP to %08lX:%04X -> %08lX:%04X\n",
1597               ntohl(ciph->saddr), ntohs(pptr[0]),
1598               ntohl(ciph->daddr), ntohs(pptr[1]));
1599
1600
1601        return 1;
1602}
1603
1604
1605/*
1606 *      Own skb_cow() beast, tweaked for rewriting commonly
1607 *      used pointers in masq code
1608 */
1609static struct sk_buff * masq_skb_cow(struct sk_buff **skb_p, 
1610                        struct iphdr **iph_p, unsigned char **t_p) {
1611        struct sk_buff *skb=(*skb_p);
1612        if (skb_cloned(skb)) {
1613                skb = skb_copy(skb, GFP_ATOMIC);
1614                if (skb) {
1615                        /*
1616                         *      skb changed, update other pointers
1617                         */
1618                        struct iphdr *iph = skb->nh.iph;
1619                        kfree_skb(*skb_p);
1620                        *skb_p = skb;
1621                        *iph_p = iph;
1622                        *t_p = (char*) iph + iph->ihl * 4;
1623                }
1624        }
1625        return skb;
1626}
1627
1628/*
1629 *      Handle ICMP messages in reverse (demasquerade) direction.
1630 *      Find any that might be relevant, check against existing connections,
1631 *      forward to masqueraded host if relevant.
1632 *      Currently handles error types - unreachable, quench, ttl exceeded
1633 */
1634
1635int ip_fw_demasq_icmp(struct sk_buff **skb_p)
1636{
1637        struct sk_buff  *skb   = *skb_p;
1638        struct iphdr    *iph   = skb->nh.iph;
1639        struct icmphdr  *icmph = (struct icmphdr *)((char *)iph + (iph->ihl<<2));
1640        struct iphdr    *ciph;  /* The ip header contained within the ICMP */
1641        __u16           *pptr;  /* port numbers from TCP/UDP contained header */
1642        struct ip_masq  *ms;
1643        unsigned short   len   = ntohs(iph->tot_len) - (iph->ihl * 4);
1644
1645
1646        IP_MASQ_DEBUG(2, "icmp in/rev (%d,%d) %lX -> %lX\n",
1647                icmph->type, ntohs(icmp_id(icmph)),
1648                ntohl(iph->saddr), ntohl(iph->daddr));
1649
1650
1651#ifdef CONFIG_IP_MASQUERADE_ICMP                
1652        if ((icmph->type == ICMP_ECHOREPLY) ||
1653            (icmph->type == ICMP_TIMESTAMPREPLY) ||
1654            (icmph->type == ICMP_INFO_REPLY) ||
1655            (icmph->type == ICMP_ADDRESSREPLY)) {
1656
1657                IP_MASQ_DEBUG(2, "icmp reply rcv %lX->%lX id %d type %d, req %d\n",
1658                       ntohl(iph->saddr),
1659                       ntohl(iph->daddr),
1660                       ntohs(icmp_id(icmph)),
1661                       icmph->type,
1662                       icmp_type_request(icmph->type));
1663
1664                ms = ip_masq_in_get(iph->protocol,
1665                                      iph->saddr,
1666                                      icmp_hv_rep(icmph),
1667                                      iph->daddr,
1668                                      icmp_id(icmph));
1669                if (ms == NULL)
1670                        return 0;
1671
1672                /*
1673                 *      got reply, so clear flag
1674                 */
1675                ms->flags &= ~IP_MASQ_F_NO_REPLY;
1676
1677                if ((skb=masq_skb_cow(skb_p, &iph, (unsigned char**)&icmph)) == NULL) {
1678                        ip_masq_put(ms);
1679                        return -1;
1680                }
1681
1682                /* Reset source address */
1683                iph->daddr = ms->saddr;
1684                /* Redo IP header checksum */
1685                ip_send_check(iph);
1686                /* Set ID to fake port number */
1687                (icmph->un).echo.id = ms->sport;
1688                /* Reset ICMP checksum and set expiry */
1689                icmph->checksum=0;
1690                icmph->checksum=ip_compute_csum((unsigned char *)icmph,len);
1691
1692
1693
1694                IP_MASQ_DEBUG(2, "icmp reply rwt %lX->%lX id %d type %d\n",
1695                       ntohl(iph->saddr),
1696                       ntohl(iph->daddr),
1697                       ntohs(icmp_id(icmph)),
1698                       icmph->type);
1699
1700                masq_set_state(ms, 0, iph, icmph);
1701                ip_masq_put(ms);
1702
1703                return 1;
1704        } else {
1705#endif
1706                if ((icmph->type != ICMP_DEST_UNREACH) &&
1707                    (icmph->type != ICMP_SOURCE_QUENCH) &&
1708                    (icmph->type != ICMP_TIME_EXCEEDED))
1709                        return 0;
1710#ifdef CONFIG_IP_MASQUERADE_ICMP
1711        }
1712#endif
1713        /*
1714         * If we get here we have an ICMP error of one of the above 3 types
1715         * Now find the contained IP header
1716         */
1717
1718        ciph = (struct iphdr *) (icmph + 1);
1719
1720#ifdef CONFIG_IP_MASQUERADE_ICMP
1721        if (ciph->protocol == IPPROTO_ICMP) {
1722                /*
1723                 * This section handles ICMP errors for ICMP packets
1724                 *
1725                 * First get a new ICMP header structure out of the IP packet
1726                 */
1727                struct icmphdr  *cicmph = (struct icmphdr *)((char *)ciph + 
1728                                                             (ciph->ihl<<2));
1729
1730
1731                IP_MASQ_DEBUG(2, "rv icmp/icmp rcv %lX->%lX id %d type %d\n",
1732                       ntohl(ciph->saddr),
1733                       ntohl(ciph->daddr),
1734                       ntohs(icmp_id(cicmph)),
1735                       cicmph->type);
1736
1737                read_lock(&__ip_masq_lock);
1738                ms = __ip_masq_in_get(ciph->protocol, 
1739                                      ciph->daddr, 
1740                                      icmp_hv_req(cicmph),
1741                                      ciph->saddr, 
1742                                      icmp_id(cicmph));
1743                read_unlock(&__ip_masq_lock);
1744
1745                if (ms == NULL)
1746                        return 0;
1747
1748                if ((skb=masq_skb_cow(skb_p, &iph, (unsigned char**)&icmph)) == NULL) {
1749                        __ip_masq_put(ms);
1750                        return -1;
1751                }
1752                ciph = (struct iphdr *) (icmph + 1);
1753                cicmph = (struct icmphdr *)((char *)ciph + 
1754                                            (ciph->ihl<<2));
1755                /* Now we do real damage to this packet...! */
1756                /* First change the dest IP address, and recalc checksum */
1757                iph->daddr = ms->saddr;
1758                ip_send_check(iph);
1759        
1760                /* Now change the *source* address in the contained IP */
1761                ciph->saddr = ms->saddr;
1762                ip_send_check(ciph);
1763
1764                /* Change the ID to the original one! */
1765                (cicmph->un).echo.id = ms->sport;
1766                __ip_masq_put(ms);
1767
1768                /* And finally the ICMP checksum */
1769                icmph->checksum = 0;
1770                icmph->checksum = ip_compute_csum((unsigned char *) icmph, len);
1771
1772
1773                IP_MASQ_DEBUG(2, "rv icmp/icmp rwt %lX->%lX id %d type %d\n",
1774                       ntohl(ciph->saddr),
1775                       ntohl(ciph->daddr),
1776                       ntohs(icmp_id(cicmph)),
1777                       cicmph->type);
1778
1779                return 1;
1780        }
1781#endif /* CONFIG_IP_MASQUERADE_ICMP */
1782
1783        /* We are only interested ICMPs generated from TCP or UDP packets */
1784        if ((ciph->protocol != IPPROTO_UDP) && 
1785            (ciph->protocol != IPPROTO_TCP))
1786                return 0;
1787
1788        /*
1789         * Find the ports involved - remember this packet was
1790         * *outgoing* so the ports are reversed (and addresses)
1791         */
1792        pptr = (__u16 *)&(((char *)ciph)[ciph->ihl*4]);
1793        if (ntohs(pptr[0]) < PORT_MASQ_BEGIN ||
1794            ntohs(pptr[0]) > PORT_MASQ_END)
1795                return 0;
1796
1797        /* Ensure the checksum is correct */
1798        if (ip_compute_csum((unsigned char *) icmph, len))
1799        {
1800                /* Failed checksum! */
1801                IP_MASQ_ERR( "reverse ICMP: failed checksum from %d.%d.%d.%d!\n",
1802                       NIPQUAD(iph->saddr));
1803                return(-1);
1804        }
1805
1806
1807        IP_MASQ_DEBUG(2, "Handling reverse ICMP for %08lX:%04X -> %08lX:%04X\n",
1808               ntohl(ciph->saddr), ntohs(pptr[0]),
1809               ntohl(ciph->daddr), ntohs(pptr[1]));
1810
1811
1812        /* This is pretty much what __ip_masq_in_get_iph() does, except params are wrong way round */
1813        read_lock(&__ip_masq_lock);
1814        ms = __ip_masq_in_get(ciph->protocol,
1815                              ciph->daddr,
1816                              pptr[1],
1817                              ciph->saddr,
1818                              pptr[0]);
1819        read_unlock(&__ip_masq_lock);
1820
1821        if (ms == NULL)
1822                return 0;
1823
1824        if ((skb=masq_skb_cow(skb_p, &iph, (unsigned char**)&icmph)) == NULL) {
1825                __ip_masq_put(ms);
1826                return -1;
1827        }
1828        ciph = (struct iphdr *) (icmph + 1);
1829        pptr = (__u16 *)&(((char *)ciph)[ciph->ihl*4]);
1830
1831        /* Now we do real damage to this packet...! */
1832        /* First change the dest IP address, and recalc checksum */
1833        iph->daddr = ms->saddr;
1834        ip_send_check(iph);
1835
1836        /* Now change the *source* address in the contained IP */
1837        ciph->saddr = ms->saddr;
1838        ip_send_check(ciph);
1839
1840        /* the TCP/UDP source port - cannot redo check */
1841        pptr[0] = ms->sport;
1842        __ip_masq_put(ms);
1843
1844        /* And finally the ICMP checksum */
1845        icmph->checksum = 0;
1846        icmph->checksum = ip_compute_csum((unsigned char *) icmph, len);
1847
1848
1849        IP_MASQ_DEBUG(2, "Rewrote reverse ICMP to %08lX:%04X -> %08lX:%04X\n",
1850               ntohl(ciph->saddr), ntohs(pptr[0]),
1851               ntohl(ciph->daddr), ntohs(pptr[1]));
1852
1853
1854        return 1;
1855}
1856
1857 /*
1858  *     Check if it's an masqueraded port, look it up,
1859  *     and send it on its way...
1860  *
1861  *     Better not have many hosts using the designated portrange
1862  *     as 'normal' ports, or you'll be spending many time in
1863  *     this function.
1864  */
1865
1866int ip_fw_demasquerade(struct sk_buff **skb_p)
1867{
1868        struct sk_buff  *skb = *skb_p;
1869        struct iphdr    *iph = skb->nh.iph;
1870        union ip_masq_tphdr h;
1871        struct ip_masq  *ms;
1872        unsigned short size;
1873        int doff = 0;
1874        int csum = 0;
1875        int csum_ok = 0;
1876        __u32 maddr;
1877
1878        /*
1879         *      Big tappo: only PACKET_HOST (nor loopback neither mcasts)
1880         *      ... don't know why 1st test DOES NOT include 2nd (?)
1881         */
1882
1883        if (skb->pkt_type != PACKET_HOST || skb->dev == &loopback_dev) {
1884                IP_MASQ_DEBUG(2, "ip_fw_demasquerade(): packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n",
1885                        skb->pkt_type,
1886                        iph->protocol,
1887                        NIPQUAD(iph->daddr));
1888                return 0;
1889        }
1890
1891        h.raw = (char*) iph + iph->ihl * 4;
1892
1893        /*
1894         *      IP payload size
1895         */
1896        size = ntohs(iph->tot_len) - (iph->ihl * 4);
1897
1898        doff = proto_doff(iph->protocol, h.raw, size);
1899
1900        switch (doff) {
1901                case 0:
1902                        /*
1903                         *      Input path: other IP protos Ok, will
1904                         *      reach local sockets path.
1905                         */
1906                        return 0;
1907                case -1:
1908                        IP_MASQ_DEBUG(0, "I-pkt invalid packet data size\n");
1909                        return -1;
1910        }
1911
1912        maddr = iph->daddr;
1913        switch (iph->protocol) {
1914        case IPPROTO_ICMP:
1915                return(ip_fw_demasq_icmp(skb_p));
1916        case IPPROTO_TCP:
1917        case IPPROTO_UDP:
1918                /* 
1919                 *      Make sure packet is in the masq range 
1920                 *      ... or some mod-ule relaxes input range
1921                 *      ... or there is still some `special' mport opened
1922                 */
1923                if ((ntohs(h.portp[1]) < PORT_MASQ_BEGIN
1924                                || ntohs(h.portp[1]) > PORT_MASQ_END)
1925#ifdef CONFIG_IP_MASQUERADE_MOD
1926                                && (ip_masq_mod_in_rule(skb, iph) != 1) 
1927#endif
1928                                && atomic_read(&mport_count) == 0 )
1929                        return 0;
1930
1931                /* Check that the checksum is OK */
1932                if ((iph->protocol == IPPROTO_UDP) && (h.uh->check == 0))
1933                        /* No UDP checksum */
1934                        break;
1935#ifdef CONFIG_IP_MASQ_DEBUG
1936                if (ip_masq_get_debug_level() > 3) {
1937                        skb->ip_summed = CHECKSUM_NONE;
1938                }
1939#endif
1940
1941                switch (skb->ip_summed)
1942                {
1943                        case CHECKSUM_NONE:
1944                                csum = csum_partial(h.raw + doff, size - doff, 0);
1945                                csum_ok++;
1946                                skb->csum = csum_partial(h.raw , doff, csum);
1947
1948                        case CHECKSUM_HW:
1949                                if (csum_tcpudp_magic(iph->saddr, iph->daddr, 
1950                                                size, iph->protocol, skb->csum))
1951                                {
1952                                        IP_MASQ_DEBUG(0, "Incoming failed %s checksum from %d.%d.%d.%d (size=%d)!\n",
1953                                               masq_proto_name(iph->protocol),
1954                                               NIPQUAD(iph->saddr),
1955                                               size);
1956                                        return -1;
1957                                }
1958                        default:
1959                                /* CHECKSUM_UNNECESSARY */
1960                }
1961                break;
1962        default:
1963                return 0;
1964        }
1965
1966
1967
1968        IP_MASQ_DEBUG(2, "Incoming %s %08lX:%04X -> %08lX:%04X\n",
1969                masq_proto_name(iph->protocol),
1970                ntohl(iph->saddr), ntohs(h.portp[0]),
1971                ntohl(iph->daddr), ntohs(h.portp[1]));
1972
1973        /*
1974         * reroute to original host:port if found...
1975         */
1976
1977        ms = ip_masq_in_get_iph(iph);
1978
1979        /*
1980         *      Give additional modules a chance to create an entry
1981         */
1982#ifdef CONFIG_IP_MASQUERADE_MOD
1983        if (!ms) 
1984                ms = ip_masq_mod_in_create(skb, iph, maddr);
1985
1986        /*
1987         *      Call module's input update hook
1988         */
1989        ip_masq_mod_in_update(skb, iph, ms);
1990#endif
1991
1992
1993        if (ms != NULL)
1994        {
1995
1996                /*
1997                 *      got reply, so clear flag
1998                 */
1999                ms->flags &= ~IP_MASQ_F_NO_REPLY;
2000                
2001                /*
2002                 *      Set daddr,dport if not defined yet
2003                 *      and tunnel is not setup as "dest loose"
2004                 */
2005
2006                if (ms->flags & IP_MASQ_F_DLOOSE) {
2007                        /*
2008                         *      update dest loose values
2009                         */
2010                        ms->dport = h.portp[0];
2011                        ms->daddr = iph->saddr;
2012                } else {
2013                if ( ms->flags & IP_MASQ_F_NO_DPORT ) { /*  && ms->protocol == IPPROTO_TCP ) { */
2014                        ms->flags &= ~IP_MASQ_F_NO_DPORT;
2015                        ms->dport = h.portp[0];
2016
2017                        IP_MASQ_DEBUG(1, "ip_fw_demasquerade(): filled dport=%d\n",
2018                               ntohs(ms->dport));
2019
2020                }
2021                if (ms->flags & IP_MASQ_F_NO_DADDR ) { /*  && ms->protocol == IPPROTO_TCP)  { */
2022                        ms->flags &= ~IP_MASQ_F_NO_DADDR;
2023                        ms->daddr = iph->saddr;
2024
2025                        IP_MASQ_DEBUG(1, "ip_fw_demasquerade(): filled daddr=%lX\n",
2026                               ntohl(ms->daddr));
2027
2028                }
2029                }
2030                if ((skb=masq_skb_cow(skb_p, &iph, &h.raw)) == NULL) {
2031                        ip_masq_put(ms);
2032                        return -1;
2033                }
2034                iph->daddr = ms->saddr;
2035                h.portp[1] = ms->sport;
2036
2037                /*
2038                 *      Invalidate csum saving if tunnel has masq helper
2039                 */
2040
2041                if (ms->app) 
2042                        csum_ok = 0;
2043
2044                /*
2045                 *      Attempt ip_masq_app call.
2046                 *      will fix ip_masq and iph ack_seq stuff
2047                 */
2048
2049                if (ip_masq_app_pkt_in(ms, skb_p, maddr) != 0)
2050                {
2051                        /*
2052                         *      skb has changed, update pointers.
2053                         */
2054
2055                        skb = *skb_p;
2056                        iph = skb->nh.iph;
2057                        h.raw = (char*) iph + iph->ihl*4;
2058                        size = ntohs(iph->tot_len) - (iph->ihl * 4);
2059                }
2060
2061                /*
2062                 * Yug! adjust UDP/TCP checksums
2063                 */
2064
2065                /*
2066                 *      Transport's payload partial csum
2067                 */
2068
2069                if (!csum_ok) {
2070                        csum = csum_partial(h.raw + doff, size - doff, 0);
2071                }
2072                skb->csum = csum;
2073
2074                /*
2075                 *      Protocol csum
2076                 */
2077                switch (iph->protocol) {
2078                        case IPPROTO_TCP:
2079                                h.th->check = 0;
2080                                h.th->check=csum_tcpudp_magic(iph->saddr, iph->daddr, 
2081                                                size, iph->protocol, 
2082                                                csum_partial(h.raw , doff, csum));
2083                                break;
2084                        case IPPROTO_UDP:
2085                                h.uh->check = 0;
2086                                h.uh->check=csum_tcpudp_magic(iph->saddr, iph->daddr, 
2087                                                size, iph->protocol, 
2088                                                csum_partial(h.raw , doff, csum));
2089                                if (h.uh->check == 0) 
2090                                        h.uh->check = 0xFFFF;
2091                                break;
2092                }
2093                ip_send_check(iph);
2094
2095                IP_MASQ_DEBUG(2, "I-routed to %08lX:%04X\n",ntohl(iph->daddr),ntohs(h.portp[1]));
2096
2097                masq_set_state (ms, 0, iph, h.portp);
2098                ip_masq_put(ms);
2099
2100                return 1;
2101        }
2102
2103        /* sorry, all this trouble for a no-hit :) */
2104        return 0;
2105}
2106
2107
2108void ip_masq_control_add(struct ip_masq *ms, struct ip_masq* ctl_ms)
2109{
2110        if (ms->control) {
2111                IP_MASQ_ERR( "request control ADD for already controlled: %d.%d.%d.%d:%d to %d.%d.%d.%d:%d\n",
2112                                NIPQUAD(ms->saddr),ntohs(ms->sport),
2113                                NIPQUAD(ms->daddr),ntohs(ms->dport));
2114                ip_masq_control_del(ms);
2115        }
2116        IP_MASQ_DEBUG(1, "ADDing control for: ms.dst=%d.%d.%d.%d:%d ctl_ms.dst=%d.%d.%d.%d:%d\n",
2117                                NIPQUAD(ms->daddr),ntohs(ms->dport),
2118                                NIPQUAD(ctl_ms->daddr),ntohs(ctl_ms->dport));
2119        ms->control = ctl_ms;
2120        atomic_inc(&ctl_ms->n_control);
2121}
2122
2123void ip_masq_control_del(struct ip_masq *ms)
2124{
2125        struct ip_masq *ctl_ms = ms->control;
2126        if (!ctl_ms) {
2127                IP_MASQ_ERR( "request control DEL for uncontrolled: %d.%d.%d.%d:%d to %d.%d.%d.%d:%d\n",
2128                                NIPQUAD(ms->saddr),ntohs(ms->sport),
2129                                NIPQUAD(ms->daddr),ntohs(ms->dport));
2130                        return;
2131        }
2132        IP_MASQ_DEBUG(1, "DELeting control for: ms.dst=%d.%d.%d.%d:%d ctl_ms.dst=%d.%d.%d.%d:%d\n",
2133                                NIPQUAD(ms->daddr),ntohs(ms->dport),
2134                                NIPQUAD(ctl_ms->daddr),ntohs(ctl_ms->dport));
2135        ms->control = NULL;
2136        if (atomic_read(&ctl_ms->n_control) == 0) {
2137                IP_MASQ_ERR( "BUG control DEL with n=0 : %d.%d.%d.%d:%d to %d.%d.%d.%d:%d\n",
2138                                NIPQUAD(ms->saddr),ntohs(ms->sport),
2139                                NIPQUAD(ms->daddr),ntohs(ms->dport));
2140                        return;
2141                
2142        }
2143        atomic_dec(&ctl_ms->n_control);
2144}
2145
2146struct ip_masq * ip_masq_control_get(struct ip_masq *ms)
2147{
2148        return ms->control;
2149}
2150
2151
2152#ifdef CONFIG_PROC_FS
2153/*
2154 *      /proc/net entries
2155 *      From userspace
2156 */
2157static int ip_msqhst_procinfo(char *buffer, char **start, off_t offset,
2158                              int length, int unused)
2159{
2160        off_t pos=0, begin;
2161        struct ip_masq *ms;
2162        char temp[129];
2163        int idx = 0;
2164        int len=0;
2165
2166
2167        if (offset < 128)
2168        {
2169                sprintf(temp,
2170                        "Prc FromIP   FPrt ToIP     TPrt Masq Init-seq  Delta PDelta Expires (free=%d,%d,%d)",
2171                        atomic_read(ip_masq_free_ports), 
2172                        atomic_read(ip_masq_free_ports+1), 
2173                        atomic_read(ip_masq_free_ports+2));
2174                len = sprintf(buffer, "%-127s\n", temp);
2175        }
2176        pos = 128;
2177
2178        for(idx = 0; idx < IP_MASQ_TAB_SIZE; idx++) 
2179        {
2180        /*
2181         *      Lock is actually only need in next loop 
2182         *      we are called from uspace: must stop bh.
2183         */
2184        read_lock_bh(&__ip_masq_lock);
2185
2186        for(ms = ip_masq_m_tab[idx]; ms ; ms = ms->m_link)
2187        {
2188                pos += 128;
2189                if (pos <= offset) {
2190                        len = 0;
2191                        continue;
2192                }
2193
2194                /*
2195                 *      We have locked the tables, no need to del/add timers
2196                 *      nor cli()  8)
2197                 */
2198
2199                sprintf(temp,"%s %08lX:%04X %08lX:%04X %04X %08X %6d %6d %7lu",
2200                        masq_proto_name(ms->protocol),
2201                        ntohl(ms->saddr), ntohs(ms->sport),
2202                        ntohl(ms->daddr), ntohs(ms->dport),
2203                        ntohs(ms->mport),
2204                        ms->out_seq.init_seq,
2205                        ms->out_seq.delta,
2206                        ms->out_seq.previous_delta,
2207                        ms->timer.expires-jiffies);
2208                len += sprintf(buffer+len, "%-127s\n", temp);
2209
2210                if(len >= length) {
2211
2212                        read_unlock_bh(&__ip_masq_lock);
2213                        goto done;
2214                }
2215        }
2216        read_unlock_bh(&__ip_masq_lock);
2217
2218        }
2219done:
2220
2221
2222        begin = len - (pos - offset);
2223        *start = buffer + begin;
2224        len -= begin;
2225        if(len>length)
2226                len = length;
2227        return len;
2228}
2229
2230#endif
2231
2232/* 
2233 *      Timeouts handling by ipfwadm/ipchains
2234 *      From ip_fw.c
2235 */
2236
2237int ip_fw_masq_timeouts(void *m, int len) 
2238{
2239        struct ip_fw_masq *masq;
2240        int ret = EINVAL;
2241
2242        if (len != sizeof(struct ip_fw_masq)) {
2243                IP_MASQ_DEBUG(1, "ip_fw_masq_timeouts: length %d, expected %d\n",
2244                                len, sizeof(struct ip_fw_masq));
2245        } else {
2246                masq = (struct ip_fw_masq *)m;
2247                if (masq->tcp_timeout)
2248                        masq_timeout_table.timeout[IP_MASQ_S_ESTABLISHED]
2249                                = masq->tcp_timeout;
2250
2251                if (masq->tcp_fin_timeout)
2252                        masq_timeout_table.timeout[IP_MASQ_S_FIN_WAIT]
2253                                = masq->tcp_fin_timeout;
2254
2255                if (masq->udp_timeout)
2256                        masq_timeout_table.timeout[IP_MASQ_S_UDP]
2257                                = masq->udp_timeout;
2258                ret = 0;
2259        }
2260        return ret;
2261}
2262/*
2263 *      Module autoloading stuff
2264 */
2265
2266static int ip_masq_user_check_hook(void) {
2267#ifdef CONFIG_KMOD
2268        if (ip_masq_user_hook == NULL) {
2269                IP_MASQ_DEBUG(1, "About to request \"ip_masq_user\" module\n");
2270                request_module("ip_masq_user");
2271        }
2272#endif /* CONFIG_KMOD */
2273        return (ip_masq_user_hook != NULL);
2274}
2275
2276/*
2277 *      user module hook- info
2278 */
2279static int ip_masq_user_info(char *buffer, char **start, off_t offset,
2280                              int len, int *eof, void *data)
2281{
2282        int ret = -ENOPKG;
2283        if (ip_masq_user_check_hook()) {
2284                ret = ip_masq_user_hook->info(buffer, start, offset, len, (int) data);
2285        }
2286        return ret;
2287}
2288
2289/*
2290 *      user module hook- entry mgmt
2291 */
2292static int ip_masq_user_ctl(int optname, void *arg, int arglen)
2293{
2294        int ret = -ENOPKG;
2295        if (ip_masq_user_check_hook())  {
2296                ret = ip_masq_user_hook->ctl(optname, arg, arglen);
2297        }
2298        return ret;
2299}
2300
2301/*
2302 *      Control from ip_sockglue
2303 *      MAIN ENTRY point from userspace (apart from /proc *info entries)
2304 *      Returns errno
2305 */
2306int ip_masq_uctl(int optname, char * optval , int optlen)
2307{
2308        struct ip_masq_ctl masq_ctl;
2309        int ret = -EINVAL;
2310
2311        if(optlen>sizeof(masq_ctl))
2312                return -EINVAL;
2313
2314        if(copy_from_user(&masq_ctl,optval,optlen))
2315                return -EFAULT;
2316
2317        IP_MASQ_DEBUG(1,"ip_masq_ctl(optname=%d, optlen=%d, target=%d, cmd=%d)\n",
2318                optname, optlen, masq_ctl.m_target, masq_ctl.m_cmd);
2319
2320        switch (masq_ctl.m_target) {
2321                case IP_MASQ_TARGET_USER:
2322                        ret = ip_masq_user_ctl(optname, &masq_ctl, optlen);
2323                        break;
2324#ifdef CONFIG_IP_MASQUERADE_MOD
2325                case IP_MASQ_TARGET_MOD:
2326                        ret = ip_masq_mod_ctl(optname, &masq_ctl, optlen);
2327                        break;
2328#endif
2329        }
2330
2331        /*      
2332         *      If ret>0, copy to user space 
2333         */
2334
2335        if (ret > 0 && ret <= sizeof (masq_ctl)) {
2336                if (copy_to_user(optval, &masq_ctl, ret) )
2337                        return -EFAULT;
2338                ret = 0;
2339        }
2340
2341        return ret;
2342}
2343
2344#ifdef CONFIG_PROC_FS
2345static struct proc_dir_entry    *proc_net_ip_masq = NULL;
2346
2347#ifdef MODULE
2348static void ip_masq_proc_count(struct inode *inode, int fill)
2349{
2350        if (fill)
2351                MOD_INC_USE_COUNT;
2352        else
2353                MOD_DEC_USE_COUNT;
2354}
2355#endif
2356
2357int ip_masq_proc_register(struct proc_dir_entry *ent)
2358{
2359        if (!proc_net_ip_masq) return -1;
2360        IP_MASQ_DEBUG(1, "registering \"/proc/net/ip_masq/%s\" entry\n",
2361                        ent->name);
2362        return proc_register(proc_net_ip_masq, ent);
2363}
2364void ip_masq_proc_unregister(struct proc_dir_entry *ent)
2365{
2366        if (!proc_net_ip_masq) return;
2367        IP_MASQ_DEBUG(1, "unregistering \"/proc/net/ip_masq/%s\" entry\n",
2368                        ent->name);
2369        proc_unregister(proc_net_ip_masq, ent->low_ino);
2370}
2371
2372
2373__initfunc(static void masq_proc_init(void))
2374{       
2375        IP_MASQ_DEBUG(1,"registering /proc/net/ip_masq\n");
2376        if (!proc_net_ip_masq) {
2377                struct proc_dir_entry *ent;
2378                ent = create_proc_entry("net/ip_masq", S_IFDIR, 0);
2379                if (ent) {
2380#ifdef MODULE
2381                        ent->fill_inode = ip_masq_proc_count;
2382#endif
2383                        proc_net_ip_masq = ent;
2384                 } else {
2385                         IP_MASQ_ERR("Could not create \"/proc/net/ip_masq\" entry\n");
2386                 }
2387        }
2388}
2389#endif  /* CONFIG_PROC_FS */
2390/*
2391 *      Wrapper over inet_select_addr()
2392 */
2393u32 ip_masq_select_addr(struct device *dev, u32 dst, int scope)
2394{
2395        return inet_select_addr(dev, dst, scope);
2396}
2397
2398/*
2399 *      Initialize ip masquerading
2400 */
2401__initfunc(int ip_masq_init(void))
2402{
2403#ifdef CONFIG_PROC_FS        
2404        proc_net_register(&(struct proc_dir_entry) {
2405                PROC_NET_IPMSQHST, 13, "ip_masquerade",
2406                S_IFREG | S_IRUGO, 1, 0, 0,
2407                0, &proc_net_inode_operations,
2408                ip_msqhst_procinfo
2409        });
2410        masq_proc_init();
2411
2412        ip_masq_proc_register(&(struct proc_dir_entry) {
2413                0, 3, "tcp",
2414                S_IFREG | S_IRUGO, 1, 0, 0,
2415                0, &proc_net_inode_operations,
2416                NULL,   /* get_info */
2417                NULL,   /* fill_inode */
2418                NULL, NULL, NULL,
2419                (char *) IPPROTO_TCP,
2420                ip_masq_user_info
2421        });
2422        ip_masq_proc_register(&(struct proc_dir_entry) {
2423                0, 3, "udp",
2424                S_IFREG | S_IRUGO, 1, 0, 0,
2425                0, &proc_net_inode_operations,
2426                NULL,   /* get_info */
2427                NULL,   /* fill_inode */
2428                NULL, NULL, NULL,
2429                (char *) IPPROTO_UDP,
2430                ip_masq_user_info
2431        });
2432        ip_masq_proc_register(&(struct proc_dir_entry) {
2433                0, 4, "icmp",
2434                S_IFREG | S_IRUGO, 1, 0, 0,
2435                0, &proc_net_inode_operations,
2436                NULL,   /* get_info */
2437                NULL,   /* fill_inode */
2438                NULL, NULL, NULL,
2439                (char *) IPPROTO_ICMP,
2440                ip_masq_user_info
2441        });
2442#endif  
2443#ifdef CONFIG_IP_MASQUERADE_IPAUTOFW
2444        ip_autofw_init();
2445#endif
2446#ifdef CONFIG_IP_MASQUERADE_IPPORTFW
2447        ip_portfw_init();
2448#endif
2449#ifdef CONFIG_IP_MASQUERADE_MFW
2450        ip_mfw_init();
2451#endif
2452        ip_masq_app_init();
2453
2454        return 0;
2455}
2456
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.