linux/net/netfilter/nf_conntrack_proto_tcp.c
<<
>>
Prefs
   1/* (C) 1999-2001 Paul `Rusty' Russell
   2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
   3 *
   4 * This program is free software; you can redistribute it and/or modify
   5 * it under the terms of the GNU General Public License version 2 as
   6 * published by the Free Software Foundation.
   7 */
   8
   9#include <linux/types.h>
  10#include <linux/timer.h>
  11#include <linux/module.h>
  12#include <linux/in.h>
  13#include <linux/tcp.h>
  14#include <linux/spinlock.h>
  15#include <linux/skbuff.h>
  16#include <linux/ipv6.h>
  17#include <net/ip6_checksum.h>
  18#include <asm/unaligned.h>
  19
  20#include <net/tcp.h>
  21
  22#include <linux/netfilter.h>
  23#include <linux/netfilter_ipv4.h>
  24#include <linux/netfilter_ipv6.h>
  25#include <net/netfilter/nf_conntrack.h>
  26#include <net/netfilter/nf_conntrack_l4proto.h>
  27#include <net/netfilter/nf_conntrack_ecache.h>
  28#include <net/netfilter/nf_log.h>
  29#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
  30#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
  31
  32/* Protects ct->proto.tcp */
  33static DEFINE_RWLOCK(tcp_lock);
  34
  35/* "Be conservative in what you do,
  36    be liberal in what you accept from others."
  37    If it's non-zero, we mark only out of window RST segments as INVALID. */
  38static int nf_ct_tcp_be_liberal __read_mostly = 0;
  39
  40/* If it is set to zero, we disable picking up already established
  41   connections. */
  42static int nf_ct_tcp_loose __read_mostly = 1;
  43
  44/* Max number of the retransmitted packets without receiving an (acceptable)
  45   ACK from the destination. If this number is reached, a shorter timer
  46   will be started. */
  47static int nf_ct_tcp_max_retrans __read_mostly = 3;
  48
  49  /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
  50     closely.  They're more complex. --RR */
  51
  52static const char *const tcp_conntrack_names[] = {
  53        "NONE",
  54        "SYN_SENT",
  55        "SYN_RECV",
  56        "ESTABLISHED",
  57        "FIN_WAIT",
  58        "CLOSE_WAIT",
  59        "LAST_ACK",
  60        "TIME_WAIT",
  61        "CLOSE",
  62        "LISTEN"
  63};
  64
  65#define SECS * HZ
  66#define MINS * 60 SECS
  67#define HOURS * 60 MINS
  68#define DAYS * 24 HOURS
  69
  70/* RFC1122 says the R2 limit should be at least 100 seconds.
  71   Linux uses 15 packets as limit, which corresponds
  72   to ~13-30min depending on RTO. */
  73static unsigned int nf_ct_tcp_timeout_max_retrans __read_mostly    =   5 MINS;
  74static unsigned int nf_ct_tcp_timeout_unacknowledged __read_mostly =   5 MINS;
  75
  76static unsigned int tcp_timeouts[TCP_CONNTRACK_MAX] __read_mostly = {
  77        [TCP_CONNTRACK_SYN_SENT]        = 2 MINS,
  78        [TCP_CONNTRACK_SYN_RECV]        = 60 SECS,
  79        [TCP_CONNTRACK_ESTABLISHED]     = 5 DAYS,
  80        [TCP_CONNTRACK_FIN_WAIT]        = 2 MINS,
  81        [TCP_CONNTRACK_CLOSE_WAIT]      = 60 SECS,
  82        [TCP_CONNTRACK_LAST_ACK]        = 30 SECS,
  83        [TCP_CONNTRACK_TIME_WAIT]       = 2 MINS,
  84        [TCP_CONNTRACK_CLOSE]           = 10 SECS,
  85};
  86
  87#define sNO TCP_CONNTRACK_NONE
  88#define sSS TCP_CONNTRACK_SYN_SENT
  89#define sSR TCP_CONNTRACK_SYN_RECV
  90#define sES TCP_CONNTRACK_ESTABLISHED
  91#define sFW TCP_CONNTRACK_FIN_WAIT
  92#define sCW TCP_CONNTRACK_CLOSE_WAIT
  93#define sLA TCP_CONNTRACK_LAST_ACK
  94#define sTW TCP_CONNTRACK_TIME_WAIT
  95#define sCL TCP_CONNTRACK_CLOSE
  96#define sLI TCP_CONNTRACK_LISTEN
  97#define sIV TCP_CONNTRACK_MAX
  98#define sIG TCP_CONNTRACK_IGNORE
  99
 100/* What TCP flags are set from RST/SYN/FIN/ACK. */
 101enum tcp_bit_set {
 102        TCP_SYN_SET,
 103        TCP_SYNACK_SET,
 104        TCP_FIN_SET,
 105        TCP_ACK_SET,
 106        TCP_RST_SET,
 107        TCP_NONE_SET,
 108};
 109
 110/*
 111 * The TCP state transition table needs a few words...
 112 *
 113 * We are the man in the middle. All the packets go through us
 114 * but might get lost in transit to the destination.
 115 * It is assumed that the destinations can't receive segments
 116 * we haven't seen.
 117 *
 118 * The checked segment is in window, but our windows are *not*
 119 * equivalent with the ones of the sender/receiver. We always
 120 * try to guess the state of the current sender.
 121 *
 122 * The meaning of the states are:
 123 *
 124 * NONE:        initial state
 125 * SYN_SENT:    SYN-only packet seen
 126 * SYN_RECV:    SYN-ACK packet seen
 127 * ESTABLISHED: ACK packet seen
 128 * FIN_WAIT:    FIN packet seen
 129 * CLOSE_WAIT:  ACK seen (after FIN)
 130 * LAST_ACK:    FIN seen (after FIN)
 131 * TIME_WAIT:   last ACK seen
 132 * CLOSE:       closed connection (RST)
 133 *
 134 * LISTEN state is not used.
 135 *
 136 * Packets marked as IGNORED (sIG):
 137 *      if they may be either invalid or valid
 138 *      and the receiver may send back a connection
 139 *      closing RST or a SYN/ACK.
 140 *
 141 * Packets marked as INVALID (sIV):
 142 *      if they are invalid
 143 *      or we do not support the request (simultaneous open)
 144 */
 145static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
 146        {
 147/* ORIGINAL */
 148/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
 149/*syn*/    { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sIV },
 150/*
 151 *      sNO -> sSS      Initialize a new connection
 152 *      sSS -> sSS      Retransmitted SYN
 153 *      sSR -> sIG      Late retransmitted SYN?
 154 *      sES -> sIG      Error: SYNs in window outside the SYN_SENT state
 155 *                      are errors. Receiver will reply with RST
 156 *                      and close the connection.
 157 *                      Or we are not in sync and hold a dead connection.
 158 *      sFW -> sIG
 159 *      sCW -> sIG
 160 *      sLA -> sIG
 161 *      sTW -> sSS      Reopened connection (RFC 1122).
 162 *      sCL -> sSS
 163 */
 164/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
 165/*synack*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
 166/*
 167 * A SYN/ACK from the client is always invalid:
 168 *      - either it tries to set up a simultaneous open, which is
 169 *        not supported;
 170 *      - or the firewall has just been inserted between the two hosts
 171 *        during the session set-up. The SYN will be retransmitted
 172 *        by the true client (or it'll time out).
 173 */
 174/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
 175/*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
 176/*
 177 *      sNO -> sIV      Too late and no reason to do anything...
 178 *      sSS -> sIV      Client migth not send FIN in this state:
 179 *                      we enforce waiting for a SYN/ACK reply first.
 180 *      sSR -> sFW      Close started.
 181 *      sES -> sFW
 182 *      sFW -> sLA      FIN seen in both directions, waiting for
 183 *                      the last ACK.
 184 *                      Migth be a retransmitted FIN as well...
 185 *      sCW -> sLA
 186 *      sLA -> sLA      Retransmitted FIN. Remain in the same state.
 187 *      sTW -> sTW
 188 *      sCL -> sCL
 189 */
 190/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
 191/*ack*/    { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
 192/*
 193 *      sNO -> sES      Assumed.
 194 *      sSS -> sIV      ACK is invalid: we haven't seen a SYN/ACK yet.
 195 *      sSR -> sES      Established state is reached.
 196 *      sES -> sES      :-)
 197 *      sFW -> sCW      Normal close request answered by ACK.
 198 *      sCW -> sCW
 199 *      sLA -> sTW      Last ACK detected.
 200 *      sTW -> sTW      Retransmitted last ACK. Remain in the same state.
 201 *      sCL -> sCL
 202 */
 203/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
 204/*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
 205/*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
 206        },
 207        {
 208/* REPLY */
 209/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
 210/*syn*/    { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
 211/*
 212 *      sNO -> sIV      Never reached.
 213 *      sSS -> sIV      Simultaneous open, not supported
 214 *      sSR -> sIV      Simultaneous open, not supported.
 215 *      sES -> sIV      Server may not initiate a connection.
 216 *      sFW -> sIV
 217 *      sCW -> sIV
 218 *      sLA -> sIV
 219 *      sTW -> sIV      Reopened connection, but server may not do it.
 220 *      sCL -> sIV
 221 */
 222/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
 223/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIV },
 224/*
 225 *      sSS -> sSR      Standard open.
 226 *      sSR -> sSR      Retransmitted SYN/ACK.
 227 *      sES -> sIG      Late retransmitted SYN/ACK?
 228 *      sFW -> sIG      Might be SYN/ACK answering ignored SYN
 229 *      sCW -> sIG
 230 *      sLA -> sIG
 231 *      sTW -> sIG
 232 *      sCL -> sIG
 233 */
 234/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
 235/*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
 236/*
 237 *      sSS -> sIV      Server might not send FIN in this state.
 238 *      sSR -> sFW      Close started.
 239 *      sES -> sFW
 240 *      sFW -> sLA      FIN seen in both directions.
 241 *      sCW -> sLA
 242 *      sLA -> sLA      Retransmitted FIN.
 243 *      sTW -> sTW
 244 *      sCL -> sCL
 245 */
 246/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
 247/*ack*/    { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV },
 248/*
 249 *      sSS -> sIG      Might be a half-open connection.
 250 *      sSR -> sSR      Might answer late resent SYN.
 251 *      sES -> sES      :-)
 252 *      sFW -> sCW      Normal close request answered by ACK.
 253 *      sCW -> sCW
 254 *      sLA -> sTW      Last ACK detected.
 255 *      sTW -> sTW      Retransmitted last ACK.
 256 *      sCL -> sCL
 257 */
 258/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
 259/*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
 260/*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
 261        }
 262};
 263
 264static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
 265                             struct nf_conntrack_tuple *tuple)
 266{
 267        const struct tcphdr *hp;
 268        struct tcphdr _hdr;
 269
 270        /* Actually only need first 8 bytes. */
 271        hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
 272        if (hp == NULL)
 273                return false;
 274
 275        tuple->src.u.tcp.port = hp->source;
 276        tuple->dst.u.tcp.port = hp->dest;
 277
 278        return true;
 279}
 280
 281static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
 282                             const struct nf_conntrack_tuple *orig)
 283{
 284        tuple->src.u.tcp.port = orig->dst.u.tcp.port;
 285        tuple->dst.u.tcp.port = orig->src.u.tcp.port;
 286        return true;
 287}
 288
 289/* Print out the per-protocol part of the tuple. */
 290static int tcp_print_tuple(struct seq_file *s,
 291                           const struct nf_conntrack_tuple *tuple)
 292{
 293        return seq_printf(s, "sport=%hu dport=%hu ",
 294                          ntohs(tuple->src.u.tcp.port),
 295                          ntohs(tuple->dst.u.tcp.port));
 296}
 297
 298/* Print out the private part of the conntrack. */
 299static int tcp_print_conntrack(struct seq_file *s, const struct nf_conn *ct)
 300{
 301        enum tcp_conntrack state;
 302
 303        read_lock_bh(&tcp_lock);
 304        state = ct->proto.tcp.state;
 305        read_unlock_bh(&tcp_lock);
 306
 307        return seq_printf(s, "%s ", tcp_conntrack_names[state]);
 308}
 309
 310static unsigned int get_conntrack_index(const struct tcphdr *tcph)
 311{
 312        if (tcph->rst) return TCP_RST_SET;
 313        else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
 314        else if (tcph->fin) return TCP_FIN_SET;
 315        else if (tcph->ack) return TCP_ACK_SET;
 316        else return TCP_NONE_SET;
 317}
 318
 319/* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
 320   in IP Filter' by Guido van Rooij.
 321
 322   http://www.nluug.nl/events/sane2000/papers.html
 323   http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz
 324
 325   The boundaries and the conditions are changed according to RFC793:
 326   the packet must intersect the window (i.e. segments may be
 327   after the right or before the left edge) and thus receivers may ACK
 328   segments after the right edge of the window.
 329
 330        td_maxend = max(sack + max(win,1)) seen in reply packets
 331        td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
 332        td_maxwin += seq + len - sender.td_maxend
 333                        if seq + len > sender.td_maxend
 334        td_end    = max(seq + len) seen in sent packets
 335
 336   I.   Upper bound for valid data:     seq <= sender.td_maxend
 337   II.  Lower bound for valid data:     seq + len >= sender.td_end - receiver.td_maxwin
 338   III. Upper bound for valid (s)ack:   sack <= receiver.td_end
 339   IV.  Lower bound for valid (s)ack:   sack >= receiver.td_end - MAXACKWINDOW
 340
 341   where sack is the highest right edge of sack block found in the packet
 342   or ack in the case of packet without SACK option.
 343
 344   The upper bound limit for a valid (s)ack is not ignored -
 345   we doesn't have to deal with fragments.
 346*/
 347
 348static inline __u32 segment_seq_plus_len(__u32 seq,
 349                                         size_t len,
 350                                         unsigned int dataoff,
 351                                         const struct tcphdr *tcph)
 352{
 353        /* XXX Should I use payload length field in IP/IPv6 header ?
 354         * - YK */
 355        return (seq + len - dataoff - tcph->doff*4
 356                + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
 357}
 358
 359/* Fixme: what about big packets? */
 360#define MAXACKWINCONST                  66000
 361#define MAXACKWINDOW(sender)                                            \
 362        ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin     \
 363                                              : MAXACKWINCONST)
 364
 365/*
 366 * Simplified tcp_parse_options routine from tcp_input.c
 367 */
 368static void tcp_options(const struct sk_buff *skb,
 369                        unsigned int dataoff,
 370                        const struct tcphdr *tcph,
 371                        struct ip_ct_tcp_state *state)
 372{
 373        unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
 374        const unsigned char *ptr;
 375        int length = (tcph->doff*4) - sizeof(struct tcphdr);
 376
 377        if (!length)
 378                return;
 379
 380        ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
 381                                 length, buff);
 382        BUG_ON(ptr == NULL);
 383
 384        state->td_scale =
 385        state->flags = 0;
 386
 387        while (length > 0) {
 388                int opcode=*ptr++;
 389                int opsize;
 390
 391                switch (opcode) {
 392                case TCPOPT_EOL:
 393                        return;
 394                case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
 395                        length--;
 396                        continue;
 397                default:
 398                        opsize=*ptr++;
 399                        if (opsize < 2) /* "silly options" */
 400                                return;
 401                        if (opsize > length)
 402                                break;  /* don't parse partial options */
 403
 404                        if (opcode == TCPOPT_SACK_PERM
 405                            && opsize == TCPOLEN_SACK_PERM)
 406                                state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
 407                        else if (opcode == TCPOPT_WINDOW
 408                                 && opsize == TCPOLEN_WINDOW) {
 409                                state->td_scale = *(u_int8_t *)ptr;
 410
 411                                if (state->td_scale > 14) {
 412                                        /* See RFC1323 */
 413                                        state->td_scale = 14;
 414                                }
 415                                state->flags |=
 416                                        IP_CT_TCP_FLAG_WINDOW_SCALE;
 417                        }
 418                        ptr += opsize - 2;
 419                        length -= opsize;
 420                }
 421        }
 422}
 423
 424static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
 425                     const struct tcphdr *tcph, __u32 *sack)
 426{
 427        unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
 428        const unsigned char *ptr;
 429        int length = (tcph->doff*4) - sizeof(struct tcphdr);
 430        __u32 tmp;
 431
 432        if (!length)
 433                return;
 434
 435        ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
 436                                 length, buff);
 437        BUG_ON(ptr == NULL);
 438
 439        /* Fast path for timestamp-only option */
 440        if (length == TCPOLEN_TSTAMP_ALIGNED*4
 441            && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
 442                                       | (TCPOPT_NOP << 16)
 443                                       | (TCPOPT_TIMESTAMP << 8)
 444                                       | TCPOLEN_TIMESTAMP))
 445                return;
 446
 447        while (length > 0) {
 448                int opcode = *ptr++;
 449                int opsize, i;
 450
 451                switch (opcode) {
 452                case TCPOPT_EOL:
 453                        return;
 454                case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
 455                        length--;
 456                        continue;
 457                default:
 458                        opsize = *ptr++;
 459                        if (opsize < 2) /* "silly options" */
 460                                return;
 461                        if (opsize > length)
 462                                break;  /* don't parse partial options */
 463
 464                        if (opcode == TCPOPT_SACK
 465                            && opsize >= (TCPOLEN_SACK_BASE
 466                                          + TCPOLEN_SACK_PERBLOCK)
 467                            && !((opsize - TCPOLEN_SACK_BASE)
 468                                 % TCPOLEN_SACK_PERBLOCK)) {
 469                                for (i = 0;
 470                                     i < (opsize - TCPOLEN_SACK_BASE);
 471                                     i += TCPOLEN_SACK_PERBLOCK) {
 472                                        tmp = get_unaligned_be32((__be32 *)(ptr+i)+1);
 473
 474                                        if (after(tmp, *sack))
 475                                                *sack = tmp;
 476                                }
 477                                return;
 478                        }
 479                        ptr += opsize - 2;
 480                        length -= opsize;
 481                }
 482        }
 483}
 484
 485static bool tcp_in_window(const struct nf_conn *ct,
 486                          struct ip_ct_tcp *state,
 487                          enum ip_conntrack_dir dir,
 488                          unsigned int index,
 489                          const struct sk_buff *skb,
 490                          unsigned int dataoff,
 491                          const struct tcphdr *tcph,
 492                          u_int8_t pf)
 493{
 494        struct net *net = nf_ct_net(ct);
 495        struct ip_ct_tcp_state *sender = &state->seen[dir];
 496        struct ip_ct_tcp_state *receiver = &state->seen[!dir];
 497        const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
 498        __u32 seq, ack, sack, end, win, swin;
 499        bool res;
 500
 501        /*
 502         * Get the required data from the packet.
 503         */
 504        seq = ntohl(tcph->seq);
 505        ack = sack = ntohl(tcph->ack_seq);
 506        win = ntohs(tcph->window);
 507        end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);
 508
 509        if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
 510                tcp_sack(skb, dataoff, tcph, &sack);
 511
 512        pr_debug("tcp_in_window: START\n");
 513        pr_debug("tcp_in_window: ");
 514        nf_ct_dump_tuple(tuple);
 515        pr_debug("seq=%u ack=%u sack=%u win=%u end=%u\n",
 516                 seq, ack, sack, win, end);
 517        pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
 518                 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
 519                 sender->td_end, sender->td_maxend, sender->td_maxwin,
 520                 sender->td_scale,
 521                 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
 522                 receiver->td_scale);
 523
 524        if (sender->td_end == 0) {
 525                /*
 526                 * Initialize sender data.
 527                 */
 528                if (tcph->syn && tcph->ack) {
 529                        /*
 530                         * Outgoing SYN-ACK in reply to a SYN.
 531                         */
 532                        sender->td_end =
 533                        sender->td_maxend = end;
 534                        sender->td_maxwin = (win == 0 ? 1 : win);
 535
 536                        tcp_options(skb, dataoff, tcph, sender);
 537                        /*
 538                         * RFC 1323:
 539                         * Both sides must send the Window Scale option
 540                         * to enable window scaling in either direction.
 541                         */
 542                        if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
 543                              && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
 544                                sender->td_scale =
 545                                receiver->td_scale = 0;
 546                } else {
 547                        /*
 548                         * We are in the middle of a connection,
 549                         * its history is lost for us.
 550                         * Let's try to use the data from the packet.
 551                         */
 552                        sender->td_end = end;
 553                        sender->td_maxwin = (win == 0 ? 1 : win);
 554                        sender->td_maxend = end + sender->td_maxwin;
 555                }
 556        } else if (((state->state == TCP_CONNTRACK_SYN_SENT
 557                     && dir == IP_CT_DIR_ORIGINAL)
 558                   || (state->state == TCP_CONNTRACK_SYN_RECV
 559                     && dir == IP_CT_DIR_REPLY))
 560                   && after(end, sender->td_end)) {
 561                /*
 562                 * RFC 793: "if a TCP is reinitialized ... then it need
 563                 * not wait at all; it must only be sure to use sequence
 564                 * numbers larger than those recently used."
 565                 */
 566                sender->td_end =
 567                sender->td_maxend = end;
 568                sender->td_maxwin = (win == 0 ? 1 : win);
 569
 570                tcp_options(skb, dataoff, tcph, sender);
 571        }
 572
 573        if (!(tcph->ack)) {
 574                /*
 575                 * If there is no ACK, just pretend it was set and OK.
 576                 */
 577                ack = sack = receiver->td_end;
 578        } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
 579                    (TCP_FLAG_ACK|TCP_FLAG_RST))
 580                   && (ack == 0)) {
 581                /*
 582                 * Broken TCP stacks, that set ACK in RST packets as well
 583                 * with zero ack value.
 584                 */
 585                ack = sack = receiver->td_end;
 586        }
 587
 588        if (seq == end
 589            && (!tcph->rst
 590                || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)))
 591                /*
 592                 * Packets contains no data: we assume it is valid
 593                 * and check the ack value only.
 594                 * However RST segments are always validated by their
 595                 * SEQ number, except when seq == 0 (reset sent answering
 596                 * SYN.
 597                 */
 598                seq = end = sender->td_end;
 599
 600        pr_debug("tcp_in_window: ");
 601        nf_ct_dump_tuple(tuple);
 602        pr_debug("seq=%u ack=%u sack =%u win=%u end=%u\n",
 603                 seq, ack, sack, win, end);
 604        pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
 605                 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
 606                 sender->td_end, sender->td_maxend, sender->td_maxwin,
 607                 sender->td_scale,
 608                 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
 609                 receiver->td_scale);
 610
 611        pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
 612                 before(seq, sender->td_maxend + 1),
 613                 after(end, sender->td_end - receiver->td_maxwin - 1),
 614                 before(sack, receiver->td_end + 1),
 615                 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1));
 616
 617        if (before(seq, sender->td_maxend + 1) &&
 618            after(end, sender->td_end - receiver->td_maxwin - 1) &&
 619            before(sack, receiver->td_end + 1) &&
 620            after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) {
 621                /*
 622                 * Take into account window scaling (RFC 1323).
 623                 */
 624                if (!tcph->syn)
 625                        win <<= sender->td_scale;
 626
 627                /*
 628                 * Update sender data.
 629                 */
 630                swin = win + (sack - ack);
 631                if (sender->td_maxwin < swin)
 632                        sender->td_maxwin = swin;
 633                if (after(end, sender->td_end)) {
 634                        sender->td_end = end;
 635                        sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
 636                }
 637                if (tcph->ack) {
 638                        if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) {
 639                                sender->td_maxack = ack;
 640                                sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET;
 641                        } else if (after(ack, sender->td_maxack))
 642                                sender->td_maxack = ack;
 643                }
 644
 645                /*
 646                 * Update receiver data.
 647                 */
 648                if (after(end, sender->td_maxend))
 649                        receiver->td_maxwin += end - sender->td_maxend;
 650                if (after(sack + win, receiver->td_maxend - 1)) {
 651                        receiver->td_maxend = sack + win;
 652                        if (win == 0)
 653                                receiver->td_maxend++;
 654                }
 655                if (ack == receiver->td_end)
 656                        receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
 657
 658                /*
 659                 * Check retransmissions.
 660                 */
 661                if (index == TCP_ACK_SET) {
 662                        if (state->last_dir == dir
 663                            && state->last_seq == seq
 664                            && state->last_ack == ack
 665                            && state->last_end == end
 666                            && state->last_win == win)
 667                                state->retrans++;
 668                        else {
 669                                state->last_dir = dir;
 670                                state->last_seq = seq;
 671                                state->last_ack = ack;
 672                                state->last_end = end;
 673                                state->last_win = win;
 674                                state->retrans = 0;
 675                        }
 676                }
 677                res = true;
 678        } else {
 679                res = false;
 680                if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
 681                    nf_ct_tcp_be_liberal)
 682                        res = true;
 683                if (!res && LOG_INVALID(net, IPPROTO_TCP))
 684                        nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 685                        "nf_ct_tcp: %s ",
 686                        before(seq, sender->td_maxend + 1) ?
 687                        after(end, sender->td_end - receiver->td_maxwin - 1) ?
 688                        before(sack, receiver->td_end + 1) ?
 689                        after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG"
 690                        : "ACK is under the lower bound (possible overly delayed ACK)"
 691                        : "ACK is over the upper bound (ACKed data not seen yet)"
 692                        : "SEQ is under the lower bound (already ACKed data retransmitted)"
 693                        : "SEQ is over the upper bound (over the window of the receiver)");
 694        }
 695
 696        pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u "
 697                 "receiver end=%u maxend=%u maxwin=%u\n",
 698                 res, sender->td_end, sender->td_maxend, sender->td_maxwin,
 699                 receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
 700
 701        return res;
 702}
 703
 704#ifdef CONFIG_NF_NAT_NEEDED
 705/* Update sender->td_end after NAT successfully mangled the packet */
 706/* Caller must linearize skb at tcp header. */
 707void nf_conntrack_tcp_update(const struct sk_buff *skb,
 708                             unsigned int dataoff,
 709                             struct nf_conn *ct,
 710                             int dir)
 711{
 712        const struct tcphdr *tcph = (const void *)skb->data + dataoff;
 713        const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[dir];
 714        const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[!dir];
 715        __u32 end;
 716
 717        end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, dataoff, tcph);
 718
 719        write_lock_bh(&tcp_lock);
 720        /*
 721         * We have to worry for the ack in the reply packet only...
 722         */
 723        if (after(end, ct->proto.tcp.seen[dir].td_end))
 724                ct->proto.tcp.seen[dir].td_end = end;
 725        ct->proto.tcp.last_end = end;
 726        write_unlock_bh(&tcp_lock);
 727        pr_debug("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i "
 728                 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
 729                 sender->td_end, sender->td_maxend, sender->td_maxwin,
 730                 sender->td_scale,
 731                 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
 732                 receiver->td_scale);
 733}
 734EXPORT_SYMBOL_GPL(nf_conntrack_tcp_update);
 735#endif
 736
 737#define TH_FIN  0x01
 738#define TH_SYN  0x02
 739#define TH_RST  0x04
 740#define TH_PUSH 0x08
 741#define TH_ACK  0x10
 742#define TH_URG  0x20
 743#define TH_ECE  0x40
 744#define TH_CWR  0x80
 745
 746/* table of valid flag combinations - PUSH, ECE and CWR are always valid */
 747static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG) + 1] =
 748{
 749        [TH_SYN]                        = 1,
 750        [TH_SYN|TH_URG]                 = 1,
 751        [TH_SYN|TH_ACK]                 = 1,
 752        [TH_RST]                        = 1,
 753        [TH_RST|TH_ACK]                 = 1,
 754        [TH_FIN|TH_ACK]                 = 1,
 755        [TH_FIN|TH_ACK|TH_URG]          = 1,
 756        [TH_ACK]                        = 1,
 757        [TH_ACK|TH_URG]                 = 1,
 758};
 759
 760/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
 761static int tcp_error(struct net *net,
 762                     struct sk_buff *skb,
 763                     unsigned int dataoff,
 764                     enum ip_conntrack_info *ctinfo,
 765                     u_int8_t pf,
 766                     unsigned int hooknum)
 767{
 768        const struct tcphdr *th;
 769        struct tcphdr _tcph;
 770        unsigned int tcplen = skb->len - dataoff;
 771        u_int8_t tcpflags;
 772
 773        /* Smaller that minimal TCP header? */
 774        th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
 775        if (th == NULL) {
 776                if (LOG_INVALID(net, IPPROTO_TCP))
 777                        nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 778                                "nf_ct_tcp: short packet ");
 779                return -NF_ACCEPT;
 780        }
 781
 782        /* Not whole TCP header or malformed packet */
 783        if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
 784                if (LOG_INVALID(net, IPPROTO_TCP))
 785                        nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 786                                "nf_ct_tcp: truncated/malformed packet ");
 787                return -NF_ACCEPT;
 788        }
 789
 790        /* Checksum invalid? Ignore.
 791         * We skip checking packets on the outgoing path
 792         * because the checksum is assumed to be correct.
 793         */
 794        /* FIXME: Source route IP option packets --RR */
 795        if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
 796            nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
 797                if (LOG_INVALID(net, IPPROTO_TCP))
 798                        nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 799                                  "nf_ct_tcp: bad TCP checksum ");
 800                return -NF_ACCEPT;
 801        }
 802
 803        /* Check TCP flags. */
 804        tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR|TH_PUSH));
 805        if (!tcp_valid_flags[tcpflags]) {
 806                if (LOG_INVALID(net, IPPROTO_TCP))
 807                        nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 808                                  "nf_ct_tcp: invalid TCP flag combination ");
 809                return -NF_ACCEPT;
 810        }
 811
 812        return NF_ACCEPT;
 813}
 814
 815/* Returns verdict for packet, or -1 for invalid. */
 816static int tcp_packet(struct nf_conn *ct,
 817                      const struct sk_buff *skb,
 818                      unsigned int dataoff,
 819                      enum ip_conntrack_info ctinfo,
 820                      u_int8_t pf,
 821                      unsigned int hooknum)
 822{
 823        struct net *net = nf_ct_net(ct);
 824        struct nf_conntrack_tuple *tuple;
 825        enum tcp_conntrack new_state, old_state;
 826        enum ip_conntrack_dir dir;
 827        const struct tcphdr *th;
 828        struct tcphdr _tcph;
 829        unsigned long timeout;
 830        unsigned int index;
 831
 832        th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
 833        BUG_ON(th == NULL);
 834
 835        write_lock_bh(&tcp_lock);
 836        old_state = ct->proto.tcp.state;
 837        dir = CTINFO2DIR(ctinfo);
 838        index = get_conntrack_index(th);
 839        new_state = tcp_conntracks[dir][index][old_state];
 840        tuple = &ct->tuplehash[dir].tuple;
 841
 842        switch (new_state) {
 843        case TCP_CONNTRACK_SYN_SENT:
 844                if (old_state < TCP_CONNTRACK_TIME_WAIT)
 845                        break;
 846                /* RFC 1122: "When a connection is closed actively,
 847                 * it MUST linger in TIME-WAIT state for a time 2xMSL
 848                 * (Maximum Segment Lifetime). However, it MAY accept
 849                 * a new SYN from the remote TCP to reopen the connection
 850                 * directly from TIME-WAIT state, if..."
 851                 * We ignore the conditions because we are in the
 852                 * TIME-WAIT state anyway.
 853                 *
 854                 * Handle aborted connections: we and the server
 855                 * think there is an existing connection but the client
 856                 * aborts it and starts a new one.
 857                 */
 858                if (((ct->proto.tcp.seen[dir].flags
 859                      | ct->proto.tcp.seen[!dir].flags)
 860                     & IP_CT_TCP_FLAG_CLOSE_INIT)
 861                    || (ct->proto.tcp.last_dir == dir
 862                        && ct->proto.tcp.last_index == TCP_RST_SET)) {
 863                        /* Attempt to reopen a closed/aborted connection.
 864                         * Delete this connection and look up again. */
 865                        write_unlock_bh(&tcp_lock);
 866
 867                        /* Only repeat if we can actually remove the timer.
 868                         * Destruction may already be in progress in process
 869                         * context and we must give it a chance to terminate.
 870                         */
 871                        if (nf_ct_kill(ct))
 872                                return -NF_REPEAT;
 873                        return NF_DROP;
 874                }
 875                /* Fall through */
 876        case TCP_CONNTRACK_IGNORE:
 877                /* Ignored packets:
 878                 *
 879                 * Our connection entry may be out of sync, so ignore
 880                 * packets which may signal the real connection between
 881                 * the client and the server.
 882                 *
 883                 * a) SYN in ORIGINAL
 884                 * b) SYN/ACK in REPLY
 885                 * c) ACK in reply direction after initial SYN in original.
 886                 *
 887                 * If the ignored packet is invalid, the receiver will send
 888                 * a RST we'll catch below.
 889                 */
 890                if (index == TCP_SYNACK_SET
 891                    && ct->proto.tcp.last_index == TCP_SYN_SET
 892                    && ct->proto.tcp.last_dir != dir
 893                    && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
 894                        /* b) This SYN/ACK acknowledges a SYN that we earlier
 895                         * ignored as invalid. This means that the client and
 896                         * the server are both in sync, while the firewall is
 897                         * not. We kill this session and block the SYN/ACK so
 898                         * that the client cannot but retransmit its SYN and
 899                         * thus initiate a clean new session.
 900                         */
 901                        write_unlock_bh(&tcp_lock);
 902                        if (LOG_INVALID(net, IPPROTO_TCP))
 903                                nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 904                                          "nf_ct_tcp: killing out of sync session ");
 905                        nf_ct_kill(ct);
 906                        return NF_DROP;
 907                }
 908                ct->proto.tcp.last_index = index;
 909                ct->proto.tcp.last_dir = dir;
 910                ct->proto.tcp.last_seq = ntohl(th->seq);
 911                ct->proto.tcp.last_end =
 912                    segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
 913
 914                write_unlock_bh(&tcp_lock);
 915                if (LOG_INVALID(net, IPPROTO_TCP))
 916                        nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 917                                  "nf_ct_tcp: invalid packet ignored ");
 918                return NF_ACCEPT;
 919        case TCP_CONNTRACK_MAX:
 920                /* Invalid packet */
 921                pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
 922                         dir, get_conntrack_index(th), old_state);
 923                write_unlock_bh(&tcp_lock);
 924                if (LOG_INVALID(net, IPPROTO_TCP))
 925                        nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 926                                  "nf_ct_tcp: invalid state ");
 927                return -NF_ACCEPT;
 928        case TCP_CONNTRACK_CLOSE:
 929                if (index == TCP_RST_SET
 930                    && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
 931                    && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
 932                        /* Invalid RST  */
 933                        write_unlock_bh(&tcp_lock);
 934                        if (LOG_INVALID(net, IPPROTO_TCP))
 935                                nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 936                                          "nf_ct_tcp: invalid RST ");
 937                        return -NF_ACCEPT;
 938                }
 939                if (index == TCP_RST_SET
 940                    && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
 941                         && ct->proto.tcp.last_index == TCP_SYN_SET)
 942                        || (!test_bit(IPS_ASSURED_BIT, &ct->status)
 943                            && ct->proto.tcp.last_index == TCP_ACK_SET))
 944                    && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
 945                        /* RST sent to invalid SYN or ACK we had let through
 946                         * at a) and c) above:
 947                         *
 948                         * a) SYN was in window then
 949                         * c) we hold a half-open connection.
 950                         *
 951                         * Delete our connection entry.
 952                         * We skip window checking, because packet might ACK
 953                         * segments we ignored. */
 954                        goto in_window;
 955                }
 956                /* Just fall through */
 957        default:
 958                /* Keep compilers happy. */
 959                break;
 960        }
 961
 962        if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
 963                           skb, dataoff, th, pf)) {
 964                write_unlock_bh(&tcp_lock);
 965                return -NF_ACCEPT;
 966        }
 967     in_window:
 968        /* From now on we have got in-window packets */
 969        ct->proto.tcp.last_index = index;
 970        ct->proto.tcp.last_dir = dir;
 971
 972        pr_debug("tcp_conntracks: ");
 973        nf_ct_dump_tuple(tuple);
 974        pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
 975                 (th->syn ? 1 : 0), (th->ack ? 1 : 0),
 976                 (th->fin ? 1 : 0), (th->rst ? 1 : 0),
 977                 old_state, new_state);
 978
 979        ct->proto.tcp.state = new_state;
 980        if (old_state != new_state
 981            && new_state == TCP_CONNTRACK_FIN_WAIT)
 982                ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
 983
 984        if (ct->proto.tcp.retrans >= nf_ct_tcp_max_retrans &&
 985            tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans)
 986                timeout = nf_ct_tcp_timeout_max_retrans;
 987        else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) &
 988                 IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
 989                 tcp_timeouts[new_state] > nf_ct_tcp_timeout_unacknowledged)
 990                timeout = nf_ct_tcp_timeout_unacknowledged;
 991        else
 992                timeout = tcp_timeouts[new_state];
 993        write_unlock_bh(&tcp_lock);
 994
 995        nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct);
 996        if (new_state != old_state)
 997                nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
 998
 999        if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
1000                /* If only reply is a RST, we can consider ourselves not to
1001                   have an established connection: this is a fairly common
1002                   problem case, so we can delete the conntrack
1003                   immediately.  --RR */
1004                if (th->rst) {
1005                        nf_ct_kill_acct(ct, ctinfo, skb);
1006                        return NF_ACCEPT;
1007                }
1008        } else if (!test_bit(IPS_ASSURED_BIT, &ct->status)
1009                   && (old_state == TCP_CONNTRACK_SYN_RECV
1010                       || old_state == TCP_CONNTRACK_ESTABLISHED)
1011                   && new_state == TCP_CONNTRACK_ESTABLISHED) {
1012                /* Set ASSURED if we see see valid ack in ESTABLISHED
1013                   after SYN_RECV or a valid answer for a picked up
1014                   connection. */
1015                set_bit(IPS_ASSURED_BIT, &ct->status);
1016                nf_conntrack_event_cache(IPCT_STATUS, ct);
1017        }
1018        nf_ct_refresh_acct(ct, ctinfo, skb, timeout);
1019
1020        return NF_ACCEPT;
1021}
1022
1023/* Called when a new connection for this protocol found. */
1024static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
1025                    unsigned int dataoff)
1026{
1027        enum tcp_conntrack new_state;
1028        const struct tcphdr *th;
1029        struct tcphdr _tcph;
1030        const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
1031        const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
1032
1033        th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
1034        BUG_ON(th == NULL);
1035
1036        /* Don't need lock here: this conntrack not in circulation yet */
1037        new_state
1038                = tcp_conntracks[0][get_conntrack_index(th)]
1039                [TCP_CONNTRACK_NONE];
1040
1041        /* Invalid: delete conntrack */
1042        if (new_state >= TCP_CONNTRACK_MAX) {
1043                pr_debug("nf_ct_tcp: invalid new deleting.\n");
1044                return false;
1045        }
1046
1047        if (new_state == TCP_CONNTRACK_SYN_SENT) {
1048                /* SYN packet */
1049                ct->proto.tcp.seen[0].td_end =
1050                        segment_seq_plus_len(ntohl(th->seq), skb->len,
1051                                             dataoff, th);
1052                ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1053                if (ct->proto.tcp.seen[0].td_maxwin == 0)
1054                        ct->proto.tcp.seen[0].td_maxwin = 1;
1055                ct->proto.tcp.seen[0].td_maxend =
1056                        ct->proto.tcp.seen[0].td_end;
1057
1058                tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
1059                ct->proto.tcp.seen[1].flags = 0;
1060        } else if (nf_ct_tcp_loose == 0) {
1061                /* Don't try to pick up connections. */
1062                return false;
1063        } else {
1064                /*
1065                 * We are in the middle of a connection,
1066                 * its history is lost for us.
1067                 * Let's try to use the data from the packet.
1068                 */
1069                ct->proto.tcp.seen[0].td_end =
1070                        segment_seq_plus_len(ntohl(th->seq), skb->len,
1071                                             dataoff, th);
1072                ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1073                if (ct->proto.tcp.seen[0].td_maxwin == 0)
1074                        ct->proto.tcp.seen[0].td_maxwin = 1;
1075                ct->proto.tcp.seen[0].td_maxend =
1076                        ct->proto.tcp.seen[0].td_end +
1077                        ct->proto.tcp.seen[0].td_maxwin;
1078                ct->proto.tcp.seen[0].td_scale = 0;
1079
1080                /* We assume SACK and liberal window checking to handle
1081                 * window scaling */
1082                ct->proto.tcp.seen[0].flags =
1083                ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
1084                                              IP_CT_TCP_FLAG_BE_LIBERAL;
1085        }
1086
1087        ct->proto.tcp.seen[1].td_end = 0;
1088        ct->proto.tcp.seen[1].td_maxend = 0;
1089        ct->proto.tcp.seen[1].td_maxwin = 1;
1090        ct->proto.tcp.seen[1].td_scale = 0;
1091
1092        /* tcp_packet will set them */
1093        ct->proto.tcp.state = TCP_CONNTRACK_NONE;
1094        ct->proto.tcp.last_index = TCP_NONE_SET;
1095
1096        pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
1097                 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
1098                 sender->td_end, sender->td_maxend, sender->td_maxwin,
1099                 sender->td_scale,
1100                 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
1101                 receiver->td_scale);
1102        return true;
1103}
1104
1105#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
1106
1107#include <linux/netfilter/nfnetlink.h>
1108#include <linux/netfilter/nfnetlink_conntrack.h>
1109
1110static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
1111                         const struct nf_conn *ct)
1112{
1113        struct nlattr *nest_parms;
1114        struct nf_ct_tcp_flags tmp = {};
1115
1116        read_lock_bh(&tcp_lock);
1117        nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED);
1118        if (!nest_parms)
1119                goto nla_put_failure;
1120
1121        NLA_PUT_U8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state);
1122
1123        NLA_PUT_U8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
1124                   ct->proto.tcp.seen[0].td_scale);
1125
1126        NLA_PUT_U8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY,
1127                   ct->proto.tcp.seen[1].td_scale);
1128
1129        tmp.flags = ct->proto.tcp.seen[0].flags;
1130        NLA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
1131                sizeof(struct nf_ct_tcp_flags), &tmp);
1132
1133        tmp.flags = ct->proto.tcp.seen[1].flags;
1134        NLA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
1135                sizeof(struct nf_ct_tcp_flags), &tmp);
1136        read_unlock_bh(&tcp_lock);
1137
1138        nla_nest_end(skb, nest_parms);
1139
1140        return 0;
1141
1142nla_put_failure:
1143        read_unlock_bh(&tcp_lock);
1144        return -1;
1145}
1146
1147static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
1148        [CTA_PROTOINFO_TCP_STATE]           = { .type = NLA_U8 },
1149        [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 },
1150        [CTA_PROTOINFO_TCP_WSCALE_REPLY]    = { .type = NLA_U8 },
1151        [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]  = { .len = sizeof(struct nf_ct_tcp_flags) },
1152        [CTA_PROTOINFO_TCP_FLAGS_REPLY]     = { .len =  sizeof(struct nf_ct_tcp_flags) },
1153};
1154
1155static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
1156{
1157        struct nlattr *pattr = cda[CTA_PROTOINFO_TCP];
1158        struct nlattr *tb[CTA_PROTOINFO_TCP_MAX+1];
1159        int err;
1160
1161        /* updates could not contain anything about the private
1162         * protocol info, in that case skip the parsing */
1163        if (!pattr)
1164                return 0;
1165
1166        err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr, tcp_nla_policy);
1167        if (err < 0)
1168                return err;
1169
1170        if (tb[CTA_PROTOINFO_TCP_STATE] &&
1171            nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
1172                return -EINVAL;
1173
1174        write_lock_bh(&tcp_lock);
1175        if (tb[CTA_PROTOINFO_TCP_STATE])
1176                ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
1177
1178        if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) {
1179                struct nf_ct_tcp_flags *attr =
1180                        nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]);
1181                ct->proto.tcp.seen[0].flags &= ~attr->mask;
1182                ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask;
1183        }
1184
1185        if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]) {
1186                struct nf_ct_tcp_flags *attr =
1187                        nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]);
1188                ct->proto.tcp.seen[1].flags &= ~attr->mask;
1189                ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask;
1190        }
1191
1192        if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] &&
1193            tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] &&
1194            ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
1195            ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
1196                ct->proto.tcp.seen[0].td_scale =
1197                        nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]);
1198                ct->proto.tcp.seen[1].td_scale =
1199                        nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
1200        }
1201        write_unlock_bh(&tcp_lock);
1202
1203        return 0;
1204}
1205
1206static int tcp_nlattr_size(void)
1207{
1208        return nla_total_size(0)           /* CTA_PROTOINFO_TCP */
1209                + nla_policy_len(tcp_nla_policy, CTA_PROTOINFO_TCP_MAX + 1);
1210}
1211
1212static int tcp_nlattr_tuple_size(void)
1213{
1214        return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
1215}
1216#endif
1217
1218#ifdef CONFIG_SYSCTL
1219static unsigned int tcp_sysctl_table_users;
1220static struct ctl_table_header *tcp_sysctl_header;
1221static struct ctl_table tcp_sysctl_table[] = {
1222        {
1223                .procname       = "nf_conntrack_tcp_timeout_syn_sent",
1224                .data           = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT],
1225                .maxlen         = sizeof(unsigned int),
1226                .mode           = 0644,
1227                .proc_handler   = proc_dointvec_jiffies,
1228        },
1229        {
1230                .procname       = "nf_conntrack_tcp_timeout_syn_recv",
1231                .data           = &tcp_timeouts[TCP_CONNTRACK_SYN_RECV],
1232                .maxlen         = sizeof(unsigned int),
1233                .mode           = 0644,
1234                .proc_handler   = proc_dointvec_jiffies,
1235        },
1236        {
1237                .procname       = "nf_conntrack_tcp_timeout_established",
1238                .data           = &tcp_timeouts[TCP_CONNTRACK_ESTABLISHED],
1239                .maxlen         = sizeof(unsigned int),
1240                .mode           = 0644,
1241                .proc_handler   = proc_dointvec_jiffies,
1242        },
1243        {
1244                .procname       = "nf_conntrack_tcp_timeout_fin_wait",
1245                .data           = &tcp_timeouts[TCP_CONNTRACK_FIN_WAIT],
1246                .maxlen         = sizeof(unsigned int),
1247                .mode           = 0644,
1248                .proc_handler   = proc_dointvec_jiffies,
1249        },
1250        {
1251                .procname       = "nf_conntrack_tcp_timeout_close_wait",
1252                .data           = &tcp_timeouts[TCP_CONNTRACK_CLOSE_WAIT],
1253                .maxlen         = sizeof(unsigned int),
1254                .mode           = 0644,
1255                .proc_handler   = proc_dointvec_jiffies,
1256        },
1257        {
1258                .procname       = "nf_conntrack_tcp_timeout_last_ack",
1259                .data           = &tcp_timeouts[TCP_CONNTRACK_LAST_ACK],
1260                .maxlen         = sizeof(unsigned int),
1261                .mode           = 0644,
1262                .proc_handler   = proc_dointvec_jiffies,
1263        },
1264        {
1265                .procname       = "nf_conntrack_tcp_timeout_time_wait",
1266                .data           = &tcp_timeouts[TCP_CONNTRACK_TIME_WAIT],
1267                .maxlen         = sizeof(unsigned int),
1268                .mode           = 0644,
1269                .proc_handler   = proc_dointvec_jiffies,
1270        },
1271        {
1272                .procname       = "nf_conntrack_tcp_timeout_close",
1273                .data           = &tcp_timeouts[TCP_CONNTRACK_CLOSE],
1274                .maxlen         = sizeof(unsigned int),
1275                .mode           = 0644,
1276                .proc_handler   = proc_dointvec_jiffies,
1277        },
1278        {
1279                .procname       = "nf_conntrack_tcp_timeout_max_retrans",
1280                .data           = &nf_ct_tcp_timeout_max_retrans,
1281                .maxlen         = sizeof(unsigned int),
1282                .mode           = 0644,
1283                .proc_handler   = proc_dointvec_jiffies,
1284        },
1285        {
1286                .procname       = "nf_conntrack_tcp_timeout_unacknowledged",
1287                .data           = &nf_ct_tcp_timeout_unacknowledged,
1288                .maxlen         = sizeof(unsigned int),
1289                .mode           = 0644,
1290                .proc_handler   = proc_dointvec_jiffies,
1291        },
1292        {
1293                .ctl_name       = NET_NF_CONNTRACK_TCP_LOOSE,
1294                .procname       = "nf_conntrack_tcp_loose",
1295                .data           = &nf_ct_tcp_loose,
1296                .maxlen         = sizeof(unsigned int),
1297                .mode           = 0644,
1298                .proc_handler   = proc_dointvec,
1299        },
1300        {
1301                .ctl_name       = NET_NF_CONNTRACK_TCP_BE_LIBERAL,
1302                .procname       = "nf_conntrack_tcp_be_liberal",
1303                .data           = &nf_ct_tcp_be_liberal,
1304                .maxlen         = sizeof(unsigned int),
1305                .mode           = 0644,
1306                .proc_handler   = proc_dointvec,
1307        },
1308        {
1309                .ctl_name       = NET_NF_CONNTRACK_TCP_MAX_RETRANS,
1310                .procname       = "nf_conntrack_tcp_max_retrans",
1311                .data           = &nf_ct_tcp_max_retrans,
1312                .maxlen         = sizeof(unsigned int),
1313                .mode           = 0644,
1314                .proc_handler   = proc_dointvec,
1315        },
1316        {
1317                .ctl_name       = 0
1318        }
1319};
1320
1321#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
1322static struct ctl_table tcp_compat_sysctl_table[] = {
1323        {
1324                .procname       = "ip_conntrack_tcp_timeout_syn_sent",
1325                .data           = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT],
1326                .maxlen         = sizeof(unsigned int),
1327                .mode           = 0644,
1328                .proc_handler   = proc_dointvec_jiffies,
1329        },
1330        {
1331                .procname       = "ip_conntrack_tcp_timeout_syn_recv",
1332                .data           = &tcp_timeouts[TCP_CONNTRACK_SYN_RECV],
1333                .maxlen         = sizeof(unsigned int),
1334                .mode           = 0644,
1335                .proc_handler   = proc_dointvec_jiffies,
1336        },
1337        {
1338                .procname       = "ip_conntrack_tcp_timeout_established",
1339                .data           = &tcp_timeouts[TCP_CONNTRACK_ESTABLISHED],
1340                .maxlen         = sizeof(unsigned int),
1341                .mode           = 0644,
1342                .proc_handler   = proc_dointvec_jiffies,
1343        },
1344        {
1345                .procname       = "ip_conntrack_tcp_timeout_fin_wait",
1346                .data           = &tcp_timeouts[TCP_CONNTRACK_FIN_WAIT],
1347                .maxlen         = sizeof(unsigned int),
1348                .mode           = 0644,
1349                .proc_handler   = proc_dointvec_jiffies,
1350        },
1351        {
1352                .procname       = "ip_conntrack_tcp_timeout_close_wait",
1353                .data           = &tcp_timeouts[TCP_CONNTRACK_CLOSE_WAIT],
1354                .maxlen         = sizeof(unsigned int),
1355                .mode           = 0644,
1356                .proc_handler   = proc_dointvec_jiffies,
1357        },
1358        {
1359                .procname       = "ip_conntrack_tcp_timeout_last_ack",
1360                .data           = &tcp_timeouts[TCP_CONNTRACK_LAST_ACK],
1361                .maxlen         = sizeof(unsigned int),
1362                .mode           = 0644,
1363                .proc_handler   = proc_dointvec_jiffies,
1364        },
1365        {
1366                .procname       = "ip_conntrack_tcp_timeout_time_wait",
1367                .data           = &tcp_timeouts[TCP_CONNTRACK_TIME_WAIT],
1368                .maxlen         = sizeof(unsigned int),
1369                .mode           = 0644,
1370                .proc_handler   = proc_dointvec_jiffies,
1371        },
1372        {
1373                .procname       = "ip_conntrack_tcp_timeout_close",
1374                .data           = &tcp_timeouts[TCP_CONNTRACK_CLOSE],
1375                .maxlen         = sizeof(unsigned int),
1376                .mode           = 0644,
1377                .proc_handler   = proc_dointvec_jiffies,
1378        },
1379        {
1380                .procname       = "ip_conntrack_tcp_timeout_max_retrans",
1381                .data           = &nf_ct_tcp_timeout_max_retrans,
1382                .maxlen         = sizeof(unsigned int),
1383                .mode           = 0644,
1384                .proc_handler   = proc_dointvec_jiffies,
1385        },
1386        {
1387                .ctl_name       = NET_IPV4_NF_CONNTRACK_TCP_LOOSE,
1388                .procname       = "ip_conntrack_tcp_loose",
1389                .data           = &nf_ct_tcp_loose,
1390                .maxlen         = sizeof(unsigned int),
1391                .mode           = 0644,
1392                .proc_handler   = proc_dointvec,
1393        },
1394        {
1395                .ctl_name       = NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL,
1396                .procname       = "ip_conntrack_tcp_be_liberal",
1397                .data           = &nf_ct_tcp_be_liberal,
1398                .maxlen         = sizeof(unsigned int),
1399                .mode           = 0644,
1400                .proc_handler   = proc_dointvec,
1401        },
1402        {
1403                .ctl_name       = NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS,
1404                .procname       = "ip_conntrack_tcp_max_retrans",
1405                .data           = &nf_ct_tcp_max_retrans,
1406                .maxlen         = sizeof(unsigned int),
1407                .mode           = 0644,
1408                .proc_handler   = proc_dointvec,
1409        },
1410        {
1411                .ctl_name       = 0
1412        }
1413};
1414#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
1415#endif /* CONFIG_SYSCTL */
1416
1417struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
1418{
1419        .l3proto                = PF_INET,
1420        .l4proto                = IPPROTO_TCP,
1421        .name                   = "tcp",
1422        .pkt_to_tuple           = tcp_pkt_to_tuple,
1423        .invert_tuple           = tcp_invert_tuple,
1424        .print_tuple            = tcp_print_tuple,
1425        .print_conntrack        = tcp_print_conntrack,
1426        .packet                 = tcp_packet,
1427        .new                    = tcp_new,
1428        .error                  = tcp_error,
1429#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
1430        .to_nlattr              = tcp_to_nlattr,
1431        .nlattr_size            = tcp_nlattr_size,
1432        .from_nlattr            = nlattr_to_tcp,
1433        .tuple_to_nlattr        = nf_ct_port_tuple_to_nlattr,
1434        .nlattr_to_tuple        = nf_ct_port_nlattr_to_tuple,
1435        .nlattr_tuple_size      = tcp_nlattr_tuple_size,
1436        .nla_policy             = nf_ct_port_nla_policy,
1437#endif
1438#ifdef CONFIG_SYSCTL
1439        .ctl_table_users        = &tcp_sysctl_table_users,
1440        .ctl_table_header       = &tcp_sysctl_header,
1441        .ctl_table              = tcp_sysctl_table,
1442#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
1443        .ctl_compat_table       = tcp_compat_sysctl_table,
1444#endif
1445#endif
1446};
1447EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4);
1448
1449struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
1450{
1451        .l3proto                = PF_INET6,
1452        .l4proto                = IPPROTO_TCP,
1453        .name                   = "tcp",
1454        .pkt_to_tuple           = tcp_pkt_to_tuple,
1455        .invert_tuple           = tcp_invert_tuple,
1456        .print_tuple            = tcp_print_tuple,
1457        .print_conntrack        = tcp_print_conntrack,
1458        .packet                 = tcp_packet,
1459        .new                    = tcp_new,
1460        .error                  = tcp_error,
1461#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
1462        .to_nlattr              = tcp_to_nlattr,
1463        .nlattr_size            = tcp_nlattr_size,
1464        .from_nlattr            = nlattr_to_tcp,
1465        .tuple_to_nlattr        = nf_ct_port_tuple_to_nlattr,
1466        .nlattr_to_tuple        = nf_ct_port_nlattr_to_tuple,
1467        .nlattr_tuple_size      = tcp_nlattr_tuple_size,
1468        .nla_policy             = nf_ct_port_nla_policy,
1469#endif
1470#ifdef CONFIG_SYSCTL
1471        .ctl_table_users        = &tcp_sysctl_table_users,
1472        .ctl_table_header       = &tcp_sysctl_header,
1473        .ctl_table              = tcp_sysctl_table,
1474#endif
1475};
1476EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6);
1477