linux-old/net/ipv4/tcp_ipv4.c
<<
>>
Prefs
   1/*
   2 * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3 *              operating system.  INET is implemented using the  BSD Socket
   4 *              interface as the means of communication with the user level.
   5 *
   6 *              Implementation of the Transmission Control Protocol(TCP).
   7 *
   8 * Version:     $Id: tcp_ipv4.c,v 1.171 1999/03/28 10:18:26 davem Exp $
   9 *
  10 *              IPv4 specific functions
  11 *
  12 *
  13 *              code split from:
  14 *              linux/ipv4/tcp.c
  15 *              linux/ipv4/tcp_input.c
  16 *              linux/ipv4/tcp_output.c
  17 *
  18 *              See tcp.c for author information
  19 *
  20 *      This program is free software; you can redistribute it and/or
  21 *      modify it under the terms of the GNU General Public License
  22 *      as published by the Free Software Foundation; either version
  23 *      2 of the License, or (at your option) any later version.
  24 */
  25
  26/*
  27 * Changes:
  28 *              David S. Miller :       New socket lookup architecture.
  29 *                                      This code is dedicated to John Dyson.
  30 *              David S. Miller :       Change semantics of established hash,
  31 *                                      half is devoted to TIME_WAIT sockets
  32 *                                      and the rest go in the other half.
  33 *              Andi Kleen :            Add support for syncookies and fixed
  34 *                                      some bugs: ip options weren't passed to
  35 *                                      the TCP layer, missed a check for an ACK bit.
  36 *              Andi Kleen :            Implemented fast path mtu discovery.
  37 *                                      Fixed many serious bugs in the
  38 *                                      open_request handling and moved
  39 *                                      most of it into the af independent code.
  40 *                                      Added tail drop and some other bugfixes.
  41 *                                      Added new listen sematics.
  42 *              Mike McLagan    :       Routing by source
  43 *      Juan Jose Ciarlante:            ip_dynaddr bits
  44 *              Andi Kleen:             various fixes.
  45 *      Vitaly E. Lavrov        :       Transparent proxy revived after year coma.
  46 *      Andi Kleen              :       Fix new listen.
  47 *      Andi Kleen              :       Fix accept error reporting.
  48 */
  49
  50#include <linux/config.h>
  51#include <linux/types.h>
  52#include <linux/fcntl.h>
  53#include <linux/random.h>
  54#include <linux/init.h>
  55#include <linux/ipsec.h>
  56
  57#include <net/icmp.h>
  58#include <net/tcp.h>
  59#include <net/ipv6.h>
  60
  61#include <asm/segment.h>
  62
  63#include <linux/inet.h>
  64#include <linux/stddef.h>
  65
  66extern int sysctl_tcp_timestamps;
  67extern int sysctl_tcp_window_scaling;
  68extern int sysctl_tcp_sack;
  69extern int sysctl_tcp_syncookies;
  70extern int sysctl_ip_dynaddr;
  71extern __u32 sysctl_wmem_max;
  72extern __u32 sysctl_rmem_max;
  73
  74/* Check TCP sequence numbers in ICMP packets. */
  75#define ICMP_MIN_LENGTH 8
  76
  77/* Socket used for sending RSTs */      
  78struct inode tcp_inode;
  79struct socket *tcp_socket=&tcp_inode.u.socket_i;
  80
  81static void tcp_v4_send_reset(struct sk_buff *skb);
  82
  83void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, 
  84                       struct sk_buff *skb);
  85
  86/* This is for sockets with full identity only.  Sockets here will always
  87 * be without wildcards and will have the following invariant:
  88 *          TCP_ESTABLISHED <= sk->state < TCP_CLOSE
  89 *
  90 * First half of the table is for sockets not in TIME_WAIT, second half
  91 * is for TIME_WAIT sockets only.
  92 */
  93struct sock *tcp_established_hash[TCP_HTABLE_SIZE];
  94
  95/* Ok, let's try this, I give up, we do need a local binding
  96 * TCP hash as well as the others for fast bind/connect.
  97 */
  98struct tcp_bind_bucket *tcp_bound_hash[TCP_BHTABLE_SIZE];
  99
 100/* All sockets in TCP_LISTEN state will be in here.  This is the only table
 101 * where wildcard'd TCP sockets can exist.  Hash function here is just local
 102 * port number.
 103 */
 104struct sock *tcp_listening_hash[TCP_LHTABLE_SIZE];
 105
 106/* Register cache. */
 107struct sock *tcp_regs[TCP_NUM_REGS];
 108
 109/*
 110 * This array holds the first and last local port number.
 111 * For high-usage systems, use sysctl to change this to
 112 * 32768-61000
 113 */
 114int sysctl_local_port_range[2] = { 1024, 4999 };
 115int tcp_port_rover = (1024 - 1);
 116
 117static __inline__ int tcp_hashfn(__u32 laddr, __u16 lport,
 118                                 __u32 faddr, __u16 fport)
 119{
 120        return ((laddr ^ lport) ^ (faddr ^ fport)) & ((TCP_HTABLE_SIZE/2) - 1);
 121}
 122
 123static __inline__ int tcp_sk_hashfn(struct sock *sk)
 124{
 125        __u32 laddr = sk->rcv_saddr;
 126        __u16 lport = sk->num;
 127        __u32 faddr = sk->daddr;
 128        __u16 fport = sk->dport;
 129
 130        return tcp_hashfn(laddr, lport, faddr, fport);
 131}
 132
 133/* Invariant, sk->num is non-zero. */
 134void tcp_bucket_unlock(struct sock *sk)
 135{
 136        struct tcp_bind_bucket *tb;
 137        unsigned short snum = sk->num;
 138
 139        SOCKHASH_LOCK();
 140        for(tb = tcp_bound_hash[tcp_bhashfn(snum)]; tb; tb = tb->next) {
 141                if(tb->port == snum) {
 142                        if(tb->owners == NULL &&
 143                           (tb->flags & TCPB_FLAG_LOCKED)) {
 144                                tb->flags &= ~(TCPB_FLAG_LOCKED |
 145                                               TCPB_FLAG_FASTREUSE);
 146                                tcp_inc_slow_timer(TCP_SLT_BUCKETGC);
 147                        }
 148                        break;
 149                }
 150        }
 151        SOCKHASH_UNLOCK();
 152}
 153
 154struct tcp_bind_bucket *tcp_bucket_create(unsigned short snum)
 155{
 156        struct tcp_bind_bucket *tb;
 157
 158        tb = kmem_cache_alloc(tcp_bucket_cachep, SLAB_ATOMIC);
 159        if(tb != NULL) {
 160                struct tcp_bind_bucket **head =
 161                        &tcp_bound_hash[tcp_bhashfn(snum)];
 162                tb->port = snum;
 163                tb->flags = TCPB_FLAG_LOCKED;
 164                tb->owners = NULL;
 165                if((tb->next = *head) != NULL)
 166                        tb->next->pprev = &tb->next;
 167                *head = tb;
 168                tb->pprev = head;
 169        }
 170        return tb;
 171}
 172
 173#ifdef CONFIG_IP_TRANSPARENT_PROXY
 174/* Ensure that the bound bucket for the port exists.
 175 * Return 0 on success.
 176 */
 177static __inline__ int tcp_bucket_check(unsigned short snum)
 178{
 179        struct tcp_bind_bucket *tb = tcp_bound_hash[tcp_bhashfn(snum)];
 180        for( ; (tb && (tb->port != snum)); tb = tb->next)
 181                ;
 182        if(tb == NULL && tcp_bucket_create(snum) == NULL)
 183                return 1;
 184        else
 185                return 0;
 186}
 187#endif
 188
 189static int tcp_v4_verify_bind(struct sock *sk, unsigned short snum)
 190{
 191        struct tcp_bind_bucket *tb;
 192        int result = 0;
 193
 194        SOCKHASH_LOCK();
 195        for(tb = tcp_bound_hash[tcp_bhashfn(snum)];
 196            (tb && (tb->port != snum));
 197            tb = tb->next)
 198                ;
 199        if(tb && tb->owners) {
 200                /* Fast path for reuse ports, see include/net/tcp.h for a very
 201                 * detailed description of why this works, and why it is worth
 202                 * the effort at all. -DaveM
 203                 */
 204                if((tb->flags & TCPB_FLAG_FASTREUSE)    &&
 205                   (sk->reuse != 0)) {
 206                        goto go_like_smoke;
 207                } else {
 208                        struct sock *sk2;
 209                        int sk_reuse = sk->reuse;
 210
 211                        /* We must walk the whole port owner list in this case. -DaveM */
 212                        for(sk2 = tb->owners; sk2; sk2 = sk2->bind_next) {
 213                                if (sk->bound_dev_if == sk2->bound_dev_if) {
 214                                        if(!sk_reuse || !sk2->reuse || sk2->state == TCP_LISTEN) {
 215                                                if(!sk2->rcv_saddr              ||
 216                                                   !sk->rcv_saddr               ||
 217                                                   (sk2->rcv_saddr == sk->rcv_saddr))
 218                                                        break;
 219                                        }
 220                                }
 221                        }
 222                        if(sk2 != NULL)
 223                                result = 1;
 224                }
 225        }
 226        if(result == 0) {
 227                if(tb == NULL) {
 228                        if((tb = tcp_bucket_create(snum)) == NULL)
 229                                result = 1;
 230                        else if (sk->reuse && sk->state != TCP_LISTEN)
 231                                tb->flags |= TCPB_FLAG_FASTREUSE;
 232                } else {
 233                        /* It could be pending garbage collection, this
 234                         * kills the race and prevents it from disappearing
 235                         * out from under us by the time we use it.  -DaveM
 236                         */
 237                        if(tb->owners == NULL) {
 238                                if (!(tb->flags & TCPB_FLAG_LOCKED)) {
 239                                        tb->flags = (TCPB_FLAG_LOCKED |
 240                                                     ((sk->reuse &&
 241                                                       sk->state != TCP_LISTEN) ?
 242                                                      TCPB_FLAG_FASTREUSE : 0));
 243                                        tcp_dec_slow_timer(TCP_SLT_BUCKETGC);
 244                                } else if (!(tb->flags & TCPB_FLAG_GOODSOCKNUM)) {
 245                                        /* Someone is in between the bind
 246                                         * and the actual connect or listen.
 247                                         * See if it was a legitimate reuse
 248                                         * and we are as well, else punt.
 249                                         */
 250                                        if (sk->reuse == 0 ||
 251                                            !(tb->flags & TCPB_FLAG_FASTREUSE))
 252                                                result = 1;
 253                                } else
 254                                        tb->flags &= ~TCPB_FLAG_GOODSOCKNUM;
 255                        }
 256                }
 257        }
 258go_like_smoke:
 259        SOCKHASH_UNLOCK();
 260        return result;
 261}
 262
 263unsigned short tcp_good_socknum(void)
 264{
 265        struct tcp_bind_bucket *tb;
 266        int low = sysctl_local_port_range[0];
 267        int high = sysctl_local_port_range[1];
 268        int remaining = (high - low) + 1;
 269        int rover;
 270
 271        SOCKHASH_LOCK();
 272        rover = tcp_port_rover;
 273        do {
 274                rover += 1;
 275                if((rover < low) || (rover > high))
 276                        rover = low;
 277                tb = tcp_bound_hash[tcp_bhashfn(rover)];
 278                for( ; tb; tb = tb->next) {
 279                        if(tb->port == rover)
 280                                goto next;
 281                }
 282                break;
 283        next:
 284        } while(--remaining > 0);
 285        tcp_port_rover = rover;
 286        tb = NULL;
 287        if((remaining <= 0) || ((tb = tcp_bucket_create(rover)) == NULL))
 288                rover = 0;
 289        if (tb != NULL)
 290                tb->flags |= TCPB_FLAG_GOODSOCKNUM;
 291        SOCKHASH_UNLOCK();
 292
 293        return rover;
 294}
 295
 296static void tcp_v4_hash(struct sock *sk)
 297{
 298        if (sk->state != TCP_CLOSE) {
 299                struct sock **skp;
 300
 301                SOCKHASH_LOCK();
 302                skp = &tcp_established_hash[(sk->hashent = tcp_sk_hashfn(sk))];
 303                if((sk->next = *skp) != NULL)
 304                        (*skp)->pprev = &sk->next;
 305                *skp = sk;
 306                sk->pprev = skp;
 307                tcp_sk_bindify(sk);
 308                SOCKHASH_UNLOCK();
 309        }
 310}
 311
 312static void tcp_v4_unhash(struct sock *sk)
 313{
 314        SOCKHASH_LOCK();
 315        if(sk->pprev) {
 316                if(sk->next)
 317                        sk->next->pprev = sk->pprev;
 318                *sk->pprev = sk->next;
 319                sk->pprev = NULL;
 320                tcp_reg_zap(sk);
 321                tcp_sk_unbindify(sk);
 322        }
 323        SOCKHASH_UNLOCK();
 324}
 325
 326static void tcp_v4_rehash(struct sock *sk)
 327{
 328        unsigned char state;
 329
 330        SOCKHASH_LOCK();
 331        state = sk->state;
 332        if(sk->pprev != NULL) {
 333                if(sk->next)
 334                        sk->next->pprev = sk->pprev;
 335                *sk->pprev = sk->next;
 336                sk->pprev = NULL;
 337                tcp_reg_zap(sk);
 338        }
 339        if(state != TCP_CLOSE) {
 340                struct sock **skp;
 341
 342                if(state == TCP_LISTEN)
 343                        skp = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
 344                else
 345                        skp = &tcp_established_hash[(sk->hashent = tcp_sk_hashfn(sk))];
 346
 347                if((sk->next = *skp) != NULL)
 348                        (*skp)->pprev = &sk->next;
 349                *skp = sk;
 350                sk->pprev = skp;
 351                if(state == TCP_LISTEN)
 352                        tcp_sk_bindify(sk);
 353        }
 354        SOCKHASH_UNLOCK();
 355}
 356
 357/* Don't inline this cruft.  Here are some nice properties to
 358 * exploit here.  The BSD API does not allow a listening TCP
 359 * to specify the remote port nor the remote address for the
 360 * connection.  So always assume those are both wildcarded
 361 * during the search since they can never be otherwise.
 362 */
 363static struct sock *tcp_v4_lookup_listener(u32 daddr, unsigned short hnum, int dif)
 364{
 365        struct sock *sk;
 366        struct sock *result = NULL;
 367        int score, hiscore;
 368
 369        hiscore=0;
 370        for(sk = tcp_listening_hash[tcp_lhashfn(hnum)]; sk; sk = sk->next) {
 371                if(sk->num == hnum) {
 372                        __u32 rcv_saddr = sk->rcv_saddr;
 373
 374                        score = 1;
 375                        if(rcv_saddr) {
 376                                if (rcv_saddr != daddr)
 377                                        continue;
 378                                score++;
 379                        }
 380                        if (sk->bound_dev_if) {
 381                                if (sk->bound_dev_if != dif)
 382                                        continue;
 383                                score++;
 384                        }
 385                        if (score == 3)
 386                                return sk;
 387                        if (score > hiscore) {
 388                                hiscore = score;
 389                                result = sk;
 390                        }
 391                }
 392        }
 393        return result;
 394}
 395
 396/* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
 397 * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
 398 * It is assumed that this code only gets called from within NET_BH.
 399 */
 400static inline struct sock *__tcp_v4_lookup(struct tcphdr *th,
 401                                           u32 saddr, u16 sport,
 402                                           u32 daddr, u16 dport, int dif)
 403{
 404        TCP_V4_ADDR_COOKIE(acookie, saddr, daddr)
 405        __u16 hnum = ntohs(dport);
 406        __u32 ports = TCP_COMBINED_PORTS(sport, hnum);
 407        struct sock *sk;
 408        int hash;
 409
 410        /* Check TCP register quick cache first. */
 411        sk = TCP_RHASH(sport);
 412        if(sk && TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif))
 413                goto hit;
 414
 415        /* Optimize here for direct hit, only listening connections can
 416         * have wildcards anyways.
 417         */
 418        hash = tcp_hashfn(daddr, hnum, saddr, sport);
 419        for(sk = tcp_established_hash[hash]; sk; sk = sk->next) {
 420                if(TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif)) {
 421                        if (sk->state == TCP_ESTABLISHED)
 422                                TCP_RHASH(sport) = sk;
 423                        goto hit; /* You sunk my battleship! */
 424                }
 425        }
 426        /* Must check for a TIME_WAIT'er before going to listener hash. */
 427        for(sk = tcp_established_hash[hash+(TCP_HTABLE_SIZE/2)]; sk; sk = sk->next)
 428                if(TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif))
 429                        goto hit;
 430        sk = tcp_v4_lookup_listener(daddr, hnum, dif);
 431hit:
 432        return sk;
 433}
 434
 435__inline__ struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif)
 436{
 437        return __tcp_v4_lookup(0, saddr, sport, daddr, dport, dif);
 438}
 439
 440#ifdef CONFIG_IP_TRANSPARENT_PROXY
 441/* Cleaned up a little and adapted to new bind bucket scheme.
 442 * Oddly, this should increase performance here for
 443 * transparent proxy, as tests within the inner loop have
 444 * been eliminated. -DaveM
 445 */
 446static struct sock *tcp_v4_proxy_lookup(unsigned short num, unsigned long raddr,
 447                                        unsigned short rnum, unsigned long laddr,
 448                                        struct device *dev, unsigned short pnum,
 449                                        int dif)
 450{
 451        struct sock *s, *result = NULL;
 452        int badness = -1;
 453        u32 paddr = 0;
 454        unsigned short hnum = ntohs(num);
 455        unsigned short hpnum = ntohs(pnum);
 456        int firstpass = 1;
 457
 458        if(dev && dev->ip_ptr) {
 459                struct in_device *idev = dev->ip_ptr;
 460
 461                if(idev->ifa_list)
 462                        paddr = idev->ifa_list->ifa_local;
 463        }
 464
 465        /* This code must run only from NET_BH. */
 466        {
 467                struct tcp_bind_bucket *tb = tcp_bound_hash[tcp_bhashfn(hnum)];
 468                for( ; (tb && tb->port != hnum); tb = tb->next)
 469                        ;
 470                if(tb == NULL)
 471                        goto next;
 472                s = tb->owners;
 473        }
 474pass2:
 475        for(; s; s = s->bind_next) {
 476                int score = 0;
 477                if(s->rcv_saddr) {
 478                        if((s->num != hpnum || s->rcv_saddr != paddr) &&
 479                           (s->num != hnum || s->rcv_saddr != laddr))
 480                                continue;
 481                        score++;
 482                }
 483                if(s->daddr) {
 484                        if(s->daddr != raddr)
 485                                continue;
 486                        score++;
 487                }
 488                if(s->dport) {
 489                        if(s->dport != rnum)
 490                                continue;
 491                        score++;
 492                }
 493                if(s->bound_dev_if) {
 494                        if(s->bound_dev_if != dif)
 495                                continue;
 496                        score++;
 497                }
 498                if(score == 4 && s->num == hnum) {
 499                        result = s;
 500                        goto gotit;
 501                } else if(score > badness && (s->num == hpnum || s->rcv_saddr)) {
 502                        result = s;
 503                        badness = score;
 504                }
 505        }
 506next:
 507        if(firstpass--) {
 508                struct tcp_bind_bucket *tb = tcp_bound_hash[tcp_bhashfn(hpnum)];
 509                for( ; (tb && tb->port != hpnum); tb = tb->next)
 510                        ;
 511                if(tb) {
 512                        s = tb->owners;
 513                        goto pass2;
 514                }
 515        }
 516gotit:
 517        return result;
 518}
 519#endif /* CONFIG_IP_TRANSPARENT_PROXY */
 520
 521static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb)
 522{
 523        return secure_tcp_sequence_number(sk->saddr, sk->daddr,
 524                                          skb->h.th->dest,
 525                                          skb->h.th->source);
 526}
 527
 528/* Check that a TCP address is unique, don't allow multiple
 529 * connects to/from the same address.  Actually we can optimize
 530 * quite a bit, since the socket about to connect is still
 531 * in TCP_CLOSE, a tcp_bind_bucket for the local port he will
 532 * use will exist, with a NULL owners list.  So check for that.
 533 * The good_socknum and verify_bind scheme we use makes this
 534 * work.
 535 */
 536static int tcp_v4_unique_address(struct sock *sk)
 537{
 538        struct tcp_bind_bucket *tb;
 539        unsigned short snum = sk->num;
 540        int retval = 1;
 541
 542        /* Freeze the hash while we snoop around. */
 543        SOCKHASH_LOCK();
 544        tb = tcp_bound_hash[tcp_bhashfn(snum)];
 545        for(; tb; tb = tb->next) {
 546                if(tb->port == snum && tb->owners != NULL) {
 547                        /* Almost certainly the re-use port case, search the real hashes
 548                         * so it actually scales.
 549                         */
 550                        sk = __tcp_v4_lookup(NULL, sk->daddr, sk->dport,
 551                                             sk->rcv_saddr, snum, sk->bound_dev_if);
 552                        if((sk != NULL) && (sk->state != TCP_LISTEN))
 553                                retval = 0;
 554                        break;
 555                }
 556        }
 557        SOCKHASH_UNLOCK();
 558        return retval;
 559}
 560
 561/* This will initiate an outgoing connection. */
 562int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 563{
 564        struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 565        struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
 566        struct sk_buff *buff;
 567        struct rtable *rt;
 568        u32 daddr, nexthop;
 569        int tmp;
 570
 571        if (sk->state != TCP_CLOSE) 
 572                return(-EISCONN);
 573
 574        /* Don't allow a double connect. */
 575        if (sk->daddr)
 576                return -EINVAL;
 577
 578        if (addr_len < sizeof(struct sockaddr_in))
 579                return(-EINVAL);
 580
 581        if (usin->sin_family != AF_INET) {
 582                static int complained;
 583                if (usin->sin_family)
 584                        return(-EAFNOSUPPORT);
 585                if (!complained++)
 586                        printk(KERN_DEBUG "%s forgot to set AF_INET in " __FUNCTION__ "\n", current->comm);
 587        }
 588
 589        nexthop = daddr = usin->sin_addr.s_addr;
 590        if (sk->opt && sk->opt->srr) {
 591                if (daddr == 0)
 592                        return -EINVAL;
 593                nexthop = sk->opt->faddr;
 594        }
 595
 596        tmp = ip_route_connect(&rt, nexthop, sk->saddr,
 597                               RT_TOS(sk->ip_tos)|RTO_CONN|sk->localroute, sk->bound_dev_if);
 598        if (tmp < 0)
 599                return tmp;
 600
 601        if (rt->rt_flags&(RTCF_MULTICAST|RTCF_BROADCAST)) {
 602                ip_rt_put(rt);
 603                return -ENETUNREACH;
 604        }
 605
 606        dst_release(xchg(&sk->dst_cache, rt));
 607
 608        buff = sock_wmalloc(sk, (MAX_HEADER + sk->prot->max_header),
 609                            0, GFP_KERNEL);
 610
 611        if (buff == NULL)
 612                return -ENOBUFS;
 613
 614        /* Socket has no identity, so lock_sock() is useless.  Also
 615         * since state==TCP_CLOSE (checked above) the socket cannot
 616         * possibly be in the hashes.  TCP hash locking is only
 617         * needed while checking quickly for a unique address.
 618         * However, the socket does need to be (and is) locked
 619         * in tcp_connect().
 620         * Perhaps this addresses all of ANK's concerns. 8-)  -DaveM
 621         */
 622        sk->dport = usin->sin_port;
 623        sk->daddr = rt->rt_dst;
 624        if (sk->opt && sk->opt->srr)
 625                sk->daddr = daddr;
 626        if (!sk->saddr)
 627                sk->saddr = rt->rt_src;
 628        sk->rcv_saddr = sk->saddr;
 629
 630        if (!tcp_v4_unique_address(sk)) {
 631                kfree_skb(buff);
 632                return -EADDRNOTAVAIL;
 633        }
 634
 635        tp->write_seq = secure_tcp_sequence_number(sk->saddr, sk->daddr,
 636                                                   sk->sport, usin->sin_port);
 637
 638        tp->ext_header_len = 0;
 639        if (sk->opt)
 640                tp->ext_header_len = sk->opt->optlen;
 641
 642        /* Reset mss clamp */
 643        tp->mss_clamp = ~0;
 644
 645        if (!ip_dont_fragment(sk, &rt->u.dst) &&
 646            rt->u.dst.pmtu > 576 && rt->rt_dst != rt->rt_gateway) {
 647                /* Clamp mss at maximum of 536 and user_mss.
 648                   Probably, user ordered to override tiny segment size
 649                   in gatewayed case.
 650                 */
 651                tp->mss_clamp = max(tp->user_mss, 536);
 652        }
 653
 654        tcp_connect(sk, buff, rt->u.dst.pmtu);
 655        return 0;
 656}
 657
 658static int tcp_v4_sendmsg(struct sock *sk, struct msghdr *msg, int len)
 659{
 660        int retval = -EINVAL;
 661
 662        /* Do sanity checking for sendmsg/sendto/send. */
 663        if (msg->msg_flags & ~(MSG_OOB|MSG_DONTROUTE|MSG_DONTWAIT|MSG_NOSIGNAL))
 664                goto out;
 665        if (msg->msg_name) {
 666                struct sockaddr_in *addr=(struct sockaddr_in *)msg->msg_name;
 667
 668                if (msg->msg_namelen < sizeof(*addr))
 669                        goto out;
 670                if (addr->sin_family && addr->sin_family != AF_INET)
 671                        goto out;
 672                retval = -ENOTCONN;
 673                if(sk->state == TCP_CLOSE)
 674                        goto out;
 675                retval = -EISCONN;
 676                if (addr->sin_port != sk->dport)
 677                        goto out;
 678                if (addr->sin_addr.s_addr != sk->daddr)
 679                        goto out;
 680        }
 681        retval = tcp_do_sendmsg(sk, msg);
 682
 683out:
 684        return retval;
 685}
 686
 687
 688/*
 689 * Do a linear search in the socket open_request list. 
 690 * This should be replaced with a global hash table.
 691 */
 692static struct open_request *tcp_v4_search_req(struct tcp_opt *tp, 
 693                                              struct iphdr *iph,
 694                                              struct tcphdr *th,
 695                                              struct open_request **prevp)
 696{
 697        struct open_request *req, *prev;  
 698        __u16 rport = th->source; 
 699
 700        /*      assumption: the socket is not in use.
 701         *      as we checked the user count on tcp_rcv and we're
 702         *      running from a soft interrupt.
 703         */
 704        prev = (struct open_request *) (&tp->syn_wait_queue); 
 705        for (req = prev->dl_next; req; req = req->dl_next) {
 706                if (req->af.v4_req.rmt_addr == iph->saddr &&
 707                    req->af.v4_req.loc_addr == iph->daddr &&
 708                    req->rmt_port == rport
 709#ifdef CONFIG_IP_TRANSPARENT_PROXY
 710                    && req->lcl_port == th->dest
 711#endif
 712                    ) {
 713                        *prevp = prev; 
 714                        return req; 
 715                }
 716                prev = req; 
 717        }
 718        return NULL; 
 719}
 720
 721
 722/* 
 723 * This routine does path mtu discovery as defined in RFC1191.
 724 */
 725static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *ip)
 726{
 727        struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
 728
 729        if (atomic_read(&sk->sock_readers))
 730                return;
 731
 732        /* Don't interested in TCP_LISTEN and open_requests (SYN-ACKs
 733         * send out by Linux are always <576bytes so they should go through
 734         * unfragmented).
 735         */
 736        if (sk->state == TCP_LISTEN)
 737                return; 
 738
 739        /* We don't check in the destentry if pmtu discovery is forbidden
 740         * on this route. We just assume that no packet_to_big packets
 741         * are send back when pmtu discovery is not active.
 742         * There is a small race when the user changes this flag in the
 743         * route, but I think that's acceptable.
 744         */
 745        if (sk->dst_cache &&
 746            sk->ip_pmtudisc != IP_PMTUDISC_DONT &&
 747            tp->pmtu_cookie > sk->dst_cache->pmtu) {
 748                tcp_sync_mss(sk, sk->dst_cache->pmtu);
 749
 750                /* Resend the TCP packet because it's  
 751                 * clear that the old packet has been
 752                 * dropped. This is the new "fast" path mtu
 753                 * discovery.
 754                 */
 755                tcp_simple_retransmit(sk);
 756        } /* else let the usual retransmit timer handle it */
 757}
 758
 759/*
 760 * This routine is called by the ICMP module when it gets some
 761 * sort of error condition.  If err < 0 then the socket should
 762 * be closed and the error returned to the user.  If err > 0
 763 * it's just the icmp type << 8 | icmp code.  After adjustment
 764 * header points to the first 8 bytes of the tcp header.  We need
 765 * to find the appropriate port.
 766 *
 767 * The locking strategy used here is very "optimistic". When
 768 * someone else accesses the socket the ICMP is just dropped
 769 * and for some paths there is no check at all.
 770 * A more general error queue to queue errors for later handling
 771 * is probably better.
 772 *
 773 * sk->err and sk->err_soft should be atomic_t.
 774 */
 775
 776void tcp_v4_err(struct sk_buff *skb, unsigned char *dp, int len)
 777{
 778        struct iphdr *iph = (struct iphdr*)dp;
 779        struct tcphdr *th; 
 780        struct tcp_opt *tp;
 781        int type = skb->h.icmph->type;
 782        int code = skb->h.icmph->code;
 783#if ICMP_MIN_LENGTH < 14
 784        int no_flags = 0;
 785#else
 786#define no_flags 0
 787#endif
 788        struct sock *sk;
 789        __u32 seq;
 790        int err;
 791
 792        if (len < (iph->ihl << 2) + ICMP_MIN_LENGTH) { 
 793                icmp_statistics.IcmpInErrors++; 
 794                return;
 795        }
 796#if ICMP_MIN_LENGTH < 14
 797        if (len < (iph->ihl << 2) + 14)
 798                no_flags = 1;
 799#endif
 800
 801        th = (struct tcphdr*)(dp+(iph->ihl<<2));
 802
 803        sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr, th->source, skb->dev->ifindex);
 804        if (sk == NULL || sk->state == TCP_TIME_WAIT) {
 805                icmp_statistics.IcmpInErrors++;
 806                return; 
 807        }
 808
 809        tp = &sk->tp_pinfo.af_tcp;
 810        seq = ntohl(th->seq);
 811        if (sk->state != TCP_LISTEN && !between(seq, tp->snd_una, tp->snd_nxt)) {
 812                net_statistics.OutOfWindowIcmps++;
 813                return; 
 814        }
 815
 816        switch (type) {
 817        case ICMP_SOURCE_QUENCH:
 818#ifndef OLD_SOURCE_QUENCH /* This is deprecated */
 819                tp->snd_ssthresh = max(tp->snd_cwnd >> 1, 2);
 820                tp->snd_cwnd = tp->snd_ssthresh;
 821                tp->snd_cwnd_cnt = 0;
 822                tp->high_seq = tp->snd_nxt;
 823#endif
 824                return;
 825        case ICMP_PARAMETERPROB:
 826                err = EPROTO;
 827                break; 
 828        case ICMP_DEST_UNREACH:
 829                if (code > NR_ICMP_UNREACH)
 830                        return;
 831
 832                if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
 833                        do_pmtu_discovery(sk, iph); 
 834                        return;
 835                }
 836
 837                err = icmp_err_convert[code].errno;
 838                break;
 839        case ICMP_TIME_EXCEEDED:
 840                err = EHOSTUNREACH;
 841                break;
 842        default:
 843                return;
 844        }
 845
 846        switch (sk->state) {
 847                struct open_request *req, *prev;
 848        case TCP_LISTEN:
 849                /* Prevent race conditions with accept() - 
 850                 * ICMP is unreliable. 
 851                 */
 852                if (atomic_read(&sk->sock_readers)) {
 853                        net_statistics.LockDroppedIcmps++;
 854                         /* If too many ICMPs get dropped on busy
 855                          * servers this needs to be solved differently.
 856                          */
 857                        return;
 858                }
 859
 860                /* The final ACK of the handshake should be already 
 861                 * handled in the new socket context, not here.
 862                 * Strictly speaking - an ICMP error for the final
 863                 * ACK should set the opening flag, but that is too
 864                 * complicated right now. 
 865                 */ 
 866                if (!no_flags && !th->syn && !th->ack)
 867                        return;
 868
 869                req = tcp_v4_search_req(tp, iph, th, &prev); 
 870                if (!req)
 871                        return;
 872                if (seq != req->snt_isn) {
 873                        net_statistics.OutOfWindowIcmps++;
 874                        return;
 875                }
 876                if (req->sk) {  
 877                        /* 
 878                         * Already in ESTABLISHED and a big socket is created,
 879                         * set error code there.
 880                         * The error will _not_ be reported in the accept(),
 881                         * but only with the next operation on the socket after
 882                         * accept. 
 883                         */
 884                        sk = req->sk;
 885                } else {
 886                        /* 
 887                         * Still in SYN_RECV, just remove it silently.
 888                         * There is no good way to pass the error to the newly
 889                         * created socket, and POSIX does not want network
 890                         * errors returned from accept(). 
 891                         */ 
 892                        tp->syn_backlog--;
 893                        tcp_synq_unlink(tp, req, prev);
 894                        req->class->destructor(req);
 895                        tcp_openreq_free(req);
 896                        return; 
 897                }
 898                break;
 899        case TCP_SYN_SENT:
 900        case TCP_SYN_RECV:  /* Cannot happen */ 
 901                if (!no_flags && !th->syn)
 902                        return;
 903                tcp_statistics.TcpAttemptFails++;
 904                sk->err = err;
 905                sk->zapped = 1;
 906                mb();
 907                sk->error_report(sk);
 908                return;
 909        }
 910
 911        /* If we've already connected we will keep trying
 912         * until we time out, or the user gives up.
 913         *
 914         * rfc1122 4.2.3.9 allows to consider as hard errors
 915         * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
 916         * but it is obsoleted by pmtu discovery).
 917         *
 918         * Note, that in modern internet, where routing is unreliable
 919         * and in each dark corner broken firewalls sit, sending random
 920         * errors ordered by their masters even this two messages finally lose
 921         * their original sense (even Linux sends invalid PORT_UNREACHs)
 922         *
 923         * Now we are in compliance with RFCs.
 924         *                                                      --ANK (980905)
 925         */
 926
 927        if (sk->ip_recverr) {
 928                /* This code isn't serialized with the socket code */
 929                /* ANK (980927) ... which is harmless now,
 930                   sk->err's may be safely lost.
 931                 */
 932                sk->err = err;
 933                mb(); 
 934                sk->error_report(sk);           /* Wake people up to see the error (see connect in sock.c) */
 935        } else  { /* Only an error on timeout */
 936                sk->err_soft = err;
 937                mb(); 
 938        }
 939}
 940
 941/* This routine computes an IPv4 TCP checksum. */
 942void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, 
 943                       struct sk_buff *skb)
 944{
 945        th->check = 0;
 946        th->check = tcp_v4_check(th, len, sk->saddr, sk->daddr,
 947                                 csum_partial((char *)th, th->doff<<2, skb->csum));
 948}
 949
 950/*
 951 *      This routine will send an RST to the other tcp.
 952 *
 953 *      Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
 954 *                    for reset.
 955 *      Answer: if a packet caused RST, it is not for a socket
 956 *              existing in our system, if it is matched to a socket,
 957 *              it is just duplicate segment or bug in other side's TCP.
 958 *              So that we build reply only basing on parameters
 959 *              arrived with segment.
 960 *      Exception: precedence violation. We do not implement it in any case.
 961 */
 962
 963static void tcp_v4_send_reset(struct sk_buff *skb)
 964{
 965        struct tcphdr *th = skb->h.th;
 966        struct tcphdr rth;
 967        struct ip_reply_arg arg;
 968
 969        /* Never send a reset in response to a reset. */
 970        if (th->rst)
 971                return;
 972
 973        if (((struct rtable*)skb->dst)->rt_type != RTN_LOCAL) {
 974#ifdef CONFIG_IP_TRANSPARENT_PROXY
 975                if (((struct rtable*)skb->dst)->rt_type == RTN_UNICAST)
 976                        icmp_send(skb, ICMP_DEST_UNREACH,
 977                                  ICMP_PORT_UNREACH, 0);
 978#endif
 979                return;
 980        }
 981
 982        /* Swap the send and the receive. */
 983        memset(&rth, 0, sizeof(struct tcphdr)); 
 984        rth.dest = th->source;
 985        rth.source = th->dest; 
 986        rth.doff = sizeof(struct tcphdr)/4;
 987        rth.rst = 1;
 988
 989        if (th->ack) {
 990                rth.seq = th->ack_seq;
 991        } else {
 992                rth.ack = 1;
 993                rth.ack_seq = th->syn ? htonl(ntohl(th->seq)+1) : th->seq;
 994        }
 995
 996        memset(&arg, 0, sizeof arg); 
 997        arg.iov[0].iov_base = (unsigned char *)&rth; 
 998        arg.iov[0].iov_len  = sizeof rth;
 999        arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr, 
1000                                      skb->nh.iph->saddr, /*XXX*/
1001                                      sizeof(struct tcphdr),
1002                                      IPPROTO_TCP,
1003                                      0); 
1004        arg.n_iov = 1;
1005        arg.csumoffset = offsetof(struct tcphdr, check) / 2; 
1006
1007        ip_send_reply(tcp_socket->sk, skb, &arg, sizeof rth);
1008
1009        tcp_statistics.TcpOutSegs++;
1010        tcp_statistics.TcpOutRsts++;
1011}
1012
1013#ifdef CONFIG_IP_TRANSPARENT_PROXY
1014
1015/*
1016   Seems, I never wrote nothing more stupid.
1017   I hope Gods will forgive me, but I cannot forgive myself 8)
1018                                                --ANK (981001)
1019 */
1020
1021static struct sock *tcp_v4_search_proxy_openreq(struct sk_buff *skb)
1022{
1023        struct iphdr *iph = skb->nh.iph;
1024        struct tcphdr *th = (struct tcphdr *)(skb->nh.raw + iph->ihl*4);
1025        struct sock *sk;
1026        int i;
1027
1028        for (i=0; i<TCP_LHTABLE_SIZE; i++) {
1029                for(sk = tcp_listening_hash[i]; sk; sk = sk->next) {
1030                        struct open_request *dummy;
1031                        if (tcp_v4_search_req(&sk->tp_pinfo.af_tcp, iph,
1032                                              th, &dummy) &&
1033                            (!sk->bound_dev_if ||
1034                             sk->bound_dev_if == skb->dev->ifindex))
1035                                return sk;
1036                }
1037        }
1038        return NULL;
1039}
1040
1041/*
1042 *      Check whether a received TCP packet might be for one of our
1043 *      connections.
1044 */
1045
1046int tcp_chkaddr(struct sk_buff *skb)
1047{
1048        struct iphdr *iph = skb->nh.iph;
1049        struct tcphdr *th = (struct tcphdr *)(skb->nh.raw + iph->ihl*4);
1050        struct sock *sk;
1051
1052        sk = tcp_v4_lookup(iph->saddr, th->source, iph->daddr,
1053                           th->dest, skb->dev->ifindex);
1054
1055        if (!sk)
1056                return tcp_v4_search_proxy_openreq(skb) != NULL;
1057
1058        if (sk->state == TCP_LISTEN) {
1059                struct open_request *dummy;
1060                if (tcp_v4_search_req(&sk->tp_pinfo.af_tcp, skb->nh.iph,
1061                                      th, &dummy) &&
1062                    (!sk->bound_dev_if ||
1063                     sk->bound_dev_if == skb->dev->ifindex))
1064                        return 1;
1065        }
1066
1067        /* 0 means accept all LOCAL addresses here, not all the world... */
1068
1069        if (sk->rcv_saddr == 0)
1070                return 0;
1071
1072        return 1;
1073}
1074#endif
1075
1076/*
1077 *      Send a SYN-ACK after having received an ACK. 
1078 *      This still operates on a open_request only, not on a big
1079 *      socket.
1080 */ 
1081static void tcp_v4_send_synack(struct sock *sk, struct open_request *req)
1082{
1083        struct rtable *rt;
1084        struct ip_options *opt;
1085        struct sk_buff * skb;
1086        int mss;
1087
1088        /* First, grab a route. */
1089        opt = req->af.v4_req.opt;
1090        if(ip_route_output(&rt, ((opt && opt->srr) ?
1091                                 opt->faddr :
1092                                 req->af.v4_req.rmt_addr),
1093                           req->af.v4_req.loc_addr,
1094                           RT_TOS(sk->ip_tos) | RTO_CONN | sk->localroute,
1095                           sk->bound_dev_if)) {
1096                ip_statistics.IpOutNoRoutes++;
1097                return;
1098        }
1099        if(opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) {
1100                ip_rt_put(rt);
1101                ip_statistics.IpOutNoRoutes++;
1102                return;
1103        }
1104
1105        mss = rt->u.dst.pmtu - sizeof(struct iphdr) - sizeof(struct tcphdr);
1106
1107        skb = tcp_make_synack(sk, &rt->u.dst, req, mss);
1108        if (skb) {
1109                struct tcphdr *th = skb->h.th;
1110
1111#ifdef CONFIG_IP_TRANSPARENT_PROXY
1112                th->source = req->lcl_port; /* LVE */
1113#endif
1114
1115                th->check = tcp_v4_check(th, skb->len,
1116                                         req->af.v4_req.loc_addr, req->af.v4_req.rmt_addr,
1117                                         csum_partial((char *)th, skb->len, skb->csum));
1118
1119                ip_build_and_send_pkt(skb, sk, req->af.v4_req.loc_addr,
1120                                      req->af.v4_req.rmt_addr, req->af.v4_req.opt);
1121        }
1122        ip_rt_put(rt);
1123}
1124
1125/*
1126 *      IPv4 open_request destructor.
1127 */ 
1128static void tcp_v4_or_free(struct open_request *req)
1129{
1130        if(!req->sk && req->af.v4_req.opt)
1131                kfree_s(req->af.v4_req.opt, optlength(req->af.v4_req.opt));
1132}
1133
1134static inline void syn_flood_warning(struct sk_buff *skb)
1135{
1136        static unsigned long warntime;
1137        
1138        if (jiffies - warntime > HZ*60) {
1139                warntime = jiffies;
1140                printk(KERN_INFO 
1141                       "possible SYN flooding on port %d. Sending cookies.\n",  
1142                       ntohs(skb->h.th->dest));
1143        }
1144}
1145
1146/* 
1147 * Save and compile IPv4 options into the open_request if needed. 
1148 */
1149static inline struct ip_options * 
1150tcp_v4_save_options(struct sock *sk, struct sk_buff *skb)
1151{
1152        struct ip_options *opt = &(IPCB(skb)->opt);
1153        struct ip_options *dopt = NULL; 
1154
1155        if (opt && opt->optlen) {
1156                int opt_size = optlength(opt); 
1157                dopt = kmalloc(opt_size, GFP_ATOMIC);
1158                if (dopt) {
1159                        if (ip_options_echo(dopt, skb)) {
1160                                kfree_s(dopt, opt_size);
1161                                dopt = NULL;
1162                        }
1163                }
1164        }
1165        return dopt;
1166}
1167
1168/* 
1169 * Maximum number of SYN_RECV sockets in queue per LISTEN socket.
1170 * One SYN_RECV socket costs about 80bytes on a 32bit machine.
1171 * It would be better to replace it with a global counter for all sockets
1172 * but then some measure against one socket starving all other sockets
1173 * would be needed.
1174 */
1175int sysctl_max_syn_backlog = 128; 
1176
1177struct or_calltable or_ipv4 = {
1178        tcp_v4_send_synack,
1179        tcp_v4_or_free,
1180        tcp_v4_send_reset
1181};
1182
1183#define BACKLOG(sk) ((sk)->tp_pinfo.af_tcp.syn_backlog) /* lvalue! */
1184#define BACKLOGMAX(sk) sysctl_max_syn_backlog
1185
1186int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, __u32 isn)
1187{
1188        struct tcp_opt tp;
1189        struct open_request *req;
1190        struct tcphdr *th = skb->h.th;
1191        __u32 saddr = skb->nh.iph->saddr;
1192        __u32 daddr = skb->nh.iph->daddr;
1193#ifdef CONFIG_SYN_COOKIES
1194        int want_cookie = 0;
1195#else
1196#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
1197#endif
1198
1199        /* If the socket is dead, don't accept the connection.  */
1200        if (sk->dead) 
1201                goto dead; 
1202
1203        /* Never answer to SYNs send to broadcast or multicast */
1204        if (((struct rtable *)skb->dst)->rt_flags & 
1205            (RTCF_BROADCAST|RTCF_MULTICAST))
1206                goto drop; 
1207
1208        /* XXX: Check against a global syn pool counter. */
1209        if (BACKLOG(sk) > BACKLOGMAX(sk)) {
1210#ifdef CONFIG_SYN_COOKIES
1211                if (sysctl_tcp_syncookies) {
1212                        syn_flood_warning(skb);
1213                        want_cookie = 1; 
1214                } else
1215#endif
1216                goto drop;
1217        } else { 
1218                if (isn == 0)
1219                        isn = tcp_v4_init_sequence(sk, skb);
1220                BACKLOG(sk)++;
1221        }
1222
1223        req = tcp_openreq_alloc();
1224        if (req == NULL) {
1225                goto dropbacklog;
1226        }
1227
1228        req->rcv_wnd = 0;               /* So that tcp_send_synack() knows! */
1229
1230        req->rcv_isn = TCP_SKB_CB(skb)->seq;
1231        tp.tstamp_ok = tp.sack_ok = tp.wscale_ok = tp.snd_wscale = 0;
1232
1233        tp.mss_clamp = 65535;
1234        tcp_parse_options(NULL, th, &tp, want_cookie);
1235        if (tp.mss_clamp == 65535)
1236                tp.mss_clamp = 576 - sizeof(struct iphdr) - sizeof(struct iphdr);
1237
1238        if (sk->tp_pinfo.af_tcp.user_mss && sk->tp_pinfo.af_tcp.user_mss < tp.mss_clamp)
1239                tp.mss_clamp = sk->tp_pinfo.af_tcp.user_mss;
1240        req->mss = tp.mss_clamp;
1241
1242        if (tp.saw_tstamp)
1243                req->ts_recent = tp.rcv_tsval;
1244        req->tstamp_ok = tp.tstamp_ok;
1245        req->sack_ok = tp.sack_ok;
1246        req->snd_wscale = tp.snd_wscale;
1247        req->wscale_ok = tp.wscale_ok;
1248        req->rmt_port = th->source;
1249#ifdef CONFIG_IP_TRANSPARENT_PROXY
1250        req->lcl_port = th->dest ; /* LVE */
1251#endif
1252        req->af.v4_req.loc_addr = daddr;
1253        req->af.v4_req.rmt_addr = saddr;
1254
1255        /* Note that we ignore the isn passed from the TIME_WAIT
1256         * state here. That's the price we pay for cookies.
1257         */
1258        if (want_cookie)
1259                isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1260
1261        req->snt_isn = isn;
1262
1263        req->af.v4_req.opt = tcp_v4_save_options(sk, skb);
1264
1265        req->class = &or_ipv4;
1266        req->retrans = 0;
1267        req->sk = NULL;
1268
1269        tcp_v4_send_synack(sk, req);
1270
1271        if (want_cookie) {
1272                if (req->af.v4_req.opt)
1273                        kfree(req->af.v4_req.opt);
1274                tcp_v4_or_free(req); 
1275                tcp_openreq_free(req); 
1276        } else {
1277                req->expires = jiffies + TCP_TIMEOUT_INIT;
1278                tcp_inc_slow_timer(TCP_SLT_SYNACK);
1279                tcp_synq_queue(&sk->tp_pinfo.af_tcp, req);
1280        }
1281
1282        return 0;
1283
1284dead:
1285        SOCK_DEBUG(sk, "Reset on %p: Connect on dead socket.\n",sk);
1286        tcp_statistics.TcpAttemptFails++;
1287        return -ENOTCONN; /* send reset */
1288
1289dropbacklog:
1290        if (!want_cookie) 
1291                BACKLOG(sk)--;
1292drop:
1293        tcp_statistics.TcpAttemptFails++;
1294        return 0;
1295}
1296
1297/* This is not only more efficient than what we used to do, it eliminates
1298 * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM
1299 *
1300 * This function wants to be moved to a common for IPv[46] file. --ANK
1301 */
1302struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req, struct sk_buff *skb)
1303{
1304        struct sock *newsk = sk_alloc(PF_INET, GFP_ATOMIC, 0);
1305
1306        if(newsk != NULL) {
1307                struct tcp_opt *newtp;
1308#ifdef CONFIG_FILTER
1309                struct sk_filter *filter;
1310#endif
1311
1312                memcpy(newsk, sk, sizeof(*newsk));
1313                newsk->sklist_next = NULL;
1314                newsk->state = TCP_SYN_RECV;
1315
1316                /* Clone the TCP header template */
1317                newsk->dport = req->rmt_port;
1318
1319                atomic_set(&newsk->sock_readers, 0);
1320                atomic_set(&newsk->rmem_alloc, 0);
1321                skb_queue_head_init(&newsk->receive_queue);
1322                atomic_set(&newsk->wmem_alloc, 0);
1323                skb_queue_head_init(&newsk->write_queue);
1324                atomic_set(&newsk->omem_alloc, 0);
1325
1326                newsk->done = 0;
1327                newsk->proc = 0;
1328                newsk->pair = NULL;
1329                skb_queue_head_init(&newsk->back_log);
1330                skb_queue_head_init(&newsk->error_queue);
1331#ifdef CONFIG_FILTER
1332                if ((filter = newsk->filter) != NULL)
1333                        sk_filter_charge(newsk, filter);
1334#endif
1335
1336                /* Now setup tcp_opt */
1337                newtp = &(newsk->tp_pinfo.af_tcp);
1338                newtp->pred_flags = 0;
1339                newtp->rcv_nxt = req->rcv_isn + 1;
1340                newtp->snd_nxt = req->snt_isn + 1;
1341                newtp->snd_una = req->snt_isn + 1;
1342                newtp->srtt = 0;
1343                newtp->ato = 0;
1344                newtp->snd_wl1 = req->rcv_isn;
1345                newtp->snd_wl2 = req->snt_isn;
1346
1347                /* RFC1323: The window in SYN & SYN/ACK segments
1348                 * is never scaled.
1349                 */
1350                newtp->snd_wnd = ntohs(skb->h.th->window);
1351
1352                newtp->max_window = newtp->snd_wnd;
1353                newtp->pending = 0;
1354                newtp->retransmits = 0;
1355                newtp->last_ack_sent = req->rcv_isn + 1;
1356                newtp->backoff = 0;
1357                newtp->mdev = TCP_TIMEOUT_INIT;
1358                newtp->snd_cwnd = 1;
1359                newtp->rto = TCP_TIMEOUT_INIT;
1360                newtp->packets_out = 0;
1361                newtp->fackets_out = 0;
1362                newtp->retrans_out = 0;
1363                newtp->high_seq = 0;
1364                newtp->snd_ssthresh = 0x7fffffff;
1365                newtp->snd_cwnd_cnt = 0;
1366                newtp->dup_acks = 0;
1367                newtp->delayed_acks = 0;
1368                init_timer(&newtp->retransmit_timer);
1369                newtp->retransmit_timer.function = &tcp_retransmit_timer;
1370                newtp->retransmit_timer.data = (unsigned long) newsk;
1371                init_timer(&newtp->delack_timer);
1372                newtp->delack_timer.function = &tcp_delack_timer;
1373                newtp->delack_timer.data = (unsigned long) newsk;
1374                skb_queue_head_init(&newtp->out_of_order_queue);
1375                newtp->send_head = newtp->retrans_head = NULL;
1376                newtp->rcv_wup = req->rcv_isn + 1;
1377                newtp->write_seq = req->snt_isn + 1;
1378                newtp->copied_seq = req->rcv_isn + 1;
1379
1380                newtp->saw_tstamp = 0;
1381                newtp->mss_clamp = req->mss;
1382
1383                init_timer(&newtp->probe_timer);
1384                newtp->probe_timer.function = &tcp_probe_timer;
1385                newtp->probe_timer.data = (unsigned long) newsk;
1386                newtp->probes_out = 0;
1387                newtp->syn_seq = req->rcv_isn;
1388                newtp->fin_seq = req->rcv_isn;
1389                newtp->urg_data = 0;
1390                tcp_synq_init(newtp);
1391                newtp->syn_backlog = 0;
1392                if (skb->len >= 536)
1393                        newtp->last_seg_size = skb->len; 
1394
1395                /* Back to base struct sock members. */
1396                newsk->err = 0;
1397                newsk->ack_backlog = 0;
1398                newsk->max_ack_backlog = SOMAXCONN;
1399                newsk->priority = 0;
1400
1401                /* IP layer stuff */
1402                newsk->timeout = 0;
1403                init_timer(&newsk->timer);
1404                newsk->timer.function = &net_timer;
1405                newsk->timer.data = (unsigned long) newsk;
1406                newsk->socket = NULL;
1407
1408                newtp->tstamp_ok = req->tstamp_ok;
1409                if((newtp->sack_ok = req->sack_ok) != 0)
1410                        newtp->num_sacks = 0;
1411                newtp->window_clamp = req->window_clamp;
1412                newtp->rcv_wnd = req->rcv_wnd;
1413                newtp->wscale_ok = req->wscale_ok;
1414                if (newtp->wscale_ok) {
1415                        newtp->snd_wscale = req->snd_wscale;
1416                        newtp->rcv_wscale = req->rcv_wscale;
1417                } else {
1418                        newtp->snd_wscale = newtp->rcv_wscale = 0;
1419                        newtp->window_clamp = min(newtp->window_clamp,65535);
1420                }
1421                if (newtp->tstamp_ok) {
1422                        newtp->ts_recent = req->ts_recent;
1423                        newtp->ts_recent_stamp = jiffies;
1424                        newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
1425                } else {
1426                        newtp->tcp_header_len = sizeof(struct tcphdr);
1427                }
1428        }
1429        return newsk;
1430}
1431
1432/* 
1433 * The three way handshake has completed - we got a valid synack - 
1434 * now create the new socket. 
1435 */
1436struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1437                                   struct open_request *req,
1438                                   struct dst_entry *dst)
1439{
1440        struct ip_options *opt = req->af.v4_req.opt;
1441        struct tcp_opt *newtp;
1442        struct sock *newsk;
1443
1444        if (sk->ack_backlog > sk->max_ack_backlog)
1445                goto exit; /* head drop */
1446        if (dst == NULL) { 
1447                struct rtable *rt;
1448                
1449                if (ip_route_output(&rt,
1450                        opt && opt->srr ? opt->faddr : req->af.v4_req.rmt_addr,
1451                        req->af.v4_req.loc_addr, sk->ip_tos|RTO_CONN, 0))
1452                        return NULL;
1453                dst = &rt->u.dst;
1454        }
1455#ifdef CONFIG_IP_TRANSPARENT_PROXY
1456        /* The new socket created for transparent proxy may fall
1457         * into a non-existed bind bucket because sk->num != newsk->num.
1458         * Ensure existance of the bucket now. The placement of the check
1459         * later will require to destroy just created newsk in the case of fail.
1460         * 1998/04/22 Andrey V. Savochkin <saw@msu.ru>
1461         */
1462        if (tcp_bucket_check(ntohs(skb->h.th->dest)))
1463                goto exit;
1464#endif
1465
1466        newsk = tcp_create_openreq_child(sk, req, skb);
1467        if (!newsk) 
1468                goto exit;
1469
1470        sk->tp_pinfo.af_tcp.syn_backlog--;
1471        sk->ack_backlog++;
1472
1473        newsk->dst_cache = dst;
1474
1475        newtp = &(newsk->tp_pinfo.af_tcp);
1476        newsk->daddr = req->af.v4_req.rmt_addr;
1477        newsk->saddr = req->af.v4_req.loc_addr;
1478        newsk->rcv_saddr = req->af.v4_req.loc_addr;
1479#ifdef CONFIG_IP_TRANSPARENT_PROXY
1480        newsk->num = ntohs(skb->h.th->dest);
1481        newsk->sport = req->lcl_port;
1482#endif
1483        newsk->opt = req->af.v4_req.opt;
1484        newtp->ext_header_len = 0;
1485        if (newsk->opt)
1486                newtp->ext_header_len = newsk->opt->optlen;
1487
1488        tcp_sync_mss(newsk, dst->pmtu);
1489        newtp->rcv_mss = newtp->mss_clamp;
1490
1491        /* It would be better to use newtp->mss_clamp here */
1492        if (newsk->rcvbuf < (3 * newtp->pmtu_cookie))
1493                newsk->rcvbuf = min ((3 * newtp->pmtu_cookie), sysctl_rmem_max);
1494        if (newsk->sndbuf < (3 * newtp->pmtu_cookie))
1495                newsk->sndbuf = min ((3 * newtp->pmtu_cookie), sysctl_wmem_max);
1496 
1497        tcp_v4_hash(newsk);
1498        add_to_prot_sklist(newsk);
1499        sk->data_ready(sk, 0); /* Deliver SIGIO */ 
1500
1501        return newsk;
1502
1503exit:
1504        dst_release(dst);
1505        return NULL;
1506}
1507
1508static void tcp_v4_rst_req(struct sock *sk, struct sk_buff *skb)
1509{
1510        struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
1511        struct open_request *req, *prev;
1512
1513        req = tcp_v4_search_req(tp,skb->nh.iph, skb->h.th, &prev);
1514        if (!req)
1515                return;
1516        /* Sequence number check required by RFC793 */
1517        if (before(TCP_SKB_CB(skb)->seq, req->rcv_isn) ||
1518            after(TCP_SKB_CB(skb)->seq, req->rcv_isn+1))
1519                return;
1520        tcp_synq_unlink(tp, req, prev);
1521        (req->sk ? sk->ack_backlog : tp->syn_backlog)--;
1522        req->class->destructor(req);
1523        tcp_openreq_free(req); 
1524
1525        net_statistics.EmbryonicRsts++;
1526}
1527
1528/* Check for embryonic sockets (open_requests) We check packets with
1529 * only the SYN bit set against the open_request queue too: This
1530 * increases connection latency a bit, but is required to detect
1531 * retransmitted SYNs.  
1532 */
1533static inline struct sock *tcp_v4_hnd_req(struct sock *sk,struct sk_buff *skb)
1534{
1535        struct tcphdr *th = skb->h.th; 
1536        u32 flg = ((u32 *)th)[3]; 
1537
1538        /* Check for RST */
1539        if (flg & __constant_htonl(0x00040000)) {
1540                tcp_v4_rst_req(sk, skb);
1541                return NULL;
1542        }
1543
1544        /* Check for SYN|ACK */
1545        if (flg & __constant_htonl(0x00120000)) {
1546                struct open_request *req, *dummy; 
1547                struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
1548
1549                /* Find possible connection requests. */
1550                req = tcp_v4_search_req(tp, skb->nh.iph, th, &dummy); 
1551                if (req) {
1552                        sk = tcp_check_req(sk, skb, req);
1553                }
1554#ifdef CONFIG_SYN_COOKIES
1555                else {
1556                        sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1557                }
1558#endif
1559        }
1560        return sk; 
1561}
1562
1563int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1564{
1565#ifdef CONFIG_FILTER
1566        struct sk_filter *filter = sk->filter;
1567        if (filter && sk_filter(skb, filter))
1568                goto discard;
1569#endif /* CONFIG_FILTER */
1570
1571        /* 
1572         * This doesn't check if the socket has enough room for the packet.
1573         * Either process the packet _without_ queueing it and then free it,
1574         * or do the check later.
1575         */
1576        skb_set_owner_r(skb, sk);
1577
1578        if (sk->state == TCP_ESTABLISHED) { /* Fast path */
1579                if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1580                        goto reset;
1581                return 0; 
1582        } 
1583
1584
1585        if (sk->state == TCP_LISTEN) { 
1586                struct sock *nsk;
1587                
1588                nsk = tcp_v4_hnd_req(sk, skb);
1589                if (!nsk) 
1590                        goto discard;
1591
1592                /*
1593                 * Queue it on the new socket if the new socket is active,
1594                 * otherwise we just shortcircuit this and continue with
1595                 * the new socket..
1596                 */
1597                if (atomic_read(&nsk->sock_readers)) {
1598                        __skb_queue_tail(&nsk->back_log, skb);
1599                        return 0;
1600                }
1601                sk = nsk;
1602        }
1603        
1604        if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1605                goto reset;
1606        return 0;
1607
1608reset:
1609        tcp_v4_send_reset(skb);
1610discard:
1611        kfree_skb(skb);
1612        /* Be careful here. If this function gets more complicated and
1613         * gcc suffers from register pressure on the x86, sk (in %ebx) 
1614         * might be destroyed here. This current version compiles correctly,
1615         * but you have been warned.
1616         */
1617        return 0;
1618}
1619
1620/*
1621 *      From tcp_input.c
1622 */
1623
1624int tcp_v4_rcv(struct sk_buff *skb, unsigned short len)
1625{
1626        struct tcphdr *th;
1627        struct sock *sk;
1628
1629        if (skb->pkt_type!=PACKET_HOST)
1630                goto discard_it;
1631
1632        th = skb->h.th;
1633
1634        /* Pull up the IP header. */
1635        __skb_pull(skb, skb->h.raw - skb->data);
1636
1637        /* Count it even if it's bad */
1638        tcp_statistics.TcpInSegs++;
1639
1640        if (len < sizeof(struct tcphdr))
1641                goto bad_packet;
1642
1643        /* Try to use the device checksum if provided. */
1644        switch (skb->ip_summed) {
1645        case CHECKSUM_NONE:
1646                skb->csum = csum_partial((char *)th, len, 0);
1647        case CHECKSUM_HW:
1648                if (tcp_v4_check(th,len,skb->nh.iph->saddr,skb->nh.iph->daddr,skb->csum)) {
1649                        NETDEBUG(printk(KERN_DEBUG "TCPv4 bad checksum "
1650                                        "from %d.%d.%d.%d:%04x to %d.%d.%d.%d:%04x, "
1651                                        "len=%d/%d/%d\n",
1652                                        NIPQUAD(skb->nh.iph->saddr),
1653                                        ntohs(th->source), 
1654                                        NIPQUAD(skb->nh.iph->daddr),
1655                                        ntohs(th->dest),
1656                                        len, skb->len,
1657                                        ntohs(skb->nh.iph->tot_len)));
1658        bad_packet:             
1659                        tcp_statistics.TcpInErrs++;
1660                        goto discard_it;
1661                }
1662        default:
1663                /* CHECKSUM_UNNECESSARY */
1664        }
1665
1666#ifdef CONFIG_IP_TRANSPARENT_PROXY
1667        if (IPCB(skb)->redirport)
1668                sk = tcp_v4_proxy_lookup(th->dest, skb->nh.iph->saddr, th->source,
1669                                         skb->nh.iph->daddr, skb->dev,
1670                                         IPCB(skb)->redirport, skb->dev->ifindex);
1671        else {
1672#endif
1673                sk = __tcp_v4_lookup(th, skb->nh.iph->saddr, th->source,
1674                                     skb->nh.iph->daddr, th->dest, skb->dev->ifindex);
1675#ifdef CONFIG_IP_TRANSPARENT_PROXY
1676                if (!sk)
1677                        sk = tcp_v4_search_proxy_openreq(skb);
1678        }
1679#endif
1680        if (!sk)
1681                goto no_tcp_socket;
1682        if(!ipsec_sk_policy(sk,skb))
1683                goto discard_it;
1684
1685        TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1686        TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1687                                    len - th->doff*4);
1688        TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1689
1690        skb->used = 0;
1691
1692        if (sk->state == TCP_TIME_WAIT)
1693                goto do_time_wait;
1694        if (!atomic_read(&sk->sock_readers))
1695                return tcp_v4_do_rcv(sk, skb);
1696
1697        __skb_queue_tail(&sk->back_log, skb);
1698        return 0;
1699
1700no_tcp_socket:
1701        tcp_v4_send_reset(skb);
1702
1703discard_it:
1704        /* Discard frame. */
1705        kfree_skb(skb);
1706        return 0;
1707
1708do_time_wait:
1709        if(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
1710                                      skb, th, skb->len))
1711                goto no_tcp_socket;
1712        goto discard_it;
1713}
1714
1715int tcp_v4_rebuild_header(struct sock *sk)
1716{
1717        struct rtable *rt = (struct rtable *)sk->dst_cache;
1718        __u32 new_saddr;
1719        int want_rewrite = sysctl_ip_dynaddr && sk->state == TCP_SYN_SENT;
1720
1721        if(rt == NULL)
1722                return 0;
1723
1724        /* Force route checking if want_rewrite.
1725         * The idea is good, the implementation is disguisting.
1726         * Well, if I made bind on this socket, you cannot randomly ovewrite
1727         * its source address. --ANK
1728         */
1729        if (want_rewrite) {
1730                int tmp;
1731                struct rtable *new_rt;
1732                __u32 old_saddr = rt->rt_src;
1733
1734                /* Query new route using another rt buffer */
1735                tmp = ip_route_connect(&new_rt, rt->rt_dst, 0,
1736                                        RT_TOS(sk->ip_tos)|sk->localroute,
1737                                        sk->bound_dev_if);
1738
1739                /* Only useful if different source addrs */
1740                if (tmp == 0) {
1741                        /*
1742                         *      Only useful if different source addrs
1743                         */
1744                        if (new_rt->rt_src != old_saddr ) {
1745                                dst_release(sk->dst_cache);
1746                                sk->dst_cache = &new_rt->u.dst;
1747                                rt = new_rt;
1748                                goto do_rewrite;
1749                        } 
1750                        dst_release(&new_rt->u.dst);
1751                }
1752        }
1753        if (rt->u.dst.obsolete) {
1754                int err;
1755                err = ip_route_output(&rt, rt->rt_dst, rt->rt_src, rt->key.tos|RTO_CONN, rt->key.oif);
1756                if (err) {
1757                        sk->err_soft=-err;
1758                        sk->error_report(sk);
1759                        return -1;
1760                }
1761                dst_release(xchg(&sk->dst_cache, &rt->u.dst));
1762        }
1763
1764        return 0;
1765
1766do_rewrite:
1767        new_saddr = rt->rt_src;
1768                
1769        /* Ouch!, this should not happen. */
1770        if (!sk->saddr || !sk->rcv_saddr) {
1771                printk(KERN_WARNING "tcp_v4_rebuild_header(): not valid sock addrs: "
1772                       "saddr=%08lX rcv_saddr=%08lX\n",
1773                       ntohl(sk->saddr), 
1774                       ntohl(sk->rcv_saddr));
1775                return 0;
1776        }
1777
1778        if (new_saddr != sk->saddr) {
1779                if (sysctl_ip_dynaddr > 1) {
1780                        printk(KERN_INFO "tcp_v4_rebuild_header(): shifting sk->saddr "
1781                               "from %d.%d.%d.%d to %d.%d.%d.%d\n",
1782                               NIPQUAD(sk->saddr), 
1783                               NIPQUAD(new_saddr));
1784                }
1785
1786                sk->saddr = new_saddr;
1787                sk->rcv_saddr = new_saddr;
1788                tcp_v4_rehash(sk);
1789        } 
1790        
1791        return 0;
1792}
1793
1794static struct sock * tcp_v4_get_sock(struct sk_buff *skb, struct tcphdr *th)
1795{
1796        return tcp_v4_lookup(skb->nh.iph->saddr, th->source,
1797                             skb->nh.iph->daddr, th->dest, skb->dev->ifindex);
1798}
1799
1800static void v4_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1801{
1802        struct sockaddr_in *sin = (struct sockaddr_in *) uaddr;
1803
1804        sin->sin_family         = AF_INET;
1805        sin->sin_addr.s_addr    = sk->daddr;
1806        sin->sin_port           = sk->dport;
1807}
1808
1809struct tcp_func ipv4_specific = {
1810        ip_queue_xmit,
1811        tcp_v4_send_check,
1812        tcp_v4_rebuild_header,
1813        tcp_v4_conn_request,
1814        tcp_v4_syn_recv_sock,
1815        tcp_v4_get_sock,
1816        sizeof(struct iphdr),
1817
1818        ip_setsockopt,
1819        ip_getsockopt,
1820        v4_addr2sockaddr,
1821        sizeof(struct sockaddr_in)
1822};
1823
1824/* NOTE: A lot of things set to zero explicitly by call to
1825 *       sk_alloc() so need not be done here.
1826 */
1827static int tcp_v4_init_sock(struct sock *sk)
1828{
1829        struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
1830
1831        skb_queue_head_init(&tp->out_of_order_queue);
1832        tcp_init_xmit_timers(sk);
1833
1834        tp->rto  = TCP_TIMEOUT_INIT;            /*TCP_WRITE_TIME*/
1835        tp->mdev = TCP_TIMEOUT_INIT;
1836        tp->mss_clamp = ~0;
1837      
1838        /* See draft-stevens-tcpca-spec-01 for discussion of the
1839         * initialization of these values.
1840         */
1841        tp->snd_cwnd = 1;
1842        tp->snd_cwnd_cnt = 0;
1843        tp->snd_ssthresh = 0x7fffffff;  /* Infinity */
1844
1845        sk->state = TCP_CLOSE;
1846        sk->max_ack_backlog = SOMAXCONN;
1847        tp->rcv_mss = 536; 
1848
1849        sk->write_space = tcp_write_space; 
1850
1851        /* Init SYN queue. */
1852        tcp_synq_init(tp);
1853
1854        sk->tp_pinfo.af_tcp.af_specific = &ipv4_specific;
1855
1856        return 0;
1857}
1858
1859static int tcp_v4_destroy_sock(struct sock *sk)
1860{
1861        struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
1862        struct sk_buff *skb;
1863
1864        tcp_clear_xmit_timers(sk);
1865
1866        if (sk->keepopen)
1867                tcp_dec_slow_timer(TCP_SLT_KEEPALIVE);
1868
1869        /* Cleanup up the write buffer. */
1870        while((skb = __skb_dequeue(&sk->write_queue)) != NULL)
1871                kfree_skb(skb);
1872
1873        /* Cleans up our, hopefuly empty, out_of_order_queue. */
1874        while((skb = __skb_dequeue(&tp->out_of_order_queue)) != NULL)
1875                kfree_skb(skb);
1876
1877        /* Clean up a locked TCP bind bucket, this only happens if a
1878         * port is allocated for a socket, but it never fully connects.
1879         * In which case we will find num to be non-zero and daddr to
1880         * be zero.
1881         */
1882        if(sk->daddr == 0 && sk->num != 0)
1883                tcp_bucket_unlock(sk);
1884
1885        return 0;
1886}
1887
1888struct proto tcp_prot = {
1889        (struct sock *)&tcp_prot,       /* sklist_next */
1890        (struct sock *)&tcp_prot,       /* sklist_prev */
1891        tcp_close,                      /* close */
1892        tcp_v4_connect,                 /* connect */
1893        tcp_accept,                     /* accept */
1894        NULL,                           /* retransmit */
1895        tcp_write_wakeup,               /* write_wakeup */
1896        tcp_read_wakeup,                /* read_wakeup */
1897        tcp_poll,                       /* poll */
1898        tcp_ioctl,                      /* ioctl */
1899        tcp_v4_init_sock,               /* init */
1900        tcp_v4_destroy_sock,            /* destroy */
1901        tcp_shutdown,                   /* shutdown */
1902        tcp_setsockopt,                 /* setsockopt */
1903        tcp_getsockopt,                 /* getsockopt */
1904        tcp_v4_sendmsg,                 /* sendmsg */
1905        tcp_recvmsg,                    /* recvmsg */
1906        NULL,                           /* bind */
1907        tcp_v4_do_rcv,                  /* backlog_rcv */
1908        tcp_v4_hash,                    /* hash */
1909        tcp_v4_unhash,                  /* unhash */
1910        tcp_v4_rehash,                  /* rehash */
1911        tcp_good_socknum,               /* good_socknum */
1912        tcp_v4_verify_bind,             /* verify_bind */
1913        128,                            /* max_header */
1914        0,                              /* retransmits */
1915        "TCP",                          /* name */
1916        0,                              /* inuse */
1917        0                               /* highestinuse */
1918};
1919
1920
1921
1922__initfunc(void tcp_v4_init(struct net_proto_family *ops))
1923{
1924        int err;
1925
1926        tcp_inode.i_mode = S_IFSOCK;
1927        tcp_inode.i_sock = 1;
1928        tcp_inode.i_uid = 0;
1929        tcp_inode.i_gid = 0;
1930
1931        tcp_socket->inode = &tcp_inode;
1932        tcp_socket->state = SS_UNCONNECTED;
1933        tcp_socket->type=SOCK_RAW;
1934
1935        if ((err=ops->create(tcp_socket, IPPROTO_TCP))<0)
1936                panic("Failed to create the TCP control socket.\n");
1937        tcp_socket->sk->allocation=GFP_ATOMIC;
1938        tcp_socket->sk->num = 256;              /* Don't receive any data */
1939        tcp_socket->sk->ip_ttl = MAXTTL;
1940}
1941
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.