linux/net/ipv4/tcp_ipv4.c
<<
>>
Prefs
   1/*
   2 * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3 *              operating system.  INET is implemented using the  BSD Socket
   4 *              interface as the means of communication with the user level.
   5 *
   6 *              Implementation of the Transmission Control Protocol(TCP).
   7 *
   8 *              IPv4 specific functions
   9 *
  10 *
  11 *              code split from:
  12 *              linux/ipv4/tcp.c
  13 *              linux/ipv4/tcp_input.c
  14 *              linux/ipv4/tcp_output.c
  15 *
  16 *              See tcp.c for author information
  17 *
  18 *      This program is free software; you can redistribute it and/or
  19 *      modify it under the terms of the GNU General Public License
  20 *      as published by the Free Software Foundation; either version
  21 *      2 of the License, or (at your option) any later version.
  22 */
  23
  24/*
  25 * Changes:
  26 *              David S. Miller :       New socket lookup architecture.
  27 *                                      This code is dedicated to John Dyson.
  28 *              David S. Miller :       Change semantics of established hash,
  29 *                                      half is devoted to TIME_WAIT sockets
  30 *                                      and the rest go in the other half.
  31 *              Andi Kleen :            Add support for syncookies and fixed
  32 *                                      some bugs: ip options weren't passed to
  33 *                                      the TCP layer, missed a check for an
  34 *                                      ACK bit.
  35 *              Andi Kleen :            Implemented fast path mtu discovery.
  36 *                                      Fixed many serious bugs in the
  37 *                                      request_sock handling and moved
  38 *                                      most of it into the af independent code.
  39 *                                      Added tail drop and some other bugfixes.
  40 *                                      Added new listen semantics.
  41 *              Mike McLagan    :       Routing by source
  42 *      Juan Jose Ciarlante:            ip_dynaddr bits
  43 *              Andi Kleen:             various fixes.
  44 *      Vitaly E. Lavrov        :       Transparent proxy revived after year
  45 *                                      coma.
  46 *      Andi Kleen              :       Fix new listen.
  47 *      Andi Kleen              :       Fix accept error reporting.
  48 *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
  49 *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
  50 *                                      a single port at the same time.
  51 */
  52
  53
  54#include <linux/bottom_half.h>
  55#include <linux/types.h>
  56#include <linux/fcntl.h>
  57#include <linux/module.h>
  58#include <linux/random.h>
  59#include <linux/cache.h>
  60#include <linux/jhash.h>
  61#include <linux/init.h>
  62#include <linux/times.h>
  63
  64#include <net/net_namespace.h>
  65#include <net/icmp.h>
  66#include <net/inet_hashtables.h>
  67#include <net/tcp.h>
  68#include <net/transp_v6.h>
  69#include <net/ipv6.h>
  70#include <net/inet_common.h>
  71#include <net/timewait_sock.h>
  72#include <net/xfrm.h>
  73#include <net/netdma.h>
  74
  75#include <linux/inet.h>
  76#include <linux/ipv6.h>
  77#include <linux/stddef.h>
  78#include <linux/proc_fs.h>
  79#include <linux/seq_file.h>
  80
  81#include <linux/crypto.h>
  82#include <linux/scatterlist.h>
  83
  84int sysctl_tcp_tw_reuse __read_mostly;
  85int sysctl_tcp_low_latency __read_mostly;
  86
  87
  88#ifdef CONFIG_TCP_MD5SIG
  89static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
  90                                                   __be32 addr);
  91static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
  92                               __be32 daddr, __be32 saddr, struct tcphdr *th);
  93#else
  94static inline
  95struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
  96{
  97        return NULL;
  98}
  99#endif
 100
 101struct inet_hashinfo tcp_hashinfo;
 102
 103static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
 104{
 105        return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
 106                                          ip_hdr(skb)->saddr,
 107                                          tcp_hdr(skb)->dest,
 108                                          tcp_hdr(skb)->source);
 109}
 110
 111int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
 112{
 113        const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
 114        struct tcp_sock *tp = tcp_sk(sk);
 115
 116        /* With PAWS, it is safe from the viewpoint
 117           of data integrity. Even without PAWS it is safe provided sequence
 118           spaces do not overlap i.e. at data rates <= 80Mbit/sec.
 119
 120           Actually, the idea is close to VJ's one, only timestamp cache is
 121           held not per host, but per port pair and TW bucket is used as state
 122           holder.
 123
 124           If TW bucket has been already destroyed we fall back to VJ's scheme
 125           and use initial timestamp retrieved from peer table.
 126         */
 127        if (tcptw->tw_ts_recent_stamp &&
 128            (twp == NULL || (sysctl_tcp_tw_reuse &&
 129                             get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
 130                tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
 131                if (tp->write_seq == 0)
 132                        tp->write_seq = 1;
 133                tp->rx_opt.ts_recent       = tcptw->tw_ts_recent;
 134                tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
 135                sock_hold(sktw);
 136                return 1;
 137        }
 138
 139        return 0;
 140}
 141
 142EXPORT_SYMBOL_GPL(tcp_twsk_unique);
 143
 144/* This will initiate an outgoing connection. */
 145int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 146{
 147        struct inet_sock *inet = inet_sk(sk);
 148        struct tcp_sock *tp = tcp_sk(sk);
 149        struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
 150        struct rtable *rt;
 151        __be32 daddr, nexthop;
 152        int tmp;
 153        int err;
 154
 155        if (addr_len < sizeof(struct sockaddr_in))
 156                return -EINVAL;
 157
 158        if (usin->sin_family != AF_INET)
 159                return -EAFNOSUPPORT;
 160
 161        nexthop = daddr = usin->sin_addr.s_addr;
 162        if (inet->opt && inet->opt->srr) {
 163                if (!daddr)
 164                        return -EINVAL;
 165                nexthop = inet->opt->faddr;
 166        }
 167
 168        tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr,
 169                               RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
 170                               IPPROTO_TCP,
 171                               inet->inet_sport, usin->sin_port, sk, 1);
 172        if (tmp < 0) {
 173                if (tmp == -ENETUNREACH)
 174                        IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
 175                return tmp;
 176        }
 177
 178        if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
 179                ip_rt_put(rt);
 180                return -ENETUNREACH;
 181        }
 182
 183        if (!inet->opt || !inet->opt->srr)
 184                daddr = rt->rt_dst;
 185
 186        if (!inet->inet_saddr)
 187                inet->inet_saddr = rt->rt_src;
 188        inet->inet_rcv_saddr = inet->inet_saddr;
 189
 190        if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
 191                /* Reset inherited state */
 192                tp->rx_opt.ts_recent       = 0;
 193                tp->rx_opt.ts_recent_stamp = 0;
 194                tp->write_seq              = 0;
 195        }
 196
 197        if (tcp_death_row.sysctl_tw_recycle &&
 198            !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
 199                struct inet_peer *peer = rt_get_peer(rt);
 200                /*
 201                 * VJ's idea. We save last timestamp seen from
 202                 * the destination in peer table, when entering state
 203                 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
 204                 * when trying new connection.
 205                 */
 206                if (peer != NULL &&
 207                    (u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
 208                        tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
 209                        tp->rx_opt.ts_recent = peer->tcp_ts;
 210                }
 211        }
 212
 213        inet->inet_dport = usin->sin_port;
 214        inet->inet_daddr = daddr;
 215
 216        inet_csk(sk)->icsk_ext_hdr_len = 0;
 217        if (inet->opt)
 218                inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
 219
 220        tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
 221
 222        /* Socket identity is still unknown (sport may be zero).
 223         * However we set state to SYN-SENT and not releasing socket
 224         * lock select source port, enter ourselves into the hash tables and
 225         * complete initialization after this.
 226         */
 227        tcp_set_state(sk, TCP_SYN_SENT);
 228        err = inet_hash_connect(&tcp_death_row, sk);
 229        if (err)
 230                goto failure;
 231
 232        err = ip_route_newports(&rt, IPPROTO_TCP,
 233                                inet->inet_sport, inet->inet_dport, sk);
 234        if (err)
 235                goto failure;
 236
 237        /* OK, now commit destination to socket.  */
 238        sk->sk_gso_type = SKB_GSO_TCPV4;
 239        sk_setup_caps(sk, &rt->u.dst);
 240
 241        if (!tp->write_seq)
 242                tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
 243                                                           inet->inet_daddr,
 244                                                           inet->inet_sport,
 245                                                           usin->sin_port);
 246
 247        inet->inet_id = tp->write_seq ^ jiffies;
 248
 249        err = tcp_connect(sk);
 250        rt = NULL;
 251        if (err)
 252                goto failure;
 253
 254        return 0;
 255
 256failure:
 257        /*
 258         * This unhashes the socket and releases the local port,
 259         * if necessary.
 260         */
 261        tcp_set_state(sk, TCP_CLOSE);
 262        ip_rt_put(rt);
 263        sk->sk_route_caps = 0;
 264        inet->inet_dport = 0;
 265        return err;
 266}
 267
 268/*
 269 * This routine does path mtu discovery as defined in RFC1191.
 270 */
 271static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
 272{
 273        struct dst_entry *dst;
 274        struct inet_sock *inet = inet_sk(sk);
 275
 276        /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
 277         * send out by Linux are always <576bytes so they should go through
 278         * unfragmented).
 279         */
 280        if (sk->sk_state == TCP_LISTEN)
 281                return;
 282
 283        /* We don't check in the destentry if pmtu discovery is forbidden
 284         * on this route. We just assume that no packet_to_big packets
 285         * are send back when pmtu discovery is not active.
 286         * There is a small race when the user changes this flag in the
 287         * route, but I think that's acceptable.
 288         */
 289        if ((dst = __sk_dst_check(sk, 0)) == NULL)
 290                return;
 291
 292        dst->ops->update_pmtu(dst, mtu);
 293
 294        /* Something is about to be wrong... Remember soft error
 295         * for the case, if this connection will not able to recover.
 296         */
 297        if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
 298                sk->sk_err_soft = EMSGSIZE;
 299
 300        mtu = dst_mtu(dst);
 301
 302        if (inet->pmtudisc != IP_PMTUDISC_DONT &&
 303            inet_csk(sk)->icsk_pmtu_cookie > mtu) {
 304                tcp_sync_mss(sk, mtu);
 305
 306                /* Resend the TCP packet because it's
 307                 * clear that the old packet has been
 308                 * dropped. This is the new "fast" path mtu
 309                 * discovery.
 310                 */
 311                tcp_simple_retransmit(sk);
 312        } /* else let the usual retransmit timer handle it */
 313}
 314
 315/*
 316 * This routine is called by the ICMP module when it gets some
 317 * sort of error condition.  If err < 0 then the socket should
 318 * be closed and the error returned to the user.  If err > 0
 319 * it's just the icmp type << 8 | icmp code.  After adjustment
 320 * header points to the first 8 bytes of the tcp header.  We need
 321 * to find the appropriate port.
 322 *
 323 * The locking strategy used here is very "optimistic". When
 324 * someone else accesses the socket the ICMP is just dropped
 325 * and for some paths there is no check at all.
 326 * A more general error queue to queue errors for later handling
 327 * is probably better.
 328 *
 329 */
 330
 331void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 332{
 333        struct iphdr *iph = (struct iphdr *)icmp_skb->data;
 334        struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
 335        struct inet_connection_sock *icsk;
 336        struct tcp_sock *tp;
 337        struct inet_sock *inet;
 338        const int type = icmp_hdr(icmp_skb)->type;
 339        const int code = icmp_hdr(icmp_skb)->code;
 340        struct sock *sk;
 341        struct sk_buff *skb;
 342        __u32 seq;
 343        __u32 remaining;
 344        int err;
 345        struct net *net = dev_net(icmp_skb->dev);
 346
 347        if (icmp_skb->len < (iph->ihl << 2) + 8) {
 348                ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
 349                return;
 350        }
 351
 352        sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
 353                        iph->saddr, th->source, inet_iif(icmp_skb));
 354        if (!sk) {
 355                ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
 356                return;
 357        }
 358        if (sk->sk_state == TCP_TIME_WAIT) {
 359                inet_twsk_put(inet_twsk(sk));
 360                return;
 361        }
 362
 363        bh_lock_sock(sk);
 364        /* If too many ICMPs get dropped on busy
 365         * servers this needs to be solved differently.
 366         */
 367        if (sock_owned_by_user(sk))
 368                NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
 369
 370        if (sk->sk_state == TCP_CLOSE)
 371                goto out;
 372
 373        icsk = inet_csk(sk);
 374        tp = tcp_sk(sk);
 375        seq = ntohl(th->seq);
 376        if (sk->sk_state != TCP_LISTEN &&
 377            !between(seq, tp->snd_una, tp->snd_nxt)) {
 378                NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
 379                goto out;
 380        }
 381
 382        switch (type) {
 383        case ICMP_SOURCE_QUENCH:
 384                /* Just silently ignore these. */
 385                goto out;
 386        case ICMP_PARAMETERPROB:
 387                err = EPROTO;
 388                break;
 389        case ICMP_DEST_UNREACH:
 390                if (code > NR_ICMP_UNREACH)
 391                        goto out;
 392
 393                if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
 394                        if (!sock_owned_by_user(sk))
 395                                do_pmtu_discovery(sk, iph, info);
 396                        goto out;
 397                }
 398
 399                err = icmp_err_convert[code].errno;
 400                /* check if icmp_skb allows revert of backoff
 401                 * (see draft-zimmermann-tcp-lcd) */
 402                if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
 403                        break;
 404                if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
 405                    !icsk->icsk_backoff)
 406                        break;
 407
 408                icsk->icsk_backoff--;
 409                inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) <<
 410                                         icsk->icsk_backoff;
 411                tcp_bound_rto(sk);
 412
 413                skb = tcp_write_queue_head(sk);
 414                BUG_ON(!skb);
 415
 416                remaining = icsk->icsk_rto - min(icsk->icsk_rto,
 417                                tcp_time_stamp - TCP_SKB_CB(skb)->when);
 418
 419                if (remaining) {
 420                        inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
 421                                                  remaining, TCP_RTO_MAX);
 422                } else if (sock_owned_by_user(sk)) {
 423                        /* RTO revert clocked out retransmission,
 424                         * but socket is locked. Will defer. */
 425                        inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
 426                                                  HZ/20, TCP_RTO_MAX);
 427                } else {
 428                        /* RTO revert clocked out retransmission.
 429                         * Will retransmit now */
 430                        tcp_retransmit_timer(sk);
 431                }
 432
 433                break;
 434        case ICMP_TIME_EXCEEDED:
 435                err = EHOSTUNREACH;
 436                break;
 437        default:
 438                goto out;
 439        }
 440
 441        switch (sk->sk_state) {
 442                struct request_sock *req, **prev;
 443        case TCP_LISTEN:
 444                if (sock_owned_by_user(sk))
 445                        goto out;
 446
 447                req = inet_csk_search_req(sk, &prev, th->dest,
 448                                          iph->daddr, iph->saddr);
 449                if (!req)
 450                        goto out;
 451
 452                /* ICMPs are not backlogged, hence we cannot get
 453                   an established socket here.
 454                 */
 455                WARN_ON(req->sk);
 456
 457                if (seq != tcp_rsk(req)->snt_isn) {
 458                        NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
 459                        goto out;
 460                }
 461
 462                /*
 463                 * Still in SYN_RECV, just remove it silently.
 464                 * There is no good way to pass the error to the newly
 465                 * created socket, and POSIX does not want network
 466                 * errors returned from accept().
 467                 */
 468                inet_csk_reqsk_queue_drop(sk, req, prev);
 469                goto out;
 470
 471        case TCP_SYN_SENT:
 472        case TCP_SYN_RECV:  /* Cannot happen.
 473                               It can f.e. if SYNs crossed.
 474                             */
 475                if (!sock_owned_by_user(sk)) {
 476                        sk->sk_err = err;
 477
 478                        sk->sk_error_report(sk);
 479
 480                        tcp_done(sk);
 481                } else {
 482                        sk->sk_err_soft = err;
 483                }
 484                goto out;
 485        }
 486
 487        /* If we've already connected we will keep trying
 488         * until we time out, or the user gives up.
 489         *
 490         * rfc1122 4.2.3.9 allows to consider as hard errors
 491         * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
 492         * but it is obsoleted by pmtu discovery).
 493         *
 494         * Note, that in modern internet, where routing is unreliable
 495         * and in each dark corner broken firewalls sit, sending random
 496         * errors ordered by their masters even this two messages finally lose
 497         * their original sense (even Linux sends invalid PORT_UNREACHs)
 498         *
 499         * Now we are in compliance with RFCs.
 500         *                                                      --ANK (980905)
 501         */
 502
 503        inet = inet_sk(sk);
 504        if (!sock_owned_by_user(sk) && inet->recverr) {
 505                sk->sk_err = err;
 506                sk->sk_error_report(sk);
 507        } else  { /* Only an error on timeout */
 508                sk->sk_err_soft = err;
 509        }
 510
 511out:
 512        bh_unlock_sock(sk);
 513        sock_put(sk);
 514}
 515
 516/* This routine computes an IPv4 TCP checksum. */
 517void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
 518{
 519        struct inet_sock *inet = inet_sk(sk);
 520        struct tcphdr *th = tcp_hdr(skb);
 521
 522        if (skb->ip_summed == CHECKSUM_PARTIAL) {
 523                th->check = ~tcp_v4_check(len, inet->inet_saddr,
 524                                          inet->inet_daddr, 0);
 525                skb->csum_start = skb_transport_header(skb) - skb->head;
 526                skb->csum_offset = offsetof(struct tcphdr, check);
 527        } else {
 528                th->check = tcp_v4_check(len, inet->inet_saddr,
 529                                         inet->inet_daddr,
 530                                         csum_partial(th,
 531                                                      th->doff << 2,
 532                                                      skb->csum));
 533        }
 534}
 535
 536int tcp_v4_gso_send_check(struct sk_buff *skb)
 537{
 538        const struct iphdr *iph;
 539        struct tcphdr *th;
 540
 541        if (!pskb_may_pull(skb, sizeof(*th)))
 542                return -EINVAL;
 543
 544        iph = ip_hdr(skb);
 545        th = tcp_hdr(skb);
 546
 547        th->check = 0;
 548        th->check = ~tcp_v4_check(skb->len, iph->saddr, iph->daddr, 0);
 549        skb->csum_start = skb_transport_header(skb) - skb->head;
 550        skb->csum_offset = offsetof(struct tcphdr, check);
 551        skb->ip_summed = CHECKSUM_PARTIAL;
 552        return 0;
 553}
 554
 555/*
 556 *      This routine will send an RST to the other tcp.
 557 *
 558 *      Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
 559 *                    for reset.
 560 *      Answer: if a packet caused RST, it is not for a socket
 561 *              existing in our system, if it is matched to a socket,
 562 *              it is just duplicate segment or bug in other side's TCP.
 563 *              So that we build reply only basing on parameters
 564 *              arrived with segment.
 565 *      Exception: precedence violation. We do not implement it in any case.
 566 */
 567
 568static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 569{
 570        struct tcphdr *th = tcp_hdr(skb);
 571        struct {
 572                struct tcphdr th;
 573#ifdef CONFIG_TCP_MD5SIG
 574                __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
 575#endif
 576        } rep;
 577        struct ip_reply_arg arg;
 578#ifdef CONFIG_TCP_MD5SIG
 579        struct tcp_md5sig_key *key;
 580#endif
 581        struct net *net;
 582
 583        /* Never send a reset in response to a reset. */
 584        if (th->rst)
 585                return;
 586
 587        if (skb_rtable(skb)->rt_type != RTN_LOCAL)
 588                return;
 589
 590        /* Swap the send and the receive. */
 591        memset(&rep, 0, sizeof(rep));
 592        rep.th.dest   = th->source;
 593        rep.th.source = th->dest;
 594        rep.th.doff   = sizeof(struct tcphdr) / 4;
 595        rep.th.rst    = 1;
 596
 597        if (th->ack) {
 598                rep.th.seq = th->ack_seq;
 599        } else {
 600                rep.th.ack = 1;
 601                rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
 602                                       skb->len - (th->doff << 2));
 603        }
 604
 605        memset(&arg, 0, sizeof(arg));
 606        arg.iov[0].iov_base = (unsigned char *)&rep;
 607        arg.iov[0].iov_len  = sizeof(rep.th);
 608
 609#ifdef CONFIG_TCP_MD5SIG
 610        key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL;
 611        if (key) {
 612                rep.opt[0] = htonl((TCPOPT_NOP << 24) |
 613                                   (TCPOPT_NOP << 16) |
 614                                   (TCPOPT_MD5SIG << 8) |
 615                                   TCPOLEN_MD5SIG);
 616                /* Update length and the length the header thinks exists */
 617                arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
 618                rep.th.doff = arg.iov[0].iov_len / 4;
 619
 620                tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
 621                                     key, ip_hdr(skb)->saddr,
 622                                     ip_hdr(skb)->daddr, &rep.th);
 623        }
 624#endif
 625        arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
 626                                      ip_hdr(skb)->saddr, /* XXX */
 627                                      arg.iov[0].iov_len, IPPROTO_TCP, 0);
 628        arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 629        arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
 630
 631        net = dev_net(skb_dst(skb)->dev);
 632        ip_send_reply(net->ipv4.tcp_sock, skb,
 633                      &arg, arg.iov[0].iov_len);
 634
 635        TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
 636        TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
 637}
 638
 639/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
 640   outside socket context is ugly, certainly. What can I do?
 641 */
 642
 643static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
 644                            u32 win, u32 ts, int oif,
 645                            struct tcp_md5sig_key *key,
 646                            int reply_flags)
 647{
 648        struct tcphdr *th = tcp_hdr(skb);
 649        struct {
 650                struct tcphdr th;
 651                __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
 652#ifdef CONFIG_TCP_MD5SIG
 653                           + (TCPOLEN_MD5SIG_ALIGNED >> 2)
 654#endif
 655                        ];
 656        } rep;
 657        struct ip_reply_arg arg;
 658        struct net *net = dev_net(skb_dst(skb)->dev);
 659
 660        memset(&rep.th, 0, sizeof(struct tcphdr));
 661        memset(&arg, 0, sizeof(arg));
 662
 663        arg.iov[0].iov_base = (unsigned char *)&rep;
 664        arg.iov[0].iov_len  = sizeof(rep.th);
 665        if (ts) {
 666                rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
 667                                   (TCPOPT_TIMESTAMP << 8) |
 668                                   TCPOLEN_TIMESTAMP);
 669                rep.opt[1] = htonl(tcp_time_stamp);
 670                rep.opt[2] = htonl(ts);
 671                arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
 672        }
 673
 674        /* Swap the send and the receive. */
 675        rep.th.dest    = th->source;
 676        rep.th.source  = th->dest;
 677        rep.th.doff    = arg.iov[0].iov_len / 4;
 678        rep.th.seq     = htonl(seq);
 679        rep.th.ack_seq = htonl(ack);
 680        rep.th.ack     = 1;
 681        rep.th.window  = htons(win);
 682
 683#ifdef CONFIG_TCP_MD5SIG
 684        if (key) {
 685                int offset = (ts) ? 3 : 0;
 686
 687                rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
 688                                          (TCPOPT_NOP << 16) |
 689                                          (TCPOPT_MD5SIG << 8) |
 690                                          TCPOLEN_MD5SIG);
 691                arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
 692                rep.th.doff = arg.iov[0].iov_len/4;
 693
 694                tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
 695                                    key, ip_hdr(skb)->saddr,
 696                                    ip_hdr(skb)->daddr, &rep.th);
 697        }
 698#endif
 699        arg.flags = reply_flags;
 700        arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
 701                                      ip_hdr(skb)->saddr, /* XXX */
 702                                      arg.iov[0].iov_len, IPPROTO_TCP, 0);
 703        arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 704        if (oif)
 705                arg.bound_dev_if = oif;
 706
 707        ip_send_reply(net->ipv4.tcp_sock, skb,
 708                      &arg, arg.iov[0].iov_len);
 709
 710        TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
 711}
 712
 713static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
 714{
 715        struct inet_timewait_sock *tw = inet_twsk(sk);
 716        struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
 717
 718        tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
 719                        tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
 720                        tcptw->tw_ts_recent,
 721                        tw->tw_bound_dev_if,
 722                        tcp_twsk_md5_key(tcptw),
 723                        tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0
 724                        );
 725
 726        inet_twsk_put(tw);
 727}
 728
 729static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
 730                                  struct request_sock *req)
 731{
 732        tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
 733                        tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
 734                        req->ts_recent,
 735                        0,
 736                        tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr),
 737                        inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0);
 738}
 739
 740/*
 741 *      Send a SYN-ACK after having received a SYN.
 742 *      This still operates on a request_sock only, not on a big
 743 *      socket.
 744 */
 745static int __tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
 746                                struct request_sock *req,
 747                                struct request_values *rvp)
 748{
 749        const struct inet_request_sock *ireq = inet_rsk(req);
 750        int err = -1;
 751        struct sk_buff * skb;
 752
 753        /* First, grab a route. */
 754        if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
 755                return -1;
 756
 757        skb = tcp_make_synack(sk, dst, req, rvp);
 758
 759        if (skb) {
 760                struct tcphdr *th = tcp_hdr(skb);
 761
 762                th->check = tcp_v4_check(skb->len,
 763                                         ireq->loc_addr,
 764                                         ireq->rmt_addr,
 765                                         csum_partial(th, skb->len,
 766                                                      skb->csum));
 767
 768                err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
 769                                            ireq->rmt_addr,
 770                                            ireq->opt);
 771                err = net_xmit_eval(err);
 772        }
 773
 774        dst_release(dst);
 775        return err;
 776}
 777
 778static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
 779                              struct request_values *rvp)
 780{
 781        return __tcp_v4_send_synack(sk, NULL, req, rvp);
 782}
 783
 784/*
 785 *      IPv4 request_sock destructor.
 786 */
 787static void tcp_v4_reqsk_destructor(struct request_sock *req)
 788{
 789        kfree(inet_rsk(req)->opt);
 790}
 791
 792#ifdef CONFIG_SYN_COOKIES
 793static void syn_flood_warning(struct sk_buff *skb)
 794{
 795        static unsigned long warntime;
 796
 797        if (time_after(jiffies, (warntime + HZ * 60))) {
 798                warntime = jiffies;
 799                printk(KERN_INFO
 800                       "possible SYN flooding on port %d. Sending cookies.\n",
 801                       ntohs(tcp_hdr(skb)->dest));
 802        }
 803}
 804#endif
 805
 806/*
 807 * Save and compile IPv4 options into the request_sock if needed.
 808 */
 809static struct ip_options *tcp_v4_save_options(struct sock *sk,
 810                                              struct sk_buff *skb)
 811{
 812        struct ip_options *opt = &(IPCB(skb)->opt);
 813        struct ip_options *dopt = NULL;
 814
 815        if (opt && opt->optlen) {
 816                int opt_size = optlength(opt);
 817                dopt = kmalloc(opt_size, GFP_ATOMIC);
 818                if (dopt) {
 819                        if (ip_options_echo(dopt, skb)) {
 820                                kfree(dopt);
 821                                dopt = NULL;
 822                        }
 823                }
 824        }
 825        return dopt;
 826}
 827
 828#ifdef CONFIG_TCP_MD5SIG
 829/*
 830 * RFC2385 MD5 checksumming requires a mapping of
 831 * IP address->MD5 Key.
 832 * We need to maintain these in the sk structure.
 833 */
 834
 835/* Find the Key structure for an address.  */
 836static struct tcp_md5sig_key *
 837                        tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
 838{
 839        struct tcp_sock *tp = tcp_sk(sk);
 840        int i;
 841
 842        if (!tp->md5sig_info || !tp->md5sig_info->entries4)
 843                return NULL;
 844        for (i = 0; i < tp->md5sig_info->entries4; i++) {
 845                if (tp->md5sig_info->keys4[i].addr == addr)
 846                        return &tp->md5sig_info->keys4[i].base;
 847        }
 848        return NULL;
 849}
 850
 851struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
 852                                         struct sock *addr_sk)
 853{
 854        return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr);
 855}
 856
 857EXPORT_SYMBOL(tcp_v4_md5_lookup);
 858
 859static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
 860                                                      struct request_sock *req)
 861{
 862        return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
 863}
 864
 865/* This can be called on a newly created socket, from other files */
 866int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
 867                      u8 *newkey, u8 newkeylen)
 868{
 869        /* Add Key to the list */
 870        struct tcp_md5sig_key *key;
 871        struct tcp_sock *tp = tcp_sk(sk);
 872        struct tcp4_md5sig_key *keys;
 873
 874        key = tcp_v4_md5_do_lookup(sk, addr);
 875        if (key) {
 876                /* Pre-existing entry - just update that one. */
 877                kfree(key->key);
 878                key->key = newkey;
 879                key->keylen = newkeylen;
 880        } else {
 881                struct tcp_md5sig_info *md5sig;
 882
 883                if (!tp->md5sig_info) {
 884                        tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info),
 885                                                  GFP_ATOMIC);
 886                        if (!tp->md5sig_info) {
 887                                kfree(newkey);
 888                                return -ENOMEM;
 889                        }
 890                        sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
 891                }
 892                if (tcp_alloc_md5sig_pool(sk) == NULL) {
 893                        kfree(newkey);
 894                        return -ENOMEM;
 895                }
 896                md5sig = tp->md5sig_info;
 897
 898                if (md5sig->alloced4 == md5sig->entries4) {
 899                        keys = kmalloc((sizeof(*keys) *
 900                                        (md5sig->entries4 + 1)), GFP_ATOMIC);
 901                        if (!keys) {
 902                                kfree(newkey);
 903                                tcp_free_md5sig_pool();
 904                                return -ENOMEM;
 905                        }
 906
 907                        if (md5sig->entries4)
 908                                memcpy(keys, md5sig->keys4,
 909                                       sizeof(*keys) * md5sig->entries4);
 910
 911                        /* Free old key list, and reference new one */
 912                        kfree(md5sig->keys4);
 913                        md5sig->keys4 = keys;
 914                        md5sig->alloced4++;
 915                }
 916                md5sig->entries4++;
 917                md5sig->keys4[md5sig->entries4 - 1].addr        = addr;
 918                md5sig->keys4[md5sig->entries4 - 1].base.key    = newkey;
 919                md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen;
 920        }
 921        return 0;
 922}
 923
 924EXPORT_SYMBOL(tcp_v4_md5_do_add);
 925
 926static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
 927                               u8 *newkey, u8 newkeylen)
 928{
 929        return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->inet_daddr,
 930                                 newkey, newkeylen);
 931}
 932
 933int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
 934{
 935        struct tcp_sock *tp = tcp_sk(sk);
 936        int i;
 937
 938        for (i = 0; i < tp->md5sig_info->entries4; i++) {
 939                if (tp->md5sig_info->keys4[i].addr == addr) {
 940                        /* Free the key */
 941                        kfree(tp->md5sig_info->keys4[i].base.key);
 942                        tp->md5sig_info->entries4--;
 943
 944                        if (tp->md5sig_info->entries4 == 0) {
 945                                kfree(tp->md5sig_info->keys4);
 946                                tp->md5sig_info->keys4 = NULL;
 947                                tp->md5sig_info->alloced4 = 0;
 948                        } else if (tp->md5sig_info->entries4 != i) {
 949                                /* Need to do some manipulation */
 950                                memmove(&tp->md5sig_info->keys4[i],
 951                                        &tp->md5sig_info->keys4[i+1],
 952                                        (tp->md5sig_info->entries4 - i) *
 953                                         sizeof(struct tcp4_md5sig_key));
 954                        }
 955                        tcp_free_md5sig_pool();
 956                        return 0;
 957                }
 958        }
 959        return -ENOENT;
 960}
 961
 962EXPORT_SYMBOL(tcp_v4_md5_do_del);
 963
 964static void tcp_v4_clear_md5_list(struct sock *sk)
 965{
 966        struct tcp_sock *tp = tcp_sk(sk);
 967
 968        /* Free each key, then the set of key keys,
 969         * the crypto element, and then decrement our
 970         * hold on the last resort crypto.
 971         */
 972        if (tp->md5sig_info->entries4) {
 973                int i;
 974                for (i = 0; i < tp->md5sig_info->entries4; i++)
 975                        kfree(tp->md5sig_info->keys4[i].base.key);
 976                tp->md5sig_info->entries4 = 0;
 977                tcp_free_md5sig_pool();
 978        }
 979        if (tp->md5sig_info->keys4) {
 980                kfree(tp->md5sig_info->keys4);
 981                tp->md5sig_info->keys4 = NULL;
 982                tp->md5sig_info->alloced4  = 0;
 983        }
 984}
 985
 986static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
 987                                 int optlen)
 988{
 989        struct tcp_md5sig cmd;
 990        struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
 991        u8 *newkey;
 992
 993        if (optlen < sizeof(cmd))
 994                return -EINVAL;
 995
 996        if (copy_from_user(&cmd, optval, sizeof(cmd)))
 997                return -EFAULT;
 998
 999        if (sin->sin_family != AF_INET)
1000                return -EINVAL;
1001
1002        if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
1003                if (!tcp_sk(sk)->md5sig_info)
1004                        return -ENOENT;
1005                return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
1006        }
1007
1008        if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1009                return -EINVAL;
1010
1011        if (!tcp_sk(sk)->md5sig_info) {
1012                struct tcp_sock *tp = tcp_sk(sk);
1013                struct tcp_md5sig_info *p;
1014
1015                p = kzalloc(sizeof(*p), sk->sk_allocation);
1016                if (!p)
1017                        return -EINVAL;
1018
1019                tp->md5sig_info = p;
1020                sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1021        }
1022
1023        newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation);
1024        if (!newkey)
1025                return -ENOMEM;
1026        return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
1027                                 newkey, cmd.tcpm_keylen);
1028}
1029
1030static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1031                                        __be32 daddr, __be32 saddr, int nbytes)
1032{
1033        struct tcp4_pseudohdr *bp;
1034        struct scatterlist sg;
1035
1036        bp = &hp->md5_blk.ip4;
1037
1038        /*
1039         * 1. the TCP pseudo-header (in the order: source IP address,
1040         * destination IP address, zero-padded protocol number, and
1041         * segment length)
1042         */
1043        bp->saddr = saddr;
1044        bp->daddr = daddr;
1045        bp->pad = 0;
1046        bp->protocol = IPPROTO_TCP;
1047        bp->len = cpu_to_be16(nbytes);
1048
1049        sg_init_one(&sg, bp, sizeof(*bp));
1050        return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1051}
1052
1053static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
1054                               __be32 daddr, __be32 saddr, struct tcphdr *th)
1055{
1056        struct tcp_md5sig_pool *hp;
1057        struct hash_desc *desc;
1058
1059        hp = tcp_get_md5sig_pool();
1060        if (!hp)
1061                goto clear_hash_noput;
1062        desc = &hp->md5_desc;
1063
1064        if (crypto_hash_init(desc))
1065                goto clear_hash;
1066        if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1067                goto clear_hash;
1068        if (tcp_md5_hash_header(hp, th))
1069                goto clear_hash;
1070        if (tcp_md5_hash_key(hp, key))
1071                goto clear_hash;
1072        if (crypto_hash_final(desc, md5_hash))
1073                goto clear_hash;
1074
1075        tcp_put_md5sig_pool();
1076        return 0;
1077
1078clear_hash:
1079        tcp_put_md5sig_pool();
1080clear_hash_noput:
1081        memset(md5_hash, 0, 16);
1082        return 1;
1083}
1084
1085int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1086                        struct sock *sk, struct request_sock *req,
1087                        struct sk_buff *skb)
1088{
1089        struct tcp_md5sig_pool *hp;
1090        struct hash_desc *desc;
1091        struct tcphdr *th = tcp_hdr(skb);
1092        __be32 saddr, daddr;
1093
1094        if (sk) {
1095                saddr = inet_sk(sk)->inet_saddr;
1096                daddr = inet_sk(sk)->inet_daddr;
1097        } else if (req) {
1098                saddr = inet_rsk(req)->loc_addr;
1099                daddr = inet_rsk(req)->rmt_addr;
1100        } else {
1101                const struct iphdr *iph = ip_hdr(skb);
1102                saddr = iph->saddr;
1103                daddr = iph->daddr;
1104        }
1105
1106        hp = tcp_get_md5sig_pool();
1107        if (!hp)
1108                goto clear_hash_noput;
1109        desc = &hp->md5_desc;
1110
1111        if (crypto_hash_init(desc))
1112                goto clear_hash;
1113
1114        if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1115                goto clear_hash;
1116        if (tcp_md5_hash_header(hp, th))
1117                goto clear_hash;
1118        if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1119                goto clear_hash;
1120        if (tcp_md5_hash_key(hp, key))
1121                goto clear_hash;
1122        if (crypto_hash_final(desc, md5_hash))
1123                goto clear_hash;
1124
1125        tcp_put_md5sig_pool();
1126        return 0;
1127
1128clear_hash:
1129        tcp_put_md5sig_pool();
1130clear_hash_noput:
1131        memset(md5_hash, 0, 16);
1132        return 1;
1133}
1134
1135EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1136
1137static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
1138{
1139        /*
1140         * This gets called for each TCP segment that arrives
1141         * so we want to be efficient.
1142         * We have 3 drop cases:
1143         * o No MD5 hash and one expected.
1144         * o MD5 hash and we're not expecting one.
1145         * o MD5 hash and its wrong.
1146         */
1147        __u8 *hash_location = NULL;
1148        struct tcp_md5sig_key *hash_expected;
1149        const struct iphdr *iph = ip_hdr(skb);
1150        struct tcphdr *th = tcp_hdr(skb);
1151        int genhash;
1152        unsigned char newhash[16];
1153
1154        hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
1155        hash_location = tcp_parse_md5sig_option(th);
1156
1157        /* We've parsed the options - do we have a hash? */
1158        if (!hash_expected && !hash_location)
1159                return 0;
1160
1161        if (hash_expected && !hash_location) {
1162                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1163                return 1;
1164        }
1165
1166        if (!hash_expected && hash_location) {
1167                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1168                return 1;
1169        }
1170
1171        /* Okay, so this is hash_expected and hash_location -
1172         * so we need to calculate the checksum.
1173         */
1174        genhash = tcp_v4_md5_hash_skb(newhash,
1175                                      hash_expected,
1176                                      NULL, NULL, skb);
1177
1178        if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1179                if (net_ratelimit()) {
1180                        printk(KERN_INFO "MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1181                               &iph->saddr, ntohs(th->source),
1182                               &iph->daddr, ntohs(th->dest),
1183                               genhash ? " tcp_v4_calc_md5_hash failed" : "");
1184                }
1185                return 1;
1186        }
1187        return 0;
1188}
1189
1190#endif
1191
1192struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1193        .family         =       PF_INET,
1194        .obj_size       =       sizeof(struct tcp_request_sock),
1195        .rtx_syn_ack    =       tcp_v4_send_synack,
1196        .send_ack       =       tcp_v4_reqsk_send_ack,
1197        .destructor     =       tcp_v4_reqsk_destructor,
1198        .send_reset     =       tcp_v4_send_reset,
1199};
1200
1201#ifdef CONFIG_TCP_MD5SIG
1202static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1203        .md5_lookup     =       tcp_v4_reqsk_md5_lookup,
1204        .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1205};
1206#endif
1207
1208static struct timewait_sock_ops tcp_timewait_sock_ops = {
1209        .twsk_obj_size  = sizeof(struct tcp_timewait_sock),
1210        .twsk_unique    = tcp_twsk_unique,
1211        .twsk_destructor= tcp_twsk_destructor,
1212};
1213
1214int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1215{
1216        struct tcp_extend_values tmp_ext;
1217        struct tcp_options_received tmp_opt;
1218        u8 *hash_location;
1219        struct request_sock *req;
1220        struct inet_request_sock *ireq;
1221        struct tcp_sock *tp = tcp_sk(sk);
1222        struct dst_entry *dst = NULL;
1223        __be32 saddr = ip_hdr(skb)->saddr;
1224        __be32 daddr = ip_hdr(skb)->daddr;
1225        __u32 isn = TCP_SKB_CB(skb)->when;
1226#ifdef CONFIG_SYN_COOKIES
1227        int want_cookie = 0;
1228#else
1229#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
1230#endif
1231
1232        /* Never answer to SYNs send to broadcast or multicast */
1233        if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1234                goto drop;
1235
1236        /* TW buckets are converted to open requests without
1237         * limitations, they conserve resources and peer is
1238         * evidently real one.
1239         */
1240        if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1241#ifdef CONFIG_SYN_COOKIES
1242                if (sysctl_tcp_syncookies) {
1243                        want_cookie = 1;
1244                } else
1245#endif
1246                goto drop;
1247        }
1248
1249        /* Accept backlog is full. If we have already queued enough
1250         * of warm entries in syn queue, drop request. It is better than
1251         * clogging syn queue with openreqs with exponentially increasing
1252         * timeout.
1253         */
1254        if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1255                goto drop;
1256
1257        req = inet_reqsk_alloc(&tcp_request_sock_ops);
1258        if (!req)
1259                goto drop;
1260
1261#ifdef CONFIG_TCP_MD5SIG
1262        tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1263#endif
1264
1265        tcp_clear_options(&tmp_opt);
1266        tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
1267        tmp_opt.user_mss  = tp->rx_opt.user_mss;
1268        tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
1269
1270        if (tmp_opt.cookie_plus > 0 &&
1271            tmp_opt.saw_tstamp &&
1272            !tp->rx_opt.cookie_out_never &&
1273            (sysctl_tcp_cookie_size > 0 ||
1274             (tp->cookie_values != NULL &&
1275              tp->cookie_values->cookie_desired > 0))) {
1276                u8 *c;
1277                u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1278                int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1279
1280                if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1281                        goto drop_and_release;
1282
1283                /* Secret recipe starts with IP addresses */
1284                *mess++ ^= daddr;
1285                *mess++ ^= saddr;
1286
1287                /* plus variable length Initiator Cookie */
1288                c = (u8 *)mess;
1289                while (l-- > 0)
1290                        *c++ ^= *hash_location++;
1291
1292#ifdef CONFIG_SYN_COOKIES
1293                want_cookie = 0;        /* not our kind of cookie */
1294#endif
1295                tmp_ext.cookie_out_never = 0; /* false */
1296                tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1297        } else if (!tp->rx_opt.cookie_in_always) {
1298                /* redundant indications, but ensure initialization. */
1299                tmp_ext.cookie_out_never = 1; /* true */
1300                tmp_ext.cookie_plus = 0;
1301        } else {
1302                goto drop_and_release;
1303        }
1304        tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1305
1306        if (want_cookie && !tmp_opt.saw_tstamp)
1307                tcp_clear_options(&tmp_opt);
1308
1309        tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1310        tcp_openreq_init(req, &tmp_opt, skb);
1311
1312        ireq = inet_rsk(req);
1313        ireq->loc_addr = daddr;
1314        ireq->rmt_addr = saddr;
1315        ireq->no_srccheck = inet_sk(sk)->transparent;
1316        ireq->opt = tcp_v4_save_options(sk, skb);
1317
1318        if (security_inet_conn_request(sk, skb, req))
1319                goto drop_and_free;
1320
1321        if (!want_cookie)
1322                TCP_ECN_create_request(req, tcp_hdr(skb));
1323
1324        if (want_cookie) {
1325#ifdef CONFIG_SYN_COOKIES
1326                syn_flood_warning(skb);
1327                req->cookie_ts = tmp_opt.tstamp_ok;
1328#endif
1329                isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1330        } else if (!isn) {
1331                struct inet_peer *peer = NULL;
1332
1333                /* VJ's idea. We save last timestamp seen
1334                 * from the destination in peer table, when entering
1335                 * state TIME-WAIT, and check against it before
1336                 * accepting new connection request.
1337                 *
1338                 * If "isn" is not zero, this request hit alive
1339                 * timewait bucket, so that all the necessary checks
1340                 * are made in the function processing timewait state.
1341                 */
1342                if (tmp_opt.saw_tstamp &&
1343                    tcp_death_row.sysctl_tw_recycle &&
1344                    (dst = inet_csk_route_req(sk, req)) != NULL &&
1345                    (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
1346                    peer->v4daddr == saddr) {
1347                        if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1348                            (s32)(peer->tcp_ts - req->ts_recent) >
1349                                                        TCP_PAWS_WINDOW) {
1350                                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1351                                goto drop_and_release;
1352                        }
1353                }
1354                /* Kill the following clause, if you dislike this way. */
1355                else if (!sysctl_tcp_syncookies &&
1356                         (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1357                          (sysctl_max_syn_backlog >> 2)) &&
1358                         (!peer || !peer->tcp_ts_stamp) &&
1359                         (!dst || !dst_metric(dst, RTAX_RTT))) {
1360                        /* Without syncookies last quarter of
1361                         * backlog is filled with destinations,
1362                         * proven to be alive.
1363                         * It means that we continue to communicate
1364                         * to destinations, already remembered
1365                         * to the moment of synflood.
1366                         */
1367                        LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n",
1368                                       &saddr, ntohs(tcp_hdr(skb)->source));
1369                        goto drop_and_release;
1370                }
1371
1372                isn = tcp_v4_init_sequence(skb);
1373        }
1374        tcp_rsk(req)->snt_isn = isn;
1375
1376        if (__tcp_v4_send_synack(sk, dst, req,
1377                                 (struct request_values *)&tmp_ext) ||
1378            want_cookie)
1379                goto drop_and_free;
1380
1381        inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1382        return 0;
1383
1384drop_and_release:
1385        dst_release(dst);
1386drop_and_free:
1387        reqsk_free(req);
1388drop:
1389        return 0;
1390}
1391
1392
1393/*
1394 * The three way handshake has completed - we got a valid synack -
1395 * now create the new socket.
1396 */
1397struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1398                                  struct request_sock *req,
1399                                  struct dst_entry *dst)
1400{
1401        struct inet_request_sock *ireq;
1402        struct inet_sock *newinet;
1403        struct tcp_sock *newtp;
1404        struct sock *newsk;
1405#ifdef CONFIG_TCP_MD5SIG
1406        struct tcp_md5sig_key *key;
1407#endif
1408
1409        if (sk_acceptq_is_full(sk))
1410                goto exit_overflow;
1411
1412        if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
1413                goto exit;
1414
1415        newsk = tcp_create_openreq_child(sk, req, skb);
1416        if (!newsk)
1417                goto exit;
1418
1419        newsk->sk_gso_type = SKB_GSO_TCPV4;
1420        sk_setup_caps(newsk, dst);
1421
1422        newtp                 = tcp_sk(newsk);
1423        newinet               = inet_sk(newsk);
1424        ireq                  = inet_rsk(req);
1425        newinet->inet_daddr   = ireq->rmt_addr;
1426        newinet->inet_rcv_saddr = ireq->loc_addr;
1427        newinet->inet_saddr           = ireq->loc_addr;
1428        newinet->opt          = ireq->opt;
1429        ireq->opt             = NULL;
1430        newinet->mc_index     = inet_iif(skb);
1431        newinet->mc_ttl       = ip_hdr(skb)->ttl;
1432        inet_csk(newsk)->icsk_ext_hdr_len = 0;
1433        if (newinet->opt)
1434                inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
1435        newinet->inet_id = newtp->write_seq ^ jiffies;
1436
1437        tcp_mtup_init(newsk);
1438        tcp_sync_mss(newsk, dst_mtu(dst));
1439        newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1440        if (tcp_sk(sk)->rx_opt.user_mss &&
1441            tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1442                newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1443
1444        tcp_initialize_rcv_mss(newsk);
1445
1446#ifdef CONFIG_TCP_MD5SIG
1447        /* Copy over the MD5 key from the original socket */
1448        key = tcp_v4_md5_do_lookup(sk, newinet->inet_daddr);
1449        if (key != NULL) {
1450                /*
1451                 * We're using one, so create a matching key
1452                 * on the newsk structure. If we fail to get
1453                 * memory, then we end up not copying the key
1454                 * across. Shucks.
1455                 */
1456                char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1457                if (newkey != NULL)
1458                        tcp_v4_md5_do_add(newsk, newinet->inet_daddr,
1459                                          newkey, key->keylen);
1460                newsk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1461        }
1462#endif
1463
1464        __inet_hash_nolisten(newsk, NULL);
1465        __inet_inherit_port(sk, newsk);
1466
1467        return newsk;
1468
1469exit_overflow:
1470        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1471exit:
1472        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1473        dst_release(dst);
1474        return NULL;
1475}
1476
1477static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1478{
1479        struct tcphdr *th = tcp_hdr(skb);
1480        const struct iphdr *iph = ip_hdr(skb);
1481        struct sock *nsk;
1482        struct request_sock **prev;
1483        /* Find possible connection requests. */
1484        struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1485                                                       iph->saddr, iph->daddr);
1486        if (req)
1487                return tcp_check_req(sk, skb, req, prev);
1488
1489        nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
1490                        th->source, iph->daddr, th->dest, inet_iif(skb));
1491
1492        if (nsk) {
1493                if (nsk->sk_state != TCP_TIME_WAIT) {
1494                        bh_lock_sock(nsk);
1495                        return nsk;
1496                }
1497                inet_twsk_put(inet_twsk(nsk));
1498                return NULL;
1499        }
1500
1501#ifdef CONFIG_SYN_COOKIES
1502        if (!th->rst && !th->syn && th->ack)
1503                sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1504#endif
1505        return sk;
1506}
1507
1508static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1509{
1510        const struct iphdr *iph = ip_hdr(skb);
1511
1512        if (skb->ip_summed == CHECKSUM_COMPLETE) {
1513                if (!tcp_v4_check(skb->len, iph->saddr,
1514                                  iph->daddr, skb->csum)) {
1515                        skb->ip_summed = CHECKSUM_UNNECESSARY;
1516                        return 0;
1517                }
1518        }
1519
1520        skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1521                                       skb->len, IPPROTO_TCP, 0);
1522
1523        if (skb->len <= 76) {
1524                return __skb_checksum_complete(skb);
1525        }
1526        return 0;
1527}
1528
1529
1530/* The socket must have it's spinlock held when we get
1531 * here.
1532 *
1533 * We have a potential double-lock case here, so even when
1534 * doing backlog processing we use the BH locking scheme.
1535 * This is because we cannot sleep with the original spinlock
1536 * held.
1537 */
1538int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1539{
1540        struct sock *rsk;
1541#ifdef CONFIG_TCP_MD5SIG
1542        /*
1543         * We really want to reject the packet as early as possible
1544         * if:
1545         *  o We're expecting an MD5'd packet and this is no MD5 tcp option
1546         *  o There is an MD5 option and we're not expecting one
1547         */
1548        if (tcp_v4_inbound_md5_hash(sk, skb))
1549                goto discard;
1550#endif
1551
1552        if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1553                TCP_CHECK_TIMER(sk);
1554                if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1555                        rsk = sk;
1556                        goto reset;
1557                }
1558                TCP_CHECK_TIMER(sk);
1559                return 0;
1560        }
1561
1562        if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1563                goto csum_err;
1564
1565        if (sk->sk_state == TCP_LISTEN) {
1566                struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1567                if (!nsk)
1568                        goto discard;
1569
1570                if (nsk != sk) {
1571                        if (tcp_child_process(sk, nsk, skb)) {
1572                                rsk = nsk;
1573                                goto reset;
1574                        }
1575                        return 0;
1576                }
1577        }
1578
1579        TCP_CHECK_TIMER(sk);
1580        if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1581                rsk = sk;
1582                goto reset;
1583        }
1584        TCP_CHECK_TIMER(sk);
1585        return 0;
1586
1587reset:
1588        tcp_v4_send_reset(rsk, skb);
1589discard:
1590        kfree_skb(skb);
1591        /* Be careful here. If this function gets more complicated and
1592         * gcc suffers from register pressure on the x86, sk (in %ebx)
1593         * might be destroyed here. This current version compiles correctly,
1594         * but you have been warned.
1595         */
1596        return 0;
1597
1598csum_err:
1599        TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1600        goto discard;
1601}
1602
1603/*
1604 *      From tcp_input.c
1605 */
1606
1607int tcp_v4_rcv(struct sk_buff *skb)
1608{
1609        const struct iphdr *iph;
1610        struct tcphdr *th;
1611        struct sock *sk;
1612        int ret;
1613        struct net *net = dev_net(skb->dev);
1614
1615        if (skb->pkt_type != PACKET_HOST)
1616                goto discard_it;
1617
1618        /* Count it even if it's bad */
1619        TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1620
1621        if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1622                goto discard_it;
1623
1624        th = tcp_hdr(skb);
1625
1626        if (th->doff < sizeof(struct tcphdr) / 4)
1627                goto bad_packet;
1628        if (!pskb_may_pull(skb, th->doff * 4))
1629                goto discard_it;
1630
1631        /* An explanation is required here, I think.
1632         * Packet length and doff are validated by header prediction,
1633         * provided case of th->doff==0 is eliminated.
1634         * So, we defer the checks. */
1635        if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1636                goto bad_packet;
1637
1638        th = tcp_hdr(skb);
1639        iph = ip_hdr(skb);
1640        TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1641        TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1642                                    skb->len - th->doff * 4);
1643        TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1644        TCP_SKB_CB(skb)->when    = 0;
1645        TCP_SKB_CB(skb)->flags   = iph->tos;
1646        TCP_SKB_CB(skb)->sacked  = 0;
1647
1648        sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1649        if (!sk)
1650                goto no_tcp_socket;
1651
1652process:
1653        if (sk->sk_state == TCP_TIME_WAIT)
1654                goto do_time_wait;
1655
1656        if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1657                goto discard_and_relse;
1658        nf_reset(skb);
1659
1660        if (sk_filter(sk, skb))
1661                goto discard_and_relse;
1662
1663        skb->dev = NULL;
1664
1665        bh_lock_sock_nested(sk);
1666        ret = 0;
1667        if (!sock_owned_by_user(sk)) {
1668#ifdef CONFIG_NET_DMA
1669                struct tcp_sock *tp = tcp_sk(sk);
1670                if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1671                        tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
1672                if (tp->ucopy.dma_chan)
1673                        ret = tcp_v4_do_rcv(sk, skb);
1674                else
1675#endif
1676                {
1677                        if (!tcp_prequeue(sk, skb))
1678                                ret = tcp_v4_do_rcv(sk, skb);
1679                }
1680        } else
1681                sk_add_backlog(sk, skb);
1682        bh_unlock_sock(sk);
1683
1684        sock_put(sk);
1685
1686        return ret;
1687
1688no_tcp_socket:
1689        if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1690                goto discard_it;
1691
1692        if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1693bad_packet:
1694                TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1695        } else {
1696                tcp_v4_send_reset(NULL, skb);
1697        }
1698
1699discard_it:
1700        /* Discard frame. */
1701        kfree_skb(skb);
1702        return 0;
1703
1704discard_and_relse:
1705        sock_put(sk);
1706        goto discard_it;
1707
1708do_time_wait:
1709        if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1710                inet_twsk_put(inet_twsk(sk));
1711                goto discard_it;
1712        }
1713
1714        if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1715                TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1716                inet_twsk_put(inet_twsk(sk));
1717                goto discard_it;
1718        }
1719        switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1720        case TCP_TW_SYN: {
1721                struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1722                                                        &tcp_hashinfo,
1723                                                        iph->daddr, th->dest,
1724                                                        inet_iif(skb));
1725                if (sk2) {
1726                        inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1727                        inet_twsk_put(inet_twsk(sk));
1728                        sk = sk2;
1729                        goto process;
1730                }
1731                /* Fall through to ACK */
1732        }
1733        case TCP_TW_ACK:
1734                tcp_v4_timewait_ack(sk, skb);
1735                break;
1736        case TCP_TW_RST:
1737                goto no_tcp_socket;
1738        case TCP_TW_SUCCESS:;
1739        }
1740        goto discard_it;
1741}
1742
1743/* VJ's idea. Save last timestamp seen from this destination
1744 * and hold it at least for normal timewait interval to use for duplicate
1745 * segment detection in subsequent connections, before they enter synchronized
1746 * state.
1747 */
1748
1749int tcp_v4_remember_stamp(struct sock *sk)
1750{
1751        struct inet_sock *inet = inet_sk(sk);
1752        struct tcp_sock *tp = tcp_sk(sk);
1753        struct rtable *rt = (struct rtable *)__sk_dst_get(sk);
1754        struct inet_peer *peer = NULL;
1755        int release_it = 0;
1756
1757        if (!rt || rt->rt_dst != inet->inet_daddr) {
1758                peer = inet_getpeer(inet->inet_daddr, 1);
1759                release_it = 1;
1760        } else {
1761                if (!rt->peer)
1762                        rt_bind_peer(rt, 1);
1763                peer = rt->peer;
1764        }
1765
1766        if (peer) {
1767                if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
1768                    ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL &&
1769                     peer->tcp_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) {
1770                        peer->tcp_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp;
1771                        peer->tcp_ts = tp->rx_opt.ts_recent;
1772                }
1773                if (release_it)
1774                        inet_putpeer(peer);
1775                return 1;
1776        }
1777
1778        return 0;
1779}
1780
1781int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
1782{
1783        struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
1784
1785        if (peer) {
1786                const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
1787
1788                if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
1789                    ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL &&
1790                     peer->tcp_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) {
1791                        peer->tcp_ts_stamp = (u32)tcptw->tw_ts_recent_stamp;
1792                        peer->tcp_ts       = tcptw->tw_ts_recent;
1793                }
1794                inet_putpeer(peer);
1795                return 1;
1796        }
1797
1798        return 0;
1799}
1800
1801const struct inet_connection_sock_af_ops ipv4_specific = {
1802        .queue_xmit        = ip_queue_xmit,
1803        .send_check        = tcp_v4_send_check,
1804        .rebuild_header    = inet_sk_rebuild_header,
1805        .conn_request      = tcp_v4_conn_request,
1806        .syn_recv_sock     = tcp_v4_syn_recv_sock,
1807        .remember_stamp    = tcp_v4_remember_stamp,
1808        .net_header_len    = sizeof(struct iphdr),
1809        .setsockopt        = ip_setsockopt,
1810        .getsockopt        = ip_getsockopt,
1811        .addr2sockaddr     = inet_csk_addr2sockaddr,
1812        .sockaddr_len      = sizeof(struct sockaddr_in),
1813        .bind_conflict     = inet_csk_bind_conflict,
1814#ifdef CONFIG_COMPAT
1815        .compat_setsockopt = compat_ip_setsockopt,
1816        .compat_getsockopt = compat_ip_getsockopt,
1817#endif
1818};
1819
1820#ifdef CONFIG_TCP_MD5SIG
1821static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1822        .md5_lookup             = tcp_v4_md5_lookup,
1823        .calc_md5_hash          = tcp_v4_md5_hash_skb,
1824        .md5_add                = tcp_v4_md5_add_func,
1825        .md5_parse              = tcp_v4_parse_md5_keys,
1826};
1827#endif
1828
1829/* NOTE: A lot of things set to zero explicitly by call to
1830 *       sk_alloc() so need not be done here.
1831 */
1832static int tcp_v4_init_sock(struct sock *sk)
1833{
1834        struct inet_connection_sock *icsk = inet_csk(sk);
1835        struct tcp_sock *tp = tcp_sk(sk);
1836
1837        skb_queue_head_init(&tp->out_of_order_queue);
1838        tcp_init_xmit_timers(sk);
1839        tcp_prequeue_init(tp);
1840
1841        icsk->icsk_rto = TCP_TIMEOUT_INIT;
1842        tp->mdev = TCP_TIMEOUT_INIT;
1843
1844        /* So many TCP implementations out there (incorrectly) count the
1845         * initial SYN frame in their delayed-ACK and congestion control
1846         * algorithms that we must have the following bandaid to talk
1847         * efficiently to them.  -DaveM
1848         */
1849        tp->snd_cwnd = 2;
1850
1851        /* See draft-stevens-tcpca-spec-01 for discussion of the
1852         * initialization of these values.
1853         */
1854        tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
1855        tp->snd_cwnd_clamp = ~0;
1856        tp->mss_cache = TCP_MSS_DEFAULT;
1857
1858        tp->reordering = sysctl_tcp_reordering;
1859        icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1860
1861        sk->sk_state = TCP_CLOSE;
1862
1863        sk->sk_write_space = sk_stream_write_space;
1864        sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1865
1866        icsk->icsk_af_ops = &ipv4_specific;
1867        icsk->icsk_sync_mss = tcp_sync_mss;
1868#ifdef CONFIG_TCP_MD5SIG
1869        tp->af_specific = &tcp_sock_ipv4_specific;
1870#endif
1871
1872        /* TCP Cookie Transactions */
1873        if (sysctl_tcp_cookie_size > 0) {
1874                /* Default, cookies without s_data_payload. */
1875                tp->cookie_values =
1876                        kzalloc(sizeof(*tp->cookie_values),
1877                                sk->sk_allocation);
1878                if (tp->cookie_values != NULL)
1879                        kref_init(&tp->cookie_values->kref);
1880        }
1881        /* Presumed zeroed, in order of appearance:
1882         *      cookie_in_always, cookie_out_never,
1883         *      s_data_constant, s_data_in, s_data_out
1884         */
1885        sk->sk_sndbuf = sysctl_tcp_wmem[1];
1886        sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1887
1888        local_bh_disable();
1889        percpu_counter_inc(&tcp_sockets_allocated);
1890        local_bh_enable();
1891
1892        return 0;
1893}
1894
1895void tcp_v4_destroy_sock(struct sock *sk)
1896{
1897        struct tcp_sock *tp = tcp_sk(sk);
1898
1899        tcp_clear_xmit_timers(sk);
1900
1901        tcp_cleanup_congestion_control(sk);
1902
1903        /* Cleanup up the write buffer. */
1904        tcp_write_queue_purge(sk);
1905
1906        /* Cleans up our, hopefully empty, out_of_order_queue. */
1907        __skb_queue_purge(&tp->out_of_order_queue);
1908
1909#ifdef CONFIG_TCP_MD5SIG
1910        /* Clean up the MD5 key list, if any */
1911        if (tp->md5sig_info) {
1912                tcp_v4_clear_md5_list(sk);
1913                kfree(tp->md5sig_info);
1914                tp->md5sig_info = NULL;
1915        }
1916#endif
1917
1918#ifdef CONFIG_NET_DMA
1919        /* Cleans up our sk_async_wait_queue */
1920        __skb_queue_purge(&sk->sk_async_wait_queue);
1921#endif
1922
1923        /* Clean prequeue, it must be empty really */
1924        __skb_queue_purge(&tp->ucopy.prequeue);
1925
1926        /* Clean up a referenced TCP bind bucket. */
1927        if (inet_csk(sk)->icsk_bind_hash)
1928                inet_put_port(sk);
1929
1930        /*
1931         * If sendmsg cached page exists, toss it.
1932         */
1933        if (sk->sk_sndmsg_page) {
1934                __free_page(sk->sk_sndmsg_page);
1935                sk->sk_sndmsg_page = NULL;
1936        }
1937
1938        /* TCP Cookie Transactions */
1939        if (tp->cookie_values != NULL) {
1940                kref_put(&tp->cookie_values->kref,
1941                         tcp_cookie_values_release);
1942                tp->cookie_values = NULL;
1943        }
1944
1945        percpu_counter_dec(&tcp_sockets_allocated);
1946}
1947
1948EXPORT_SYMBOL(tcp_v4_destroy_sock);
1949
1950#ifdef CONFIG_PROC_FS
1951/* Proc filesystem TCP sock list dumping. */
1952
1953static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
1954{
1955        return hlist_nulls_empty(head) ? NULL :
1956                list_entry(head->first, struct inet_timewait_sock, tw_node);
1957}
1958
1959static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1960{
1961        return !is_a_nulls(tw->tw_node.next) ?
1962                hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1963}
1964
1965static void *listening_get_next(struct seq_file *seq, void *cur)
1966{
1967        struct inet_connection_sock *icsk;
1968        struct hlist_nulls_node *node;
1969        struct sock *sk = cur;
1970        struct inet_listen_hashbucket *ilb;
1971        struct tcp_iter_state *st = seq->private;
1972        struct net *net = seq_file_net(seq);
1973
1974        if (!sk) {
1975                st->bucket = 0;
1976                ilb = &tcp_hashinfo.listening_hash[0];
1977                spin_lock_bh(&ilb->lock);
1978                sk = sk_nulls_head(&ilb->head);
1979                goto get_sk;
1980        }
1981        ilb = &tcp_hashinfo.listening_hash[st->bucket];
1982        ++st->num;
1983
1984        if (st->state == TCP_SEQ_STATE_OPENREQ) {
1985                struct request_sock *req = cur;
1986
1987                icsk = inet_csk(st->syn_wait_sk);
1988                req = req->dl_next;
1989                while (1) {
1990                        while (req) {
1991                                if (req->rsk_ops->family == st->family) {
1992                                        cur = req;
1993                                        goto out;
1994                                }
1995                                req = req->dl_next;
1996                        }
1997                        if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
1998                                break;
1999get_req:
2000                        req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
2001                }
2002                sk        = sk_next(st->syn_wait_sk);
2003                st->state = TCP_SEQ_STATE_LISTENING;
2004                read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2005        } else {
2006                icsk = inet_csk(sk);
2007                read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2008                if (reqsk_queue_len(&icsk->icsk_accept_queue))
2009                        goto start_req;
2010                read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2011                sk = sk_next(sk);
2012        }
2013get_sk:
2014        sk_nulls_for_each_from(sk, node) {
2015                if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) {
2016                        cur = sk;
2017                        goto out;
2018                }
2019                icsk = inet_csk(sk);
2020                read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2021                if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
2022start_req:
2023                        st->uid         = sock_i_uid(sk);
2024                        st->syn_wait_sk = sk;
2025                        st->state       = TCP_SEQ_STATE_OPENREQ;
2026                        st->sbucket     = 0;
2027                        goto get_req;
2028                }
2029                read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2030        }
2031        spin_unlock_bh(&ilb->lock);
2032        if (++st->bucket < INET_LHTABLE_SIZE) {
2033                ilb = &tcp_hashinfo.listening_hash[st->bucket];
2034                spin_lock_bh(&ilb->lock);
2035                sk = sk_nulls_head(&ilb->head);
2036                goto get_sk;
2037        }
2038        cur = NULL;
2039out:
2040        return cur;
2041}
2042
2043static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2044{
2045        void *rc = listening_get_next(seq, NULL);
2046
2047        while (rc && *pos) {
2048                rc = listening_get_next(seq, rc);
2049                --*pos;
2050        }
2051        return rc;
2052}
2053
2054static inline int empty_bucket(struct tcp_iter_state *st)
2055{
2056        return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
2057                hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
2058}
2059
2060static void *established_get_first(struct seq_file *seq)
2061{
2062        struct tcp_iter_state *st = seq->private;
2063        struct net *net = seq_file_net(seq);
2064        void *rc = NULL;
2065
2066        for (st->bucket = 0; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
2067                struct sock *sk;
2068                struct hlist_nulls_node *node;
2069                struct inet_timewait_sock *tw;
2070                spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
2071
2072                /* Lockless fast path for the common case of empty buckets */
2073                if (empty_bucket(st))
2074                        continue;
2075
2076                spin_lock_bh(lock);
2077                sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2078                        if (sk->sk_family != st->family ||
2079                            !net_eq(sock_net(sk), net)) {
2080                                continue;
2081                        }
2082                        rc = sk;
2083                        goto out;
2084                }
2085                st->state = TCP_SEQ_STATE_TIME_WAIT;
2086                inet_twsk_for_each(tw, node,
2087                                   &tcp_hashinfo.ehash[st->bucket].twchain) {
2088                        if (tw->tw_family != st->family ||
2089                            !net_eq(twsk_net(tw), net)) {
2090                                continue;
2091                        }
2092                        rc = tw;
2093                        goto out;
2094                }
2095                spin_unlock_bh(lock);
2096                st->state = TCP_SEQ_STATE_ESTABLISHED;
2097        }
2098out:
2099        return rc;
2100}
2101
2102static void *established_get_next(struct seq_file *seq, void *cur)
2103{
2104        struct sock *sk = cur;
2105        struct inet_timewait_sock *tw;
2106        struct hlist_nulls_node *node;
2107        struct tcp_iter_state *st = seq->private;
2108        struct net *net = seq_file_net(seq);
2109
2110        ++st->num;
2111
2112        if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2113                tw = cur;
2114                tw = tw_next(tw);
2115get_tw:
2116                while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
2117                        tw = tw_next(tw);
2118                }
2119                if (tw) {
2120                        cur = tw;
2121                        goto out;
2122                }
2123                spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2124                st->state = TCP_SEQ_STATE_ESTABLISHED;
2125
2126                /* Look for next non empty bucket */
2127                while (++st->bucket <= tcp_hashinfo.ehash_mask &&
2128                                empty_bucket(st))
2129                        ;
2130                if (st->bucket > tcp_hashinfo.ehash_mask)
2131                        return NULL;
2132
2133                spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2134                sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
2135        } else
2136                sk = sk_nulls_next(sk);
2137
2138        sk_nulls_for_each_from(sk, node) {
2139                if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
2140                        goto found;
2141        }
2142
2143        st->state = TCP_SEQ_STATE_TIME_WAIT;
2144        tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
2145        goto get_tw;
2146found:
2147        cur = sk;
2148out:
2149        return cur;
2150}
2151
2152static void *established_get_idx(struct seq_file *seq, loff_t pos)
2153{
2154        void *rc = established_get_first(seq);
2155
2156        while (rc && pos) {
2157                rc = established_get_next(seq, rc);
2158                --pos;
2159        }
2160        return rc;
2161}
2162
2163static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2164{
2165        void *rc;
2166        struct tcp_iter_state *st = seq->private;
2167
2168        st->state = TCP_SEQ_STATE_LISTENING;
2169        rc        = listening_get_idx(seq, &pos);
2170
2171        if (!rc) {
2172                st->state = TCP_SEQ_STATE_ESTABLISHED;
2173                rc        = established_get_idx(seq, pos);
2174        }
2175
2176        return rc;
2177}
2178
2179static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2180{
2181        struct tcp_iter_state *st = seq->private;
2182        st->state = TCP_SEQ_STATE_LISTENING;
2183        st->num = 0;
2184        return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2185}
2186
2187static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2188{
2189        void *rc = NULL;
2190        struct tcp_iter_state *st;
2191
2192        if (v == SEQ_START_TOKEN) {
2193                rc = tcp_get_idx(seq, 0);
2194                goto out;
2195        }
2196        st = seq->private;
2197
2198        switch (st->state) {
2199        case TCP_SEQ_STATE_OPENREQ:
2200        case TCP_SEQ_STATE_LISTENING:
2201                rc = listening_get_next(seq, v);
2202                if (!rc) {
2203                        st->state = TCP_SEQ_STATE_ESTABLISHED;
2204                        rc        = established_get_first(seq);
2205                }
2206                break;
2207        case TCP_SEQ_STATE_ESTABLISHED:
2208        case TCP_SEQ_STATE_TIME_WAIT:
2209                rc = established_get_next(seq, v);
2210                break;
2211        }
2212out:
2213        ++*pos;
2214        return rc;
2215}
2216
2217static void tcp_seq_stop(struct seq_file *seq, void *v)
2218{
2219        struct tcp_iter_state *st = seq->private;
2220
2221        switch (st->state) {
2222        case TCP_SEQ_STATE_OPENREQ:
2223                if (v) {
2224                        struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2225                        read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2226                }
2227        case TCP_SEQ_STATE_LISTENING:
2228                if (v != SEQ_START_TOKEN)
2229                        spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
2230                break;
2231        case TCP_SEQ_STATE_TIME_WAIT:
2232        case TCP_SEQ_STATE_ESTABLISHED:
2233                if (v)
2234                        spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2235                break;
2236        }
2237}
2238
2239static int tcp_seq_open(struct inode *inode, struct file *file)
2240{
2241        struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2242        struct tcp_iter_state *s;
2243        int err;
2244
2245        err = seq_open_net(inode, file, &afinfo->seq_ops,
2246                          sizeof(struct tcp_iter_state));
2247        if (err < 0)
2248                return err;
2249
2250        s = ((struct seq_file *)file->private_data)->private;
2251        s->family               = afinfo->family;
2252        return 0;
2253}
2254
2255int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2256{
2257        int rc = 0;
2258        struct proc_dir_entry *p;
2259
2260        afinfo->seq_fops.open           = tcp_seq_open;
2261        afinfo->seq_fops.read           = seq_read;
2262        afinfo->seq_fops.llseek         = seq_lseek;
2263        afinfo->seq_fops.release        = seq_release_net;
2264
2265        afinfo->seq_ops.start           = tcp_seq_start;
2266        afinfo->seq_ops.next            = tcp_seq_next;
2267        afinfo->seq_ops.stop            = tcp_seq_stop;
2268
2269        p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2270                             &afinfo->seq_fops, afinfo);
2271        if (!p)
2272                rc = -ENOMEM;
2273        return rc;
2274}
2275
2276void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2277{
2278        proc_net_remove(net, afinfo->name);
2279}
2280
2281static void get_openreq4(struct sock *sk, struct request_sock *req,
2282                         struct seq_file *f, int i, int uid, int *len)
2283{
2284        const struct inet_request_sock *ireq = inet_rsk(req);
2285        int ttd = req->expires - jiffies;
2286
2287        seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2288                " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p%n",
2289                i,
2290                ireq->loc_addr,
2291                ntohs(inet_sk(sk)->inet_sport),
2292                ireq->rmt_addr,
2293                ntohs(ireq->rmt_port),
2294                TCP_SYN_RECV,
2295                0, 0, /* could print option size, but that is af dependent. */
2296                1,    /* timers active (only the expire timer) */
2297                jiffies_to_clock_t(ttd),
2298                req->retrans,
2299                uid,
2300                0,  /* non standard timer */
2301                0, /* open_requests have no inode */
2302                atomic_read(&sk->sk_refcnt),
2303                req,
2304                len);
2305}
2306
2307static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
2308{
2309        int timer_active;
2310        unsigned long timer_expires;
2311        struct tcp_sock *tp = tcp_sk(sk);
2312        const struct inet_connection_sock *icsk = inet_csk(sk);
2313        struct inet_sock *inet = inet_sk(sk);
2314        __be32 dest = inet->inet_daddr;
2315        __be32 src = inet->inet_rcv_saddr;
2316        __u16 destp = ntohs(inet->inet_dport);
2317        __u16 srcp = ntohs(inet->inet_sport);
2318        int rx_queue;
2319
2320        if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
2321                timer_active    = 1;
2322                timer_expires   = icsk->icsk_timeout;
2323        } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2324                timer_active    = 4;
2325                timer_expires   = icsk->icsk_timeout;
2326        } else if (timer_pending(&sk->sk_timer)) {
2327                timer_active    = 2;
2328                timer_expires   = sk->sk_timer.expires;
2329        } else {
2330                timer_active    = 0;
2331                timer_expires = jiffies;
2332        }
2333
2334        if (sk->sk_state == TCP_LISTEN)
2335                rx_queue = sk->sk_ack_backlog;
2336        else
2337                /*
2338                 * because we dont lock socket, we might find a transient negative value
2339                 */
2340                rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2341
2342        seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2343                        "%08X %5d %8d %lu %d %p %lu %lu %u %u %d%n",
2344                i, src, srcp, dest, destp, sk->sk_state,
2345                tp->write_seq - tp->snd_una,
2346                rx_queue,
2347                timer_active,
2348                jiffies_to_clock_t(timer_expires - jiffies),
2349                icsk->icsk_retransmits,
2350                sock_i_uid(sk),
2351                icsk->icsk_probes_out,
2352                sock_i_ino(sk),
2353                atomic_read(&sk->sk_refcnt), sk,
2354                jiffies_to_clock_t(icsk->icsk_rto),
2355                jiffies_to_clock_t(icsk->icsk_ack.ato),
2356                (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2357                tp->snd_cwnd,
2358                tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
2359                len);
2360}
2361
2362static void get_timewait4_sock(struct inet_timewait_sock *tw,
2363                               struct seq_file *f, int i, int *len)
2364{
2365        __be32 dest, src;
2366        __u16 destp, srcp;
2367        int ttd = tw->tw_ttd - jiffies;
2368
2369        if (ttd < 0)
2370                ttd = 0;
2371
2372        dest  = tw->tw_daddr;
2373        src   = tw->tw_rcv_saddr;
2374        destp = ntohs(tw->tw_dport);
2375        srcp  = ntohs(tw->tw_sport);
2376
2377        seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2378                " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p%n",
2379                i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2380                3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2381                atomic_read(&tw->tw_refcnt), tw, len);
2382}
2383
2384#define TMPSZ 150
2385
2386static int tcp4_seq_show(struct seq_file *seq, void *v)
2387{
2388        struct tcp_iter_state *st;
2389        int len;
2390
2391        if (v == SEQ_START_TOKEN) {
2392                seq_printf(seq, "%-*s\n", TMPSZ - 1,
2393                           "  sl  local_address rem_address   st tx_queue "
2394                           "rx_queue tr tm->when retrnsmt   uid  timeout "
2395                           "inode");
2396                goto out;
2397        }
2398        st = seq->private;
2399
2400        switch (st->state) {
2401        case TCP_SEQ_STATE_LISTENING:
2402        case TCP_SEQ_STATE_ESTABLISHED:
2403                get_tcp4_sock(v, seq, st->num, &len);
2404                break;
2405        case TCP_SEQ_STATE_OPENREQ:
2406                get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
2407                break;
2408        case TCP_SEQ_STATE_TIME_WAIT:
2409                get_timewait4_sock(v, seq, st->num, &len);
2410                break;
2411        }
2412        seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
2413out:
2414        return 0;
2415}
2416
2417static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2418        .name           = "tcp",
2419        .family         = AF_INET,
2420        .seq_fops       = {
2421                .owner          = THIS_MODULE,
2422        },
2423        .seq_ops        = {
2424                .show           = tcp4_seq_show,
2425        },
2426};
2427
2428static int tcp4_proc_init_net(struct net *net)
2429{
2430        return tcp_proc_register(net, &tcp4_seq_afinfo);
2431}
2432
2433static void tcp4_proc_exit_net(struct net *net)
2434{
2435        tcp_proc_unregister(net, &tcp4_seq_afinfo);
2436}
2437
2438static struct pernet_operations tcp4_net_ops = {
2439        .init = tcp4_proc_init_net,
2440        .exit = tcp4_proc_exit_net,
2441};
2442
2443int __init tcp4_proc_init(void)
2444{
2445        return register_pernet_subsys(&tcp4_net_ops);
2446}
2447
2448void tcp4_proc_exit(void)
2449{
2450        unregister_pernet_subsys(&tcp4_net_ops);
2451}
2452#endif /* CONFIG_PROC_FS */
2453
2454struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2455{
2456        struct iphdr *iph = skb_gro_network_header(skb);
2457
2458        switch (skb->ip_summed) {
2459        case CHECKSUM_COMPLETE:
2460                if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
2461                                  skb->csum)) {
2462                        skb->ip_summed = CHECKSUM_UNNECESSARY;
2463                        break;
2464                }
2465
2466                /* fall through */
2467        case CHECKSUM_NONE:
2468                NAPI_GRO_CB(skb)->flush = 1;
2469                return NULL;
2470        }
2471
2472        return tcp_gro_receive(head, skb);
2473}
2474EXPORT_SYMBOL(tcp4_gro_receive);
2475
2476int tcp4_gro_complete(struct sk_buff *skb)
2477{
2478        struct iphdr *iph = ip_hdr(skb);
2479        struct tcphdr *th = tcp_hdr(skb);
2480
2481        th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2482                                  iph->saddr, iph->daddr, 0);
2483        skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2484
2485        return tcp_gro_complete(skb);
2486}
2487EXPORT_SYMBOL(tcp4_gro_complete);
2488
2489struct proto tcp_prot = {
2490        .name                   = "TCP",
2491        .owner                  = THIS_MODULE,
2492        .close                  = tcp_close,
2493        .connect                = tcp_v4_connect,
2494        .disconnect             = tcp_disconnect,
2495        .accept                 = inet_csk_accept,
2496        .ioctl                  = tcp_ioctl,
2497        .init                   = tcp_v4_init_sock,
2498        .destroy                = tcp_v4_destroy_sock,
2499        .shutdown               = tcp_shutdown,
2500        .setsockopt             = tcp_setsockopt,
2501        .getsockopt             = tcp_getsockopt,
2502        .recvmsg                = tcp_recvmsg,
2503        .backlog_rcv            = tcp_v4_do_rcv,
2504        .hash                   = inet_hash,
2505        .unhash                 = inet_unhash,
2506        .get_port               = inet_csk_get_port,
2507        .enter_memory_pressure  = tcp_enter_memory_pressure,
2508        .sockets_allocated      = &tcp_sockets_allocated,
2509        .orphan_count           = &tcp_orphan_count,
2510        .memory_allocated       = &tcp_memory_allocated,
2511        .memory_pressure        = &tcp_memory_pressure,
2512        .sysctl_mem             = sysctl_tcp_mem,
2513        .sysctl_wmem            = sysctl_tcp_wmem,
2514        .sysctl_rmem            = sysctl_tcp_rmem,
2515        .max_header             = MAX_TCP_HEADER,
2516        .obj_size               = sizeof(struct tcp_sock),
2517        .slab_flags             = SLAB_DESTROY_BY_RCU,
2518        .twsk_prot              = &tcp_timewait_sock_ops,
2519        .rsk_prot               = &tcp_request_sock_ops,
2520        .h.hashinfo             = &tcp_hashinfo,
2521#ifdef CONFIG_COMPAT
2522        .compat_setsockopt      = compat_tcp_setsockopt,
2523        .compat_getsockopt      = compat_tcp_getsockopt,
2524#endif
2525};
2526
2527
2528static int __net_init tcp_sk_init(struct net *net)
2529{
2530        return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2531                                    PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2532}
2533
2534static void __net_exit tcp_sk_exit(struct net *net)
2535{
2536        inet_ctl_sock_destroy(net->ipv4.tcp_sock);
2537}
2538
2539static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2540{
2541        inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
2542}
2543
2544static struct pernet_operations __net_initdata tcp_sk_ops = {
2545       .init       = tcp_sk_init,
2546       .exit       = tcp_sk_exit,
2547       .exit_batch = tcp_sk_exit_batch,
2548};
2549
2550void __init tcp_v4_init(void)
2551{
2552        inet_hashinfo_init(&tcp_hashinfo);
2553        if (register_pernet_subsys(&tcp_sk_ops))
2554                panic("Failed to create the TCP control socket.\n");
2555}
2556
2557EXPORT_SYMBOL(ipv4_specific);
2558EXPORT_SYMBOL(tcp_hashinfo);
2559EXPORT_SYMBOL(tcp_prot);
2560EXPORT_SYMBOL(tcp_v4_conn_request);
2561EXPORT_SYMBOL(tcp_v4_connect);
2562EXPORT_SYMBOL(tcp_v4_do_rcv);
2563EXPORT_SYMBOL(tcp_v4_remember_stamp);
2564EXPORT_SYMBOL(tcp_v4_send_check);
2565EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
2566
2567#ifdef CONFIG_PROC_FS
2568EXPORT_SYMBOL(tcp_proc_register);
2569EXPORT_SYMBOL(tcp_proc_unregister);
2570#endif
2571EXPORT_SYMBOL(sysctl_tcp_low_latency);
2572
2573
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.