linux/net/ipv4/tcp_ipv4.c
<<
>>
Prefs
   1/*
   2 * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3 *              operating system.  INET is implemented using the  BSD Socket
   4 *              interface as the means of communication with the user level.
   5 *
   6 *              Implementation of the Transmission Control Protocol(TCP).
   7 *
   8 *              IPv4 specific functions
   9 *
  10 *
  11 *              code split from:
  12 *              linux/ipv4/tcp.c
  13 *              linux/ipv4/tcp_input.c
  14 *              linux/ipv4/tcp_output.c
  15 *
  16 *              See tcp.c for author information
  17 *
  18 *      This program is free software; you can redistribute it and/or
  19 *      modify it under the terms of the GNU General Public License
  20 *      as published by the Free Software Foundation; either version
  21 *      2 of the License, or (at your option) any later version.
  22 */
  23
  24/*
  25 * Changes:
  26 *              David S. Miller :       New socket lookup architecture.
  27 *                                      This code is dedicated to John Dyson.
  28 *              David S. Miller :       Change semantics of established hash,
  29 *                                      half is devoted to TIME_WAIT sockets
  30 *                                      and the rest go in the other half.
  31 *              Andi Kleen :            Add support for syncookies and fixed
  32 *                                      some bugs: ip options weren't passed to
  33 *                                      the TCP layer, missed a check for an
  34 *                                      ACK bit.
  35 *              Andi Kleen :            Implemented fast path mtu discovery.
  36 *                                      Fixed many serious bugs in the
  37 *                                      request_sock handling and moved
  38 *                                      most of it into the af independent code.
  39 *                                      Added tail drop and some other bugfixes.
  40 *                                      Added new listen semantics.
  41 *              Mike McLagan    :       Routing by source
  42 *      Juan Jose Ciarlante:            ip_dynaddr bits
  43 *              Andi Kleen:             various fixes.
  44 *      Vitaly E. Lavrov        :       Transparent proxy revived after year
  45 *                                      coma.
  46 *      Andi Kleen              :       Fix new listen.
  47 *      Andi Kleen              :       Fix accept error reporting.
  48 *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
  49 *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
  50 *                                      a single port at the same time.
  51 */
  52
  53
  54#include <linux/bottom_half.h>
  55#include <linux/types.h>
  56#include <linux/fcntl.h>
  57#include <linux/module.h>
  58#include <linux/random.h>
  59#include <linux/cache.h>
  60#include <linux/jhash.h>
  61#include <linux/init.h>
  62#include <linux/times.h>
  63#include <linux/slab.h>
  64
  65#include <net/net_namespace.h>
  66#include <net/icmp.h>
  67#include <net/inet_hashtables.h>
  68#include <net/tcp.h>
  69#include <net/transp_v6.h>
  70#include <net/ipv6.h>
  71#include <net/inet_common.h>
  72#include <net/timewait_sock.h>
  73#include <net/xfrm.h>
  74#include <net/netdma.h>
  75#include <net/secure_seq.h>
  76
  77#include <linux/inet.h>
  78#include <linux/ipv6.h>
  79#include <linux/stddef.h>
  80#include <linux/proc_fs.h>
  81#include <linux/seq_file.h>
  82
  83#include <linux/crypto.h>
  84#include <linux/scatterlist.h>
  85
  86int sysctl_tcp_tw_reuse __read_mostly;
  87int sysctl_tcp_low_latency __read_mostly;
  88EXPORT_SYMBOL(sysctl_tcp_low_latency);
  89
  90
  91#ifdef CONFIG_TCP_MD5SIG
  92static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
  93                                                   __be32 addr);
  94static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
  95                               __be32 daddr, __be32 saddr, const struct tcphdr *th);
  96#else
  97static inline
  98struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
  99{
 100        return NULL;
 101}
 102#endif
 103
 104struct inet_hashinfo tcp_hashinfo;
 105EXPORT_SYMBOL(tcp_hashinfo);
 106
 107static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
 108{
 109        return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
 110                                          ip_hdr(skb)->saddr,
 111                                          tcp_hdr(skb)->dest,
 112                                          tcp_hdr(skb)->source);
 113}
 114
 115int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
 116{
 117        const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
 118        struct tcp_sock *tp = tcp_sk(sk);
 119
 120        /* With PAWS, it is safe from the viewpoint
 121           of data integrity. Even without PAWS it is safe provided sequence
 122           spaces do not overlap i.e. at data rates <= 80Mbit/sec.
 123
 124           Actually, the idea is close to VJ's one, only timestamp cache is
 125           held not per host, but per port pair and TW bucket is used as state
 126           holder.
 127
 128           If TW bucket has been already destroyed we fall back to VJ's scheme
 129           and use initial timestamp retrieved from peer table.
 130         */
 131        if (tcptw->tw_ts_recent_stamp &&
 132            (twp == NULL || (sysctl_tcp_tw_reuse &&
 133                             get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
 134                tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
 135                if (tp->write_seq == 0)
 136                        tp->write_seq = 1;
 137                tp->rx_opt.ts_recent       = tcptw->tw_ts_recent;
 138                tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
 139                sock_hold(sktw);
 140                return 1;
 141        }
 142
 143        return 0;
 144}
 145EXPORT_SYMBOL_GPL(tcp_twsk_unique);
 146
 147/* This will initiate an outgoing connection. */
 148int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 149{
 150        struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
 151        struct inet_sock *inet = inet_sk(sk);
 152        struct tcp_sock *tp = tcp_sk(sk);
 153        __be16 orig_sport, orig_dport;
 154        __be32 daddr, nexthop;
 155        struct flowi4 *fl4;
 156        struct rtable *rt;
 157        int err;
 158        struct ip_options_rcu *inet_opt;
 159
 160        if (addr_len < sizeof(struct sockaddr_in))
 161                return -EINVAL;
 162
 163        if (usin->sin_family != AF_INET)
 164                return -EAFNOSUPPORT;
 165
 166        nexthop = daddr = usin->sin_addr.s_addr;
 167        inet_opt = rcu_dereference_protected(inet->inet_opt,
 168                                             sock_owned_by_user(sk));
 169        if (inet_opt && inet_opt->opt.srr) {
 170                if (!daddr)
 171                        return -EINVAL;
 172                nexthop = inet_opt->opt.faddr;
 173        }
 174
 175        orig_sport = inet->inet_sport;
 176        orig_dport = usin->sin_port;
 177        fl4 = &inet->cork.fl.u.ip4;
 178        rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
 179                              RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
 180                              IPPROTO_TCP,
 181                              orig_sport, orig_dport, sk, true);
 182        if (IS_ERR(rt)) {
 183                err = PTR_ERR(rt);
 184                if (err == -ENETUNREACH)
 185                        IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
 186                return err;
 187        }
 188
 189        if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
 190                ip_rt_put(rt);
 191                return -ENETUNREACH;
 192        }
 193
 194        if (!inet_opt || !inet_opt->opt.srr)
 195                daddr = fl4->daddr;
 196
 197        if (!inet->inet_saddr)
 198                inet->inet_saddr = fl4->saddr;
 199        inet->inet_rcv_saddr = inet->inet_saddr;
 200
 201        if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
 202                /* Reset inherited state */
 203                tp->rx_opt.ts_recent       = 0;
 204                tp->rx_opt.ts_recent_stamp = 0;
 205                tp->write_seq              = 0;
 206        }
 207
 208        if (tcp_death_row.sysctl_tw_recycle &&
 209            !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) {
 210                struct inet_peer *peer = rt_get_peer(rt, fl4->daddr);
 211                /*
 212                 * VJ's idea. We save last timestamp seen from
 213                 * the destination in peer table, when entering state
 214                 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
 215                 * when trying new connection.
 216                 */
 217                if (peer) {
 218                        inet_peer_refcheck(peer);
 219                        if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
 220                                tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
 221                                tp->rx_opt.ts_recent = peer->tcp_ts;
 222                        }
 223                }
 224        }
 225
 226        inet->inet_dport = usin->sin_port;
 227        inet->inet_daddr = daddr;
 228
 229        inet_csk(sk)->icsk_ext_hdr_len = 0;
 230        if (inet_opt)
 231                inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
 232
 233        tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
 234
 235        /* Socket identity is still unknown (sport may be zero).
 236         * However we set state to SYN-SENT and not releasing socket
 237         * lock select source port, enter ourselves into the hash tables and
 238         * complete initialization after this.
 239         */
 240        tcp_set_state(sk, TCP_SYN_SENT);
 241        err = inet_hash_connect(&tcp_death_row, sk);
 242        if (err)
 243                goto failure;
 244
 245        rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
 246                               inet->inet_sport, inet->inet_dport, sk);
 247        if (IS_ERR(rt)) {
 248                err = PTR_ERR(rt);
 249                rt = NULL;
 250                goto failure;
 251        }
 252        /* OK, now commit destination to socket.  */
 253        sk->sk_gso_type = SKB_GSO_TCPV4;
 254        sk_setup_caps(sk, &rt->dst);
 255
 256        if (!tp->write_seq)
 257                tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
 258                                                           inet->inet_daddr,
 259                                                           inet->inet_sport,
 260                                                           usin->sin_port);
 261
 262        inet->inet_id = tp->write_seq ^ jiffies;
 263
 264        err = tcp_connect(sk);
 265        rt = NULL;
 266        if (err)
 267                goto failure;
 268
 269        return 0;
 270
 271failure:
 272        /*
 273         * This unhashes the socket and releases the local port,
 274         * if necessary.
 275         */
 276        tcp_set_state(sk, TCP_CLOSE);
 277        ip_rt_put(rt);
 278        sk->sk_route_caps = 0;
 279        inet->inet_dport = 0;
 280        return err;
 281}
 282EXPORT_SYMBOL(tcp_v4_connect);
 283
 284/*
 285 * This routine does path mtu discovery as defined in RFC1191.
 286 */
 287static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu)
 288{
 289        struct dst_entry *dst;
 290        struct inet_sock *inet = inet_sk(sk);
 291
 292        /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
 293         * send out by Linux are always <576bytes so they should go through
 294         * unfragmented).
 295         */
 296        if (sk->sk_state == TCP_LISTEN)
 297                return;
 298
 299        /* We don't check in the destentry if pmtu discovery is forbidden
 300         * on this route. We just assume that no packet_to_big packets
 301         * are send back when pmtu discovery is not active.
 302         * There is a small race when the user changes this flag in the
 303         * route, but I think that's acceptable.
 304         */
 305        if ((dst = __sk_dst_check(sk, 0)) == NULL)
 306                return;
 307
 308        dst->ops->update_pmtu(dst, mtu);
 309
 310        /* Something is about to be wrong... Remember soft error
 311         * for the case, if this connection will not able to recover.
 312         */
 313        if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
 314                sk->sk_err_soft = EMSGSIZE;
 315
 316        mtu = dst_mtu(dst);
 317
 318        if (inet->pmtudisc != IP_PMTUDISC_DONT &&
 319            inet_csk(sk)->icsk_pmtu_cookie > mtu) {
 320                tcp_sync_mss(sk, mtu);
 321
 322                /* Resend the TCP packet because it's
 323                 * clear that the old packet has been
 324                 * dropped. This is the new "fast" path mtu
 325                 * discovery.
 326                 */
 327                tcp_simple_retransmit(sk);
 328        } /* else let the usual retransmit timer handle it */
 329}
 330
 331/*
 332 * This routine is called by the ICMP module when it gets some
 333 * sort of error condition.  If err < 0 then the socket should
 334 * be closed and the error returned to the user.  If err > 0
 335 * it's just the icmp type << 8 | icmp code.  After adjustment
 336 * header points to the first 8 bytes of the tcp header.  We need
 337 * to find the appropriate port.
 338 *
 339 * The locking strategy used here is very "optimistic". When
 340 * someone else accesses the socket the ICMP is just dropped
 341 * and for some paths there is no check at all.
 342 * A more general error queue to queue errors for later handling
 343 * is probably better.
 344 *
 345 */
 346
 347void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 348{
 349        const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
 350        struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
 351        struct inet_connection_sock *icsk;
 352        struct tcp_sock *tp;
 353        struct inet_sock *inet;
 354        const int type = icmp_hdr(icmp_skb)->type;
 355        const int code = icmp_hdr(icmp_skb)->code;
 356        struct sock *sk;
 357        struct sk_buff *skb;
 358        __u32 seq;
 359        __u32 remaining;
 360        int err;
 361        struct net *net = dev_net(icmp_skb->dev);
 362
 363        if (icmp_skb->len < (iph->ihl << 2) + 8) {
 364                ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
 365                return;
 366        }
 367
 368        sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
 369                        iph->saddr, th->source, inet_iif(icmp_skb));
 370        if (!sk) {
 371                ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
 372                return;
 373        }
 374        if (sk->sk_state == TCP_TIME_WAIT) {
 375                inet_twsk_put(inet_twsk(sk));
 376                return;
 377        }
 378
 379        bh_lock_sock(sk);
 380        /* If too many ICMPs get dropped on busy
 381         * servers this needs to be solved differently.
 382         */
 383        if (sock_owned_by_user(sk))
 384                NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
 385
 386        if (sk->sk_state == TCP_CLOSE)
 387                goto out;
 388
 389        if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
 390                NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
 391                goto out;
 392        }
 393
 394        icsk = inet_csk(sk);
 395        tp = tcp_sk(sk);
 396        seq = ntohl(th->seq);
 397        if (sk->sk_state != TCP_LISTEN &&
 398            !between(seq, tp->snd_una, tp->snd_nxt)) {
 399                NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
 400                goto out;
 401        }
 402
 403        switch (type) {
 404        case ICMP_SOURCE_QUENCH:
 405                /* Just silently ignore these. */
 406                goto out;
 407        case ICMP_PARAMETERPROB:
 408                err = EPROTO;
 409                break;
 410        case ICMP_DEST_UNREACH:
 411                if (code > NR_ICMP_UNREACH)
 412                        goto out;
 413
 414                if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
 415                        if (!sock_owned_by_user(sk))
 416                                do_pmtu_discovery(sk, iph, info);
 417                        goto out;
 418                }
 419
 420                err = icmp_err_convert[code].errno;
 421                /* check if icmp_skb allows revert of backoff
 422                 * (see draft-zimmermann-tcp-lcd) */
 423                if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
 424                        break;
 425                if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
 426                    !icsk->icsk_backoff)
 427                        break;
 428
 429                if (sock_owned_by_user(sk))
 430                        break;
 431
 432                icsk->icsk_backoff--;
 433                inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
 434                        TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
 435                tcp_bound_rto(sk);
 436
 437                skb = tcp_write_queue_head(sk);
 438                BUG_ON(!skb);
 439
 440                remaining = icsk->icsk_rto - min(icsk->icsk_rto,
 441                                tcp_time_stamp - TCP_SKB_CB(skb)->when);
 442
 443                if (remaining) {
 444                        inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
 445                                                  remaining, TCP_RTO_MAX);
 446                } else {
 447                        /* RTO revert clocked out retransmission.
 448                         * Will retransmit now */
 449                        tcp_retransmit_timer(sk);
 450                }
 451
 452                break;
 453        case ICMP_TIME_EXCEEDED:
 454                err = EHOSTUNREACH;
 455                break;
 456        default:
 457                goto out;
 458        }
 459
 460        switch (sk->sk_state) {
 461                struct request_sock *req, **prev;
 462        case TCP_LISTEN:
 463                if (sock_owned_by_user(sk))
 464                        goto out;
 465
 466                req = inet_csk_search_req(sk, &prev, th->dest,
 467                                          iph->daddr, iph->saddr);
 468                if (!req)
 469                        goto out;
 470
 471                /* ICMPs are not backlogged, hence we cannot get
 472                   an established socket here.
 473                 */
 474                WARN_ON(req->sk);
 475
 476                if (seq != tcp_rsk(req)->snt_isn) {
 477                        NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
 478                        goto out;
 479                }
 480
 481                /*
 482                 * Still in SYN_RECV, just remove it silently.
 483                 * There is no good way to pass the error to the newly
 484                 * created socket, and POSIX does not want network
 485                 * errors returned from accept().
 486                 */
 487                inet_csk_reqsk_queue_drop(sk, req, prev);
 488                goto out;
 489
 490        case TCP_SYN_SENT:
 491        case TCP_SYN_RECV:  /* Cannot happen.
 492                               It can f.e. if SYNs crossed.
 493                             */
 494                if (!sock_owned_by_user(sk)) {
 495                        sk->sk_err = err;
 496
 497                        sk->sk_error_report(sk);
 498
 499                        tcp_done(sk);
 500                } else {
 501                        sk->sk_err_soft = err;
 502                }
 503                goto out;
 504        }
 505
 506        /* If we've already connected we will keep trying
 507         * until we time out, or the user gives up.
 508         *
 509         * rfc1122 4.2.3.9 allows to consider as hard errors
 510         * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
 511         * but it is obsoleted by pmtu discovery).
 512         *
 513         * Note, that in modern internet, where routing is unreliable
 514         * and in each dark corner broken firewalls sit, sending random
 515         * errors ordered by their masters even this two messages finally lose
 516         * their original sense (even Linux sends invalid PORT_UNREACHs)
 517         *
 518         * Now we are in compliance with RFCs.
 519         *                                                      --ANK (980905)
 520         */
 521
 522        inet = inet_sk(sk);
 523        if (!sock_owned_by_user(sk) && inet->recverr) {
 524                sk->sk_err = err;
 525                sk->sk_error_report(sk);
 526        } else  { /* Only an error on timeout */
 527                sk->sk_err_soft = err;
 528        }
 529
 530out:
 531        bh_unlock_sock(sk);
 532        sock_put(sk);
 533}
 534
 535static void __tcp_v4_send_check(struct sk_buff *skb,
 536                                __be32 saddr, __be32 daddr)
 537{
 538        struct tcphdr *th = tcp_hdr(skb);
 539
 540        if (skb->ip_summed == CHECKSUM_PARTIAL) {
 541                th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
 542                skb->csum_start = skb_transport_header(skb) - skb->head;
 543                skb->csum_offset = offsetof(struct tcphdr, check);
 544        } else {
 545                th->check = tcp_v4_check(skb->len, saddr, daddr,
 546                                         csum_partial(th,
 547                                                      th->doff << 2,
 548                                                      skb->csum));
 549        }
 550}
 551
 552/* This routine computes an IPv4 TCP checksum. */
 553void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
 554{
 555        const struct inet_sock *inet = inet_sk(sk);
 556
 557        __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
 558}
 559EXPORT_SYMBOL(tcp_v4_send_check);
 560
 561int tcp_v4_gso_send_check(struct sk_buff *skb)
 562{
 563        const struct iphdr *iph;
 564        struct tcphdr *th;
 565
 566        if (!pskb_may_pull(skb, sizeof(*th)))
 567                return -EINVAL;
 568
 569        iph = ip_hdr(skb);
 570        th = tcp_hdr(skb);
 571
 572        th->check = 0;
 573        skb->ip_summed = CHECKSUM_PARTIAL;
 574        __tcp_v4_send_check(skb, iph->saddr, iph->daddr);
 575        return 0;
 576}
 577
 578/*
 579 *      This routine will send an RST to the other tcp.
 580 *
 581 *      Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
 582 *                    for reset.
 583 *      Answer: if a packet caused RST, it is not for a socket
 584 *              existing in our system, if it is matched to a socket,
 585 *              it is just duplicate segment or bug in other side's TCP.
 586 *              So that we build reply only basing on parameters
 587 *              arrived with segment.
 588 *      Exception: precedence violation. We do not implement it in any case.
 589 */
 590
 591static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 592{
 593        const struct tcphdr *th = tcp_hdr(skb);
 594        struct {
 595                struct tcphdr th;
 596#ifdef CONFIG_TCP_MD5SIG
 597                __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
 598#endif
 599        } rep;
 600        struct ip_reply_arg arg;
 601#ifdef CONFIG_TCP_MD5SIG
 602        struct tcp_md5sig_key *key;
 603#endif
 604        struct net *net;
 605
 606        /* Never send a reset in response to a reset. */
 607        if (th->rst)
 608                return;
 609
 610        if (skb_rtable(skb)->rt_type != RTN_LOCAL)
 611                return;
 612
 613        /* Swap the send and the receive. */
 614        memset(&rep, 0, sizeof(rep));
 615        rep.th.dest   = th->source;
 616        rep.th.source = th->dest;
 617        rep.th.doff   = sizeof(struct tcphdr) / 4;
 618        rep.th.rst    = 1;
 619
 620        if (th->ack) {
 621                rep.th.seq = th->ack_seq;
 622        } else {
 623                rep.th.ack = 1;
 624                rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
 625                                       skb->len - (th->doff << 2));
 626        }
 627
 628        memset(&arg, 0, sizeof(arg));
 629        arg.iov[0].iov_base = (unsigned char *)&rep;
 630        arg.iov[0].iov_len  = sizeof(rep.th);
 631
 632#ifdef CONFIG_TCP_MD5SIG
 633        key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->saddr) : NULL;
 634        if (key) {
 635                rep.opt[0] = htonl((TCPOPT_NOP << 24) |
 636                                   (TCPOPT_NOP << 16) |
 637                                   (TCPOPT_MD5SIG << 8) |
 638                                   TCPOLEN_MD5SIG);
 639                /* Update length and the length the header thinks exists */
 640                arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
 641                rep.th.doff = arg.iov[0].iov_len / 4;
 642
 643                tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
 644                                     key, ip_hdr(skb)->saddr,
 645                                     ip_hdr(skb)->daddr, &rep.th);
 646        }
 647#endif
 648        arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
 649                                      ip_hdr(skb)->saddr, /* XXX */
 650                                      arg.iov[0].iov_len, IPPROTO_TCP, 0);
 651        arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 652        arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
 653
 654        net = dev_net(skb_dst(skb)->dev);
 655        arg.tos = ip_hdr(skb)->tos;
 656        ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
 657                      &arg, arg.iov[0].iov_len);
 658
 659        TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
 660        TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
 661}
 662
 663/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
 664   outside socket context is ugly, certainly. What can I do?
 665 */
 666
 667static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
 668                            u32 win, u32 ts, int oif,
 669                            struct tcp_md5sig_key *key,
 670                            int reply_flags, u8 tos)
 671{
 672        const struct tcphdr *th = tcp_hdr(skb);
 673        struct {
 674                struct tcphdr th;
 675                __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
 676#ifdef CONFIG_TCP_MD5SIG
 677                           + (TCPOLEN_MD5SIG_ALIGNED >> 2)
 678#endif
 679                        ];
 680        } rep;
 681        struct ip_reply_arg arg;
 682        struct net *net = dev_net(skb_dst(skb)->dev);
 683
 684        memset(&rep.th, 0, sizeof(struct tcphdr));
 685        memset(&arg, 0, sizeof(arg));
 686
 687        arg.iov[0].iov_base = (unsigned char *)&rep;
 688        arg.iov[0].iov_len  = sizeof(rep.th);
 689        if (ts) {
 690                rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
 691                                   (TCPOPT_TIMESTAMP << 8) |
 692                                   TCPOLEN_TIMESTAMP);
 693                rep.opt[1] = htonl(tcp_time_stamp);
 694                rep.opt[2] = htonl(ts);
 695                arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
 696        }
 697
 698        /* Swap the send and the receive. */
 699        rep.th.dest    = th->source;
 700        rep.th.source  = th->dest;
 701        rep.th.doff    = arg.iov[0].iov_len / 4;
 702        rep.th.seq     = htonl(seq);
 703        rep.th.ack_seq = htonl(ack);
 704        rep.th.ack     = 1;
 705        rep.th.window  = htons(win);
 706
 707#ifdef CONFIG_TCP_MD5SIG
 708        if (key) {
 709                int offset = (ts) ? 3 : 0;
 710
 711                rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
 712                                          (TCPOPT_NOP << 16) |
 713                                          (TCPOPT_MD5SIG << 8) |
 714                                          TCPOLEN_MD5SIG);
 715                arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
 716                rep.th.doff = arg.iov[0].iov_len/4;
 717
 718                tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
 719                                    key, ip_hdr(skb)->saddr,
 720                                    ip_hdr(skb)->daddr, &rep.th);
 721        }
 722#endif
 723        arg.flags = reply_flags;
 724        arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
 725                                      ip_hdr(skb)->saddr, /* XXX */
 726                                      arg.iov[0].iov_len, IPPROTO_TCP, 0);
 727        arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 728        if (oif)
 729                arg.bound_dev_if = oif;
 730        arg.tos = tos;
 731        ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
 732                      &arg, arg.iov[0].iov_len);
 733
 734        TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
 735}
 736
 737static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
 738{
 739        struct inet_timewait_sock *tw = inet_twsk(sk);
 740        struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
 741
 742        tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
 743                        tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
 744                        tcptw->tw_ts_recent,
 745                        tw->tw_bound_dev_if,
 746                        tcp_twsk_md5_key(tcptw),
 747                        tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
 748                        tw->tw_tos
 749                        );
 750
 751        inet_twsk_put(tw);
 752}
 753
 754static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
 755                                  struct request_sock *req)
 756{
 757        tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
 758                        tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
 759                        req->ts_recent,
 760                        0,
 761                        tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr),
 762                        inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
 763                        ip_hdr(skb)->tos);
 764}
 765
 766/*
 767 *      Send a SYN-ACK after having received a SYN.
 768 *      This still operates on a request_sock only, not on a big
 769 *      socket.
 770 */
 771static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
 772                              struct request_sock *req,
 773                              struct request_values *rvp)
 774{
 775        const struct inet_request_sock *ireq = inet_rsk(req);
 776        struct flowi4 fl4;
 777        int err = -1;
 778        struct sk_buff * skb;
 779
 780        /* First, grab a route. */
 781        if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
 782                return -1;
 783
 784        skb = tcp_make_synack(sk, dst, req, rvp);
 785
 786        if (skb) {
 787                __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
 788
 789                err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
 790                                            ireq->rmt_addr,
 791                                            ireq->opt);
 792                err = net_xmit_eval(err);
 793        }
 794
 795        dst_release(dst);
 796        return err;
 797}
 798
 799static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
 800                              struct request_values *rvp)
 801{
 802        TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
 803        return tcp_v4_send_synack(sk, NULL, req, rvp);
 804}
 805
 806/*
 807 *      IPv4 request_sock destructor.
 808 */
 809static void tcp_v4_reqsk_destructor(struct request_sock *req)
 810{
 811        kfree(inet_rsk(req)->opt);
 812}
 813
 814/*
 815 * Return 1 if a syncookie should be sent
 816 */
 817int tcp_syn_flood_action(struct sock *sk,
 818                         const struct sk_buff *skb,
 819                         const char *proto)
 820{
 821        const char *msg = "Dropping request";
 822        int want_cookie = 0;
 823        struct listen_sock *lopt;
 824
 825
 826
 827#ifdef CONFIG_SYN_COOKIES
 828        if (sysctl_tcp_syncookies) {
 829                msg = "Sending cookies";
 830                want_cookie = 1;
 831                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
 832        } else
 833#endif
 834                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
 835
 836        lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
 837        if (!lopt->synflood_warned) {
 838                lopt->synflood_warned = 1;
 839                pr_info("%s: Possible SYN flooding on port %d. %s. "
 840                        " Check SNMP counters.\n",
 841                        proto, ntohs(tcp_hdr(skb)->dest), msg);
 842        }
 843        return want_cookie;
 844}
 845EXPORT_SYMBOL(tcp_syn_flood_action);
 846
 847/*
 848 * Save and compile IPv4 options into the request_sock if needed.
 849 */
 850static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk,
 851                                                  struct sk_buff *skb)
 852{
 853        const struct ip_options *opt = &(IPCB(skb)->opt);
 854        struct ip_options_rcu *dopt = NULL;
 855
 856        if (opt && opt->optlen) {
 857                int opt_size = sizeof(*dopt) + opt->optlen;
 858
 859                dopt = kmalloc(opt_size, GFP_ATOMIC);
 860                if (dopt) {
 861                        if (ip_options_echo(&dopt->opt, skb)) {
 862                                kfree(dopt);
 863                                dopt = NULL;
 864                        }
 865                }
 866        }
 867        return dopt;
 868}
 869
 870#ifdef CONFIG_TCP_MD5SIG
 871/*
 872 * RFC2385 MD5 checksumming requires a mapping of
 873 * IP address->MD5 Key.
 874 * We need to maintain these in the sk structure.
 875 */
 876
 877/* Find the Key structure for an address.  */
 878static struct tcp_md5sig_key *
 879                        tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
 880{
 881        struct tcp_sock *tp = tcp_sk(sk);
 882        int i;
 883
 884        if (!tp->md5sig_info || !tp->md5sig_info->entries4)
 885                return NULL;
 886        for (i = 0; i < tp->md5sig_info->entries4; i++) {
 887                if (tp->md5sig_info->keys4[i].addr == addr)
 888                        return &tp->md5sig_info->keys4[i].base;
 889        }
 890        return NULL;
 891}
 892
 893struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
 894                                         struct sock *addr_sk)
 895{
 896        return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr);
 897}
 898EXPORT_SYMBOL(tcp_v4_md5_lookup);
 899
 900static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
 901                                                      struct request_sock *req)
 902{
 903        return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
 904}
 905
 906/* This can be called on a newly created socket, from other files */
 907int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
 908                      u8 *newkey, u8 newkeylen)
 909{
 910        /* Add Key to the list */
 911        struct tcp_md5sig_key *key;
 912        struct tcp_sock *tp = tcp_sk(sk);
 913        struct tcp4_md5sig_key *keys;
 914
 915        key = tcp_v4_md5_do_lookup(sk, addr);
 916        if (key) {
 917                /* Pre-existing entry - just update that one. */
 918                kfree(key->key);
 919                key->key = newkey;
 920                key->keylen = newkeylen;
 921        } else {
 922                struct tcp_md5sig_info *md5sig;
 923
 924                if (!tp->md5sig_info) {
 925                        tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info),
 926                                                  GFP_ATOMIC);
 927                        if (!tp->md5sig_info) {
 928                                kfree(newkey);
 929                                return -ENOMEM;
 930                        }
 931                        sk_nocaps_add(sk, NETIF_F_GSO_MASK);
 932                }
 933
 934                md5sig = tp->md5sig_info;
 935                if (md5sig->entries4 == 0 &&
 936                    tcp_alloc_md5sig_pool(sk) == NULL) {
 937                        kfree(newkey);
 938                        return -ENOMEM;
 939                }
 940
 941                if (md5sig->alloced4 == md5sig->entries4) {
 942                        keys = kmalloc((sizeof(*keys) *
 943                                        (md5sig->entries4 + 1)), GFP_ATOMIC);
 944                        if (!keys) {
 945                                kfree(newkey);
 946                                if (md5sig->entries4 == 0)
 947                                        tcp_free_md5sig_pool();
 948                                return -ENOMEM;
 949                        }
 950
 951                        if (md5sig->entries4)
 952                                memcpy(keys, md5sig->keys4,
 953                                       sizeof(*keys) * md5sig->entries4);
 954
 955                        /* Free old key list, and reference new one */
 956                        kfree(md5sig->keys4);
 957                        md5sig->keys4 = keys;
 958                        md5sig->alloced4++;
 959                }
 960                md5sig->entries4++;
 961                md5sig->keys4[md5sig->entries4 - 1].addr        = addr;
 962                md5sig->keys4[md5sig->entries4 - 1].base.key    = newkey;
 963                md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen;
 964        }
 965        return 0;
 966}
 967EXPORT_SYMBOL(tcp_v4_md5_do_add);
 968
 969static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
 970                               u8 *newkey, u8 newkeylen)
 971{
 972        return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->inet_daddr,
 973                                 newkey, newkeylen);
 974}
 975
 976int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
 977{
 978        struct tcp_sock *tp = tcp_sk(sk);
 979        int i;
 980
 981        for (i = 0; i < tp->md5sig_info->entries4; i++) {
 982                if (tp->md5sig_info->keys4[i].addr == addr) {
 983                        /* Free the key */
 984                        kfree(tp->md5sig_info->keys4[i].base.key);
 985                        tp->md5sig_info->entries4--;
 986
 987                        if (tp->md5sig_info->entries4 == 0) {
 988                                kfree(tp->md5sig_info->keys4);
 989                                tp->md5sig_info->keys4 = NULL;
 990                                tp->md5sig_info->alloced4 = 0;
 991                                tcp_free_md5sig_pool();
 992                        } else if (tp->md5sig_info->entries4 != i) {
 993                                /* Need to do some manipulation */
 994                                memmove(&tp->md5sig_info->keys4[i],
 995                                        &tp->md5sig_info->keys4[i+1],
 996                                        (tp->md5sig_info->entries4 - i) *
 997                                         sizeof(struct tcp4_md5sig_key));
 998                        }
 999                        return 0;
1000                }
1001        }
1002        return -ENOENT;
1003}
1004EXPORT_SYMBOL(tcp_v4_md5_do_del);
1005
1006static void tcp_v4_clear_md5_list(struct sock *sk)
1007{
1008        struct tcp_sock *tp = tcp_sk(sk);
1009
1010        /* Free each key, then the set of key keys,
1011         * the crypto element, and then decrement our
1012         * hold on the last resort crypto.
1013         */
1014        if (tp->md5sig_info->entries4) {
1015                int i;
1016                for (i = 0; i < tp->md5sig_info->entries4; i++)
1017                        kfree(tp->md5sig_info->keys4[i].base.key);
1018                tp->md5sig_info->entries4 = 0;
1019                tcp_free_md5sig_pool();
1020        }
1021        if (tp->md5sig_info->keys4) {
1022                kfree(tp->md5sig_info->keys4);
1023                tp->md5sig_info->keys4 = NULL;
1024                tp->md5sig_info->alloced4  = 0;
1025        }
1026}
1027
1028static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1029                                 int optlen)
1030{
1031        struct tcp_md5sig cmd;
1032        struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1033        u8 *newkey;
1034
1035        if (optlen < sizeof(cmd))
1036                return -EINVAL;
1037
1038        if (copy_from_user(&cmd, optval, sizeof(cmd)))
1039                return -EFAULT;
1040
1041        if (sin->sin_family != AF_INET)
1042                return -EINVAL;
1043
1044        if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
1045                if (!tcp_sk(sk)->md5sig_info)
1046                        return -ENOENT;
1047                return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
1048        }
1049
1050        if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1051                return -EINVAL;
1052
1053        if (!tcp_sk(sk)->md5sig_info) {
1054                struct tcp_sock *tp = tcp_sk(sk);
1055                struct tcp_md5sig_info *p;
1056
1057                p = kzalloc(sizeof(*p), sk->sk_allocation);
1058                if (!p)
1059                        return -EINVAL;
1060
1061                tp->md5sig_info = p;
1062                sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1063        }
1064
1065        newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation);
1066        if (!newkey)
1067                return -ENOMEM;
1068        return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
1069                                 newkey, cmd.tcpm_keylen);
1070}
1071
1072static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1073                                        __be32 daddr, __be32 saddr, int nbytes)
1074{
1075        struct tcp4_pseudohdr *bp;
1076        struct scatterlist sg;
1077
1078        bp = &hp->md5_blk.ip4;
1079
1080        /*
1081         * 1. the TCP pseudo-header (in the order: source IP address,
1082         * destination IP address, zero-padded protocol number, and
1083         * segment length)
1084         */
1085        bp->saddr = saddr;
1086        bp->daddr = daddr;
1087        bp->pad = 0;
1088        bp->protocol = IPPROTO_TCP;
1089        bp->len = cpu_to_be16(nbytes);
1090
1091        sg_init_one(&sg, bp, sizeof(*bp));
1092        return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1093}
1094
1095static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
1096                               __be32 daddr, __be32 saddr, const struct tcphdr *th)
1097{
1098        struct tcp_md5sig_pool *hp;
1099        struct hash_desc *desc;
1100
1101        hp = tcp_get_md5sig_pool();
1102        if (!hp)
1103                goto clear_hash_noput;
1104        desc = &hp->md5_desc;
1105
1106        if (crypto_hash_init(desc))
1107                goto clear_hash;
1108        if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1109                goto clear_hash;
1110        if (tcp_md5_hash_header(hp, th))
1111                goto clear_hash;
1112        if (tcp_md5_hash_key(hp, key))
1113                goto clear_hash;
1114        if (crypto_hash_final(desc, md5_hash))
1115                goto clear_hash;
1116
1117        tcp_put_md5sig_pool();
1118        return 0;
1119
1120clear_hash:
1121        tcp_put_md5sig_pool();
1122clear_hash_noput:
1123        memset(md5_hash, 0, 16);
1124        return 1;
1125}
1126
1127int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1128                        const struct sock *sk, const struct request_sock *req,
1129                        const struct sk_buff *skb)
1130{
1131        struct tcp_md5sig_pool *hp;
1132        struct hash_desc *desc;
1133        const struct tcphdr *th = tcp_hdr(skb);
1134        __be32 saddr, daddr;
1135
1136        if (sk) {
1137                saddr = inet_sk(sk)->inet_saddr;
1138                daddr = inet_sk(sk)->inet_daddr;
1139        } else if (req) {
1140                saddr = inet_rsk(req)->loc_addr;
1141                daddr = inet_rsk(req)->rmt_addr;
1142        } else {
1143                const struct iphdr *iph = ip_hdr(skb);
1144                saddr = iph->saddr;
1145                daddr = iph->daddr;
1146        }
1147
1148        hp = tcp_get_md5sig_pool();
1149        if (!hp)
1150                goto clear_hash_noput;
1151        desc = &hp->md5_desc;
1152
1153        if (crypto_hash_init(desc))
1154                goto clear_hash;
1155
1156        if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1157                goto clear_hash;
1158        if (tcp_md5_hash_header(hp, th))
1159                goto clear_hash;
1160        if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1161                goto clear_hash;
1162        if (tcp_md5_hash_key(hp, key))
1163                goto clear_hash;
1164        if (crypto_hash_final(desc, md5_hash))
1165                goto clear_hash;
1166
1167        tcp_put_md5sig_pool();
1168        return 0;
1169
1170clear_hash:
1171        tcp_put_md5sig_pool();
1172clear_hash_noput:
1173        memset(md5_hash, 0, 16);
1174        return 1;
1175}
1176EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1177
1178static int tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
1179{
1180        /*
1181         * This gets called for each TCP segment that arrives
1182         * so we want to be efficient.
1183         * We have 3 drop cases:
1184         * o No MD5 hash and one expected.
1185         * o MD5 hash and we're not expecting one.
1186         * o MD5 hash and its wrong.
1187         */
1188        const __u8 *hash_location = NULL;
1189        struct tcp_md5sig_key *hash_expected;
1190        const struct iphdr *iph = ip_hdr(skb);
1191        const struct tcphdr *th = tcp_hdr(skb);
1192        int genhash;
1193        unsigned char newhash[16];
1194
1195        hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
1196        hash_location = tcp_parse_md5sig_option(th);
1197
1198        /* We've parsed the options - do we have a hash? */
1199        if (!hash_expected && !hash_location)
1200                return 0;
1201
1202        if (hash_expected && !hash_location) {
1203                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1204                return 1;
1205        }
1206
1207        if (!hash_expected && hash_location) {
1208                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1209                return 1;
1210        }
1211
1212        /* Okay, so this is hash_expected and hash_location -
1213         * so we need to calculate the checksum.
1214         */
1215        genhash = tcp_v4_md5_hash_skb(newhash,
1216                                      hash_expected,
1217                                      NULL, NULL, skb);
1218
1219        if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1220                if (net_ratelimit()) {
1221                        printk(KERN_INFO "MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1222                               &iph->saddr, ntohs(th->source),
1223                               &iph->daddr, ntohs(th->dest),
1224                               genhash ? " tcp_v4_calc_md5_hash failed" : "");
1225                }
1226                return 1;
1227        }
1228        return 0;
1229}
1230
1231#endif
1232
1233struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1234        .family         =       PF_INET,
1235        .obj_size       =       sizeof(struct tcp_request_sock),
1236        .rtx_syn_ack    =       tcp_v4_rtx_synack,
1237        .send_ack       =       tcp_v4_reqsk_send_ack,
1238        .destructor     =       tcp_v4_reqsk_destructor,
1239        .send_reset     =       tcp_v4_send_reset,
1240        .syn_ack_timeout =      tcp_syn_ack_timeout,
1241};
1242
1243#ifdef CONFIG_TCP_MD5SIG
1244static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1245        .md5_lookup     =       tcp_v4_reqsk_md5_lookup,
1246        .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1247};
1248#endif
1249
1250int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1251{
1252        struct tcp_extend_values tmp_ext;
1253        struct tcp_options_received tmp_opt;
1254        const u8 *hash_location;
1255        struct request_sock *req;
1256        struct inet_request_sock *ireq;
1257        struct tcp_sock *tp = tcp_sk(sk);
1258        struct dst_entry *dst = NULL;
1259        __be32 saddr = ip_hdr(skb)->saddr;
1260        __be32 daddr = ip_hdr(skb)->daddr;
1261        __u32 isn = TCP_SKB_CB(skb)->when;
1262        int want_cookie = 0;
1263
1264        /* Never answer to SYNs send to broadcast or multicast */
1265        if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1266                goto drop;
1267
1268        /* TW buckets are converted to open requests without
1269         * limitations, they conserve resources and peer is
1270         * evidently real one.
1271         */
1272        if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1273                want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1274                if (!want_cookie)
1275                        goto drop;
1276        }
1277
1278        /* Accept backlog is full. If we have already queued enough
1279         * of warm entries in syn queue, drop request. It is better than
1280         * clogging syn queue with openreqs with exponentially increasing
1281         * timeout.
1282         */
1283        if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1284                goto drop;
1285
1286        req = inet_reqsk_alloc(&tcp_request_sock_ops);
1287        if (!req)
1288                goto drop;
1289
1290#ifdef CONFIG_TCP_MD5SIG
1291        tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1292#endif
1293
1294        tcp_clear_options(&tmp_opt);
1295        tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
1296        tmp_opt.user_mss  = tp->rx_opt.user_mss;
1297        tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
1298
1299        if (tmp_opt.cookie_plus > 0 &&
1300            tmp_opt.saw_tstamp &&
1301            !tp->rx_opt.cookie_out_never &&
1302            (sysctl_tcp_cookie_size > 0 ||
1303             (tp->cookie_values != NULL &&
1304              tp->cookie_values->cookie_desired > 0))) {
1305                u8 *c;
1306                u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1307                int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1308
1309                if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1310                        goto drop_and_release;
1311
1312                /* Secret recipe starts with IP addresses */
1313                *mess++ ^= (__force u32)daddr;
1314                *mess++ ^= (__force u32)saddr;
1315
1316                /* plus variable length Initiator Cookie */
1317                c = (u8 *)mess;
1318                while (l-- > 0)
1319                        *c++ ^= *hash_location++;
1320
1321                want_cookie = 0;        /* not our kind of cookie */
1322                tmp_ext.cookie_out_never = 0; /* false */
1323                tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1324        } else if (!tp->rx_opt.cookie_in_always) {
1325                /* redundant indications, but ensure initialization. */
1326                tmp_ext.cookie_out_never = 1; /* true */
1327                tmp_ext.cookie_plus = 0;
1328        } else {
1329                goto drop_and_release;
1330        }
1331        tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1332
1333        if (want_cookie && !tmp_opt.saw_tstamp)
1334                tcp_clear_options(&tmp_opt);
1335
1336        tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1337        tcp_openreq_init(req, &tmp_opt, skb);
1338
1339        ireq = inet_rsk(req);
1340        ireq->loc_addr = daddr;
1341        ireq->rmt_addr = saddr;
1342        ireq->no_srccheck = inet_sk(sk)->transparent;
1343        ireq->opt = tcp_v4_save_options(sk, skb);
1344
1345        if (security_inet_conn_request(sk, skb, req))
1346                goto drop_and_free;
1347
1348        if (!want_cookie || tmp_opt.tstamp_ok)
1349                TCP_ECN_create_request(req, tcp_hdr(skb));
1350
1351        if (want_cookie) {
1352                isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1353                req->cookie_ts = tmp_opt.tstamp_ok;
1354        } else if (!isn) {
1355                struct inet_peer *peer = NULL;
1356                struct flowi4 fl4;
1357
1358                /* VJ's idea. We save last timestamp seen
1359                 * from the destination in peer table, when entering
1360                 * state TIME-WAIT, and check against it before
1361                 * accepting new connection request.
1362                 *
1363                 * If "isn" is not zero, this request hit alive
1364                 * timewait bucket, so that all the necessary checks
1365                 * are made in the function processing timewait state.
1366                 */
1367                if (tmp_opt.saw_tstamp &&
1368                    tcp_death_row.sysctl_tw_recycle &&
1369                    (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
1370                    fl4.daddr == saddr &&
1371                    (peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) {
1372                        inet_peer_refcheck(peer);
1373                        if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1374                            (s32)(peer->tcp_ts - req->ts_recent) >
1375                                                        TCP_PAWS_WINDOW) {
1376                                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1377                                goto drop_and_release;
1378                        }
1379                }
1380                /* Kill the following clause, if you dislike this way. */
1381                else if (!sysctl_tcp_syncookies &&
1382                         (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1383                          (sysctl_max_syn_backlog >> 2)) &&
1384                         (!peer || !peer->tcp_ts_stamp) &&
1385                         (!dst || !dst_metric(dst, RTAX_RTT))) {
1386                        /* Without syncookies last quarter of
1387                         * backlog is filled with destinations,
1388                         * proven to be alive.
1389                         * It means that we continue to communicate
1390                         * to destinations, already remembered
1391                         * to the moment of synflood.
1392                         */
1393                        LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n",
1394                                       &saddr, ntohs(tcp_hdr(skb)->source));
1395                        goto drop_and_release;
1396                }
1397
1398                isn = tcp_v4_init_sequence(skb);
1399        }
1400        tcp_rsk(req)->snt_isn = isn;
1401        tcp_rsk(req)->snt_synack = tcp_time_stamp;
1402
1403        if (tcp_v4_send_synack(sk, dst, req,
1404                               (struct request_values *)&tmp_ext) ||
1405            want_cookie)
1406                goto drop_and_free;
1407
1408        inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1409        return 0;
1410
1411drop_and_release:
1412        dst_release(dst);
1413drop_and_free:
1414        reqsk_free(req);
1415drop:
1416        return 0;
1417}
1418EXPORT_SYMBOL(tcp_v4_conn_request);
1419
1420
1421/*
1422 * The three way handshake has completed - we got a valid synack -
1423 * now create the new socket.
1424 */
1425struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1426                                  struct request_sock *req,
1427                                  struct dst_entry *dst)
1428{
1429        struct inet_request_sock *ireq;
1430        struct inet_sock *newinet;
1431        struct tcp_sock *newtp;
1432        struct sock *newsk;
1433#ifdef CONFIG_TCP_MD5SIG
1434        struct tcp_md5sig_key *key;
1435#endif
1436        struct ip_options_rcu *inet_opt;
1437
1438        if (sk_acceptq_is_full(sk))
1439                goto exit_overflow;
1440
1441        newsk = tcp_create_openreq_child(sk, req, skb);
1442        if (!newsk)
1443                goto exit_nonewsk;
1444
1445        newsk->sk_gso_type = SKB_GSO_TCPV4;
1446
1447        newtp                 = tcp_sk(newsk);
1448        newinet               = inet_sk(newsk);
1449        ireq                  = inet_rsk(req);
1450        newinet->inet_daddr   = ireq->rmt_addr;
1451        newinet->inet_rcv_saddr = ireq->loc_addr;
1452        newinet->inet_saddr           = ireq->loc_addr;
1453        inet_opt              = ireq->opt;
1454        rcu_assign_pointer(newinet->inet_opt, inet_opt);
1455        ireq->opt             = NULL;
1456        newinet->mc_index     = inet_iif(skb);
1457        newinet->mc_ttl       = ip_hdr(skb)->ttl;
1458        inet_csk(newsk)->icsk_ext_hdr_len = 0;
1459        if (inet_opt)
1460                inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1461        newinet->inet_id = newtp->write_seq ^ jiffies;
1462
1463        if (!dst && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL)
1464                goto put_and_exit;
1465
1466        sk_setup_caps(newsk, dst);
1467
1468        tcp_mtup_init(newsk);
1469        tcp_sync_mss(newsk, dst_mtu(dst));
1470        newtp->advmss = dst_metric_advmss(dst);
1471        if (tcp_sk(sk)->rx_opt.user_mss &&
1472            tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1473                newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1474
1475        tcp_initialize_rcv_mss(newsk);
1476        if (tcp_rsk(req)->snt_synack)
1477                tcp_valid_rtt_meas(newsk,
1478                    tcp_time_stamp - tcp_rsk(req)->snt_synack);
1479        newtp->total_retrans = req->retrans;
1480
1481#ifdef CONFIG_TCP_MD5SIG
1482        /* Copy over the MD5 key from the original socket */
1483        key = tcp_v4_md5_do_lookup(sk, newinet->inet_daddr);
1484        if (key != NULL) {
1485                /*
1486                 * We're using one, so create a matching key
1487                 * on the newsk structure. If we fail to get
1488                 * memory, then we end up not copying the key
1489                 * across. Shucks.
1490                 */
1491                char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1492                if (newkey != NULL)
1493                        tcp_v4_md5_do_add(newsk, newinet->inet_daddr,
1494                                          newkey, key->keylen);
1495                sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1496        }
1497#endif
1498
1499        if (__inet_inherit_port(sk, newsk) < 0)
1500                goto put_and_exit;
1501        __inet_hash_nolisten(newsk, NULL);
1502
1503        return newsk;
1504
1505exit_overflow:
1506        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1507exit_nonewsk:
1508        dst_release(dst);
1509exit:
1510        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1511        return NULL;
1512put_and_exit:
1513        tcp_clear_xmit_timers(newsk);
1514        bh_unlock_sock(newsk);
1515        sock_put(newsk);
1516        goto exit;
1517}
1518EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1519
1520static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1521{
1522        struct tcphdr *th = tcp_hdr(skb);
1523        const struct iphdr *iph = ip_hdr(skb);
1524        struct sock *nsk;
1525        struct request_sock **prev;
1526        /* Find possible connection requests. */
1527        struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1528                                                       iph->saddr, iph->daddr);
1529        if (req)
1530                return tcp_check_req(sk, skb, req, prev);
1531
1532        nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
1533                        th->source, iph->daddr, th->dest, inet_iif(skb));
1534
1535        if (nsk) {
1536                if (nsk->sk_state != TCP_TIME_WAIT) {
1537                        bh_lock_sock(nsk);
1538                        return nsk;
1539                }
1540                inet_twsk_put(inet_twsk(nsk));
1541                return NULL;
1542        }
1543
1544#ifdef CONFIG_SYN_COOKIES
1545        if (!th->syn)
1546                sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1547#endif
1548        return sk;
1549}
1550
1551static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1552{
1553        const struct iphdr *iph = ip_hdr(skb);
1554
1555        if (skb->ip_summed == CHECKSUM_COMPLETE) {
1556                if (!tcp_v4_check(skb->len, iph->saddr,
1557                                  iph->daddr, skb->csum)) {
1558                        skb->ip_summed = CHECKSUM_UNNECESSARY;
1559                        return 0;
1560                }
1561        }
1562
1563        skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1564                                       skb->len, IPPROTO_TCP, 0);
1565
1566        if (skb->len <= 76) {
1567                return __skb_checksum_complete(skb);
1568        }
1569        return 0;
1570}
1571
1572
1573/* The socket must have it's spinlock held when we get
1574 * here.
1575 *
1576 * We have a potential double-lock case here, so even when
1577 * doing backlog processing we use the BH locking scheme.
1578 * This is because we cannot sleep with the original spinlock
1579 * held.
1580 */
1581int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1582{
1583        struct sock *rsk;
1584#ifdef CONFIG_TCP_MD5SIG
1585        /*
1586         * We really want to reject the packet as early as possible
1587         * if:
1588         *  o We're expecting an MD5'd packet and this is no MD5 tcp option
1589         *  o There is an MD5 option and we're not expecting one
1590         */
1591        if (tcp_v4_inbound_md5_hash(sk, skb))
1592                goto discard;
1593#endif
1594
1595        if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1596                sock_rps_save_rxhash(sk, skb);
1597                if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1598                        rsk = sk;
1599                        goto reset;
1600                }
1601                return 0;
1602        }
1603
1604        if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1605                goto csum_err;
1606
1607        if (sk->sk_state == TCP_LISTEN) {
1608                struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1609                if (!nsk)
1610                        goto discard;
1611
1612                if (nsk != sk) {
1613                        sock_rps_save_rxhash(nsk, skb);
1614                        if (tcp_child_process(sk, nsk, skb)) {
1615                                rsk = nsk;
1616                                goto reset;
1617                        }
1618                        return 0;
1619                }
1620        } else
1621                sock_rps_save_rxhash(sk, skb);
1622
1623        if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1624                rsk = sk;
1625                goto reset;
1626        }
1627        return 0;
1628
1629reset:
1630        tcp_v4_send_reset(rsk, skb);
1631discard:
1632        kfree_skb(skb);
1633        /* Be careful here. If this function gets more complicated and
1634         * gcc suffers from register pressure on the x86, sk (in %ebx)
1635         * might be destroyed here. This current version compiles correctly,
1636         * but you have been warned.
1637         */
1638        return 0;
1639
1640csum_err:
1641        TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1642        goto discard;
1643}
1644EXPORT_SYMBOL(tcp_v4_do_rcv);
1645
1646/*
1647 *      From tcp_input.c
1648 */
1649
1650int tcp_v4_rcv(struct sk_buff *skb)
1651{
1652        const struct iphdr *iph;
1653        const struct tcphdr *th;
1654        struct sock *sk;
1655        int ret;
1656        struct net *net = dev_net(skb->dev);
1657
1658        if (skb->pkt_type != PACKET_HOST)
1659                goto discard_it;
1660
1661        /* Count it even if it's bad */
1662        TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1663
1664        if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1665                goto discard_it;
1666
1667        th = tcp_hdr(skb);
1668
1669        if (th->doff < sizeof(struct tcphdr) / 4)
1670                goto bad_packet;
1671        if (!pskb_may_pull(skb, th->doff * 4))
1672                goto discard_it;
1673
1674        /* An explanation is required here, I think.
1675         * Packet length and doff are validated by header prediction,
1676         * provided case of th->doff==0 is eliminated.
1677         * So, we defer the checks. */
1678        if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1679                goto bad_packet;
1680
1681        th = tcp_hdr(skb);
1682        iph = ip_hdr(skb);
1683        TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1684        TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1685                                    skb->len - th->doff * 4);
1686        TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1687        TCP_SKB_CB(skb)->when    = 0;
1688        TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1689        TCP_SKB_CB(skb)->sacked  = 0;
1690
1691        sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1692        if (!sk)
1693                goto no_tcp_socket;
1694
1695process:
1696        if (sk->sk_state == TCP_TIME_WAIT)
1697                goto do_time_wait;
1698
1699        if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1700                NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1701                goto discard_and_relse;
1702        }
1703
1704        if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1705                goto discard_and_relse;
1706        nf_reset(skb);
1707
1708        if (sk_filter(sk, skb))
1709                goto discard_and_relse;
1710
1711        skb->dev = NULL;
1712
1713        bh_lock_sock_nested(sk);
1714        ret = 0;
1715        if (!sock_owned_by_user(sk)) {
1716#ifdef CONFIG_NET_DMA
1717                struct tcp_sock *tp = tcp_sk(sk);
1718                if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1719                        tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
1720                if (tp->ucopy.dma_chan)
1721                        ret = tcp_v4_do_rcv(sk, skb);
1722                else
1723#endif
1724                {
1725                        if (!tcp_prequeue(sk, skb))
1726                                ret = tcp_v4_do_rcv(sk, skb);
1727                }
1728        } else if (unlikely(sk_add_backlog(sk, skb))) {
1729                bh_unlock_sock(sk);
1730                NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1731                goto discard_and_relse;
1732        }
1733        bh_unlock_sock(sk);
1734
1735        sock_put(sk);
1736
1737        return ret;
1738
1739no_tcp_socket:
1740        if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1741                goto discard_it;
1742
1743        if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1744bad_packet:
1745                TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1746        } else {
1747                tcp_v4_send_reset(NULL, skb);
1748        }
1749
1750discard_it:
1751        /* Discard frame. */
1752        kfree_skb(skb);
1753        return 0;
1754
1755discard_and_relse:
1756        sock_put(sk);
1757        goto discard_it;
1758
1759do_time_wait:
1760        if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1761                inet_twsk_put(inet_twsk(sk));
1762                goto discard_it;
1763        }
1764
1765        if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1766                TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1767                inet_twsk_put(inet_twsk(sk));
1768                goto discard_it;
1769        }
1770        switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1771        case TCP_TW_SYN: {
1772                struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1773                                                        &tcp_hashinfo,
1774                                                        iph->daddr, th->dest,
1775                                                        inet_iif(skb));
1776                if (sk2) {
1777                        inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1778                        inet_twsk_put(inet_twsk(sk));
1779                        sk = sk2;
1780                        goto process;
1781                }
1782                /* Fall through to ACK */
1783        }
1784        case TCP_TW_ACK:
1785                tcp_v4_timewait_ack(sk, skb);
1786                break;
1787        case TCP_TW_RST:
1788                goto no_tcp_socket;
1789        case TCP_TW_SUCCESS:;
1790        }
1791        goto discard_it;
1792}
1793
1794struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it)
1795{
1796        struct rtable *rt = (struct rtable *) __sk_dst_get(sk);
1797        struct inet_sock *inet = inet_sk(sk);
1798        struct inet_peer *peer;
1799
1800        if (!rt ||
1801            inet->cork.fl.u.ip4.daddr != inet->inet_daddr) {
1802                peer = inet_getpeer_v4(inet->inet_daddr, 1);
1803                *release_it = true;
1804        } else {
1805                if (!rt->peer)
1806                        rt_bind_peer(rt, inet->inet_daddr, 1);
1807                peer = rt->peer;
1808                *release_it = false;
1809        }
1810
1811        return peer;
1812}
1813EXPORT_SYMBOL(tcp_v4_get_peer);
1814
1815void *tcp_v4_tw_get_peer(struct sock *sk)
1816{
1817        const struct inet_timewait_sock *tw = inet_twsk(sk);
1818
1819        return inet_getpeer_v4(tw->tw_daddr, 1);
1820}
1821EXPORT_SYMBOL(tcp_v4_tw_get_peer);
1822
1823static struct timewait_sock_ops tcp_timewait_sock_ops = {
1824        .twsk_obj_size  = sizeof(struct tcp_timewait_sock),
1825        .twsk_unique    = tcp_twsk_unique,
1826        .twsk_destructor= tcp_twsk_destructor,
1827        .twsk_getpeer   = tcp_v4_tw_get_peer,
1828};
1829
1830const struct inet_connection_sock_af_ops ipv4_specific = {
1831        .queue_xmit        = ip_queue_xmit,
1832        .send_check        = tcp_v4_send_check,
1833        .rebuild_header    = inet_sk_rebuild_header,
1834        .conn_request      = tcp_v4_conn_request,
1835        .syn_recv_sock     = tcp_v4_syn_recv_sock,
1836        .get_peer          = tcp_v4_get_peer,
1837        .net_header_len    = sizeof(struct iphdr),
1838        .setsockopt        = ip_setsockopt,
1839        .getsockopt        = ip_getsockopt,
1840        .addr2sockaddr     = inet_csk_addr2sockaddr,
1841        .sockaddr_len      = sizeof(struct sockaddr_in),
1842        .bind_conflict     = inet_csk_bind_conflict,
1843#ifdef CONFIG_COMPAT
1844        .compat_setsockopt = compat_ip_setsockopt,
1845        .compat_getsockopt = compat_ip_getsockopt,
1846#endif
1847};
1848EXPORT_SYMBOL(ipv4_specific);
1849
1850#ifdef CONFIG_TCP_MD5SIG
1851static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1852        .md5_lookup             = tcp_v4_md5_lookup,
1853        .calc_md5_hash          = tcp_v4_md5_hash_skb,
1854        .md5_add                = tcp_v4_md5_add_func,
1855        .md5_parse              = tcp_v4_parse_md5_keys,
1856};
1857#endif
1858
1859/* NOTE: A lot of things set to zero explicitly by call to
1860 *       sk_alloc() so need not be done here.
1861 */
1862static int tcp_v4_init_sock(struct sock *sk)
1863{
1864        struct inet_connection_sock *icsk = inet_csk(sk);
1865        struct tcp_sock *tp = tcp_sk(sk);
1866
1867        skb_queue_head_init(&tp->out_of_order_queue);
1868        tcp_init_xmit_timers(sk);
1869        tcp_prequeue_init(tp);
1870
1871        icsk->icsk_rto = TCP_TIMEOUT_INIT;
1872        tp->mdev = TCP_TIMEOUT_INIT;
1873
1874        /* So many TCP implementations out there (incorrectly) count the
1875         * initial SYN frame in their delayed-ACK and congestion control
1876         * algorithms that we must have the following bandaid to talk
1877         * efficiently to them.  -DaveM
1878         */
1879        tp->snd_cwnd = TCP_INIT_CWND;
1880
1881        /* See draft-stevens-tcpca-spec-01 for discussion of the
1882         * initialization of these values.
1883         */
1884        tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
1885        tp->snd_cwnd_clamp = ~0;
1886        tp->mss_cache = TCP_MSS_DEFAULT;
1887
1888        tp->reordering = sysctl_tcp_reordering;
1889        icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1890
1891        sk->sk_state = TCP_CLOSE;
1892
1893        sk->sk_write_space = sk_stream_write_space;
1894        sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1895
1896        icsk->icsk_af_ops = &ipv4_specific;
1897        icsk->icsk_sync_mss = tcp_sync_mss;
1898#ifdef CONFIG_TCP_MD5SIG
1899        tp->af_specific = &tcp_sock_ipv4_specific;
1900#endif
1901
1902        /* TCP Cookie Transactions */
1903        if (sysctl_tcp_cookie_size > 0) {
1904                /* Default, cookies without s_data_payload. */
1905                tp->cookie_values =
1906                        kzalloc(sizeof(*tp->cookie_values),
1907                                sk->sk_allocation);
1908                if (tp->cookie_values != NULL)
1909                        kref_init(&tp->cookie_values->kref);
1910        }
1911        /* Presumed zeroed, in order of appearance:
1912         *      cookie_in_always, cookie_out_never,
1913         *      s_data_constant, s_data_in, s_data_out
1914         */
1915        sk->sk_sndbuf = sysctl_tcp_wmem[1];
1916        sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1917
1918        local_bh_disable();
1919        percpu_counter_inc(&tcp_sockets_allocated);
1920        local_bh_enable();
1921
1922        return 0;
1923}
1924
1925void tcp_v4_destroy_sock(struct sock *sk)
1926{
1927        struct tcp_sock *tp = tcp_sk(sk);
1928
1929        tcp_clear_xmit_timers(sk);
1930
1931        tcp_cleanup_congestion_control(sk);
1932
1933        /* Cleanup up the write buffer. */
1934        tcp_write_queue_purge(sk);
1935
1936        /* Cleans up our, hopefully empty, out_of_order_queue. */
1937        __skb_queue_purge(&tp->out_of_order_queue);
1938
1939#ifdef CONFIG_TCP_MD5SIG
1940        /* Clean up the MD5 key list, if any */
1941        if (tp->md5sig_info) {
1942                tcp_v4_clear_md5_list(sk);
1943                kfree(tp->md5sig_info);
1944                tp->md5sig_info = NULL;
1945        }
1946#endif
1947
1948#ifdef CONFIG_NET_DMA
1949        /* Cleans up our sk_async_wait_queue */
1950        __skb_queue_purge(&sk->sk_async_wait_queue);
1951#endif
1952
1953        /* Clean prequeue, it must be empty really */
1954        __skb_queue_purge(&tp->ucopy.prequeue);
1955
1956        /* Clean up a referenced TCP bind bucket. */
1957        if (inet_csk(sk)->icsk_bind_hash)
1958                inet_put_port(sk);
1959
1960        /*
1961         * If sendmsg cached page exists, toss it.
1962         */
1963        if (sk->sk_sndmsg_page) {
1964                __free_page(sk->sk_sndmsg_page);
1965                sk->sk_sndmsg_page = NULL;
1966        }
1967
1968        /* TCP Cookie Transactions */
1969        if (tp->cookie_values != NULL) {
1970                kref_put(&tp->cookie_values->kref,
1971                         tcp_cookie_values_release);
1972                tp->cookie_values = NULL;
1973        }
1974
1975        percpu_counter_dec(&tcp_sockets_allocated);
1976}
1977EXPORT_SYMBOL(tcp_v4_destroy_sock);
1978
1979#ifdef CONFIG_PROC_FS
1980/* Proc filesystem TCP sock list dumping. */
1981
1982static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
1983{
1984        return hlist_nulls_empty(head) ? NULL :
1985                list_entry(head->first, struct inet_timewait_sock, tw_node);
1986}
1987
1988static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1989{
1990        return !is_a_nulls(tw->tw_node.next) ?
1991                hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1992}
1993
1994/*
1995 * Get next listener socket follow cur.  If cur is NULL, get first socket
1996 * starting from bucket given in st->bucket; when st->bucket is zero the
1997 * very first socket in the hash table is returned.
1998 */
1999static void *listening_get_next(struct seq_file *seq, void *cur)
2000{
2001        struct inet_connection_sock *icsk;
2002        struct hlist_nulls_node *node;
2003        struct sock *sk = cur;
2004        struct inet_listen_hashbucket *ilb;
2005        struct tcp_iter_state *st = seq->private;
2006        struct net *net = seq_file_net(seq);
2007
2008        if (!sk) {
2009                ilb = &tcp_hashinfo.listening_hash[st->bucket];
2010                spin_lock_bh(&ilb->lock);
2011                sk = sk_nulls_head(&ilb->head);
2012                st->offset = 0;
2013                goto get_sk;
2014        }
2015        ilb = &tcp_hashinfo.listening_hash[st->bucket];
2016        ++st->num;
2017        ++st->offset;
2018
2019        if (st->state == TCP_SEQ_STATE_OPENREQ) {
2020                struct request_sock *req = cur;
2021
2022                icsk = inet_csk(st->syn_wait_sk);
2023                req = req->dl_next;
2024                while (1) {
2025                        while (req) {
2026                                if (req->rsk_ops->family == st->family) {
2027                                        cur = req;
2028                                        goto out;
2029                                }
2030                                req = req->dl_next;
2031                        }
2032                        if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
2033                                break;
2034get_req:
2035                        req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
2036                }
2037                sk        = sk_nulls_next(st->syn_wait_sk);
2038                st->state = TCP_SEQ_STATE_LISTENING;
2039                read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2040        } else {
2041                icsk = inet_csk(sk);
2042                read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2043                if (reqsk_queue_len(&icsk->icsk_accept_queue))
2044                        goto start_req;
2045                read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2046                sk = sk_nulls_next(sk);
2047        }
2048get_sk:
2049        sk_nulls_for_each_from(sk, node) {
2050                if (!net_eq(sock_net(sk), net))
2051                        continue;
2052                if (sk->sk_family == st->family) {
2053                        cur = sk;
2054                        goto out;
2055                }
2056                icsk = inet_csk(sk);
2057                read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2058                if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
2059start_req:
2060                        st->uid         = sock_i_uid(sk);
2061                        st->syn_wait_sk = sk;
2062                        st->state       = TCP_SEQ_STATE_OPENREQ;
2063                        st->sbucket     = 0;
2064                        goto get_req;
2065                }
2066                read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2067        }
2068        spin_unlock_bh(&ilb->lock);
2069        st->offset = 0;
2070        if (++st->bucket < INET_LHTABLE_SIZE) {
2071                ilb = &tcp_hashinfo.listening_hash[st->bucket];
2072                spin_lock_bh(&ilb->lock);
2073                sk = sk_nulls_head(&ilb->head);
2074                goto get_sk;
2075        }
2076        cur = NULL;
2077out:
2078        return cur;
2079}
2080
2081static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2082{
2083        struct tcp_iter_state *st = seq->private;
2084        void *rc;
2085
2086        st->bucket = 0;
2087        st->offset = 0;
2088        rc = listening_get_next(seq, NULL);
2089
2090        while (rc && *pos) {
2091                rc = listening_get_next(seq, rc);
2092                --*pos;
2093        }
2094        return rc;
2095}
2096
2097static inline int empty_bucket(struct tcp_iter_state *st)
2098{
2099        return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
2100                hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
2101}
2102
2103/*
2104 * Get first established socket starting from bucket given in st->bucket.
2105 * If st->bucket is zero, the very first socket in the hash is returned.
2106 */
2107static void *established_get_first(struct seq_file *seq)
2108{
2109        struct tcp_iter_state *st = seq->private;
2110        struct net *net = seq_file_net(seq);
2111        void *rc = NULL;
2112
2113        st->offset = 0;
2114        for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
2115                struct sock *sk;
2116                struct hlist_nulls_node *node;
2117                struct inet_timewait_sock *tw;
2118                spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
2119
2120                /* Lockless fast path for the common case of empty buckets */
2121                if (empty_bucket(st))
2122                        continue;
2123
2124                spin_lock_bh(lock);
2125                sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2126                        if (sk->sk_family != st->family ||
2127                            !net_eq(sock_net(sk), net)) {
2128                                continue;
2129                        }
2130                        rc = sk;
2131                        goto out;
2132                }
2133                st->state = TCP_SEQ_STATE_TIME_WAIT;
2134                inet_twsk_for_each(tw, node,
2135                                   &tcp_hashinfo.ehash[st->bucket].twchain) {
2136                        if (tw->tw_family != st->family ||
2137                            !net_eq(twsk_net(tw), net)) {
2138                                continue;
2139                        }
2140                        rc = tw;
2141                        goto out;
2142                }
2143                spin_unlock_bh(lock);
2144                st->state = TCP_SEQ_STATE_ESTABLISHED;
2145        }
2146out:
2147        return rc;
2148}
2149
2150static void *established_get_next(struct seq_file *seq, void *cur)
2151{
2152        struct sock *sk = cur;
2153        struct inet_timewait_sock *tw;
2154        struct hlist_nulls_node *node;
2155        struct tcp_iter_state *st = seq->private;
2156        struct net *net = seq_file_net(seq);
2157
2158        ++st->num;
2159        ++st->offset;
2160
2161        if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2162                tw = cur;
2163                tw = tw_next(tw);
2164get_tw:
2165                while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
2166                        tw = tw_next(tw);
2167                }
2168                if (tw) {
2169                        cur = tw;
2170                        goto out;
2171                }
2172                spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2173                st->state = TCP_SEQ_STATE_ESTABLISHED;
2174
2175                /* Look for next non empty bucket */
2176                st->offset = 0;
2177                while (++st->bucket <= tcp_hashinfo.ehash_mask &&
2178                                empty_bucket(st))
2179                        ;
2180                if (st->bucket > tcp_hashinfo.ehash_mask)
2181                        return NULL;
2182
2183                spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2184                sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
2185        } else
2186                sk = sk_nulls_next(sk);
2187
2188        sk_nulls_for_each_from(sk, node) {
2189                if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
2190                        goto found;
2191        }
2192
2193        st->state = TCP_SEQ_STATE_TIME_WAIT;
2194        tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
2195        goto get_tw;
2196found:
2197        cur = sk;
2198out:
2199        return cur;
2200}
2201
2202static void *established_get_idx(struct seq_file *seq, loff_t pos)
2203{
2204        struct tcp_iter_state *st = seq->private;
2205        void *rc;
2206
2207        st->bucket = 0;
2208        rc = established_get_first(seq);
2209
2210        while (rc && pos) {
2211                rc = established_get_next(seq, rc);
2212                --pos;
2213        }
2214        return rc;
2215}
2216
2217static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2218{
2219        void *rc;
2220        struct tcp_iter_state *st = seq->private;
2221
2222        st->state = TCP_SEQ_STATE_LISTENING;
2223        rc        = listening_get_idx(seq, &pos);
2224
2225        if (!rc) {
2226                st->state = TCP_SEQ_STATE_ESTABLISHED;
2227                rc        = established_get_idx(seq, pos);
2228        }
2229
2230        return rc;
2231}
2232
2233static void *tcp_seek_last_pos(struct seq_file *seq)
2234{
2235        struct tcp_iter_state *st = seq->private;
2236        int offset = st->offset;
2237        int orig_num = st->num;
2238        void *rc = NULL;
2239
2240        switch (st->state) {
2241        case TCP_SEQ_STATE_OPENREQ:
2242        case TCP_SEQ_STATE_LISTENING:
2243                if (st->bucket >= INET_LHTABLE_SIZE)
2244                        break;
2245                st->state = TCP_SEQ_STATE_LISTENING;
2246                rc = listening_get_next(seq, NULL);
2247                while (offset-- && rc)
2248                        rc = listening_get_next(seq, rc);
2249                if (rc)
2250                        break;
2251                st->bucket = 0;
2252                /* Fallthrough */
2253        case TCP_SEQ_STATE_ESTABLISHED:
2254        case TCP_SEQ_STATE_TIME_WAIT:
2255                st->state = TCP_SEQ_STATE_ESTABLISHED;
2256                if (st->bucket > tcp_hashinfo.ehash_mask)
2257                        break;
2258                rc = established_get_first(seq);
2259                while (offset-- && rc)
2260                        rc = established_get_next(seq, rc);
2261        }
2262
2263        st->num = orig_num;
2264
2265        return rc;
2266}
2267
2268static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2269{
2270        struct tcp_iter_state *st = seq->private;
2271        void *rc;
2272
2273        if (*pos && *pos == st->last_pos) {
2274                rc = tcp_seek_last_pos(seq);
2275                if (rc)
2276                        goto out;
2277        }
2278
2279        st->state = TCP_SEQ_STATE_LISTENING;
2280        st->num = 0;
2281        st->bucket = 0;
2282        st->offset = 0;
2283        rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2284
2285out:
2286        st->last_pos = *pos;
2287        return rc;
2288}
2289
2290static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2291{
2292        struct tcp_iter_state *st = seq->private;
2293        void *rc = NULL;
2294
2295        if (v == SEQ_START_TOKEN) {
2296                rc = tcp_get_idx(seq, 0);
2297                goto out;
2298        }
2299
2300        switch (st->state) {
2301        case TCP_SEQ_STATE_OPENREQ:
2302        case TCP_SEQ_STATE_LISTENING:
2303                rc = listening_get_next(seq, v);
2304                if (!rc) {
2305                        st->state = TCP_SEQ_STATE_ESTABLISHED;
2306                        st->bucket = 0;
2307                        st->offset = 0;
2308                        rc        = established_get_first(seq);
2309                }
2310                break;
2311        case TCP_SEQ_STATE_ESTABLISHED:
2312        case TCP_SEQ_STATE_TIME_WAIT:
2313                rc = established_get_next(seq, v);
2314                break;
2315        }
2316out:
2317        ++*pos;
2318        st->last_pos = *pos;
2319        return rc;
2320}
2321
2322static void tcp_seq_stop(struct seq_file *seq, void *v)
2323{
2324        struct tcp_iter_state *st = seq->private;
2325
2326        switch (st->state) {
2327        case TCP_SEQ_STATE_OPENREQ:
2328                if (v) {
2329                        struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2330                        read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2331                }
2332        case TCP_SEQ_STATE_LISTENING:
2333                if (v != SEQ_START_TOKEN)
2334                        spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
2335                break;
2336        case TCP_SEQ_STATE_TIME_WAIT:
2337        case TCP_SEQ_STATE_ESTABLISHED:
2338                if (v)
2339                        spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2340                break;
2341        }
2342}
2343
2344int tcp_seq_open(struct inode *inode, struct file *file)
2345{
2346        struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2347        struct tcp_iter_state *s;
2348        int err;
2349
2350        err = seq_open_net(inode, file, &afinfo->seq_ops,
2351                          sizeof(struct tcp_iter_state));
2352        if (err < 0)
2353                return err;
2354
2355        s = ((struct seq_file *)file->private_data)->private;
2356        s->family               = afinfo->family;
2357        s->last_pos             = 0;
2358        return 0;
2359}
2360EXPORT_SYMBOL(tcp_seq_open);
2361
2362int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2363{
2364        int rc = 0;
2365        struct proc_dir_entry *p;
2366
2367        afinfo->seq_ops.start           = tcp_seq_start;
2368        afinfo->seq_ops.next            = tcp_seq_next;
2369        afinfo->seq_ops.stop            = tcp_seq_stop;
2370
2371        p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2372                             afinfo->seq_fops, afinfo);
2373        if (!p)
2374                rc = -ENOMEM;
2375        return rc;
2376}
2377EXPORT_SYMBOL(tcp_proc_register);
2378
2379void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2380{
2381        proc_net_remove(net, afinfo->name);
2382}
2383EXPORT_SYMBOL(tcp_proc_unregister);
2384
2385static void get_openreq4(const struct sock *sk, const struct request_sock *req,
2386                         struct seq_file *f, int i, int uid, int *len)
2387{
2388        const struct inet_request_sock *ireq = inet_rsk(req);
2389        int ttd = req->expires - jiffies;
2390
2391        seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2392                " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
2393                i,
2394                ireq->loc_addr,
2395                ntohs(inet_sk(sk)->inet_sport),
2396                ireq->rmt_addr,
2397                ntohs(ireq->rmt_port),
2398                TCP_SYN_RECV,
2399                0, 0, /* could print option size, but that is af dependent. */
2400                1,    /* timers active (only the expire timer) */
2401                jiffies_to_clock_t(ttd),
2402                req->retrans,
2403                uid,
2404                0,  /* non standard timer */
2405                0, /* open_requests have no inode */
2406                atomic_read(&sk->sk_refcnt),
2407                req,
2408                len);
2409}
2410
2411static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
2412{
2413        int timer_active;
2414        unsigned long timer_expires;
2415        const struct tcp_sock *tp = tcp_sk(sk);
2416        const struct inet_connection_sock *icsk = inet_csk(sk);
2417        const struct inet_sock *inet = inet_sk(sk);
2418        __be32 dest = inet->inet_daddr;
2419        __be32 src = inet->inet_rcv_saddr;
2420        __u16 destp = ntohs(inet->inet_dport);
2421        __u16 srcp = ntohs(inet->inet_sport);
2422        int rx_queue;
2423
2424        if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
2425                timer_active    = 1;
2426                timer_expires   = icsk->icsk_timeout;
2427        } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2428                timer_active    = 4;
2429                timer_expires   = icsk->icsk_timeout;
2430        } else if (timer_pending(&sk->sk_timer)) {
2431                timer_active    = 2;
2432                timer_expires   = sk->sk_timer.expires;
2433        } else {
2434                timer_active    = 0;
2435                timer_expires = jiffies;
2436        }
2437
2438        if (sk->sk_state == TCP_LISTEN)
2439                rx_queue = sk->sk_ack_backlog;
2440        else
2441                /*
2442                 * because we dont lock socket, we might find a transient negative value
2443                 */
2444                rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2445
2446        seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2447                        "%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n",
2448                i, src, srcp, dest, destp, sk->sk_state,
2449                tp->write_seq - tp->snd_una,
2450                rx_queue,
2451                timer_active,
2452                jiffies_to_clock_t(timer_expires - jiffies),
2453                icsk->icsk_retransmits,
2454                sock_i_uid(sk),
2455                icsk->icsk_probes_out,
2456                sock_i_ino(sk),
2457                atomic_read(&sk->sk_refcnt), sk,
2458                jiffies_to_clock_t(icsk->icsk_rto),
2459                jiffies_to_clock_t(icsk->icsk_ack.ato),
2460                (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2461                tp->snd_cwnd,
2462                tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
2463                len);
2464}
2465
2466static void get_timewait4_sock(const struct inet_timewait_sock *tw,
2467                               struct seq_file *f, int i, int *len)
2468{
2469        __be32 dest, src;
2470        __u16 destp, srcp;
2471        int ttd = tw->tw_ttd - jiffies;
2472
2473        if (ttd < 0)
2474                ttd = 0;
2475
2476        dest  = tw->tw_daddr;
2477        src   = tw->tw_rcv_saddr;
2478        destp = ntohs(tw->tw_dport);
2479        srcp  = ntohs(tw->tw_sport);
2480
2481        seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2482                " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n",
2483                i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2484                3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2485                atomic_read(&tw->tw_refcnt), tw, len);
2486}
2487
2488#define TMPSZ 150
2489
2490static int tcp4_seq_show(struct seq_file *seq, void *v)
2491{
2492        struct tcp_iter_state *st;
2493        int len;
2494
2495        if (v == SEQ_START_TOKEN) {
2496                seq_printf(seq, "%-*s\n", TMPSZ - 1,
2497                           "  sl  local_address rem_address   st tx_queue "
2498                           "rx_queue tr tm->when retrnsmt   uid  timeout "
2499                           "inode");
2500                goto out;
2501        }
2502        st = seq->private;
2503
2504        switch (st->state) {
2505        case TCP_SEQ_STATE_LISTENING:
2506        case TCP_SEQ_STATE_ESTABLISHED:
2507                get_tcp4_sock(v, seq, st->num, &len);
2508                break;
2509        case TCP_SEQ_STATE_OPENREQ:
2510                get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
2511                break;
2512        case TCP_SEQ_STATE_TIME_WAIT:
2513                get_timewait4_sock(v, seq, st->num, &len);
2514                break;
2515        }
2516        seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
2517out:
2518        return 0;
2519}
2520
2521static const struct file_operations tcp_afinfo_seq_fops = {
2522        .owner   = THIS_MODULE,
2523        .open    = tcp_seq_open,
2524        .read    = seq_read,
2525        .llseek  = seq_lseek,
2526        .release = seq_release_net
2527};
2528
2529static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2530        .name           = "tcp",
2531        .family         = AF_INET,
2532        .seq_fops       = &tcp_afinfo_seq_fops,
2533        .seq_ops        = {
2534                .show           = tcp4_seq_show,
2535        },
2536};
2537
2538static int __net_init tcp4_proc_init_net(struct net *net)
2539{
2540        return tcp_proc_register(net, &tcp4_seq_afinfo);
2541}
2542
2543static void __net_exit tcp4_proc_exit_net(struct net *net)
2544{
2545        tcp_proc_unregister(net, &tcp4_seq_afinfo);
2546}
2547
2548static struct pernet_operations tcp4_net_ops = {
2549        .init = tcp4_proc_init_net,
2550        .exit = tcp4_proc_exit_net,
2551};
2552
2553int __init tcp4_proc_init(void)
2554{
2555        return register_pernet_subsys(&tcp4_net_ops);
2556}
2557
2558void tcp4_proc_exit(void)
2559{
2560        unregister_pernet_subsys(&tcp4_net_ops);
2561}
2562#endif /* CONFIG_PROC_FS */
2563
2564struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2565{
2566        const struct iphdr *iph = skb_gro_network_header(skb);
2567
2568        switch (skb->ip_summed) {
2569        case CHECKSUM_COMPLETE:
2570                if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
2571                                  skb->csum)) {
2572                        skb->ip_summed = CHECKSUM_UNNECESSARY;
2573                        break;
2574                }
2575
2576                /* fall through */
2577        case CHECKSUM_NONE:
2578                NAPI_GRO_CB(skb)->flush = 1;
2579                return NULL;
2580        }
2581
2582        return tcp_gro_receive(head, skb);
2583}
2584
2585int tcp4_gro_complete(struct sk_buff *skb)
2586{
2587        const struct iphdr *iph = ip_hdr(skb);
2588        struct tcphdr *th = tcp_hdr(skb);
2589
2590        th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2591                                  iph->saddr, iph->daddr, 0);
2592        skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2593
2594        return tcp_gro_complete(skb);
2595}
2596
2597struct proto tcp_prot = {
2598        .name                   = "TCP",
2599        .owner                  = THIS_MODULE,
2600        .close                  = tcp_close,
2601        .connect                = tcp_v4_connect,
2602        .disconnect             = tcp_disconnect,
2603        .accept                 = inet_csk_accept,
2604        .ioctl                  = tcp_ioctl,
2605        .init                   = tcp_v4_init_sock,
2606        .destroy                = tcp_v4_destroy_sock,
2607        .shutdown               = tcp_shutdown,
2608        .setsockopt             = tcp_setsockopt,
2609        .getsockopt             = tcp_getsockopt,
2610        .recvmsg                = tcp_recvmsg,
2611        .sendmsg                = tcp_sendmsg,
2612        .sendpage               = tcp_sendpage,
2613        .backlog_rcv            = tcp_v4_do_rcv,
2614        .hash                   = inet_hash,
2615        .unhash                 = inet_unhash,
2616        .get_port               = inet_csk_get_port,
2617        .enter_memory_pressure  = tcp_enter_memory_pressure,
2618        .sockets_allocated      = &tcp_sockets_allocated,
2619        .orphan_count           = &tcp_orphan_count,
2620        .memory_allocated       = &tcp_memory_allocated,
2621        .memory_pressure        = &tcp_memory_pressure,
2622        .sysctl_mem             = sysctl_tcp_mem,
2623        .sysctl_wmem            = sysctl_tcp_wmem,
2624        .sysctl_rmem            = sysctl_tcp_rmem,
2625        .max_header             = MAX_TCP_HEADER,
2626        .obj_size               = sizeof(struct tcp_sock),
2627        .slab_flags             = SLAB_DESTROY_BY_RCU,
2628        .twsk_prot              = &tcp_timewait_sock_ops,
2629        .rsk_prot               = &tcp_request_sock_ops,
2630        .h.hashinfo             = &tcp_hashinfo,
2631        .no_autobind            = true,
2632#ifdef CONFIG_COMPAT
2633        .compat_setsockopt      = compat_tcp_setsockopt,
2634        .compat_getsockopt      = compat_tcp_getsockopt,
2635#endif
2636};
2637EXPORT_SYMBOL(tcp_prot);
2638
2639
2640static int __net_init tcp_sk_init(struct net *net)
2641{
2642        return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2643                                    PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2644}
2645
2646static void __net_exit tcp_sk_exit(struct net *net)
2647{
2648        inet_ctl_sock_destroy(net->ipv4.tcp_sock);
2649}
2650
2651static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2652{
2653        inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
2654}
2655
2656static struct pernet_operations __net_initdata tcp_sk_ops = {
2657       .init       = tcp_sk_init,
2658       .exit       = tcp_sk_exit,
2659       .exit_batch = tcp_sk_exit_batch,
2660};
2661
2662void __init tcp_v4_init(void)
2663{
2664        inet_hashinfo_init(&tcp_hashinfo);
2665        if (register_pernet_subsys(&tcp_sk_ops))
2666                panic("Failed to create the TCP control socket.\n");
2667}
2668
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.