linux/net/ipv4/tcp_ipv4.c
<<
>>
Prefs
   1/*
   2 * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3 *              operating system.  INET is implemented using the  BSD Socket
   4 *              interface as the means of communication with the user level.
   5 *
   6 *              Implementation of the Transmission Control Protocol(TCP).
   7 *
   8 *              IPv4 specific functions
   9 *
  10 *
  11 *              code split from:
  12 *              linux/ipv4/tcp.c
  13 *              linux/ipv4/tcp_input.c
  14 *              linux/ipv4/tcp_output.c
  15 *
  16 *              See tcp.c for author information
  17 *
  18 *      This program is free software; you can redistribute it and/or
  19 *      modify it under the terms of the GNU General Public License
  20 *      as published by the Free Software Foundation; either version
  21 *      2 of the License, or (at your option) any later version.
  22 */
  23
  24/*
  25 * Changes:
  26 *              David S. Miller :       New socket lookup architecture.
  27 *                                      This code is dedicated to John Dyson.
  28 *              David S. Miller :       Change semantics of established hash,
  29 *                                      half is devoted to TIME_WAIT sockets
  30 *                                      and the rest go in the other half.
  31 *              Andi Kleen :            Add support for syncookies and fixed
  32 *                                      some bugs: ip options weren't passed to
  33 *                                      the TCP layer, missed a check for an
  34 *                                      ACK bit.
  35 *              Andi Kleen :            Implemented fast path mtu discovery.
  36 *                                      Fixed many serious bugs in the
  37 *                                      request_sock handling and moved
  38 *                                      most of it into the af independent code.
  39 *                                      Added tail drop and some other bugfixes.
  40 *                                      Added new listen semantics.
  41 *              Mike McLagan    :       Routing by source
  42 *      Juan Jose Ciarlante:            ip_dynaddr bits
  43 *              Andi Kleen:             various fixes.
  44 *      Vitaly E. Lavrov        :       Transparent proxy revived after year
  45 *                                      coma.
  46 *      Andi Kleen              :       Fix new listen.
  47 *      Andi Kleen              :       Fix accept error reporting.
  48 *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
  49 *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
  50 *                                      a single port at the same time.
  51 */
  52
  53#define pr_fmt(fmt) "TCP: " fmt
  54
  55#include <linux/bottom_half.h>
  56#include <linux/types.h>
  57#include <linux/fcntl.h>
  58#include <linux/module.h>
  59#include <linux/random.h>
  60#include <linux/cache.h>
  61#include <linux/jhash.h>
  62#include <linux/init.h>
  63#include <linux/times.h>
  64#include <linux/slab.h>
  65
  66#include <net/net_namespace.h>
  67#include <net/icmp.h>
  68#include <net/inet_hashtables.h>
  69#include <net/tcp.h>
  70#include <net/transp_v6.h>
  71#include <net/ipv6.h>
  72#include <net/inet_common.h>
  73#include <net/timewait_sock.h>
  74#include <net/xfrm.h>
  75#include <net/secure_seq.h>
  76#include <net/busy_poll.h>
  77
  78#include <linux/inet.h>
  79#include <linux/ipv6.h>
  80#include <linux/stddef.h>
  81#include <linux/proc_fs.h>
  82#include <linux/seq_file.h>
  83
  84#include <crypto/hash.h>
  85#include <linux/scatterlist.h>
  86
  87int sysctl_tcp_tw_reuse __read_mostly;
  88int sysctl_tcp_low_latency __read_mostly;
  89
  90#ifdef CONFIG_TCP_MD5SIG
  91static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
  92                               __be32 daddr, __be32 saddr, const struct tcphdr *th);
  93#endif
  94
  95struct inet_hashinfo tcp_hashinfo;
  96EXPORT_SYMBOL(tcp_hashinfo);
  97
  98static  __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
  99{
 100        return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
 101                                          ip_hdr(skb)->saddr,
 102                                          tcp_hdr(skb)->dest,
 103                                          tcp_hdr(skb)->source);
 104}
 105
 106int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
 107{
 108        const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
 109        struct tcp_sock *tp = tcp_sk(sk);
 110
 111        /* With PAWS, it is safe from the viewpoint
 112           of data integrity. Even without PAWS it is safe provided sequence
 113           spaces do not overlap i.e. at data rates <= 80Mbit/sec.
 114
 115           Actually, the idea is close to VJ's one, only timestamp cache is
 116           held not per host, but per port pair and TW bucket is used as state
 117           holder.
 118
 119           If TW bucket has been already destroyed we fall back to VJ's scheme
 120           and use initial timestamp retrieved from peer table.
 121         */
 122        if (tcptw->tw_ts_recent_stamp &&
 123            (!twp || (sysctl_tcp_tw_reuse &&
 124                             get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
 125                tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
 126                if (tp->write_seq == 0)
 127                        tp->write_seq = 1;
 128                tp->rx_opt.ts_recent       = tcptw->tw_ts_recent;
 129                tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
 130                sock_hold(sktw);
 131                return 1;
 132        }
 133
 134        return 0;
 135}
 136EXPORT_SYMBOL_GPL(tcp_twsk_unique);
 137
 138/* This will initiate an outgoing connection. */
 139int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 140{
 141        struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
 142        struct inet_sock *inet = inet_sk(sk);
 143        struct tcp_sock *tp = tcp_sk(sk);
 144        __be16 orig_sport, orig_dport;
 145        __be32 daddr, nexthop;
 146        struct flowi4 *fl4;
 147        struct rtable *rt;
 148        int err;
 149        struct ip_options_rcu *inet_opt;
 150
 151        if (addr_len < sizeof(struct sockaddr_in))
 152                return -EINVAL;
 153
 154        if (usin->sin_family != AF_INET)
 155                return -EAFNOSUPPORT;
 156
 157        nexthop = daddr = usin->sin_addr.s_addr;
 158        inet_opt = rcu_dereference_protected(inet->inet_opt,
 159                                             lockdep_sock_is_held(sk));
 160        if (inet_opt && inet_opt->opt.srr) {
 161                if (!daddr)
 162                        return -EINVAL;
 163                nexthop = inet_opt->opt.faddr;
 164        }
 165
 166        orig_sport = inet->inet_sport;
 167        orig_dport = usin->sin_port;
 168        fl4 = &inet->cork.fl.u.ip4;
 169        rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
 170                              RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
 171                              IPPROTO_TCP,
 172                              orig_sport, orig_dport, sk);
 173        if (IS_ERR(rt)) {
 174                err = PTR_ERR(rt);
 175                if (err == -ENETUNREACH)
 176                        IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
 177                return err;
 178        }
 179
 180        if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
 181                ip_rt_put(rt);
 182                return -ENETUNREACH;
 183        }
 184
 185        if (!inet_opt || !inet_opt->opt.srr)
 186                daddr = fl4->daddr;
 187
 188        if (!inet->inet_saddr)
 189                inet->inet_saddr = fl4->saddr;
 190        sk_rcv_saddr_set(sk, inet->inet_saddr);
 191
 192        if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
 193                /* Reset inherited state */
 194                tp->rx_opt.ts_recent       = 0;
 195                tp->rx_opt.ts_recent_stamp = 0;
 196                if (likely(!tp->repair))
 197                        tp->write_seq      = 0;
 198        }
 199
 200        if (tcp_death_row.sysctl_tw_recycle &&
 201            !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
 202                tcp_fetch_timewait_stamp(sk, &rt->dst);
 203
 204        inet->inet_dport = usin->sin_port;
 205        sk_daddr_set(sk, daddr);
 206
 207        inet_csk(sk)->icsk_ext_hdr_len = 0;
 208        if (inet_opt)
 209                inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
 210
 211        tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
 212
 213        /* Socket identity is still unknown (sport may be zero).
 214         * However we set state to SYN-SENT and not releasing socket
 215         * lock select source port, enter ourselves into the hash tables and
 216         * complete initialization after this.
 217         */
 218        tcp_set_state(sk, TCP_SYN_SENT);
 219        err = inet_hash_connect(&tcp_death_row, sk);
 220        if (err)
 221                goto failure;
 222
 223        sk_set_txhash(sk);
 224
 225        rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
 226                               inet->inet_sport, inet->inet_dport, sk);
 227        if (IS_ERR(rt)) {
 228                err = PTR_ERR(rt);
 229                rt = NULL;
 230                goto failure;
 231        }
 232        /* OK, now commit destination to socket.  */
 233        sk->sk_gso_type = SKB_GSO_TCPV4;
 234        sk_setup_caps(sk, &rt->dst);
 235
 236        if (!tp->write_seq && likely(!tp->repair))
 237                tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
 238                                                           inet->inet_daddr,
 239                                                           inet->inet_sport,
 240                                                           usin->sin_port);
 241
 242        inet->inet_id = tp->write_seq ^ jiffies;
 243
 244        err = tcp_connect(sk);
 245
 246        rt = NULL;
 247        if (err)
 248                goto failure;
 249
 250        return 0;
 251
 252failure:
 253        /*
 254         * This unhashes the socket and releases the local port,
 255         * if necessary.
 256         */
 257        tcp_set_state(sk, TCP_CLOSE);
 258        ip_rt_put(rt);
 259        sk->sk_route_caps = 0;
 260        inet->inet_dport = 0;
 261        return err;
 262}
 263EXPORT_SYMBOL(tcp_v4_connect);
 264
 265/*
 266 * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
 267 * It can be called through tcp_release_cb() if socket was owned by user
 268 * at the time tcp_v4_err() was called to handle ICMP message.
 269 */
 270void tcp_v4_mtu_reduced(struct sock *sk)
 271{
 272        struct dst_entry *dst;
 273        struct inet_sock *inet = inet_sk(sk);
 274        u32 mtu = tcp_sk(sk)->mtu_info;
 275
 276        dst = inet_csk_update_pmtu(sk, mtu);
 277        if (!dst)
 278                return;
 279
 280        /* Something is about to be wrong... Remember soft error
 281         * for the case, if this connection will not able to recover.
 282         */
 283        if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
 284                sk->sk_err_soft = EMSGSIZE;
 285
 286        mtu = dst_mtu(dst);
 287
 288        if (inet->pmtudisc != IP_PMTUDISC_DONT &&
 289            ip_sk_accept_pmtu(sk) &&
 290            inet_csk(sk)->icsk_pmtu_cookie > mtu) {
 291                tcp_sync_mss(sk, mtu);
 292
 293                /* Resend the TCP packet because it's
 294                 * clear that the old packet has been
 295                 * dropped. This is the new "fast" path mtu
 296                 * discovery.
 297                 */
 298                tcp_simple_retransmit(sk);
 299        } /* else let the usual retransmit timer handle it */
 300}
 301EXPORT_SYMBOL(tcp_v4_mtu_reduced);
 302
 303static void do_redirect(struct sk_buff *skb, struct sock *sk)
 304{
 305        struct dst_entry *dst = __sk_dst_check(sk, 0);
 306
 307        if (dst)
 308                dst->ops->redirect(dst, sk, skb);
 309}
 310
 311
 312/* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */
 313void tcp_req_err(struct sock *sk, u32 seq, bool abort)
 314{
 315        struct request_sock *req = inet_reqsk(sk);
 316        struct net *net = sock_net(sk);
 317
 318        /* ICMPs are not backlogged, hence we cannot get
 319         * an established socket here.
 320         */
 321        if (seq != tcp_rsk(req)->snt_isn) {
 322                __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
 323        } else if (abort) {
 324                /*
 325                 * Still in SYN_RECV, just remove it silently.
 326                 * There is no good way to pass the error to the newly
 327                 * created socket, and POSIX does not want network
 328                 * errors returned from accept().
 329                 */
 330                inet_csk_reqsk_queue_drop(req->rsk_listener, req);
 331                tcp_listendrop(req->rsk_listener);
 332        }
 333        reqsk_put(req);
 334}
 335EXPORT_SYMBOL(tcp_req_err);
 336
 337/*
 338 * This routine is called by the ICMP module when it gets some
 339 * sort of error condition.  If err < 0 then the socket should
 340 * be closed and the error returned to the user.  If err > 0
 341 * it's just the icmp type << 8 | icmp code.  After adjustment
 342 * header points to the first 8 bytes of the tcp header.  We need
 343 * to find the appropriate port.
 344 *
 345 * The locking strategy used here is very "optimistic". When
 346 * someone else accesses the socket the ICMP is just dropped
 347 * and for some paths there is no check at all.
 348 * A more general error queue to queue errors for later handling
 349 * is probably better.
 350 *
 351 */
 352
 353void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 354{
 355        const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
 356        struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
 357        struct inet_connection_sock *icsk;
 358        struct tcp_sock *tp;
 359        struct inet_sock *inet;
 360        const int type = icmp_hdr(icmp_skb)->type;
 361        const int code = icmp_hdr(icmp_skb)->code;
 362        struct sock *sk;
 363        struct sk_buff *skb;
 364        struct request_sock *fastopen;
 365        __u32 seq, snd_una;
 366        __u32 remaining;
 367        int err;
 368        struct net *net = dev_net(icmp_skb->dev);
 369
 370        sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr,
 371                                       th->dest, iph->saddr, ntohs(th->source),
 372                                       inet_iif(icmp_skb));
 373        if (!sk) {
 374                __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
 375                return;
 376        }
 377        if (sk->sk_state == TCP_TIME_WAIT) {
 378                inet_twsk_put(inet_twsk(sk));
 379                return;
 380        }
 381        seq = ntohl(th->seq);
 382        if (sk->sk_state == TCP_NEW_SYN_RECV)
 383                return tcp_req_err(sk, seq,
 384                                  type == ICMP_PARAMETERPROB ||
 385                                  type == ICMP_TIME_EXCEEDED ||
 386                                  (type == ICMP_DEST_UNREACH &&
 387                                   (code == ICMP_NET_UNREACH ||
 388                                    code == ICMP_HOST_UNREACH)));
 389
 390        bh_lock_sock(sk);
 391        /* If too many ICMPs get dropped on busy
 392         * servers this needs to be solved differently.
 393         * We do take care of PMTU discovery (RFC1191) special case :
 394         * we can receive locally generated ICMP messages while socket is held.
 395         */
 396        if (sock_owned_by_user(sk)) {
 397                if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
 398                        __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
 399        }
 400        if (sk->sk_state == TCP_CLOSE)
 401                goto out;
 402
 403        if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
 404                __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
 405                goto out;
 406        }
 407
 408        icsk = inet_csk(sk);
 409        tp = tcp_sk(sk);
 410        /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
 411        fastopen = tp->fastopen_rsk;
 412        snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
 413        if (sk->sk_state != TCP_LISTEN &&
 414            !between(seq, snd_una, tp->snd_nxt)) {
 415                __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
 416                goto out;
 417        }
 418
 419        switch (type) {
 420        case ICMP_REDIRECT:
 421                do_redirect(icmp_skb, sk);
 422                goto out;
 423        case ICMP_SOURCE_QUENCH:
 424                /* Just silently ignore these. */
 425                goto out;
 426        case ICMP_PARAMETERPROB:
 427                err = EPROTO;
 428                break;
 429        case ICMP_DEST_UNREACH:
 430                if (code > NR_ICMP_UNREACH)
 431                        goto out;
 432
 433                if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
 434                        /* We are not interested in TCP_LISTEN and open_requests
 435                         * (SYN-ACKs send out by Linux are always <576bytes so
 436                         * they should go through unfragmented).
 437                         */
 438                        if (sk->sk_state == TCP_LISTEN)
 439                                goto out;
 440
 441                        tp->mtu_info = info;
 442                        if (!sock_owned_by_user(sk)) {
 443                                tcp_v4_mtu_reduced(sk);
 444                        } else {
 445                                if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
 446                                        sock_hold(sk);
 447                        }
 448                        goto out;
 449                }
 450
 451                err = icmp_err_convert[code].errno;
 452                /* check if icmp_skb allows revert of backoff
 453                 * (see draft-zimmermann-tcp-lcd) */
 454                if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
 455                        break;
 456                if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
 457                    !icsk->icsk_backoff || fastopen)
 458                        break;
 459
 460                if (sock_owned_by_user(sk))
 461                        break;
 462
 463                icsk->icsk_backoff--;
 464                icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) :
 465                                               TCP_TIMEOUT_INIT;
 466                icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
 467
 468                skb = tcp_write_queue_head(sk);
 469                BUG_ON(!skb);
 470
 471                remaining = icsk->icsk_rto -
 472                            min(icsk->icsk_rto,
 473                                tcp_time_stamp - tcp_skb_timestamp(skb));
 474
 475                if (remaining) {
 476                        inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
 477                                                  remaining, TCP_RTO_MAX);
 478                } else {
 479                        /* RTO revert clocked out retransmission.
 480                         * Will retransmit now */
 481                        tcp_retransmit_timer(sk);
 482                }
 483
 484                break;
 485        case ICMP_TIME_EXCEEDED:
 486                err = EHOSTUNREACH;
 487                break;
 488        default:
 489                goto out;
 490        }
 491
 492        switch (sk->sk_state) {
 493        case TCP_SYN_SENT:
 494        case TCP_SYN_RECV:
 495                /* Only in fast or simultaneous open. If a fast open socket is
 496                 * is already accepted it is treated as a connected one below.
 497                 */
 498                if (fastopen && !fastopen->sk)
 499                        break;
 500
 501                if (!sock_owned_by_user(sk)) {
 502                        sk->sk_err = err;
 503
 504                        sk->sk_error_report(sk);
 505
 506                        tcp_done(sk);
 507                } else {
 508                        sk->sk_err_soft = err;
 509                }
 510                goto out;
 511        }
 512
 513        /* If we've already connected we will keep trying
 514         * until we time out, or the user gives up.
 515         *
 516         * rfc1122 4.2.3.9 allows to consider as hard errors
 517         * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
 518         * but it is obsoleted by pmtu discovery).
 519         *
 520         * Note, that in modern internet, where routing is unreliable
 521         * and in each dark corner broken firewalls sit, sending random
 522         * errors ordered by their masters even this two messages finally lose
 523         * their original sense (even Linux sends invalid PORT_UNREACHs)
 524         *
 525         * Now we are in compliance with RFCs.
 526         *                                                      --ANK (980905)
 527         */
 528
 529        inet = inet_sk(sk);
 530        if (!sock_owned_by_user(sk) && inet->recverr) {
 531                sk->sk_err = err;
 532                sk->sk_error_report(sk);
 533        } else  { /* Only an error on timeout */
 534                sk->sk_err_soft = err;
 535        }
 536
 537out:
 538        bh_unlock_sock(sk);
 539        sock_put(sk);
 540}
 541
 542void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
 543{
 544        struct tcphdr *th = tcp_hdr(skb);
 545
 546        if (skb->ip_summed == CHECKSUM_PARTIAL) {
 547                th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
 548                skb->csum_start = skb_transport_header(skb) - skb->head;
 549                skb->csum_offset = offsetof(struct tcphdr, check);
 550        } else {
 551                th->check = tcp_v4_check(skb->len, saddr, daddr,
 552                                         csum_partial(th,
 553                                                      th->doff << 2,
 554                                                      skb->csum));
 555        }
 556}
 557
 558/* This routine computes an IPv4 TCP checksum. */
 559void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
 560{
 561        const struct inet_sock *inet = inet_sk(sk);
 562
 563        __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
 564}
 565EXPORT_SYMBOL(tcp_v4_send_check);
 566
 567/*
 568 *      This routine will send an RST to the other tcp.
 569 *
 570 *      Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
 571 *                    for reset.
 572 *      Answer: if a packet caused RST, it is not for a socket
 573 *              existing in our system, if it is matched to a socket,
 574 *              it is just duplicate segment or bug in other side's TCP.
 575 *              So that we build reply only basing on parameters
 576 *              arrived with segment.
 577 *      Exception: precedence violation. We do not implement it in any case.
 578 */
 579
 580static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
 581{
 582        const struct tcphdr *th = tcp_hdr(skb);
 583        struct {
 584                struct tcphdr th;
 585#ifdef CONFIG_TCP_MD5SIG
 586                __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
 587#endif
 588        } rep;
 589        struct ip_reply_arg arg;
 590#ifdef CONFIG_TCP_MD5SIG
 591        struct tcp_md5sig_key *key = NULL;
 592        const __u8 *hash_location = NULL;
 593        unsigned char newhash[16];
 594        int genhash;
 595        struct sock *sk1 = NULL;
 596#endif
 597        struct net *net;
 598
 599        /* Never send a reset in response to a reset. */
 600        if (th->rst)
 601                return;
 602
 603        /* If sk not NULL, it means we did a successful lookup and incoming
 604         * route had to be correct. prequeue might have dropped our dst.
 605         */
 606        if (!sk && skb_rtable(skb)->rt_type != RTN_LOCAL)
 607                return;
 608
 609        /* Swap the send and the receive. */
 610        memset(&rep, 0, sizeof(rep));
 611        rep.th.dest   = th->source;
 612        rep.th.source = th->dest;
 613        rep.th.doff   = sizeof(struct tcphdr) / 4;
 614        rep.th.rst    = 1;
 615
 616        if (th->ack) {
 617                rep.th.seq = th->ack_seq;
 618        } else {
 619                rep.th.ack = 1;
 620                rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
 621                                       skb->len - (th->doff << 2));
 622        }
 623
 624        memset(&arg, 0, sizeof(arg));
 625        arg.iov[0].iov_base = (unsigned char *)&rep;
 626        arg.iov[0].iov_len  = sizeof(rep.th);
 627
 628        net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
 629#ifdef CONFIG_TCP_MD5SIG
 630        rcu_read_lock();
 631        hash_location = tcp_parse_md5sig_option(th);
 632        if (sk && sk_fullsock(sk)) {
 633                key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
 634                                        &ip_hdr(skb)->saddr, AF_INET);
 635        } else if (hash_location) {
 636                /*
 637                 * active side is lost. Try to find listening socket through
 638                 * source port, and then find md5 key through listening socket.
 639                 * we are not loose security here:
 640                 * Incoming packet is checked with md5 hash with finding key,
 641                 * no RST generated if md5 hash doesn't match.
 642                 */
 643                sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
 644                                             ip_hdr(skb)->saddr,
 645                                             th->source, ip_hdr(skb)->daddr,
 646                                             ntohs(th->source), inet_iif(skb));
 647                /* don't send rst if it can't find key */
 648                if (!sk1)
 649                        goto out;
 650
 651                key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
 652                                        &ip_hdr(skb)->saddr, AF_INET);
 653                if (!key)
 654                        goto out;
 655
 656
 657                genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
 658                if (genhash || memcmp(hash_location, newhash, 16) != 0)
 659                        goto out;
 660
 661        }
 662
 663        if (key) {
 664                rep.opt[0] = htonl((TCPOPT_NOP << 24) |
 665                                   (TCPOPT_NOP << 16) |
 666                                   (TCPOPT_MD5SIG << 8) |
 667                                   TCPOLEN_MD5SIG);
 668                /* Update length and the length the header thinks exists */
 669                arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
 670                rep.th.doff = arg.iov[0].iov_len / 4;
 671
 672                tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
 673                                     key, ip_hdr(skb)->saddr,
 674                                     ip_hdr(skb)->daddr, &rep.th);
 675        }
 676#endif
 677        arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
 678                                      ip_hdr(skb)->saddr, /* XXX */
 679                                      arg.iov[0].iov_len, IPPROTO_TCP, 0);
 680        arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 681        arg.flags = (sk && inet_sk_transparent(sk)) ? IP_REPLY_ARG_NOSRCCHECK : 0;
 682
 683        /* When socket is gone, all binding information is lost.
 684         * routing might fail in this case. No choice here, if we choose to force
 685         * input interface, we will misroute in case of asymmetric route.
 686         */
 687        if (sk)
 688                arg.bound_dev_if = sk->sk_bound_dev_if;
 689
 690        BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
 691                     offsetof(struct inet_timewait_sock, tw_bound_dev_if));
 692
 693        arg.tos = ip_hdr(skb)->tos;
 694        local_bh_disable();
 695        ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
 696                              skb, &TCP_SKB_CB(skb)->header.h4.opt,
 697                              ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
 698                              &arg, arg.iov[0].iov_len);
 699
 700        __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
 701        __TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
 702        local_bh_enable();
 703
 704#ifdef CONFIG_TCP_MD5SIG
 705out:
 706        rcu_read_unlock();
 707#endif
 708}
 709
 710/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
 711   outside socket context is ugly, certainly. What can I do?
 712 */
 713
 714static void tcp_v4_send_ack(struct net *net,
 715                            struct sk_buff *skb, u32 seq, u32 ack,
 716                            u32 win, u32 tsval, u32 tsecr, int oif,
 717                            struct tcp_md5sig_key *key,
 718                            int reply_flags, u8 tos)
 719{
 720        const struct tcphdr *th = tcp_hdr(skb);
 721        struct {
 722                struct tcphdr th;
 723                __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
 724#ifdef CONFIG_TCP_MD5SIG
 725                           + (TCPOLEN_MD5SIG_ALIGNED >> 2)
 726#endif
 727                        ];
 728        } rep;
 729        struct ip_reply_arg arg;
 730
 731        memset(&rep.th, 0, sizeof(struct tcphdr));
 732        memset(&arg, 0, sizeof(arg));
 733
 734        arg.iov[0].iov_base = (unsigned char *)&rep;
 735        arg.iov[0].iov_len  = sizeof(rep.th);
 736        if (tsecr) {
 737                rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
 738                                   (TCPOPT_TIMESTAMP << 8) |
 739                                   TCPOLEN_TIMESTAMP);
 740                rep.opt[1] = htonl(tsval);
 741                rep.opt[2] = htonl(tsecr);
 742                arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
 743        }
 744
 745        /* Swap the send and the receive. */
 746        rep.th.dest    = th->source;
 747        rep.th.source  = th->dest;
 748        rep.th.doff    = arg.iov[0].iov_len / 4;
 749        rep.th.seq     = htonl(seq);
 750        rep.th.ack_seq = htonl(ack);
 751        rep.th.ack     = 1;
 752        rep.th.window  = htons(win);
 753
 754#ifdef CONFIG_TCP_MD5SIG
 755        if (key) {
 756                int offset = (tsecr) ? 3 : 0;
 757
 758                rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
 759                                          (TCPOPT_NOP << 16) |
 760                                          (TCPOPT_MD5SIG << 8) |
 761                                          TCPOLEN_MD5SIG);
 762                arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
 763                rep.th.doff = arg.iov[0].iov_len/4;
 764
 765                tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
 766                                    key, ip_hdr(skb)->saddr,
 767                                    ip_hdr(skb)->daddr, &rep.th);
 768        }
 769#endif
 770        arg.flags = reply_flags;
 771        arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
 772                                      ip_hdr(skb)->saddr, /* XXX */
 773                                      arg.iov[0].iov_len, IPPROTO_TCP, 0);
 774        arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 775        if (oif)
 776                arg.bound_dev_if = oif;
 777        arg.tos = tos;
 778        local_bh_disable();
 779        ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
 780                              skb, &TCP_SKB_CB(skb)->header.h4.opt,
 781                              ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
 782                              &arg, arg.iov[0].iov_len);
 783
 784        __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
 785        local_bh_enable();
 786}
 787
 788static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
 789{
 790        struct inet_timewait_sock *tw = inet_twsk(sk);
 791        struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
 792
 793        tcp_v4_send_ack(sock_net(sk), skb,
 794                        tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
 795                        tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
 796                        tcp_time_stamp + tcptw->tw_ts_offset,
 797                        tcptw->tw_ts_recent,
 798                        tw->tw_bound_dev_if,
 799                        tcp_twsk_md5_key(tcptw),
 800                        tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
 801                        tw->tw_tos
 802                        );
 803
 804        inet_twsk_put(tw);
 805}
 806
 807static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
 808                                  struct request_sock *req)
 809{
 810        /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
 811         * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
 812         */
 813        u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
 814                                             tcp_sk(sk)->snd_nxt;
 815
 816        /* RFC 7323 2.3
 817         * The window field (SEG.WND) of every outgoing segment, with the
 818         * exception of <SYN> segments, MUST be right-shifted by
 819         * Rcv.Wind.Shift bits:
 820         */
 821        tcp_v4_send_ack(sock_net(sk), skb, seq,
 822                        tcp_rsk(req)->rcv_nxt,
 823                        req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
 824                        tcp_time_stamp,
 825                        req->ts_recent,
 826                        0,
 827                        tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
 828                                          AF_INET),
 829                        inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
 830                        ip_hdr(skb)->tos);
 831}
 832
 833/*
 834 *      Send a SYN-ACK after having received a SYN.
 835 *      This still operates on a request_sock only, not on a big
 836 *      socket.
 837 */
 838static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
 839                              struct flowi *fl,
 840                              struct request_sock *req,
 841                              struct tcp_fastopen_cookie *foc,
 842                              enum tcp_synack_type synack_type)
 843{
 844        const struct inet_request_sock *ireq = inet_rsk(req);
 845        struct flowi4 fl4;
 846        int err = -1;
 847        struct sk_buff *skb;
 848
 849        /* First, grab a route. */
 850        if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
 851                return -1;
 852
 853        skb = tcp_make_synack(sk, dst, req, foc, synack_type);
 854
 855        if (skb) {
 856                __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
 857
 858                err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
 859                                            ireq->ir_rmt_addr,
 860                                            ireq->opt);
 861                err = net_xmit_eval(err);
 862        }
 863
 864        return err;
 865}
 866
 867/*
 868 *      IPv4 request_sock destructor.
 869 */
 870static void tcp_v4_reqsk_destructor(struct request_sock *req)
 871{
 872        kfree(inet_rsk(req)->opt);
 873}
 874
 875#ifdef CONFIG_TCP_MD5SIG
 876/*
 877 * RFC2385 MD5 checksumming requires a mapping of
 878 * IP address->MD5 Key.
 879 * We need to maintain these in the sk structure.
 880 */
 881
 882/* Find the Key structure for an address.  */
 883struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
 884                                         const union tcp_md5_addr *addr,
 885                                         int family)
 886{
 887        const struct tcp_sock *tp = tcp_sk(sk);
 888        struct tcp_md5sig_key *key;
 889        unsigned int size = sizeof(struct in_addr);
 890        const struct tcp_md5sig_info *md5sig;
 891
 892        /* caller either holds rcu_read_lock() or socket lock */
 893        md5sig = rcu_dereference_check(tp->md5sig_info,
 894                                       lockdep_sock_is_held(sk));
 895        if (!md5sig)
 896                return NULL;
 897#if IS_ENABLED(CONFIG_IPV6)
 898        if (family == AF_INET6)
 899                size = sizeof(struct in6_addr);
 900#endif
 901        hlist_for_each_entry_rcu(key, &md5sig->head, node) {
 902                if (key->family != family)
 903                        continue;
 904                if (!memcmp(&key->addr, addr, size))
 905                        return key;
 906        }
 907        return NULL;
 908}
 909EXPORT_SYMBOL(tcp_md5_do_lookup);
 910
 911struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
 912                                         const struct sock *addr_sk)
 913{
 914        const union tcp_md5_addr *addr;
 915
 916        addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr;
 917        return tcp_md5_do_lookup(sk, addr, AF_INET);
 918}
 919EXPORT_SYMBOL(tcp_v4_md5_lookup);
 920
 921/* This can be called on a newly created socket, from other files */
 922int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
 923                   int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
 924{
 925        /* Add Key to the list */
 926        struct tcp_md5sig_key *key;
 927        struct tcp_sock *tp = tcp_sk(sk);
 928        struct tcp_md5sig_info *md5sig;
 929
 930        key = tcp_md5_do_lookup(sk, addr, family);
 931        if (key) {
 932                /* Pre-existing entry - just update that one. */
 933                memcpy(key->key, newkey, newkeylen);
 934                key->keylen = newkeylen;
 935                return 0;
 936        }
 937
 938        md5sig = rcu_dereference_protected(tp->md5sig_info,
 939                                           lockdep_sock_is_held(sk));
 940        if (!md5sig) {
 941                md5sig = kmalloc(sizeof(*md5sig), gfp);
 942                if (!md5sig)
 943                        return -ENOMEM;
 944
 945                sk_nocaps_add(sk, NETIF_F_GSO_MASK);
 946                INIT_HLIST_HEAD(&md5sig->head);
 947                rcu_assign_pointer(tp->md5sig_info, md5sig);
 948        }
 949
 950        key = sock_kmalloc(sk, sizeof(*key), gfp);
 951        if (!key)
 952                return -ENOMEM;
 953        if (!tcp_alloc_md5sig_pool()) {
 954                sock_kfree_s(sk, key, sizeof(*key));
 955                return -ENOMEM;
 956        }
 957
 958        memcpy(key->key, newkey, newkeylen);
 959        key->keylen = newkeylen;
 960        key->family = family;
 961        memcpy(&key->addr, addr,
 962               (family == AF_INET6) ? sizeof(struct in6_addr) :
 963                                      sizeof(struct in_addr));
 964        hlist_add_head_rcu(&key->node, &md5sig->head);
 965        return 0;
 966}
 967EXPORT_SYMBOL(tcp_md5_do_add);
 968
 969int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
 970{
 971        struct tcp_md5sig_key *key;
 972
 973        key = tcp_md5_do_lookup(sk, addr, family);
 974        if (!key)
 975                return -ENOENT;
 976        hlist_del_rcu(&key->node);
 977        atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
 978        kfree_rcu(key, rcu);
 979        return 0;
 980}
 981EXPORT_SYMBOL(tcp_md5_do_del);
 982
 983static void tcp_clear_md5_list(struct sock *sk)
 984{
 985        struct tcp_sock *tp = tcp_sk(sk);
 986        struct tcp_md5sig_key *key;
 987        struct hlist_node *n;
 988        struct tcp_md5sig_info *md5sig;
 989
 990        md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
 991
 992        hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
 993                hlist_del_rcu(&key->node);
 994                atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
 995                kfree_rcu(key, rcu);
 996        }
 997}
 998
 999static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1000                                 int optlen)
1001{
1002        struct tcp_md5sig cmd;
1003        struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1004
1005        if (optlen < sizeof(cmd))
1006                return -EINVAL;
1007
1008        if (copy_from_user(&cmd, optval, sizeof(cmd)))
1009                return -EFAULT;
1010
1011        if (sin->sin_family != AF_INET)
1012                return -EINVAL;
1013
1014        if (!cmd.tcpm_keylen)
1015                return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1016                                      AF_INET);
1017
1018        if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1019                return -EINVAL;
1020
1021        return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1022                              AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1023                              GFP_KERNEL);
1024}
1025
1026static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp,
1027                                   __be32 daddr, __be32 saddr,
1028                                   const struct tcphdr *th, int nbytes)
1029{
1030        struct tcp4_pseudohdr *bp;
1031        struct scatterlist sg;
1032        struct tcphdr *_th;
1033
1034        bp = hp->scratch;
1035        bp->saddr = saddr;
1036        bp->daddr = daddr;
1037        bp->pad = 0;
1038        bp->protocol = IPPROTO_TCP;
1039        bp->len = cpu_to_be16(nbytes);
1040
1041        _th = (struct tcphdr *)(bp + 1);
1042        memcpy(_th, th, sizeof(*th));
1043        _th->check = 0;
1044
1045        sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
1046        ahash_request_set_crypt(hp->md5_req, &sg, NULL,
1047                                sizeof(*bp) + sizeof(*th));
1048        return crypto_ahash_update(hp->md5_req);
1049}
1050
1051static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
1052                               __be32 daddr, __be32 saddr, const struct tcphdr *th)
1053{
1054        struct tcp_md5sig_pool *hp;
1055        struct ahash_request *req;
1056
1057        hp = tcp_get_md5sig_pool();
1058        if (!hp)
1059                goto clear_hash_noput;
1060        req = hp->md5_req;
1061
1062        if (crypto_ahash_init(req))
1063                goto clear_hash;
1064        if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
1065                goto clear_hash;
1066        if (tcp_md5_hash_key(hp, key))
1067                goto clear_hash;
1068        ahash_request_set_crypt(req, NULL, md5_hash, 0);
1069        if (crypto_ahash_final(req))
1070                goto clear_hash;
1071
1072        tcp_put_md5sig_pool();
1073        return 0;
1074
1075clear_hash:
1076        tcp_put_md5sig_pool();
1077clear_hash_noput:
1078        memset(md5_hash, 0, 16);
1079        return 1;
1080}
1081
1082int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
1083                        const struct sock *sk,
1084                        const struct sk_buff *skb)
1085{
1086        struct tcp_md5sig_pool *hp;
1087        struct ahash_request *req;
1088        const struct tcphdr *th = tcp_hdr(skb);
1089        __be32 saddr, daddr;
1090
1091        if (sk) { /* valid for establish/request sockets */
1092                saddr = sk->sk_rcv_saddr;
1093                daddr = sk->sk_daddr;
1094        } else {
1095                const struct iphdr *iph = ip_hdr(skb);
1096                saddr = iph->saddr;
1097                daddr = iph->daddr;
1098        }
1099
1100        hp = tcp_get_md5sig_pool();
1101        if (!hp)
1102                goto clear_hash_noput;
1103        req = hp->md5_req;
1104
1105        if (crypto_ahash_init(req))
1106                goto clear_hash;
1107
1108        if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len))
1109                goto clear_hash;
1110        if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1111                goto clear_hash;
1112        if (tcp_md5_hash_key(hp, key))
1113                goto clear_hash;
1114        ahash_request_set_crypt(req, NULL, md5_hash, 0);
1115        if (crypto_ahash_final(req))
1116                goto clear_hash;
1117
1118        tcp_put_md5sig_pool();
1119        return 0;
1120
1121clear_hash:
1122        tcp_put_md5sig_pool();
1123clear_hash_noput:
1124        memset(md5_hash, 0, 16);
1125        return 1;
1126}
1127EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1128
1129#endif
1130
1131/* Called with rcu_read_lock() */
1132static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
1133                                    const struct sk_buff *skb)
1134{
1135#ifdef CONFIG_TCP_MD5SIG
1136        /*
1137         * This gets called for each TCP segment that arrives
1138         * so we want to be efficient.
1139         * We have 3 drop cases:
1140         * o No MD5 hash and one expected.
1141         * o MD5 hash and we're not expecting one.
1142         * o MD5 hash and its wrong.
1143         */
1144        const __u8 *hash_location = NULL;
1145        struct tcp_md5sig_key *hash_expected;
1146        const struct iphdr *iph = ip_hdr(skb);
1147        const struct tcphdr *th = tcp_hdr(skb);
1148        int genhash;
1149        unsigned char newhash[16];
1150
1151        hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1152                                          AF_INET);
1153        hash_location = tcp_parse_md5sig_option(th);
1154
1155        /* We've parsed the options - do we have a hash? */
1156        if (!hash_expected && !hash_location)
1157                return false;
1158
1159        if (hash_expected && !hash_location) {
1160                NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1161                return true;
1162        }
1163
1164        if (!hash_expected && hash_location) {
1165                NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1166                return true;
1167        }
1168
1169        /* Okay, so this is hash_expected and hash_location -
1170         * so we need to calculate the checksum.
1171         */
1172        genhash = tcp_v4_md5_hash_skb(newhash,
1173                                      hash_expected,
1174                                      NULL, skb);
1175
1176        if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1177                NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
1178                net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1179                                     &iph->saddr, ntohs(th->source),
1180                                     &iph->daddr, ntohs(th->dest),
1181                                     genhash ? " tcp_v4_calc_md5_hash failed"
1182                                     : "");
1183                return true;
1184        }
1185        return false;
1186#endif
1187        return false;
1188}
1189
1190static void tcp_v4_init_req(struct request_sock *req,
1191                            const struct sock *sk_listener,
1192                            struct sk_buff *skb)
1193{
1194        struct inet_request_sock *ireq = inet_rsk(req);
1195
1196        sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
1197        sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
1198        ireq->opt = tcp_v4_save_options(skb);
1199}
1200
1201static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
1202                                          struct flowi *fl,
1203                                          const struct request_sock *req,
1204                                          bool *strict)
1205{
1206        struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req);
1207
1208        if (strict) {
1209                if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr)
1210                        *strict = true;
1211                else
1212                        *strict = false;
1213        }
1214
1215        return dst;
1216}
1217
1218struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1219        .family         =       PF_INET,
1220        .obj_size       =       sizeof(struct tcp_request_sock),
1221        .rtx_syn_ack    =       tcp_rtx_synack,
1222        .send_ack       =       tcp_v4_reqsk_send_ack,
1223        .destructor     =       tcp_v4_reqsk_destructor,
1224        .send_reset     =       tcp_v4_send_reset,
1225        .syn_ack_timeout =      tcp_syn_ack_timeout,
1226};
1227
1228static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1229        .mss_clamp      =       TCP_MSS_DEFAULT,
1230#ifdef CONFIG_TCP_MD5SIG
1231        .req_md5_lookup =       tcp_v4_md5_lookup,
1232        .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1233#endif
1234        .init_req       =       tcp_v4_init_req,
1235#ifdef CONFIG_SYN_COOKIES
1236        .cookie_init_seq =      cookie_v4_init_sequence,
1237#endif
1238        .route_req      =       tcp_v4_route_req,
1239        .init_seq       =       tcp_v4_init_sequence,
1240        .send_synack    =       tcp_v4_send_synack,
1241};
1242
1243int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1244{
1245        /* Never answer to SYNs send to broadcast or multicast */
1246        if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1247                goto drop;
1248
1249        return tcp_conn_request(&tcp_request_sock_ops,
1250                                &tcp_request_sock_ipv4_ops, sk, skb);
1251
1252drop:
1253        tcp_listendrop(sk);
1254        return 0;
1255}
1256EXPORT_SYMBOL(tcp_v4_conn_request);
1257
1258
1259/*
1260 * The three way handshake has completed - we got a valid synack -
1261 * now create the new socket.
1262 */
1263struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1264                                  struct request_sock *req,
1265                                  struct dst_entry *dst,
1266                                  struct request_sock *req_unhash,
1267                                  bool *own_req)
1268{
1269        struct inet_request_sock *ireq;
1270        struct inet_sock *newinet;
1271        struct tcp_sock *newtp;
1272        struct sock *newsk;
1273#ifdef CONFIG_TCP_MD5SIG
1274        struct tcp_md5sig_key *key;
1275#endif
1276        struct ip_options_rcu *inet_opt;
1277
1278        if (sk_acceptq_is_full(sk))
1279                goto exit_overflow;
1280
1281        newsk = tcp_create_openreq_child(sk, req, skb);
1282        if (!newsk)
1283                goto exit_nonewsk;
1284
1285        newsk->sk_gso_type = SKB_GSO_TCPV4;
1286        inet_sk_rx_dst_set(newsk, skb);
1287
1288        newtp                 = tcp_sk(newsk);
1289        newinet               = inet_sk(newsk);
1290        ireq                  = inet_rsk(req);
1291        sk_daddr_set(newsk, ireq->ir_rmt_addr);
1292        sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
1293        newsk->sk_bound_dev_if = ireq->ir_iif;
1294        newinet->inet_saddr           = ireq->ir_loc_addr;
1295        inet_opt              = ireq->opt;
1296        rcu_assign_pointer(newinet->inet_opt, inet_opt);
1297        ireq->opt             = NULL;
1298        newinet->mc_index     = inet_iif(skb);
1299        newinet->mc_ttl       = ip_hdr(skb)->ttl;
1300        newinet->rcv_tos      = ip_hdr(skb)->tos;
1301        inet_csk(newsk)->icsk_ext_hdr_len = 0;
1302        if (inet_opt)
1303                inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1304        newinet->inet_id = newtp->write_seq ^ jiffies;
1305
1306        if (!dst) {
1307                dst = inet_csk_route_child_sock(sk, newsk, req);
1308                if (!dst)
1309                        goto put_and_exit;
1310        } else {
1311                /* syncookie case : see end of cookie_v4_check() */
1312        }
1313        sk_setup_caps(newsk, dst);
1314
1315        tcp_ca_openreq_child(newsk, dst);
1316
1317        tcp_sync_mss(newsk, dst_mtu(dst));
1318        newtp->advmss = dst_metric_advmss(dst);
1319        if (tcp_sk(sk)->rx_opt.user_mss &&
1320            tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1321                newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1322
1323        tcp_initialize_rcv_mss(newsk);
1324
1325#ifdef CONFIG_TCP_MD5SIG
1326        /* Copy over the MD5 key from the original socket */
1327        key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1328                                AF_INET);
1329        if (key) {
1330                /*
1331                 * We're using one, so create a matching key
1332                 * on the newsk structure. If we fail to get
1333                 * memory, then we end up not copying the key
1334                 * across. Shucks.
1335                 */
1336                tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1337                               AF_INET, key->key, key->keylen, GFP_ATOMIC);
1338                sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1339        }
1340#endif
1341
1342        if (__inet_inherit_port(sk, newsk) < 0)
1343                goto put_and_exit;
1344        *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1345        if (*own_req)
1346                tcp_move_syn(newtp, req);
1347
1348        return newsk;
1349
1350exit_overflow:
1351        NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1352exit_nonewsk:
1353        dst_release(dst);
1354exit:
1355        tcp_listendrop(sk);
1356        return NULL;
1357put_and_exit:
1358        inet_csk_prepare_forced_close(newsk);
1359        tcp_done(newsk);
1360        goto exit;
1361}
1362EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1363
1364static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
1365{
1366#ifdef CONFIG_SYN_COOKIES
1367        const struct tcphdr *th = tcp_hdr(skb);
1368
1369        if (!th->syn)
1370                sk = cookie_v4_check(sk, skb);
1371#endif
1372        return sk;
1373}
1374
1375/* The socket must have it's spinlock held when we get
1376 * here, unless it is a TCP_LISTEN socket.
1377 *
1378 * We have a potential double-lock case here, so even when
1379 * doing backlog processing we use the BH locking scheme.
1380 * This is because we cannot sleep with the original spinlock
1381 * held.
1382 */
1383int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1384{
1385        struct sock *rsk;
1386
1387        if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1388                struct dst_entry *dst = sk->sk_rx_dst;
1389
1390                sock_rps_save_rxhash(sk, skb);
1391                sk_mark_napi_id(sk, skb);
1392                if (dst) {
1393                        if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1394                            !dst->ops->check(dst, 0)) {
1395                                dst_release(dst);
1396                                sk->sk_rx_dst = NULL;
1397                        }
1398                }
1399                tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
1400                return 0;
1401        }
1402
1403        if (tcp_checksum_complete(skb))
1404                goto csum_err;
1405
1406        if (sk->sk_state == TCP_LISTEN) {
1407                struct sock *nsk = tcp_v4_cookie_check(sk, skb);
1408
1409                if (!nsk)
1410                        goto discard;
1411                if (nsk != sk) {
1412                        sock_rps_save_rxhash(nsk, skb);
1413                        sk_mark_napi_id(nsk, skb);
1414                        if (tcp_child_process(sk, nsk, skb)) {
1415                                rsk = nsk;
1416                                goto reset;
1417                        }
1418                        return 0;
1419                }
1420        } else
1421                sock_rps_save_rxhash(sk, skb);
1422
1423        if (tcp_rcv_state_process(sk, skb)) {
1424                rsk = sk;
1425                goto reset;
1426        }
1427        return 0;
1428
1429reset:
1430        tcp_v4_send_reset(rsk, skb);
1431discard:
1432        kfree_skb(skb);
1433        /* Be careful here. If this function gets more complicated and
1434         * gcc suffers from register pressure on the x86, sk (in %ebx)
1435         * might be destroyed here. This current version compiles correctly,
1436         * but you have been warned.
1437         */
1438        return 0;
1439
1440csum_err:
1441        TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1442        TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1443        goto discard;
1444}
1445EXPORT_SYMBOL(tcp_v4_do_rcv);
1446
1447void tcp_v4_early_demux(struct sk_buff *skb)
1448{
1449        const struct iphdr *iph;
1450        const struct tcphdr *th;
1451        struct sock *sk;
1452
1453        if (skb->pkt_type != PACKET_HOST)
1454                return;
1455
1456        if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1457                return;
1458
1459        iph = ip_hdr(skb);
1460        th = tcp_hdr(skb);
1461
1462        if (th->doff < sizeof(struct tcphdr) / 4)
1463                return;
1464
1465        sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1466                                       iph->saddr, th->source,
1467                                       iph->daddr, ntohs(th->dest),
1468                                       skb->skb_iif);
1469        if (sk) {
1470                skb->sk = sk;
1471                skb->destructor = sock_edemux;
1472                if (sk_fullsock(sk)) {
1473                        struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1474
1475                        if (dst)
1476                                dst = dst_check(dst, 0);
1477                        if (dst &&
1478                            inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1479                                skb_dst_set_noref(skb, dst);
1480                }
1481        }
1482}
1483
1484/* Packet is added to VJ-style prequeue for processing in process
1485 * context, if a reader task is waiting. Apparently, this exciting
1486 * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
1487 * failed somewhere. Latency? Burstiness? Well, at least now we will
1488 * see, why it failed. 8)8)                               --ANK
1489 *
1490 */
1491bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1492{
1493        struct tcp_sock *tp = tcp_sk(sk);
1494
1495        if (sysctl_tcp_low_latency || !tp->ucopy.task)
1496                return false;
1497
1498        if (skb->len <= tcp_hdrlen(skb) &&
1499            skb_queue_len(&tp->ucopy.prequeue) == 0)
1500                return false;
1501
1502        /* Before escaping RCU protected region, we need to take care of skb
1503         * dst. Prequeue is only enabled for established sockets.
1504         * For such sockets, we might need the skb dst only to set sk->sk_rx_dst
1505         * Instead of doing full sk_rx_dst validity here, let's perform
1506         * an optimistic check.
1507         */
1508        if (likely(sk->sk_rx_dst))
1509                skb_dst_drop(skb);
1510        else
1511                skb_dst_force_safe(skb);
1512
1513        __skb_queue_tail(&tp->ucopy.prequeue, skb);
1514        tp->ucopy.memory += skb->truesize;
1515        if (skb_queue_len(&tp->ucopy.prequeue) >= 32 ||
1516            tp->ucopy.memory + atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) {
1517                struct sk_buff *skb1;
1518
1519                BUG_ON(sock_owned_by_user(sk));
1520                __NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUEDROPPED,
1521                                skb_queue_len(&tp->ucopy.prequeue));
1522
1523                while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
1524                        sk_backlog_rcv(sk, skb1);
1525
1526                tp->ucopy.memory = 0;
1527        } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1528                wake_up_interruptible_sync_poll(sk_sleep(sk),
1529                                           POLLIN | POLLRDNORM | POLLRDBAND);
1530                if (!inet_csk_ack_scheduled(sk))
1531                        inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
1532                                                  (3 * tcp_rto_min(sk)) / 4,
1533                                                  TCP_RTO_MAX);
1534        }
1535        return true;
1536}
1537EXPORT_SYMBOL(tcp_prequeue);
1538
1539bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
1540{
1541        u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf;
1542
1543        /* Only socket owner can try to collapse/prune rx queues
1544         * to reduce memory overhead, so add a little headroom here.
1545         * Few sockets backlog are possibly concurrently non empty.
1546         */
1547        limit += 64*1024;
1548
1549        /* In case all data was pulled from skb frags (in __pskb_pull_tail()),
1550         * we can fix skb->truesize to its real value to avoid future drops.
1551         * This is valid because skb is not yet charged to the socket.
1552         * It has been noticed pure SACK packets were sometimes dropped
1553         * (if cooked by drivers without copybreak feature).
1554         */
1555        if (!skb->data_len)
1556                skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
1557
1558        if (unlikely(sk_add_backlog(sk, skb, limit))) {
1559                bh_unlock_sock(sk);
1560                __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP);
1561                return true;
1562        }
1563        return false;
1564}
1565EXPORT_SYMBOL(tcp_add_backlog);
1566
1567int tcp_filter(struct sock *sk, struct sk_buff *skb)
1568{
1569        struct tcphdr *th = (struct tcphdr *)skb->data;
1570        unsigned int eaten = skb->len;
1571        int err;
1572
1573        err = sk_filter_trim_cap(sk, skb, th->doff * 4);
1574        if (!err) {
1575                eaten -= skb->len;
1576                TCP_SKB_CB(skb)->end_seq -= eaten;
1577        }
1578        return err;
1579}
1580EXPORT_SYMBOL(tcp_filter);
1581
1582/*
1583 *      From tcp_input.c
1584 */
1585
1586int tcp_v4_rcv(struct sk_buff *skb)
1587{
1588        struct net *net = dev_net(skb->dev);
1589        const struct iphdr *iph;
1590        const struct tcphdr *th;
1591        bool refcounted;
1592        struct sock *sk;
1593        int ret;
1594
1595        if (skb->pkt_type != PACKET_HOST)
1596                goto discard_it;
1597
1598        /* Count it even if it's bad */
1599        __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1600
1601        if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1602                goto discard_it;
1603
1604        th = (const struct tcphdr *)skb->data;
1605
1606        if (unlikely(th->doff < sizeof(struct tcphdr) / 4))
1607                goto bad_packet;
1608        if (!pskb_may_pull(skb, th->doff * 4))
1609                goto discard_it;
1610
1611        /* An explanation is required here, I think.
1612         * Packet length and doff are validated by header prediction,
1613         * provided case of th->doff==0 is eliminated.
1614         * So, we defer the checks. */
1615
1616        if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
1617                goto csum_error;
1618
1619        th = (const struct tcphdr *)skb->data;
1620        iph = ip_hdr(skb);
1621        /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
1622         * barrier() makes sure compiler wont play fool^Waliasing games.
1623         */
1624        memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
1625                sizeof(struct inet_skb_parm));
1626        barrier();
1627
1628        TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1629        TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1630                                    skb->len - th->doff * 4);
1631        TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1632        TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1633        TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1634        TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1635        TCP_SKB_CB(skb)->sacked  = 0;
1636
1637lookup:
1638        sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
1639                               th->dest, &refcounted);
1640        if (!sk)
1641                goto no_tcp_socket;
1642
1643process:
1644        if (sk->sk_state == TCP_TIME_WAIT)
1645                goto do_time_wait;
1646
1647        if (sk->sk_state == TCP_NEW_SYN_RECV) {
1648                struct request_sock *req = inet_reqsk(sk);
1649                struct sock *nsk;
1650
1651                sk = req->rsk_listener;
1652                if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
1653                        sk_drops_add(sk, skb);
1654                        reqsk_put(req);
1655                        goto discard_it;
1656                }
1657                if (unlikely(sk->sk_state != TCP_LISTEN)) {
1658                        inet_csk_reqsk_queue_drop_and_put(sk, req);
1659                        goto lookup;
1660                }
1661                /* We own a reference on the listener, increase it again
1662                 * as we might lose it too soon.
1663                 */
1664                sock_hold(sk);
1665                refcounted = true;
1666                nsk = tcp_check_req(sk, skb, req, false);
1667                if (!nsk) {
1668                        reqsk_put(req);
1669                        goto discard_and_relse;
1670                }
1671                if (nsk == sk) {
1672                        reqsk_put(req);
1673                } else if (tcp_child_process(sk, nsk, skb)) {
1674                        tcp_v4_send_reset(nsk, skb);
1675                        goto discard_and_relse;
1676                } else {
1677                        sock_put(sk);
1678                        return 0;
1679                }
1680        }
1681        if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1682                __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1683                goto discard_and_relse;
1684        }
1685
1686        if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1687                goto discard_and_relse;
1688
1689        if (tcp_v4_inbound_md5_hash(sk, skb))
1690                goto discard_and_relse;
1691
1692        nf_reset(skb);
1693
1694        if (tcp_filter(sk, skb))
1695                goto discard_and_relse;
1696        th = (const struct tcphdr *)skb->data;
1697        iph = ip_hdr(skb);
1698
1699        skb->dev = NULL;
1700
1701        if (sk->sk_state == TCP_LISTEN) {
1702                ret = tcp_v4_do_rcv(sk, skb);
1703                goto put_and_return;
1704        }
1705
1706        sk_incoming_cpu_update(sk);
1707
1708        bh_lock_sock_nested(sk);
1709        tcp_segs_in(tcp_sk(sk), skb);
1710        ret = 0;
1711        if (!sock_owned_by_user(sk)) {
1712                if (!tcp_prequeue(sk, skb))
1713                        ret = tcp_v4_do_rcv(sk, skb);
1714        } else if (tcp_add_backlog(sk, skb)) {
1715                goto discard_and_relse;
1716        }
1717        bh_unlock_sock(sk);
1718
1719put_and_return:
1720        if (refcounted)
1721                sock_put(sk);
1722
1723        return ret;
1724
1725no_tcp_socket:
1726        if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1727                goto discard_it;
1728
1729        if (tcp_checksum_complete(skb)) {
1730csum_error:
1731                __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1732bad_packet:
1733                __TCP_INC_STATS(net, TCP_MIB_INERRS);
1734        } else {
1735                tcp_v4_send_reset(NULL, skb);
1736        }
1737
1738discard_it:
1739        /* Discard frame. */
1740        kfree_skb(skb);
1741        return 0;
1742
1743discard_and_relse:
1744        sk_drops_add(sk, skb);
1745        if (refcounted)
1746                sock_put(sk);
1747        goto discard_it;
1748
1749do_time_wait:
1750        if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1751                inet_twsk_put(inet_twsk(sk));
1752                goto discard_it;
1753        }
1754
1755        if (tcp_checksum_complete(skb)) {
1756                inet_twsk_put(inet_twsk(sk));
1757                goto csum_error;
1758        }
1759        switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1760        case TCP_TW_SYN: {
1761                struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1762                                                        &tcp_hashinfo, skb,
1763                                                        __tcp_hdrlen(th),
1764                                                        iph->saddr, th->source,
1765                                                        iph->daddr, th->dest,
1766                                                        inet_iif(skb));
1767                if (sk2) {
1768                        inet_twsk_deschedule_put(inet_twsk(sk));
1769                        sk = sk2;
1770                        refcounted = false;
1771                        goto process;
1772                }
1773                /* Fall through to ACK */
1774        }
1775        case TCP_TW_ACK:
1776                tcp_v4_timewait_ack(sk, skb);
1777                break;
1778        case TCP_TW_RST:
1779                tcp_v4_send_reset(sk, skb);
1780                inet_twsk_deschedule_put(inet_twsk(sk));
1781                goto discard_it;
1782        case TCP_TW_SUCCESS:;
1783        }
1784        goto discard_it;
1785}
1786
1787static struct timewait_sock_ops tcp_timewait_sock_ops = {
1788        .twsk_obj_size  = sizeof(struct tcp_timewait_sock),
1789        .twsk_unique    = tcp_twsk_unique,
1790        .twsk_destructor= tcp_twsk_destructor,
1791};
1792
1793void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
1794{
1795        struct dst_entry *dst = skb_dst(skb);
1796
1797        if (dst && dst_hold_safe(dst)) {
1798                sk->sk_rx_dst = dst;
1799                inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
1800        }
1801}
1802EXPORT_SYMBOL(inet_sk_rx_dst_set);
1803
1804const struct inet_connection_sock_af_ops ipv4_specific = {
1805        .queue_xmit        = ip_queue_xmit,
1806        .send_check        = tcp_v4_send_check,
1807        .rebuild_header    = inet_sk_rebuild_header,
1808        .sk_rx_dst_set     = inet_sk_rx_dst_set,
1809        .conn_request      = tcp_v4_conn_request,
1810        .syn_recv_sock     = tcp_v4_syn_recv_sock,
1811        .net_header_len    = sizeof(struct iphdr),
1812        .setsockopt        = ip_setsockopt,
1813        .getsockopt        = ip_getsockopt,
1814        .addr2sockaddr     = inet_csk_addr2sockaddr,
1815        .sockaddr_len      = sizeof(struct sockaddr_in),
1816        .bind_conflict     = inet_csk_bind_conflict,
1817#ifdef CONFIG_COMPAT
1818        .compat_setsockopt = compat_ip_setsockopt,
1819        .compat_getsockopt = compat_ip_getsockopt,
1820#endif
1821        .mtu_reduced       = tcp_v4_mtu_reduced,
1822};
1823EXPORT_SYMBOL(ipv4_specific);
1824
1825#ifdef CONFIG_TCP_MD5SIG
1826static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1827        .md5_lookup             = tcp_v4_md5_lookup,
1828        .calc_md5_hash          = tcp_v4_md5_hash_skb,
1829        .md5_parse              = tcp_v4_parse_md5_keys,
1830};
1831#endif
1832
1833/* NOTE: A lot of things set to zero explicitly by call to
1834 *       sk_alloc() so need not be done here.
1835 */
1836static int tcp_v4_init_sock(struct sock *sk)
1837{
1838        struct inet_connection_sock *icsk = inet_csk(sk);
1839
1840        tcp_init_sock(sk);
1841
1842        icsk->icsk_af_ops = &ipv4_specific;
1843
1844#ifdef CONFIG_TCP_MD5SIG
1845        tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
1846#endif
1847
1848        return 0;
1849}
1850
1851void tcp_v4_destroy_sock(struct sock *sk)
1852{
1853        struct tcp_sock *tp = tcp_sk(sk);
1854
1855        tcp_clear_xmit_timers(sk);
1856
1857        tcp_cleanup_congestion_control(sk);
1858
1859        /* Cleanup up the write buffer. */
1860        tcp_write_queue_purge(sk);
1861
1862        /* Cleans up our, hopefully empty, out_of_order_queue. */
1863        skb_rbtree_purge(&tp->out_of_order_queue);
1864
1865#ifdef CONFIG_TCP_MD5SIG
1866        /* Clean up the MD5 key list, if any */
1867        if (tp->md5sig_info) {
1868                tcp_clear_md5_list(sk);
1869                kfree_rcu(tp->md5sig_info, rcu);
1870                tp->md5sig_info = NULL;
1871        }
1872#endif
1873
1874        /* Clean prequeue, it must be empty really */
1875        __skb_queue_purge(&tp->ucopy.prequeue);
1876
1877        /* Clean up a referenced TCP bind bucket. */
1878        if (inet_csk(sk)->icsk_bind_hash)
1879                inet_put_port(sk);
1880
1881        BUG_ON(tp->fastopen_rsk);
1882
1883        /* If socket is aborted during connect operation */
1884        tcp_free_fastopen_req(tp);
1885        tcp_saved_syn_free(tp);
1886
1887        local_bh_disable();
1888        sk_sockets_allocated_dec(sk);
1889        local_bh_enable();
1890}
1891EXPORT_SYMBOL(tcp_v4_destroy_sock);
1892
1893#ifdef CONFIG_PROC_FS
1894/* Proc filesystem TCP sock list dumping. */
1895
1896/*
1897 * Get next listener socket follow cur.  If cur is NULL, get first socket
1898 * starting from bucket given in st->bucket; when st->bucket is zero the
1899 * very first socket in the hash table is returned.
1900 */
1901static void *listening_get_next(struct seq_file *seq, void *cur)
1902{
1903        struct tcp_iter_state *st = seq->private;
1904        struct net *net = seq_file_net(seq);
1905        struct inet_listen_hashbucket *ilb;
1906        struct sock *sk = cur;
1907
1908        if (!sk) {
1909get_head:
1910                ilb = &tcp_hashinfo.listening_hash[st->bucket];
1911                spin_lock_bh(&ilb->lock);
1912                sk = sk_head(&ilb->head);
1913                st->offset = 0;
1914                goto get_sk;
1915        }
1916        ilb = &tcp_hashinfo.listening_hash[st->bucket];
1917        ++st->num;
1918        ++st->offset;
1919
1920        sk = sk_next(sk);
1921get_sk:
1922        sk_for_each_from(sk) {
1923                if (!net_eq(sock_net(sk), net))
1924                        continue;
1925                if (sk->sk_family == st->family)
1926                        return sk;
1927        }
1928        spin_unlock_bh(&ilb->lock);
1929        st->offset = 0;
1930        if (++st->bucket < INET_LHTABLE_SIZE)
1931                goto get_head;
1932        return NULL;
1933}
1934
1935static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
1936{
1937        struct tcp_iter_state *st = seq->private;
1938        void *rc;
1939
1940        st->bucket = 0;
1941        st->offset = 0;
1942        rc = listening_get_next(seq, NULL);
1943
1944        while (rc && *pos) {
1945                rc = listening_get_next(seq, rc);
1946                --*pos;
1947        }
1948        return rc;
1949}
1950
1951static inline bool empty_bucket(const struct tcp_iter_state *st)
1952{
1953        return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
1954}
1955
1956/*
1957 * Get first established socket starting from bucket given in st->bucket.
1958 * If st->bucket is zero, the very first socket in the hash is returned.
1959 */
1960static void *established_get_first(struct seq_file *seq)
1961{
1962        struct tcp_iter_state *st = seq->private;
1963        struct net *net = seq_file_net(seq);
1964        void *rc = NULL;
1965
1966        st->offset = 0;
1967        for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
1968                struct sock *sk;
1969                struct hlist_nulls_node *node;
1970                spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
1971
1972                /* Lockless fast path for the common case of empty buckets */
1973                if (empty_bucket(st))
1974                        continue;
1975
1976                spin_lock_bh(lock);
1977                sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
1978                        if (sk->sk_family != st->family ||
1979                            !net_eq(sock_net(sk), net)) {
1980                                continue;
1981                        }
1982                        rc = sk;
1983                        goto out;
1984                }
1985                spin_unlock_bh(lock);
1986        }
1987out:
1988        return rc;
1989}
1990
1991static void *established_get_next(struct seq_file *seq, void *cur)
1992{
1993        struct sock *sk = cur;
1994        struct hlist_nulls_node *node;
1995        struct tcp_iter_state *st = seq->private;
1996        struct net *net = seq_file_net(seq);
1997
1998        ++st->num;
1999        ++st->offset;
2000
2001        sk = sk_nulls_next(sk);
2002
2003        sk_nulls_for_each_from(sk, node) {
2004                if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
2005                        return sk;
2006        }
2007
2008        spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2009        ++st->bucket;
2010        return established_get_first(seq);
2011}
2012
2013static void *established_get_idx(struct seq_file *seq, loff_t pos)
2014{
2015        struct tcp_iter_state *st = seq->private;
2016        void *rc;
2017
2018        st->bucket = 0;
2019        rc = established_get_first(seq);
2020
2021        while (rc && pos) {
2022                rc = established_get_next(seq, rc);
2023                --pos;
2024        }
2025        return rc;
2026}
2027
2028static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2029{
2030        void *rc;
2031        struct tcp_iter_state *st = seq->private;
2032
2033        st->state = TCP_SEQ_STATE_LISTENING;
2034        rc        = listening_get_idx(seq, &pos);
2035
2036        if (!rc) {
2037                st->state = TCP_SEQ_STATE_ESTABLISHED;
2038                rc        = established_get_idx(seq, pos);
2039        }
2040
2041        return rc;
2042}
2043
2044static void *tcp_seek_last_pos(struct seq_file *seq)
2045{
2046        struct tcp_iter_state *st = seq->private;
2047        int offset = st->offset;
2048        int orig_num = st->num;
2049        void *rc = NULL;
2050
2051        switch (st->state) {
2052        case TCP_SEQ_STATE_LISTENING:
2053                if (st->bucket >= INET_LHTABLE_SIZE)
2054                        break;
2055                st->state = TCP_SEQ_STATE_LISTENING;
2056                rc = listening_get_next(seq, NULL);
2057                while (offset-- && rc)
2058                        rc = listening_get_next(seq, rc);
2059                if (rc)
2060                        break;
2061                st->bucket = 0;
2062                st->state = TCP_SEQ_STATE_ESTABLISHED;
2063                /* Fallthrough */
2064        case TCP_SEQ_STATE_ESTABLISHED:
2065                if (st->bucket > tcp_hashinfo.ehash_mask)
2066                        break;
2067                rc = established_get_first(seq);
2068                while (offset-- && rc)
2069                        rc = established_get_next(seq, rc);
2070        }
2071
2072        st->num = orig_num;
2073
2074        return rc;
2075}
2076
2077static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2078{
2079        struct tcp_iter_state *st = seq->private;
2080        void *rc;
2081
2082        if (*pos && *pos == st->last_pos) {
2083                rc = tcp_seek_last_pos(seq);
2084                if (rc)
2085                        goto out;
2086        }
2087
2088        st->state = TCP_SEQ_STATE_LISTENING;
2089        st->num = 0;
2090        st->bucket = 0;
2091        st->offset = 0;
2092        rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2093
2094out:
2095        st->last_pos = *pos;
2096        return rc;
2097}
2098
2099static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2100{
2101        struct tcp_iter_state *st = seq->private;
2102        void *rc = NULL;
2103
2104        if (v == SEQ_START_TOKEN) {
2105                rc = tcp_get_idx(seq, 0);
2106                goto out;
2107        }
2108
2109        switch (st->state) {
2110        case TCP_SEQ_STATE_LISTENING:
2111                rc = listening_get_next(seq, v);
2112                if (!rc) {
2113                        st->state = TCP_SEQ_STATE_ESTABLISHED;
2114                        st->bucket = 0;
2115                        st->offset = 0;
2116                        rc        = established_get_first(seq);
2117                }
2118                break;
2119        case TCP_SEQ_STATE_ESTABLISHED:
2120                rc = established_get_next(seq, v);
2121                break;
2122        }
2123out:
2124        ++*pos;
2125        st->last_pos = *pos;
2126        return rc;
2127}
2128
2129static void tcp_seq_stop(struct seq_file *seq, void *v)
2130{
2131        struct tcp_iter_state *st = seq->private;
2132
2133        switch (st->state) {
2134        case TCP_SEQ_STATE_LISTENING:
2135                if (v != SEQ_START_TOKEN)
2136                        spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
2137                break;
2138        case TCP_SEQ_STATE_ESTABLISHED:
2139                if (v)
2140                        spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2141                break;
2142        }
2143}
2144
2145int tcp_seq_open(struct inode *inode, struct file *file)
2146{
2147        struct tcp_seq_afinfo *afinfo = PDE_DATA(inode);
2148        struct tcp_iter_state *s;
2149        int err;
2150
2151        err = seq_open_net(inode, file, &afinfo->seq_ops,
2152                          sizeof(struct tcp_iter_state));
2153        if (err < 0)
2154                return err;
2155
2156        s = ((struct seq_file *)file->private_data)->private;
2157        s->family               = afinfo->family;
2158        s->last_pos             = 0;
2159        return 0;
2160}
2161EXPORT_SYMBOL(tcp_seq_open);
2162
2163int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2164{
2165        int rc = 0;
2166        struct proc_dir_entry *p;
2167
2168        afinfo->seq_ops.start           = tcp_seq_start;
2169        afinfo->seq_ops.next            = tcp_seq_next;
2170        afinfo->seq_ops.stop            = tcp_seq_stop;
2171
2172        p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2173                             afinfo->seq_fops, afinfo);
2174        if (!p)
2175                rc = -ENOMEM;
2176        return rc;
2177}
2178EXPORT_SYMBOL(tcp_proc_register);
2179
2180void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2181{
2182        remove_proc_entry(afinfo->name, net->proc_net);
2183}
2184EXPORT_SYMBOL(tcp_proc_unregister);
2185
2186static void get_openreq4(const struct request_sock *req,
2187                         struct seq_file *f, int i)
2188{
2189        const struct inet_request_sock *ireq = inet_rsk(req);
2190        long delta = req->rsk_timer.expires - jiffies;
2191
2192        seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2193                " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
2194                i,
2195                ireq->ir_loc_addr,
2196                ireq->ir_num,
2197                ireq->ir_rmt_addr,
2198                ntohs(ireq->ir_rmt_port),
2199                TCP_SYN_RECV,
2200                0, 0, /* could print option size, but that is af dependent. */
2201                1,    /* timers active (only the expire timer) */
2202                jiffies_delta_to_clock_t(delta),
2203                req->num_timeout,
2204                from_kuid_munged(seq_user_ns(f),
2205                                 sock_i_uid(req->rsk_listener)),
2206                0,  /* non standard timer */
2207                0, /* open_requests have no inode */
2208                0,
2209                req);
2210}
2211
2212static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
2213{
2214        int timer_active;
2215        unsigned long timer_expires;
2216        const struct tcp_sock *tp = tcp_sk(sk);
2217        const struct inet_connection_sock *icsk = inet_csk(sk);
2218        const struct inet_sock *inet = inet_sk(sk);
2219        const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2220        __be32 dest = inet->inet_daddr;
2221        __be32 src = inet->inet_rcv_saddr;
2222        __u16 destp = ntohs(inet->inet_dport);
2223        __u16 srcp = ntohs(inet->inet_sport);
2224        int rx_queue;
2225        int state;
2226
2227        if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2228            icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
2229            icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2230                timer_active    = 1;
2231                timer_expires   = icsk->icsk_timeout;
2232        } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2233                timer_active    = 4;
2234                timer_expires   = icsk->icsk_timeout;
2235        } else if (timer_pending(&sk->sk_timer)) {
2236                timer_active    = 2;
2237                timer_expires   = sk->sk_timer.expires;
2238        } else {
2239                timer_active    = 0;
2240                timer_expires = jiffies;
2241        }
2242
2243        state = sk_state_load(sk);
2244        if (state == TCP_LISTEN)
2245                rx_queue = sk->sk_ack_backlog;
2246        else
2247                /* Because we don't lock the socket,
2248                 * we might find a transient negative value.
2249                 */
2250                rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2251
2252        seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2253                        "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
2254                i, src, srcp, dest, destp, state,
2255                tp->write_seq - tp->snd_una,
2256                rx_queue,
2257                timer_active,
2258                jiffies_delta_to_clock_t(timer_expires - jiffies),
2259                icsk->icsk_retransmits,
2260                from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
2261                icsk->icsk_probes_out,
2262                sock_i_ino(sk),
2263                atomic_read(&sk->sk_refcnt), sk,
2264                jiffies_to_clock_t(icsk->icsk_rto),
2265                jiffies_to_clock_t(icsk->icsk_ack.ato),
2266                (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2267                tp->snd_cwnd,
2268                state == TCP_LISTEN ?
2269                    fastopenq->max_qlen :
2270                    (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
2271}
2272
2273static void get_timewait4_sock(const struct inet_timewait_sock *tw,
2274                               struct seq_file *f, int i)
2275{
2276        long delta = tw->tw_timer.expires - jiffies;
2277        __be32 dest, src;
2278        __u16 destp, srcp;
2279
2280        dest  = tw->tw_daddr;
2281        src   = tw->tw_rcv_saddr;
2282        destp = ntohs(tw->tw_dport);
2283        srcp  = ntohs(tw->tw_sport);
2284
2285        seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2286                " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
2287                i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2288                3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2289                atomic_read(&tw->tw_refcnt), tw);
2290}
2291
2292#define TMPSZ 150
2293
2294static int tcp4_seq_show(struct seq_file *seq, void *v)
2295{
2296        struct tcp_iter_state *st;
2297        struct sock *sk = v;
2298
2299        seq_setwidth(seq, TMPSZ - 1);
2300        if (v == SEQ_START_TOKEN) {
2301                seq_puts(seq, "  sl  local_address rem_address   st tx_queue "
2302                           "rx_queue tr tm->when retrnsmt   uid  timeout "
2303                           "inode");
2304                goto out;
2305        }
2306        st = seq->private;
2307
2308        if (sk->sk_state == TCP_TIME_WAIT)
2309                get_timewait4_sock(v, seq, st->num);
2310        else if (sk->sk_state == TCP_NEW_SYN_RECV)
2311                get_openreq4(v, seq, st->num);
2312        else
2313                get_tcp4_sock(v, seq, st->num);
2314out:
2315        seq_pad(seq, '\n');
2316        return 0;
2317}
2318
2319static const struct file_operations tcp_afinfo_seq_fops = {
2320        .owner   = THIS_MODULE,
2321        .open    = tcp_seq_open,
2322        .read    = seq_read,
2323        .llseek  = seq_lseek,
2324        .release = seq_release_net
2325};
2326
2327static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2328        .name           = "tcp",
2329        .family         = AF_INET,
2330        .seq_fops       = &tcp_afinfo_seq_fops,
2331        .seq_ops        = {
2332                .show           = tcp4_seq_show,
2333        },
2334};
2335
2336static int __net_init tcp4_proc_init_net(struct net *net)
2337{
2338        return tcp_proc_register(net, &tcp4_seq_afinfo);
2339}
2340
2341static void __net_exit tcp4_proc_exit_net(struct net *net)
2342{
2343        tcp_proc_unregister(net, &tcp4_seq_afinfo);
2344}
2345
2346static struct pernet_operations tcp4_net_ops = {
2347        .init = tcp4_proc_init_net,
2348        .exit = tcp4_proc_exit_net,
2349};
2350
2351int __init tcp4_proc_init(void)
2352{
2353        return register_pernet_subsys(&tcp4_net_ops);
2354}
2355
2356void tcp4_proc_exit(void)
2357{
2358        unregister_pernet_subsys(&tcp4_net_ops);
2359}
2360#endif /* CONFIG_PROC_FS */
2361
2362struct proto tcp_prot = {
2363        .name                   = "TCP",
2364        .owner                  = THIS_MODULE,
2365        .close                  = tcp_close,
2366        .connect                = tcp_v4_connect,
2367        .disconnect             = tcp_disconnect,
2368        .accept                 = inet_csk_accept,
2369        .ioctl                  = tcp_ioctl,
2370        .init                   = tcp_v4_init_sock,
2371        .destroy                = tcp_v4_destroy_sock,
2372        .shutdown               = tcp_shutdown,
2373        .setsockopt             = tcp_setsockopt,
2374        .getsockopt             = tcp_getsockopt,
2375        .recvmsg                = tcp_recvmsg,
2376        .sendmsg                = tcp_sendmsg,
2377        .sendpage               = tcp_sendpage,
2378        .backlog_rcv            = tcp_v4_do_rcv,
2379        .release_cb             = tcp_release_cb,
2380        .hash                   = inet_hash,
2381        .unhash                 = inet_unhash,
2382        .get_port               = inet_csk_get_port,
2383        .enter_memory_pressure  = tcp_enter_memory_pressure,
2384        .stream_memory_free     = tcp_stream_memory_free,
2385        .sockets_allocated      = &tcp_sockets_allocated,
2386        .orphan_count           = &tcp_orphan_count,
2387        .memory_allocated       = &tcp_memory_allocated,
2388        .memory_pressure        = &tcp_memory_pressure,
2389        .sysctl_mem             = sysctl_tcp_mem,
2390        .sysctl_wmem            = sysctl_tcp_wmem,
2391        .sysctl_rmem            = sysctl_tcp_rmem,
2392        .max_header             = MAX_TCP_HEADER,
2393        .obj_size               = sizeof(struct tcp_sock),
2394        .slab_flags             = SLAB_DESTROY_BY_RCU,
2395        .twsk_prot              = &tcp_timewait_sock_ops,
2396        .rsk_prot               = &tcp_request_sock_ops,
2397        .h.hashinfo             = &tcp_hashinfo,
2398        .no_autobind            = true,
2399#ifdef CONFIG_COMPAT
2400        .compat_setsockopt      = compat_tcp_setsockopt,
2401        .compat_getsockopt      = compat_tcp_getsockopt,
2402#endif
2403        .diag_destroy           = tcp_abort,
2404};
2405EXPORT_SYMBOL(tcp_prot);
2406
2407static void __net_exit tcp_sk_exit(struct net *net)
2408{
2409        int cpu;
2410
2411        for_each_possible_cpu(cpu)
2412                inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
2413        free_percpu(net->ipv4.tcp_sk);
2414}
2415
2416static int __net_init tcp_sk_init(struct net *net)
2417{
2418        int res, cpu;
2419
2420        net->ipv4.tcp_sk = alloc_percpu(struct sock *);
2421        if (!net->ipv4.tcp_sk)
2422                return -ENOMEM;
2423
2424        for_each_possible_cpu(cpu) {
2425                struct sock *sk;
2426
2427                res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
2428                                           IPPROTO_TCP, net);
2429                if (res)
2430                        goto fail;
2431                sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
2432                *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
2433        }
2434
2435        net->ipv4.sysctl_tcp_ecn = 2;
2436        net->ipv4.sysctl_tcp_ecn_fallback = 1;
2437
2438        net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
2439        net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
2440        net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
2441
2442        net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
2443        net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
2444        net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
2445
2446        net->ipv4.sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
2447        net->ipv4.sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
2448        net->ipv4.sysctl_tcp_syncookies = 1;
2449        net->ipv4.sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH;
2450        net->ipv4.sysctl_tcp_retries1 = TCP_RETR1;
2451        net->ipv4.sysctl_tcp_retries2 = TCP_RETR2;
2452        net->ipv4.sysctl_tcp_orphan_retries = 0;
2453        net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
2454        net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
2455
2456        return 0;
2457fail:
2458        tcp_sk_exit(net);
2459
2460        return res;
2461}
2462
2463static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2464{
2465        inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
2466}
2467
2468static struct pernet_operations __net_initdata tcp_sk_ops = {
2469       .init       = tcp_sk_init,
2470       .exit       = tcp_sk_exit,
2471       .exit_batch = tcp_sk_exit_batch,
2472};
2473
2474void __init tcp_v4_init(void)
2475{
2476        inet_hashinfo_init(&tcp_hashinfo);
2477        if (register_pernet_subsys(&tcp_sk_ops))
2478                panic("Failed to create the TCP control socket.\n");
2479}
2480
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.