linux/net/ipv4/tcp_ipv4.c
<<
>>
Prefs
   1/*
   2 * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3 *              operating system.  INET is implemented using the  BSD Socket
   4 *              interface as the means of communication with the user level.
   5 *
   6 *              Implementation of the Transmission Control Protocol(TCP).
   7 *
   8 *              IPv4 specific functions
   9 *
  10 *
  11 *              code split from:
  12 *              linux/ipv4/tcp.c
  13 *              linux/ipv4/tcp_input.c
  14 *              linux/ipv4/tcp_output.c
  15 *
  16 *              See tcp.c for author information
  17 *
  18 *      This program is free software; you can redistribute it and/or
  19 *      modify it under the terms of the GNU General Public License
  20 *      as published by the Free Software Foundation; either version
  21 *      2 of the License, or (at your option) any later version.
  22 */
  23
  24/*
  25 * Changes:
  26 *              David S. Miller :       New socket lookup architecture.
  27 *                                      This code is dedicated to John Dyson.
  28 *              David S. Miller :       Change semantics of established hash,
  29 *                                      half is devoted to TIME_WAIT sockets
  30 *                                      and the rest go in the other half.
  31 *              Andi Kleen :            Add support for syncookies and fixed
  32 *                                      some bugs: ip options weren't passed to
  33 *                                      the TCP layer, missed a check for an
  34 *                                      ACK bit.
  35 *              Andi Kleen :            Implemented fast path mtu discovery.
  36 *                                      Fixed many serious bugs in the
  37 *                                      request_sock handling and moved
  38 *                                      most of it into the af independent code.
  39 *                                      Added tail drop and some other bugfixes.
  40 *                                      Added new listen semantics.
  41 *              Mike McLagan    :       Routing by source
  42 *      Juan Jose Ciarlante:            ip_dynaddr bits
  43 *              Andi Kleen:             various fixes.
  44 *      Vitaly E. Lavrov        :       Transparent proxy revived after year
  45 *                                      coma.
  46 *      Andi Kleen              :       Fix new listen.
  47 *      Andi Kleen              :       Fix accept error reporting.
  48 *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
  49 *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
  50 *                                      a single port at the same time.
  51 */
  52
  53#define pr_fmt(fmt) "TCP: " fmt
  54
  55#include <linux/bottom_half.h>
  56#include <linux/types.h>
  57#include <linux/fcntl.h>
  58#include <linux/module.h>
  59#include <linux/random.h>
  60#include <linux/cache.h>
  61#include <linux/jhash.h>
  62#include <linux/init.h>
  63#include <linux/times.h>
  64#include <linux/slab.h>
  65
  66#include <net/net_namespace.h>
  67#include <net/icmp.h>
  68#include <net/inet_hashtables.h>
  69#include <net/tcp.h>
  70#include <net/transp_v6.h>
  71#include <net/ipv6.h>
  72#include <net/inet_common.h>
  73#include <net/timewait_sock.h>
  74#include <net/xfrm.h>
  75#include <net/secure_seq.h>
  76#include <net/busy_poll.h>
  77
  78#include <linux/inet.h>
  79#include <linux/ipv6.h>
  80#include <linux/stddef.h>
  81#include <linux/proc_fs.h>
  82#include <linux/seq_file.h>
  83
  84#include <crypto/hash.h>
  85#include <linux/scatterlist.h>
  86
  87int sysctl_tcp_tw_reuse __read_mostly;
  88int sysctl_tcp_low_latency __read_mostly;
  89EXPORT_SYMBOL(sysctl_tcp_low_latency);
  90
  91#ifdef CONFIG_TCP_MD5SIG
  92static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
  93                               __be32 daddr, __be32 saddr, const struct tcphdr *th);
  94#endif
  95
  96struct inet_hashinfo tcp_hashinfo;
  97EXPORT_SYMBOL(tcp_hashinfo);
  98
  99static  __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
 100{
 101        return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
 102                                          ip_hdr(skb)->saddr,
 103                                          tcp_hdr(skb)->dest,
 104                                          tcp_hdr(skb)->source);
 105}
 106
 107int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
 108{
 109        const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
 110        struct tcp_sock *tp = tcp_sk(sk);
 111
 112        /* With PAWS, it is safe from the viewpoint
 113           of data integrity. Even without PAWS it is safe provided sequence
 114           spaces do not overlap i.e. at data rates <= 80Mbit/sec.
 115
 116           Actually, the idea is close to VJ's one, only timestamp cache is
 117           held not per host, but per port pair and TW bucket is used as state
 118           holder.
 119
 120           If TW bucket has been already destroyed we fall back to VJ's scheme
 121           and use initial timestamp retrieved from peer table.
 122         */
 123        if (tcptw->tw_ts_recent_stamp &&
 124            (!twp || (sysctl_tcp_tw_reuse &&
 125                             get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
 126                tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
 127                if (tp->write_seq == 0)
 128                        tp->write_seq = 1;
 129                tp->rx_opt.ts_recent       = tcptw->tw_ts_recent;
 130                tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
 131                sock_hold(sktw);
 132                return 1;
 133        }
 134
 135        return 0;
 136}
 137EXPORT_SYMBOL_GPL(tcp_twsk_unique);
 138
 139/* This will initiate an outgoing connection. */
 140int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 141{
 142        struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
 143        struct inet_sock *inet = inet_sk(sk);
 144        struct tcp_sock *tp = tcp_sk(sk);
 145        __be16 orig_sport, orig_dport;
 146        __be32 daddr, nexthop;
 147        struct flowi4 *fl4;
 148        struct rtable *rt;
 149        int err;
 150        struct ip_options_rcu *inet_opt;
 151
 152        if (addr_len < sizeof(struct sockaddr_in))
 153                return -EINVAL;
 154
 155        if (usin->sin_family != AF_INET)
 156                return -EAFNOSUPPORT;
 157
 158        nexthop = daddr = usin->sin_addr.s_addr;
 159        inet_opt = rcu_dereference_protected(inet->inet_opt,
 160                                             lockdep_sock_is_held(sk));
 161        if (inet_opt && inet_opt->opt.srr) {
 162                if (!daddr)
 163                        return -EINVAL;
 164                nexthop = inet_opt->opt.faddr;
 165        }
 166
 167        orig_sport = inet->inet_sport;
 168        orig_dport = usin->sin_port;
 169        fl4 = &inet->cork.fl.u.ip4;
 170        rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
 171                              RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
 172                              IPPROTO_TCP,
 173                              orig_sport, orig_dport, sk);
 174        if (IS_ERR(rt)) {
 175                err = PTR_ERR(rt);
 176                if (err == -ENETUNREACH)
 177                        IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
 178                return err;
 179        }
 180
 181        if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
 182                ip_rt_put(rt);
 183                return -ENETUNREACH;
 184        }
 185
 186        if (!inet_opt || !inet_opt->opt.srr)
 187                daddr = fl4->daddr;
 188
 189        if (!inet->inet_saddr)
 190                inet->inet_saddr = fl4->saddr;
 191        sk_rcv_saddr_set(sk, inet->inet_saddr);
 192
 193        if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
 194                /* Reset inherited state */
 195                tp->rx_opt.ts_recent       = 0;
 196                tp->rx_opt.ts_recent_stamp = 0;
 197                if (likely(!tp->repair))
 198                        tp->write_seq      = 0;
 199        }
 200
 201        if (tcp_death_row.sysctl_tw_recycle &&
 202            !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
 203                tcp_fetch_timewait_stamp(sk, &rt->dst);
 204
 205        inet->inet_dport = usin->sin_port;
 206        sk_daddr_set(sk, daddr);
 207
 208        inet_csk(sk)->icsk_ext_hdr_len = 0;
 209        if (inet_opt)
 210                inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
 211
 212        tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
 213
 214        /* Socket identity is still unknown (sport may be zero).
 215         * However we set state to SYN-SENT and not releasing socket
 216         * lock select source port, enter ourselves into the hash tables and
 217         * complete initialization after this.
 218         */
 219        tcp_set_state(sk, TCP_SYN_SENT);
 220        err = inet_hash_connect(&tcp_death_row, sk);
 221        if (err)
 222                goto failure;
 223
 224        sk_set_txhash(sk);
 225
 226        rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
 227                               inet->inet_sport, inet->inet_dport, sk);
 228        if (IS_ERR(rt)) {
 229                err = PTR_ERR(rt);
 230                rt = NULL;
 231                goto failure;
 232        }
 233        /* OK, now commit destination to socket.  */
 234        sk->sk_gso_type = SKB_GSO_TCPV4;
 235        sk_setup_caps(sk, &rt->dst);
 236
 237        if (!tp->write_seq && likely(!tp->repair))
 238                tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
 239                                                           inet->inet_daddr,
 240                                                           inet->inet_sport,
 241                                                           usin->sin_port);
 242
 243        inet->inet_id = tp->write_seq ^ jiffies;
 244
 245        err = tcp_connect(sk);
 246
 247        rt = NULL;
 248        if (err)
 249                goto failure;
 250
 251        return 0;
 252
 253failure:
 254        /*
 255         * This unhashes the socket and releases the local port,
 256         * if necessary.
 257         */
 258        tcp_set_state(sk, TCP_CLOSE);
 259        ip_rt_put(rt);
 260        sk->sk_route_caps = 0;
 261        inet->inet_dport = 0;
 262        return err;
 263}
 264EXPORT_SYMBOL(tcp_v4_connect);
 265
 266/*
 267 * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
 268 * It can be called through tcp_release_cb() if socket was owned by user
 269 * at the time tcp_v4_err() was called to handle ICMP message.
 270 */
 271void tcp_v4_mtu_reduced(struct sock *sk)
 272{
 273        struct dst_entry *dst;
 274        struct inet_sock *inet = inet_sk(sk);
 275        u32 mtu = tcp_sk(sk)->mtu_info;
 276
 277        dst = inet_csk_update_pmtu(sk, mtu);
 278        if (!dst)
 279                return;
 280
 281        /* Something is about to be wrong... Remember soft error
 282         * for the case, if this connection will not able to recover.
 283         */
 284        if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
 285                sk->sk_err_soft = EMSGSIZE;
 286
 287        mtu = dst_mtu(dst);
 288
 289        if (inet->pmtudisc != IP_PMTUDISC_DONT &&
 290            ip_sk_accept_pmtu(sk) &&
 291            inet_csk(sk)->icsk_pmtu_cookie > mtu) {
 292                tcp_sync_mss(sk, mtu);
 293
 294                /* Resend the TCP packet because it's
 295                 * clear that the old packet has been
 296                 * dropped. This is the new "fast" path mtu
 297                 * discovery.
 298                 */
 299                tcp_simple_retransmit(sk);
 300        } /* else let the usual retransmit timer handle it */
 301}
 302EXPORT_SYMBOL(tcp_v4_mtu_reduced);
 303
 304static void do_redirect(struct sk_buff *skb, struct sock *sk)
 305{
 306        struct dst_entry *dst = __sk_dst_check(sk, 0);
 307
 308        if (dst)
 309                dst->ops->redirect(dst, sk, skb);
 310}
 311
 312
 313/* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */
 314void tcp_req_err(struct sock *sk, u32 seq, bool abort)
 315{
 316        struct request_sock *req = inet_reqsk(sk);
 317        struct net *net = sock_net(sk);
 318
 319        /* ICMPs are not backlogged, hence we cannot get
 320         * an established socket here.
 321         */
 322        if (seq != tcp_rsk(req)->snt_isn) {
 323                __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
 324        } else if (abort) {
 325                /*
 326                 * Still in SYN_RECV, just remove it silently.
 327                 * There is no good way to pass the error to the newly
 328                 * created socket, and POSIX does not want network
 329                 * errors returned from accept().
 330                 */
 331                inet_csk_reqsk_queue_drop(req->rsk_listener, req);
 332                tcp_listendrop(req->rsk_listener);
 333        }
 334        reqsk_put(req);
 335}
 336EXPORT_SYMBOL(tcp_req_err);
 337
 338/*
 339 * This routine is called by the ICMP module when it gets some
 340 * sort of error condition.  If err < 0 then the socket should
 341 * be closed and the error returned to the user.  If err > 0
 342 * it's just the icmp type << 8 | icmp code.  After adjustment
 343 * header points to the first 8 bytes of the tcp header.  We need
 344 * to find the appropriate port.
 345 *
 346 * The locking strategy used here is very "optimistic". When
 347 * someone else accesses the socket the ICMP is just dropped
 348 * and for some paths there is no check at all.
 349 * A more general error queue to queue errors for later handling
 350 * is probably better.
 351 *
 352 */
 353
 354void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 355{
 356        const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
 357        struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
 358        struct inet_connection_sock *icsk;
 359        struct tcp_sock *tp;
 360        struct inet_sock *inet;
 361        const int type = icmp_hdr(icmp_skb)->type;
 362        const int code = icmp_hdr(icmp_skb)->code;
 363        struct sock *sk;
 364        struct sk_buff *skb;
 365        struct request_sock *fastopen;
 366        __u32 seq, snd_una;
 367        __u32 remaining;
 368        int err;
 369        struct net *net = dev_net(icmp_skb->dev);
 370
 371        sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr,
 372                                       th->dest, iph->saddr, ntohs(th->source),
 373                                       inet_iif(icmp_skb));
 374        if (!sk) {
 375                __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
 376                return;
 377        }
 378        if (sk->sk_state == TCP_TIME_WAIT) {
 379                inet_twsk_put(inet_twsk(sk));
 380                return;
 381        }
 382        seq = ntohl(th->seq);
 383        if (sk->sk_state == TCP_NEW_SYN_RECV)
 384                return tcp_req_err(sk, seq,
 385                                  type == ICMP_PARAMETERPROB ||
 386                                  type == ICMP_TIME_EXCEEDED ||
 387                                  (type == ICMP_DEST_UNREACH &&
 388                                   (code == ICMP_NET_UNREACH ||
 389                                    code == ICMP_HOST_UNREACH)));
 390
 391        bh_lock_sock(sk);
 392        /* If too many ICMPs get dropped on busy
 393         * servers this needs to be solved differently.
 394         * We do take care of PMTU discovery (RFC1191) special case :
 395         * we can receive locally generated ICMP messages while socket is held.
 396         */
 397        if (sock_owned_by_user(sk)) {
 398                if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
 399                        __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
 400        }
 401        if (sk->sk_state == TCP_CLOSE)
 402                goto out;
 403
 404        if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
 405                __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
 406                goto out;
 407        }
 408
 409        icsk = inet_csk(sk);
 410        tp = tcp_sk(sk);
 411        /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
 412        fastopen = tp->fastopen_rsk;
 413        snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
 414        if (sk->sk_state != TCP_LISTEN &&
 415            !between(seq, snd_una, tp->snd_nxt)) {
 416                __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
 417                goto out;
 418        }
 419
 420        switch (type) {
 421        case ICMP_REDIRECT:
 422                do_redirect(icmp_skb, sk);
 423                goto out;
 424        case ICMP_SOURCE_QUENCH:
 425                /* Just silently ignore these. */
 426                goto out;
 427        case ICMP_PARAMETERPROB:
 428                err = EPROTO;
 429                break;
 430        case ICMP_DEST_UNREACH:
 431                if (code > NR_ICMP_UNREACH)
 432                        goto out;
 433
 434                if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
 435                        /* We are not interested in TCP_LISTEN and open_requests
 436                         * (SYN-ACKs send out by Linux are always <576bytes so
 437                         * they should go through unfragmented).
 438                         */
 439                        if (sk->sk_state == TCP_LISTEN)
 440                                goto out;
 441
 442                        tp->mtu_info = info;
 443                        if (!sock_owned_by_user(sk)) {
 444                                tcp_v4_mtu_reduced(sk);
 445                        } else {
 446                                if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
 447                                        sock_hold(sk);
 448                        }
 449                        goto out;
 450                }
 451
 452                err = icmp_err_convert[code].errno;
 453                /* check if icmp_skb allows revert of backoff
 454                 * (see draft-zimmermann-tcp-lcd) */
 455                if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
 456                        break;
 457                if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
 458                    !icsk->icsk_backoff || fastopen)
 459                        break;
 460
 461                if (sock_owned_by_user(sk))
 462                        break;
 463
 464                icsk->icsk_backoff--;
 465                icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) :
 466                                               TCP_TIMEOUT_INIT;
 467                icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
 468
 469                skb = tcp_write_queue_head(sk);
 470                BUG_ON(!skb);
 471
 472                remaining = icsk->icsk_rto -
 473                            min(icsk->icsk_rto,
 474                                tcp_time_stamp - tcp_skb_timestamp(skb));
 475
 476                if (remaining) {
 477                        inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
 478                                                  remaining, TCP_RTO_MAX);
 479                } else {
 480                        /* RTO revert clocked out retransmission.
 481                         * Will retransmit now */
 482                        tcp_retransmit_timer(sk);
 483                }
 484
 485                break;
 486        case ICMP_TIME_EXCEEDED:
 487                err = EHOSTUNREACH;
 488                break;
 489        default:
 490                goto out;
 491        }
 492
 493        switch (sk->sk_state) {
 494        case TCP_SYN_SENT:
 495        case TCP_SYN_RECV:
 496                /* Only in fast or simultaneous open. If a fast open socket is
 497                 * is already accepted it is treated as a connected one below.
 498                 */
 499                if (fastopen && !fastopen->sk)
 500                        break;
 501
 502                if (!sock_owned_by_user(sk)) {
 503                        sk->sk_err = err;
 504
 505                        sk->sk_error_report(sk);
 506
 507                        tcp_done(sk);
 508                } else {
 509                        sk->sk_err_soft = err;
 510                }
 511                goto out;
 512        }
 513
 514        /* If we've already connected we will keep trying
 515         * until we time out, or the user gives up.
 516         *
 517         * rfc1122 4.2.3.9 allows to consider as hard errors
 518         * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
 519         * but it is obsoleted by pmtu discovery).
 520         *
 521         * Note, that in modern internet, where routing is unreliable
 522         * and in each dark corner broken firewalls sit, sending random
 523         * errors ordered by their masters even this two messages finally lose
 524         * their original sense (even Linux sends invalid PORT_UNREACHs)
 525         *
 526         * Now we are in compliance with RFCs.
 527         *                                                      --ANK (980905)
 528         */
 529
 530        inet = inet_sk(sk);
 531        if (!sock_owned_by_user(sk) && inet->recverr) {
 532                sk->sk_err = err;
 533                sk->sk_error_report(sk);
 534        } else  { /* Only an error on timeout */
 535                sk->sk_err_soft = err;
 536        }
 537
 538out:
 539        bh_unlock_sock(sk);
 540        sock_put(sk);
 541}
 542
 543void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
 544{
 545        struct tcphdr *th = tcp_hdr(skb);
 546
 547        if (skb->ip_summed == CHECKSUM_PARTIAL) {
 548                th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
 549                skb->csum_start = skb_transport_header(skb) - skb->head;
 550                skb->csum_offset = offsetof(struct tcphdr, check);
 551        } else {
 552                th->check = tcp_v4_check(skb->len, saddr, daddr,
 553                                         csum_partial(th,
 554                                                      th->doff << 2,
 555                                                      skb->csum));
 556        }
 557}
 558
 559/* This routine computes an IPv4 TCP checksum. */
 560void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
 561{
 562        const struct inet_sock *inet = inet_sk(sk);
 563
 564        __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
 565}
 566EXPORT_SYMBOL(tcp_v4_send_check);
 567
 568/*
 569 *      This routine will send an RST to the other tcp.
 570 *
 571 *      Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
 572 *                    for reset.
 573 *      Answer: if a packet caused RST, it is not for a socket
 574 *              existing in our system, if it is matched to a socket,
 575 *              it is just duplicate segment or bug in other side's TCP.
 576 *              So that we build reply only basing on parameters
 577 *              arrived with segment.
 578 *      Exception: precedence violation. We do not implement it in any case.
 579 */
 580
 581static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
 582{
 583        const struct tcphdr *th = tcp_hdr(skb);
 584        struct {
 585                struct tcphdr th;
 586#ifdef CONFIG_TCP_MD5SIG
 587                __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
 588#endif
 589        } rep;
 590        struct ip_reply_arg arg;
 591#ifdef CONFIG_TCP_MD5SIG
 592        struct tcp_md5sig_key *key = NULL;
 593        const __u8 *hash_location = NULL;
 594        unsigned char newhash[16];
 595        int genhash;
 596        struct sock *sk1 = NULL;
 597#endif
 598        struct net *net;
 599
 600        /* Never send a reset in response to a reset. */
 601        if (th->rst)
 602                return;
 603
 604        /* If sk not NULL, it means we did a successful lookup and incoming
 605         * route had to be correct. prequeue might have dropped our dst.
 606         */
 607        if (!sk && skb_rtable(skb)->rt_type != RTN_LOCAL)
 608                return;
 609
 610        /* Swap the send and the receive. */
 611        memset(&rep, 0, sizeof(rep));
 612        rep.th.dest   = th->source;
 613        rep.th.source = th->dest;
 614        rep.th.doff   = sizeof(struct tcphdr) / 4;
 615        rep.th.rst    = 1;
 616
 617        if (th->ack) {
 618                rep.th.seq = th->ack_seq;
 619        } else {
 620                rep.th.ack = 1;
 621                rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
 622                                       skb->len - (th->doff << 2));
 623        }
 624
 625        memset(&arg, 0, sizeof(arg));
 626        arg.iov[0].iov_base = (unsigned char *)&rep;
 627        arg.iov[0].iov_len  = sizeof(rep.th);
 628
 629        net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
 630#ifdef CONFIG_TCP_MD5SIG
 631        rcu_read_lock();
 632        hash_location = tcp_parse_md5sig_option(th);
 633        if (sk && sk_fullsock(sk)) {
 634                key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
 635                                        &ip_hdr(skb)->saddr, AF_INET);
 636        } else if (hash_location) {
 637                /*
 638                 * active side is lost. Try to find listening socket through
 639                 * source port, and then find md5 key through listening socket.
 640                 * we are not loose security here:
 641                 * Incoming packet is checked with md5 hash with finding key,
 642                 * no RST generated if md5 hash doesn't match.
 643                 */
 644                sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
 645                                             ip_hdr(skb)->saddr,
 646                                             th->source, ip_hdr(skb)->daddr,
 647                                             ntohs(th->source), inet_iif(skb));
 648                /* don't send rst if it can't find key */
 649                if (!sk1)
 650                        goto out;
 651
 652                key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
 653                                        &ip_hdr(skb)->saddr, AF_INET);
 654                if (!key)
 655                        goto out;
 656
 657
 658                genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
 659                if (genhash || memcmp(hash_location, newhash, 16) != 0)
 660                        goto out;
 661
 662        }
 663
 664        if (key) {
 665                rep.opt[0] = htonl((TCPOPT_NOP << 24) |
 666                                   (TCPOPT_NOP << 16) |
 667                                   (TCPOPT_MD5SIG << 8) |
 668                                   TCPOLEN_MD5SIG);
 669                /* Update length and the length the header thinks exists */
 670                arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
 671                rep.th.doff = arg.iov[0].iov_len / 4;
 672
 673                tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
 674                                     key, ip_hdr(skb)->saddr,
 675                                     ip_hdr(skb)->daddr, &rep.th);
 676        }
 677#endif
 678        arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
 679                                      ip_hdr(skb)->saddr, /* XXX */
 680                                      arg.iov[0].iov_len, IPPROTO_TCP, 0);
 681        arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 682        arg.flags = (sk && inet_sk_transparent(sk)) ? IP_REPLY_ARG_NOSRCCHECK : 0;
 683
 684        /* When socket is gone, all binding information is lost.
 685         * routing might fail in this case. No choice here, if we choose to force
 686         * input interface, we will misroute in case of asymmetric route.
 687         */
 688        if (sk)
 689                arg.bound_dev_if = sk->sk_bound_dev_if;
 690
 691        BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
 692                     offsetof(struct inet_timewait_sock, tw_bound_dev_if));
 693
 694        arg.tos = ip_hdr(skb)->tos;
 695        local_bh_disable();
 696        ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
 697                              skb, &TCP_SKB_CB(skb)->header.h4.opt,
 698                              ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
 699                              &arg, arg.iov[0].iov_len);
 700
 701        __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
 702        __TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
 703        local_bh_enable();
 704
 705#ifdef CONFIG_TCP_MD5SIG
 706out:
 707        rcu_read_unlock();
 708#endif
 709}
 710
 711/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
 712   outside socket context is ugly, certainly. What can I do?
 713 */
 714
 715static void tcp_v4_send_ack(struct net *net,
 716                            struct sk_buff *skb, u32 seq, u32 ack,
 717                            u32 win, u32 tsval, u32 tsecr, int oif,
 718                            struct tcp_md5sig_key *key,
 719                            int reply_flags, u8 tos)
 720{
 721        const struct tcphdr *th = tcp_hdr(skb);
 722        struct {
 723                struct tcphdr th;
 724                __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
 725#ifdef CONFIG_TCP_MD5SIG
 726                           + (TCPOLEN_MD5SIG_ALIGNED >> 2)
 727#endif
 728                        ];
 729        } rep;
 730        struct ip_reply_arg arg;
 731
 732        memset(&rep.th, 0, sizeof(struct tcphdr));
 733        memset(&arg, 0, sizeof(arg));
 734
 735        arg.iov[0].iov_base = (unsigned char *)&rep;
 736        arg.iov[0].iov_len  = sizeof(rep.th);
 737        if (tsecr) {
 738                rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
 739                                   (TCPOPT_TIMESTAMP << 8) |
 740                                   TCPOLEN_TIMESTAMP);
 741                rep.opt[1] = htonl(tsval);
 742                rep.opt[2] = htonl(tsecr);
 743                arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
 744        }
 745
 746        /* Swap the send and the receive. */
 747        rep.th.dest    = th->source;
 748        rep.th.source  = th->dest;
 749        rep.th.doff    = arg.iov[0].iov_len / 4;
 750        rep.th.seq     = htonl(seq);
 751        rep.th.ack_seq = htonl(ack);
 752        rep.th.ack     = 1;
 753        rep.th.window  = htons(win);
 754
 755#ifdef CONFIG_TCP_MD5SIG
 756        if (key) {
 757                int offset = (tsecr) ? 3 : 0;
 758
 759                rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
 760                                          (TCPOPT_NOP << 16) |
 761                                          (TCPOPT_MD5SIG << 8) |
 762                                          TCPOLEN_MD5SIG);
 763                arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
 764                rep.th.doff = arg.iov[0].iov_len/4;
 765
 766                tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
 767                                    key, ip_hdr(skb)->saddr,
 768                                    ip_hdr(skb)->daddr, &rep.th);
 769        }
 770#endif
 771        arg.flags = reply_flags;
 772        arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
 773                                      ip_hdr(skb)->saddr, /* XXX */
 774                                      arg.iov[0].iov_len, IPPROTO_TCP, 0);
 775        arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 776        if (oif)
 777                arg.bound_dev_if = oif;
 778        arg.tos = tos;
 779        local_bh_disable();
 780        ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
 781                              skb, &TCP_SKB_CB(skb)->header.h4.opt,
 782                              ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
 783                              &arg, arg.iov[0].iov_len);
 784
 785        __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
 786        local_bh_enable();
 787}
 788
 789static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
 790{
 791        struct inet_timewait_sock *tw = inet_twsk(sk);
 792        struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
 793
 794        tcp_v4_send_ack(sock_net(sk), skb,
 795                        tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
 796                        tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
 797                        tcp_time_stamp + tcptw->tw_ts_offset,
 798                        tcptw->tw_ts_recent,
 799                        tw->tw_bound_dev_if,
 800                        tcp_twsk_md5_key(tcptw),
 801                        tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
 802                        tw->tw_tos
 803                        );
 804
 805        inet_twsk_put(tw);
 806}
 807
 808static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
 809                                  struct request_sock *req)
 810{
 811        /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
 812         * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
 813         */
 814        u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
 815                                             tcp_sk(sk)->snd_nxt;
 816
 817        /* RFC 7323 2.3
 818         * The window field (SEG.WND) of every outgoing segment, with the
 819         * exception of <SYN> segments, MUST be right-shifted by
 820         * Rcv.Wind.Shift bits:
 821         */
 822        tcp_v4_send_ack(sock_net(sk), skb, seq,
 823                        tcp_rsk(req)->rcv_nxt,
 824                        req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
 825                        tcp_time_stamp,
 826                        req->ts_recent,
 827                        0,
 828                        tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
 829                                          AF_INET),
 830                        inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
 831                        ip_hdr(skb)->tos);
 832}
 833
 834/*
 835 *      Send a SYN-ACK after having received a SYN.
 836 *      This still operates on a request_sock only, not on a big
 837 *      socket.
 838 */
 839static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
 840                              struct flowi *fl,
 841                              struct request_sock *req,
 842                              struct tcp_fastopen_cookie *foc,
 843                              enum tcp_synack_type synack_type)
 844{
 845        const struct inet_request_sock *ireq = inet_rsk(req);
 846        struct flowi4 fl4;
 847        int err = -1;
 848        struct sk_buff *skb;
 849
 850        /* First, grab a route. */
 851        if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
 852                return -1;
 853
 854        skb = tcp_make_synack(sk, dst, req, foc, synack_type);
 855
 856        if (skb) {
 857                __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
 858
 859                err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
 860                                            ireq->ir_rmt_addr,
 861                                            ireq->opt);
 862                err = net_xmit_eval(err);
 863        }
 864
 865        return err;
 866}
 867
 868/*
 869 *      IPv4 request_sock destructor.
 870 */
 871static void tcp_v4_reqsk_destructor(struct request_sock *req)
 872{
 873        kfree(inet_rsk(req)->opt);
 874}
 875
 876#ifdef CONFIG_TCP_MD5SIG
 877/*
 878 * RFC2385 MD5 checksumming requires a mapping of
 879 * IP address->MD5 Key.
 880 * We need to maintain these in the sk structure.
 881 */
 882
 883/* Find the Key structure for an address.  */
 884struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
 885                                         const union tcp_md5_addr *addr,
 886                                         int family)
 887{
 888        const struct tcp_sock *tp = tcp_sk(sk);
 889        struct tcp_md5sig_key *key;
 890        unsigned int size = sizeof(struct in_addr);
 891        const struct tcp_md5sig_info *md5sig;
 892
 893        /* caller either holds rcu_read_lock() or socket lock */
 894        md5sig = rcu_dereference_check(tp->md5sig_info,
 895                                       lockdep_sock_is_held(sk));
 896        if (!md5sig)
 897                return NULL;
 898#if IS_ENABLED(CONFIG_IPV6)
 899        if (family == AF_INET6)
 900                size = sizeof(struct in6_addr);
 901#endif
 902        hlist_for_each_entry_rcu(key, &md5sig->head, node) {
 903                if (key->family != family)
 904                        continue;
 905                if (!memcmp(&key->addr, addr, size))
 906                        return key;
 907        }
 908        return NULL;
 909}
 910EXPORT_SYMBOL(tcp_md5_do_lookup);
 911
 912struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
 913                                         const struct sock *addr_sk)
 914{
 915        const union tcp_md5_addr *addr;
 916
 917        addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr;
 918        return tcp_md5_do_lookup(sk, addr, AF_INET);
 919}
 920EXPORT_SYMBOL(tcp_v4_md5_lookup);
 921
 922/* This can be called on a newly created socket, from other files */
 923int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
 924                   int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
 925{
 926        /* Add Key to the list */
 927        struct tcp_md5sig_key *key;
 928        struct tcp_sock *tp = tcp_sk(sk);
 929        struct tcp_md5sig_info *md5sig;
 930
 931        key = tcp_md5_do_lookup(sk, addr, family);
 932        if (key) {
 933                /* Pre-existing entry - just update that one. */
 934                memcpy(key->key, newkey, newkeylen);
 935                key->keylen = newkeylen;
 936                return 0;
 937        }
 938
 939        md5sig = rcu_dereference_protected(tp->md5sig_info,
 940                                           lockdep_sock_is_held(sk));
 941        if (!md5sig) {
 942                md5sig = kmalloc(sizeof(*md5sig), gfp);
 943                if (!md5sig)
 944                        return -ENOMEM;
 945
 946                sk_nocaps_add(sk, NETIF_F_GSO_MASK);
 947                INIT_HLIST_HEAD(&md5sig->head);
 948                rcu_assign_pointer(tp->md5sig_info, md5sig);
 949        }
 950
 951        key = sock_kmalloc(sk, sizeof(*key), gfp);
 952        if (!key)
 953                return -ENOMEM;
 954        if (!tcp_alloc_md5sig_pool()) {
 955                sock_kfree_s(sk, key, sizeof(*key));
 956                return -ENOMEM;
 957        }
 958
 959        memcpy(key->key, newkey, newkeylen);
 960        key->keylen = newkeylen;
 961        key->family = family;
 962        memcpy(&key->addr, addr,
 963               (family == AF_INET6) ? sizeof(struct in6_addr) :
 964                                      sizeof(struct in_addr));
 965        hlist_add_head_rcu(&key->node, &md5sig->head);
 966        return 0;
 967}
 968EXPORT_SYMBOL(tcp_md5_do_add);
 969
 970int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
 971{
 972        struct tcp_md5sig_key *key;
 973
 974        key = tcp_md5_do_lookup(sk, addr, family);
 975        if (!key)
 976                return -ENOENT;
 977        hlist_del_rcu(&key->node);
 978        atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
 979        kfree_rcu(key, rcu);
 980        return 0;
 981}
 982EXPORT_SYMBOL(tcp_md5_do_del);
 983
 984static void tcp_clear_md5_list(struct sock *sk)
 985{
 986        struct tcp_sock *tp = tcp_sk(sk);
 987        struct tcp_md5sig_key *key;
 988        struct hlist_node *n;
 989        struct tcp_md5sig_info *md5sig;
 990
 991        md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
 992
 993        hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
 994                hlist_del_rcu(&key->node);
 995                atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
 996                kfree_rcu(key, rcu);
 997        }
 998}
 999
1000static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1001                                 int optlen)
1002{
1003        struct tcp_md5sig cmd;
1004        struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1005
1006        if (optlen < sizeof(cmd))
1007                return -EINVAL;
1008
1009        if (copy_from_user(&cmd, optval, sizeof(cmd)))
1010                return -EFAULT;
1011
1012        if (sin->sin_family != AF_INET)
1013                return -EINVAL;
1014
1015        if (!cmd.tcpm_keylen)
1016                return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1017                                      AF_INET);
1018
1019        if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1020                return -EINVAL;
1021
1022        return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1023                              AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1024                              GFP_KERNEL);
1025}
1026
1027static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1028                                        __be32 daddr, __be32 saddr, int nbytes)
1029{
1030        struct tcp4_pseudohdr *bp;
1031        struct scatterlist sg;
1032
1033        bp = &hp->md5_blk.ip4;
1034
1035        /*
1036         * 1. the TCP pseudo-header (in the order: source IP address,
1037         * destination IP address, zero-padded protocol number, and
1038         * segment length)
1039         */
1040        bp->saddr = saddr;
1041        bp->daddr = daddr;
1042        bp->pad = 0;
1043        bp->protocol = IPPROTO_TCP;
1044        bp->len = cpu_to_be16(nbytes);
1045
1046        sg_init_one(&sg, bp, sizeof(*bp));
1047        ahash_request_set_crypt(hp->md5_req, &sg, NULL, sizeof(*bp));
1048        return crypto_ahash_update(hp->md5_req);
1049}
1050
1051static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
1052                               __be32 daddr, __be32 saddr, const struct tcphdr *th)
1053{
1054        struct tcp_md5sig_pool *hp;
1055        struct ahash_request *req;
1056
1057        hp = tcp_get_md5sig_pool();
1058        if (!hp)
1059                goto clear_hash_noput;
1060        req = hp->md5_req;
1061
1062        if (crypto_ahash_init(req))
1063                goto clear_hash;
1064        if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1065                goto clear_hash;
1066        if (tcp_md5_hash_header(hp, th))
1067                goto clear_hash;
1068        if (tcp_md5_hash_key(hp, key))
1069                goto clear_hash;
1070        ahash_request_set_crypt(req, NULL, md5_hash, 0);
1071        if (crypto_ahash_final(req))
1072                goto clear_hash;
1073
1074        tcp_put_md5sig_pool();
1075        return 0;
1076
1077clear_hash:
1078        tcp_put_md5sig_pool();
1079clear_hash_noput:
1080        memset(md5_hash, 0, 16);
1081        return 1;
1082}
1083
1084int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
1085                        const struct sock *sk,
1086                        const struct sk_buff *skb)
1087{
1088        struct tcp_md5sig_pool *hp;
1089        struct ahash_request *req;
1090        const struct tcphdr *th = tcp_hdr(skb);
1091        __be32 saddr, daddr;
1092
1093        if (sk) { /* valid for establish/request sockets */
1094                saddr = sk->sk_rcv_saddr;
1095                daddr = sk->sk_daddr;
1096        } else {
1097                const struct iphdr *iph = ip_hdr(skb);
1098                saddr = iph->saddr;
1099                daddr = iph->daddr;
1100        }
1101
1102        hp = tcp_get_md5sig_pool();
1103        if (!hp)
1104                goto clear_hash_noput;
1105        req = hp->md5_req;
1106
1107        if (crypto_ahash_init(req))
1108                goto clear_hash;
1109
1110        if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1111                goto clear_hash;
1112        if (tcp_md5_hash_header(hp, th))
1113                goto clear_hash;
1114        if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1115                goto clear_hash;
1116        if (tcp_md5_hash_key(hp, key))
1117                goto clear_hash;
1118        ahash_request_set_crypt(req, NULL, md5_hash, 0);
1119        if (crypto_ahash_final(req))
1120                goto clear_hash;
1121
1122        tcp_put_md5sig_pool();
1123        return 0;
1124
1125clear_hash:
1126        tcp_put_md5sig_pool();
1127clear_hash_noput:
1128        memset(md5_hash, 0, 16);
1129        return 1;
1130}
1131EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1132
1133#endif
1134
1135/* Called with rcu_read_lock() */
1136static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
1137                                    const struct sk_buff *skb)
1138{
1139#ifdef CONFIG_TCP_MD5SIG
1140        /*
1141         * This gets called for each TCP segment that arrives
1142         * so we want to be efficient.
1143         * We have 3 drop cases:
1144         * o No MD5 hash and one expected.
1145         * o MD5 hash and we're not expecting one.
1146         * o MD5 hash and its wrong.
1147         */
1148        const __u8 *hash_location = NULL;
1149        struct tcp_md5sig_key *hash_expected;
1150        const struct iphdr *iph = ip_hdr(skb);
1151        const struct tcphdr *th = tcp_hdr(skb);
1152        int genhash;
1153        unsigned char newhash[16];
1154
1155        hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1156                                          AF_INET);
1157        hash_location = tcp_parse_md5sig_option(th);
1158
1159        /* We've parsed the options - do we have a hash? */
1160        if (!hash_expected && !hash_location)
1161                return false;
1162
1163        if (hash_expected && !hash_location) {
1164                NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1165                return true;
1166        }
1167
1168        if (!hash_expected && hash_location) {
1169                NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1170                return true;
1171        }
1172
1173        /* Okay, so this is hash_expected and hash_location -
1174         * so we need to calculate the checksum.
1175         */
1176        genhash = tcp_v4_md5_hash_skb(newhash,
1177                                      hash_expected,
1178                                      NULL, skb);
1179
1180        if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1181                net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1182                                     &iph->saddr, ntohs(th->source),
1183                                     &iph->daddr, ntohs(th->dest),
1184                                     genhash ? " tcp_v4_calc_md5_hash failed"
1185                                     : "");
1186                return true;
1187        }
1188        return false;
1189#endif
1190        return false;
1191}
1192
1193static void tcp_v4_init_req(struct request_sock *req,
1194                            const struct sock *sk_listener,
1195                            struct sk_buff *skb)
1196{
1197        struct inet_request_sock *ireq = inet_rsk(req);
1198
1199        sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
1200        sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
1201        ireq->no_srccheck = inet_sk(sk_listener)->transparent;
1202        ireq->opt = tcp_v4_save_options(skb);
1203}
1204
1205static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
1206                                          struct flowi *fl,
1207                                          const struct request_sock *req,
1208                                          bool *strict)
1209{
1210        struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req);
1211
1212        if (strict) {
1213                if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr)
1214                        *strict = true;
1215                else
1216                        *strict = false;
1217        }
1218
1219        return dst;
1220}
1221
1222struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1223        .family         =       PF_INET,
1224        .obj_size       =       sizeof(struct tcp_request_sock),
1225        .rtx_syn_ack    =       tcp_rtx_synack,
1226        .send_ack       =       tcp_v4_reqsk_send_ack,
1227        .destructor     =       tcp_v4_reqsk_destructor,
1228        .send_reset     =       tcp_v4_send_reset,
1229        .syn_ack_timeout =      tcp_syn_ack_timeout,
1230};
1231
1232static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1233        .mss_clamp      =       TCP_MSS_DEFAULT,
1234#ifdef CONFIG_TCP_MD5SIG
1235        .req_md5_lookup =       tcp_v4_md5_lookup,
1236        .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1237#endif
1238        .init_req       =       tcp_v4_init_req,
1239#ifdef CONFIG_SYN_COOKIES
1240        .cookie_init_seq =      cookie_v4_init_sequence,
1241#endif
1242        .route_req      =       tcp_v4_route_req,
1243        .init_seq       =       tcp_v4_init_sequence,
1244        .send_synack    =       tcp_v4_send_synack,
1245};
1246
1247int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1248{
1249        /* Never answer to SYNs send to broadcast or multicast */
1250        if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1251                goto drop;
1252
1253        return tcp_conn_request(&tcp_request_sock_ops,
1254                                &tcp_request_sock_ipv4_ops, sk, skb);
1255
1256drop:
1257        tcp_listendrop(sk);
1258        return 0;
1259}
1260EXPORT_SYMBOL(tcp_v4_conn_request);
1261
1262
1263/*
1264 * The three way handshake has completed - we got a valid synack -
1265 * now create the new socket.
1266 */
1267struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1268                                  struct request_sock *req,
1269                                  struct dst_entry *dst,
1270                                  struct request_sock *req_unhash,
1271                                  bool *own_req)
1272{
1273        struct inet_request_sock *ireq;
1274        struct inet_sock *newinet;
1275        struct tcp_sock *newtp;
1276        struct sock *newsk;
1277#ifdef CONFIG_TCP_MD5SIG
1278        struct tcp_md5sig_key *key;
1279#endif
1280        struct ip_options_rcu *inet_opt;
1281
1282        if (sk_acceptq_is_full(sk))
1283                goto exit_overflow;
1284
1285        newsk = tcp_create_openreq_child(sk, req, skb);
1286        if (!newsk)
1287                goto exit_nonewsk;
1288
1289        newsk->sk_gso_type = SKB_GSO_TCPV4;
1290        inet_sk_rx_dst_set(newsk, skb);
1291
1292        newtp                 = tcp_sk(newsk);
1293        newinet               = inet_sk(newsk);
1294        ireq                  = inet_rsk(req);
1295        sk_daddr_set(newsk, ireq->ir_rmt_addr);
1296        sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
1297        newsk->sk_bound_dev_if = ireq->ir_iif;
1298        newinet->inet_saddr           = ireq->ir_loc_addr;
1299        inet_opt              = ireq->opt;
1300        rcu_assign_pointer(newinet->inet_opt, inet_opt);
1301        ireq->opt             = NULL;
1302        newinet->mc_index     = inet_iif(skb);
1303        newinet->mc_ttl       = ip_hdr(skb)->ttl;
1304        newinet->rcv_tos      = ip_hdr(skb)->tos;
1305        inet_csk(newsk)->icsk_ext_hdr_len = 0;
1306        if (inet_opt)
1307                inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1308        newinet->inet_id = newtp->write_seq ^ jiffies;
1309
1310        if (!dst) {
1311                dst = inet_csk_route_child_sock(sk, newsk, req);
1312                if (!dst)
1313                        goto put_and_exit;
1314        } else {
1315                /* syncookie case : see end of cookie_v4_check() */
1316        }
1317        sk_setup_caps(newsk, dst);
1318
1319        tcp_ca_openreq_child(newsk, dst);
1320
1321        tcp_sync_mss(newsk, dst_mtu(dst));
1322        newtp->advmss = dst_metric_advmss(dst);
1323        if (tcp_sk(sk)->rx_opt.user_mss &&
1324            tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1325                newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1326
1327        tcp_initialize_rcv_mss(newsk);
1328
1329#ifdef CONFIG_TCP_MD5SIG
1330        /* Copy over the MD5 key from the original socket */
1331        key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1332                                AF_INET);
1333        if (key) {
1334                /*
1335                 * We're using one, so create a matching key
1336                 * on the newsk structure. If we fail to get
1337                 * memory, then we end up not copying the key
1338                 * across. Shucks.
1339                 */
1340                tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1341                               AF_INET, key->key, key->keylen, GFP_ATOMIC);
1342                sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1343        }
1344#endif
1345
1346        if (__inet_inherit_port(sk, newsk) < 0)
1347                goto put_and_exit;
1348        *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1349        if (*own_req)
1350                tcp_move_syn(newtp, req);
1351
1352        return newsk;
1353
1354exit_overflow:
1355        NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1356exit_nonewsk:
1357        dst_release(dst);
1358exit:
1359        tcp_listendrop(sk);
1360        return NULL;
1361put_and_exit:
1362        inet_csk_prepare_forced_close(newsk);
1363        tcp_done(newsk);
1364        goto exit;
1365}
1366EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1367
1368static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
1369{
1370#ifdef CONFIG_SYN_COOKIES
1371        const struct tcphdr *th = tcp_hdr(skb);
1372
1373        if (!th->syn)
1374                sk = cookie_v4_check(sk, skb);
1375#endif
1376        return sk;
1377}
1378
1379/* The socket must have it's spinlock held when we get
1380 * here, unless it is a TCP_LISTEN socket.
1381 *
1382 * We have a potential double-lock case here, so even when
1383 * doing backlog processing we use the BH locking scheme.
1384 * This is because we cannot sleep with the original spinlock
1385 * held.
1386 */
1387int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1388{
1389        struct sock *rsk;
1390
1391        if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1392                struct dst_entry *dst = sk->sk_rx_dst;
1393
1394                sock_rps_save_rxhash(sk, skb);
1395                sk_mark_napi_id(sk, skb);
1396                if (dst) {
1397                        if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1398                            !dst->ops->check(dst, 0)) {
1399                                dst_release(dst);
1400                                sk->sk_rx_dst = NULL;
1401                        }
1402                }
1403                tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
1404                return 0;
1405        }
1406
1407        if (tcp_checksum_complete(skb))
1408                goto csum_err;
1409
1410        if (sk->sk_state == TCP_LISTEN) {
1411                struct sock *nsk = tcp_v4_cookie_check(sk, skb);
1412
1413                if (!nsk)
1414                        goto discard;
1415                if (nsk != sk) {
1416                        sock_rps_save_rxhash(nsk, skb);
1417                        sk_mark_napi_id(nsk, skb);
1418                        if (tcp_child_process(sk, nsk, skb)) {
1419                                rsk = nsk;
1420                                goto reset;
1421                        }
1422                        return 0;
1423                }
1424        } else
1425                sock_rps_save_rxhash(sk, skb);
1426
1427        if (tcp_rcv_state_process(sk, skb)) {
1428                rsk = sk;
1429                goto reset;
1430        }
1431        return 0;
1432
1433reset:
1434        tcp_v4_send_reset(rsk, skb);
1435discard:
1436        kfree_skb(skb);
1437        /* Be careful here. If this function gets more complicated and
1438         * gcc suffers from register pressure on the x86, sk (in %ebx)
1439         * might be destroyed here. This current version compiles correctly,
1440         * but you have been warned.
1441         */
1442        return 0;
1443
1444csum_err:
1445        TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1446        TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1447        goto discard;
1448}
1449EXPORT_SYMBOL(tcp_v4_do_rcv);
1450
1451void tcp_v4_early_demux(struct sk_buff *skb)
1452{
1453        const struct iphdr *iph;
1454        const struct tcphdr *th;
1455        struct sock *sk;
1456
1457        if (skb->pkt_type != PACKET_HOST)
1458                return;
1459
1460        if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1461                return;
1462
1463        iph = ip_hdr(skb);
1464        th = tcp_hdr(skb);
1465
1466        if (th->doff < sizeof(struct tcphdr) / 4)
1467                return;
1468
1469        sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1470                                       iph->saddr, th->source,
1471                                       iph->daddr, ntohs(th->dest),
1472                                       skb->skb_iif);
1473        if (sk) {
1474                skb->sk = sk;
1475                skb->destructor = sock_edemux;
1476                if (sk_fullsock(sk)) {
1477                        struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1478
1479                        if (dst)
1480                                dst = dst_check(dst, 0);
1481                        if (dst &&
1482                            inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1483                                skb_dst_set_noref(skb, dst);
1484                }
1485        }
1486}
1487
1488/* Packet is added to VJ-style prequeue for processing in process
1489 * context, if a reader task is waiting. Apparently, this exciting
1490 * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
1491 * failed somewhere. Latency? Burstiness? Well, at least now we will
1492 * see, why it failed. 8)8)                               --ANK
1493 *
1494 */
1495bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1496{
1497        struct tcp_sock *tp = tcp_sk(sk);
1498
1499        if (sysctl_tcp_low_latency || !tp->ucopy.task)
1500                return false;
1501
1502        if (skb->len <= tcp_hdrlen(skb) &&
1503            skb_queue_len(&tp->ucopy.prequeue) == 0)
1504                return false;
1505
1506        /* Before escaping RCU protected region, we need to take care of skb
1507         * dst. Prequeue is only enabled for established sockets.
1508         * For such sockets, we might need the skb dst only to set sk->sk_rx_dst
1509         * Instead of doing full sk_rx_dst validity here, let's perform
1510         * an optimistic check.
1511         */
1512        if (likely(sk->sk_rx_dst))
1513                skb_dst_drop(skb);
1514        else
1515                skb_dst_force_safe(skb);
1516
1517        __skb_queue_tail(&tp->ucopy.prequeue, skb);
1518        tp->ucopy.memory += skb->truesize;
1519        if (skb_queue_len(&tp->ucopy.prequeue) >= 32 ||
1520            tp->ucopy.memory + atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) {
1521                struct sk_buff *skb1;
1522
1523                BUG_ON(sock_owned_by_user(sk));
1524                __NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUEDROPPED,
1525                                skb_queue_len(&tp->ucopy.prequeue));
1526
1527                while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
1528                        sk_backlog_rcv(sk, skb1);
1529
1530                tp->ucopy.memory = 0;
1531        } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1532                wake_up_interruptible_sync_poll(sk_sleep(sk),
1533                                           POLLIN | POLLRDNORM | POLLRDBAND);
1534                if (!inet_csk_ack_scheduled(sk))
1535                        inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
1536                                                  (3 * tcp_rto_min(sk)) / 4,
1537                                                  TCP_RTO_MAX);
1538        }
1539        return true;
1540}
1541EXPORT_SYMBOL(tcp_prequeue);
1542
1543/*
1544 *      From tcp_input.c
1545 */
1546
1547int tcp_v4_rcv(struct sk_buff *skb)
1548{
1549        struct net *net = dev_net(skb->dev);
1550        const struct iphdr *iph;
1551        const struct tcphdr *th;
1552        bool refcounted;
1553        struct sock *sk;
1554        int ret;
1555
1556        if (skb->pkt_type != PACKET_HOST)
1557                goto discard_it;
1558
1559        /* Count it even if it's bad */
1560        __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1561
1562        if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1563                goto discard_it;
1564
1565        th = (const struct tcphdr *)skb->data;
1566
1567        if (unlikely(th->doff < sizeof(struct tcphdr) / 4))
1568                goto bad_packet;
1569        if (!pskb_may_pull(skb, th->doff * 4))
1570                goto discard_it;
1571
1572        /* An explanation is required here, I think.
1573         * Packet length and doff are validated by header prediction,
1574         * provided case of th->doff==0 is eliminated.
1575         * So, we defer the checks. */
1576
1577        if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
1578                goto csum_error;
1579
1580        th = (const struct tcphdr *)skb->data;
1581        iph = ip_hdr(skb);
1582        /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
1583         * barrier() makes sure compiler wont play fool^Waliasing games.
1584         */
1585        memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
1586                sizeof(struct inet_skb_parm));
1587        barrier();
1588
1589        TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1590        TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1591                                    skb->len - th->doff * 4);
1592        TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1593        TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1594        TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1595        TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1596        TCP_SKB_CB(skb)->sacked  = 0;
1597
1598lookup:
1599        sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
1600                               th->dest, &refcounted);
1601        if (!sk)
1602                goto no_tcp_socket;
1603
1604process:
1605        if (sk->sk_state == TCP_TIME_WAIT)
1606                goto do_time_wait;
1607
1608        if (sk->sk_state == TCP_NEW_SYN_RECV) {
1609                struct request_sock *req = inet_reqsk(sk);
1610                struct sock *nsk;
1611
1612                sk = req->rsk_listener;
1613                if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
1614                        reqsk_put(req);
1615                        goto discard_it;
1616                }
1617                if (unlikely(sk->sk_state != TCP_LISTEN)) {
1618                        inet_csk_reqsk_queue_drop_and_put(sk, req);
1619                        goto lookup;
1620                }
1621                /* We own a reference on the listener, increase it again
1622                 * as we might lose it too soon.
1623                 */
1624                sock_hold(sk);
1625                refcounted = true;
1626                nsk = tcp_check_req(sk, skb, req, false);
1627                if (!nsk) {
1628                        reqsk_put(req);
1629                        goto discard_and_relse;
1630                }
1631                if (nsk == sk) {
1632                        reqsk_put(req);
1633                } else if (tcp_child_process(sk, nsk, skb)) {
1634                        tcp_v4_send_reset(nsk, skb);
1635                        goto discard_and_relse;
1636                } else {
1637                        sock_put(sk);
1638                        return 0;
1639                }
1640        }
1641        if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1642                __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1643                goto discard_and_relse;
1644        }
1645
1646        if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1647                goto discard_and_relse;
1648
1649        if (tcp_v4_inbound_md5_hash(sk, skb))
1650                goto discard_and_relse;
1651
1652        nf_reset(skb);
1653
1654        if (sk_filter(sk, skb))
1655                goto discard_and_relse;
1656
1657        skb->dev = NULL;
1658
1659        if (sk->sk_state == TCP_LISTEN) {
1660                ret = tcp_v4_do_rcv(sk, skb);
1661                goto put_and_return;
1662        }
1663
1664        sk_incoming_cpu_update(sk);
1665
1666        bh_lock_sock_nested(sk);
1667        tcp_segs_in(tcp_sk(sk), skb);
1668        ret = 0;
1669        if (!sock_owned_by_user(sk)) {
1670                if (!tcp_prequeue(sk, skb))
1671                        ret = tcp_v4_do_rcv(sk, skb);
1672        } else if (unlikely(sk_add_backlog(sk, skb,
1673                                           sk->sk_rcvbuf + sk->sk_sndbuf))) {
1674                bh_unlock_sock(sk);
1675                __NET_INC_STATS(net, LINUX_MIB_TCPBACKLOGDROP);
1676                goto discard_and_relse;
1677        }
1678        bh_unlock_sock(sk);
1679
1680put_and_return:
1681        if (refcounted)
1682                sock_put(sk);
1683
1684        return ret;
1685
1686no_tcp_socket:
1687        if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1688                goto discard_it;
1689
1690        if (tcp_checksum_complete(skb)) {
1691csum_error:
1692                __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1693bad_packet:
1694                __TCP_INC_STATS(net, TCP_MIB_INERRS);
1695        } else {
1696                tcp_v4_send_reset(NULL, skb);
1697        }
1698
1699discard_it:
1700        /* Discard frame. */
1701        kfree_skb(skb);
1702        return 0;
1703
1704discard_and_relse:
1705        sk_drops_add(sk, skb);
1706        if (refcounted)
1707                sock_put(sk);
1708        goto discard_it;
1709
1710do_time_wait:
1711        if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1712                inet_twsk_put(inet_twsk(sk));
1713                goto discard_it;
1714        }
1715
1716        if (tcp_checksum_complete(skb)) {
1717                inet_twsk_put(inet_twsk(sk));
1718                goto csum_error;
1719        }
1720        switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1721        case TCP_TW_SYN: {
1722                struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1723                                                        &tcp_hashinfo, skb,
1724                                                        __tcp_hdrlen(th),
1725                                                        iph->saddr, th->source,
1726                                                        iph->daddr, th->dest,
1727                                                        inet_iif(skb));
1728                if (sk2) {
1729                        inet_twsk_deschedule_put(inet_twsk(sk));
1730                        sk = sk2;
1731                        refcounted = false;
1732                        goto process;
1733                }
1734                /* Fall through to ACK */
1735        }
1736        case TCP_TW_ACK:
1737                tcp_v4_timewait_ack(sk, skb);
1738                break;
1739        case TCP_TW_RST:
1740                tcp_v4_send_reset(sk, skb);
1741                inet_twsk_deschedule_put(inet_twsk(sk));
1742                goto discard_it;
1743        case TCP_TW_SUCCESS:;
1744        }
1745        goto discard_it;
1746}
1747
1748static struct timewait_sock_ops tcp_timewait_sock_ops = {
1749        .twsk_obj_size  = sizeof(struct tcp_timewait_sock),
1750        .twsk_unique    = tcp_twsk_unique,
1751        .twsk_destructor= tcp_twsk_destructor,
1752};
1753
1754void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
1755{
1756        struct dst_entry *dst = skb_dst(skb);
1757
1758        if (dst && dst_hold_safe(dst)) {
1759                sk->sk_rx_dst = dst;
1760                inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
1761        }
1762}
1763EXPORT_SYMBOL(inet_sk_rx_dst_set);
1764
1765const struct inet_connection_sock_af_ops ipv4_specific = {
1766        .queue_xmit        = ip_queue_xmit,
1767        .send_check        = tcp_v4_send_check,
1768        .rebuild_header    = inet_sk_rebuild_header,
1769        .sk_rx_dst_set     = inet_sk_rx_dst_set,
1770        .conn_request      = tcp_v4_conn_request,
1771        .syn_recv_sock     = tcp_v4_syn_recv_sock,
1772        .net_header_len    = sizeof(struct iphdr),
1773        .setsockopt        = ip_setsockopt,
1774        .getsockopt        = ip_getsockopt,
1775        .addr2sockaddr     = inet_csk_addr2sockaddr,
1776        .sockaddr_len      = sizeof(struct sockaddr_in),
1777        .bind_conflict     = inet_csk_bind_conflict,
1778#ifdef CONFIG_COMPAT
1779        .compat_setsockopt = compat_ip_setsockopt,
1780        .compat_getsockopt = compat_ip_getsockopt,
1781#endif
1782        .mtu_reduced       = tcp_v4_mtu_reduced,
1783};
1784EXPORT_SYMBOL(ipv4_specific);
1785
1786#ifdef CONFIG_TCP_MD5SIG
1787static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1788        .md5_lookup             = tcp_v4_md5_lookup,
1789        .calc_md5_hash          = tcp_v4_md5_hash_skb,
1790        .md5_parse              = tcp_v4_parse_md5_keys,
1791};
1792#endif
1793
1794/* NOTE: A lot of things set to zero explicitly by call to
1795 *       sk_alloc() so need not be done here.
1796 */
1797static int tcp_v4_init_sock(struct sock *sk)
1798{
1799        struct inet_connection_sock *icsk = inet_csk(sk);
1800
1801        tcp_init_sock(sk);
1802
1803        icsk->icsk_af_ops = &ipv4_specific;
1804
1805#ifdef CONFIG_TCP_MD5SIG
1806        tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
1807#endif
1808
1809        return 0;
1810}
1811
1812void tcp_v4_destroy_sock(struct sock *sk)
1813{
1814        struct tcp_sock *tp = tcp_sk(sk);
1815
1816        tcp_clear_xmit_timers(sk);
1817
1818        tcp_cleanup_congestion_control(sk);
1819
1820        /* Cleanup up the write buffer. */
1821        tcp_write_queue_purge(sk);
1822
1823        /* Cleans up our, hopefully empty, out_of_order_queue. */
1824        __skb_queue_purge(&tp->out_of_order_queue);
1825
1826#ifdef CONFIG_TCP_MD5SIG
1827        /* Clean up the MD5 key list, if any */
1828        if (tp->md5sig_info) {
1829                tcp_clear_md5_list(sk);
1830                kfree_rcu(tp->md5sig_info, rcu);
1831                tp->md5sig_info = NULL;
1832        }
1833#endif
1834
1835        /* Clean prequeue, it must be empty really */
1836        __skb_queue_purge(&tp->ucopy.prequeue);
1837
1838        /* Clean up a referenced TCP bind bucket. */
1839        if (inet_csk(sk)->icsk_bind_hash)
1840                inet_put_port(sk);
1841
1842        BUG_ON(tp->fastopen_rsk);
1843
1844        /* If socket is aborted during connect operation */
1845        tcp_free_fastopen_req(tp);
1846        tcp_saved_syn_free(tp);
1847
1848        local_bh_disable();
1849        sk_sockets_allocated_dec(sk);
1850        local_bh_enable();
1851
1852        if (mem_cgroup_sockets_enabled && sk->sk_memcg)
1853                sock_release_memcg(sk);
1854}
1855EXPORT_SYMBOL(tcp_v4_destroy_sock);
1856
1857#ifdef CONFIG_PROC_FS
1858/* Proc filesystem TCP sock list dumping. */
1859
1860/*
1861 * Get next listener socket follow cur.  If cur is NULL, get first socket
1862 * starting from bucket given in st->bucket; when st->bucket is zero the
1863 * very first socket in the hash table is returned.
1864 */
1865static void *listening_get_next(struct seq_file *seq, void *cur)
1866{
1867        struct tcp_iter_state *st = seq->private;
1868        struct net *net = seq_file_net(seq);
1869        struct inet_listen_hashbucket *ilb;
1870        struct inet_connection_sock *icsk;
1871        struct sock *sk = cur;
1872
1873        if (!sk) {
1874get_head:
1875                ilb = &tcp_hashinfo.listening_hash[st->bucket];
1876                spin_lock_bh(&ilb->lock);
1877                sk = sk_head(&ilb->head);
1878                st->offset = 0;
1879                goto get_sk;
1880        }
1881        ilb = &tcp_hashinfo.listening_hash[st->bucket];
1882        ++st->num;
1883        ++st->offset;
1884
1885        sk = sk_next(sk);
1886get_sk:
1887        sk_for_each_from(sk) {
1888                if (!net_eq(sock_net(sk), net))
1889                        continue;
1890                if (sk->sk_family == st->family)
1891                        return sk;
1892                icsk = inet_csk(sk);
1893        }
1894        spin_unlock_bh(&ilb->lock);
1895        st->offset = 0;
1896        if (++st->bucket < INET_LHTABLE_SIZE)
1897                goto get_head;
1898        return NULL;
1899}
1900
1901static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
1902{
1903        struct tcp_iter_state *st = seq->private;
1904        void *rc;
1905
1906        st->bucket = 0;
1907        st->offset = 0;
1908        rc = listening_get_next(seq, NULL);
1909
1910        while (rc && *pos) {
1911                rc = listening_get_next(seq, rc);
1912                --*pos;
1913        }
1914        return rc;
1915}
1916
1917static inline bool empty_bucket(const struct tcp_iter_state *st)
1918{
1919        return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
1920}
1921
1922/*
1923 * Get first established socket starting from bucket given in st->bucket.
1924 * If st->bucket is zero, the very first socket in the hash is returned.
1925 */
1926static void *established_get_first(struct seq_file *seq)
1927{
1928        struct tcp_iter_state *st = seq->private;
1929        struct net *net = seq_file_net(seq);
1930        void *rc = NULL;
1931
1932        st->offset = 0;
1933        for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
1934                struct sock *sk;
1935                struct hlist_nulls_node *node;
1936                spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
1937
1938                /* Lockless fast path for the common case of empty buckets */
1939                if (empty_bucket(st))
1940                        continue;
1941
1942                spin_lock_bh(lock);
1943                sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
1944                        if (sk->sk_family != st->family ||
1945                            !net_eq(sock_net(sk), net)) {
1946                                continue;
1947                        }
1948                        rc = sk;
1949                        goto out;
1950                }
1951                spin_unlock_bh(lock);
1952        }
1953out:
1954        return rc;
1955}
1956
1957static void *established_get_next(struct seq_file *seq, void *cur)
1958{
1959        struct sock *sk = cur;
1960        struct hlist_nulls_node *node;
1961        struct tcp_iter_state *st = seq->private;
1962        struct net *net = seq_file_net(seq);
1963
1964        ++st->num;
1965        ++st->offset;
1966
1967        sk = sk_nulls_next(sk);
1968
1969        sk_nulls_for_each_from(sk, node) {
1970                if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
1971                        return sk;
1972        }
1973
1974        spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1975        ++st->bucket;
1976        return established_get_first(seq);
1977}
1978
1979static void *established_get_idx(struct seq_file *seq, loff_t pos)
1980{
1981        struct tcp_iter_state *st = seq->private;
1982        void *rc;
1983
1984        st->bucket = 0;
1985        rc = established_get_first(seq);
1986
1987        while (rc && pos) {
1988                rc = established_get_next(seq, rc);
1989                --pos;
1990        }
1991        return rc;
1992}
1993
1994static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
1995{
1996        void *rc;
1997        struct tcp_iter_state *st = seq->private;
1998
1999        st->state = TCP_SEQ_STATE_LISTENING;
2000        rc        = listening_get_idx(seq, &pos);
2001
2002        if (!rc) {
2003                st->state = TCP_SEQ_STATE_ESTABLISHED;
2004                rc        = established_get_idx(seq, pos);
2005        }
2006
2007        return rc;
2008}
2009
2010static void *tcp_seek_last_pos(struct seq_file *seq)
2011{
2012        struct tcp_iter_state *st = seq->private;
2013        int offset = st->offset;
2014        int orig_num = st->num;
2015        void *rc = NULL;
2016
2017        switch (st->state) {
2018        case TCP_SEQ_STATE_LISTENING:
2019                if (st->bucket >= INET_LHTABLE_SIZE)
2020                        break;
2021                st->state = TCP_SEQ_STATE_LISTENING;
2022                rc = listening_get_next(seq, NULL);
2023                while (offset-- && rc)
2024                        rc = listening_get_next(seq, rc);
2025                if (rc)
2026                        break;
2027                st->bucket = 0;
2028                st->state = TCP_SEQ_STATE_ESTABLISHED;
2029                /* Fallthrough */
2030        case TCP_SEQ_STATE_ESTABLISHED:
2031                if (st->bucket > tcp_hashinfo.ehash_mask)
2032                        break;
2033                rc = established_get_first(seq);
2034                while (offset-- && rc)
2035                        rc = established_get_next(seq, rc);
2036        }
2037
2038        st->num = orig_num;
2039
2040        return rc;
2041}
2042
2043static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2044{
2045        struct tcp_iter_state *st = seq->private;
2046        void *rc;
2047
2048        if (*pos && *pos == st->last_pos) {
2049                rc = tcp_seek_last_pos(seq);
2050                if (rc)
2051                        goto out;
2052        }
2053
2054        st->state = TCP_SEQ_STATE_LISTENING;
2055        st->num = 0;
2056        st->bucket = 0;
2057        st->offset = 0;
2058        rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2059
2060out:
2061        st->last_pos = *pos;
2062        return rc;
2063}
2064
2065static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2066{
2067        struct tcp_iter_state *st = seq->private;
2068        void *rc = NULL;
2069
2070        if (v == SEQ_START_TOKEN) {
2071                rc = tcp_get_idx(seq, 0);
2072                goto out;
2073        }
2074
2075        switch (st->state) {
2076        case TCP_SEQ_STATE_LISTENING:
2077                rc = listening_get_next(seq, v);
2078                if (!rc) {
2079                        st->state = TCP_SEQ_STATE_ESTABLISHED;
2080                        st->bucket = 0;
2081                        st->offset = 0;
2082                        rc        = established_get_first(seq);
2083                }
2084                break;
2085        case TCP_SEQ_STATE_ESTABLISHED:
2086                rc = established_get_next(seq, v);
2087                break;
2088        }
2089out:
2090        ++*pos;
2091        st->last_pos = *pos;
2092        return rc;
2093}
2094
2095static void tcp_seq_stop(struct seq_file *seq, void *v)
2096{
2097        struct tcp_iter_state *st = seq->private;
2098
2099        switch (st->state) {
2100        case TCP_SEQ_STATE_LISTENING:
2101                if (v != SEQ_START_TOKEN)
2102                        spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
2103                break;
2104        case TCP_SEQ_STATE_ESTABLISHED:
2105                if (v)
2106                        spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2107                break;
2108        }
2109}
2110
2111int tcp_seq_open(struct inode *inode, struct file *file)
2112{
2113        struct tcp_seq_afinfo *afinfo = PDE_DATA(inode);
2114        struct tcp_iter_state *s;
2115        int err;
2116
2117        err = seq_open_net(inode, file, &afinfo->seq_ops,
2118                          sizeof(struct tcp_iter_state));
2119        if (err < 0)
2120                return err;
2121
2122        s = ((struct seq_file *)file->private_data)->private;
2123        s->family               = afinfo->family;
2124        s->last_pos             = 0;
2125        return 0;
2126}
2127EXPORT_SYMBOL(tcp_seq_open);
2128
2129int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2130{
2131        int rc = 0;
2132        struct proc_dir_entry *p;
2133
2134        afinfo->seq_ops.start           = tcp_seq_start;
2135        afinfo->seq_ops.next            = tcp_seq_next;
2136        afinfo->seq_ops.stop            = tcp_seq_stop;
2137
2138        p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2139                             afinfo->seq_fops, afinfo);
2140        if (!p)
2141                rc = -ENOMEM;
2142        return rc;
2143}
2144EXPORT_SYMBOL(tcp_proc_register);
2145
2146void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2147{
2148        remove_proc_entry(afinfo->name, net->proc_net);
2149}
2150EXPORT_SYMBOL(tcp_proc_unregister);
2151
2152static void get_openreq4(const struct request_sock *req,
2153                         struct seq_file *f, int i)
2154{
2155        const struct inet_request_sock *ireq = inet_rsk(req);
2156        long delta = req->rsk_timer.expires - jiffies;
2157
2158        seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2159                " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
2160                i,
2161                ireq->ir_loc_addr,
2162                ireq->ir_num,
2163                ireq->ir_rmt_addr,
2164                ntohs(ireq->ir_rmt_port),
2165                TCP_SYN_RECV,
2166                0, 0, /* could print option size, but that is af dependent. */
2167                1,    /* timers active (only the expire timer) */
2168                jiffies_delta_to_clock_t(delta),
2169                req->num_timeout,
2170                from_kuid_munged(seq_user_ns(f),
2171                                 sock_i_uid(req->rsk_listener)),
2172                0,  /* non standard timer */
2173                0, /* open_requests have no inode */
2174                0,
2175                req);
2176}
2177
2178static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
2179{
2180        int timer_active;
2181        unsigned long timer_expires;
2182        const struct tcp_sock *tp = tcp_sk(sk);
2183        const struct inet_connection_sock *icsk = inet_csk(sk);
2184        const struct inet_sock *inet = inet_sk(sk);
2185        const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2186        __be32 dest = inet->inet_daddr;
2187        __be32 src = inet->inet_rcv_saddr;
2188        __u16 destp = ntohs(inet->inet_dport);
2189        __u16 srcp = ntohs(inet->inet_sport);
2190        int rx_queue;
2191        int state;
2192
2193        if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2194            icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
2195            icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2196                timer_active    = 1;
2197                timer_expires   = icsk->icsk_timeout;
2198        } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2199                timer_active    = 4;
2200                timer_expires   = icsk->icsk_timeout;
2201        } else if (timer_pending(&sk->sk_timer)) {
2202                timer_active    = 2;
2203                timer_expires   = sk->sk_timer.expires;
2204        } else {
2205                timer_active    = 0;
2206                timer_expires = jiffies;
2207        }
2208
2209        state = sk_state_load(sk);
2210        if (state == TCP_LISTEN)
2211                rx_queue = sk->sk_ack_backlog;
2212        else
2213                /* Because we don't lock the socket,
2214                 * we might find a transient negative value.
2215                 */
2216                rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2217
2218        seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2219                        "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
2220                i, src, srcp, dest, destp, state,
2221                tp->write_seq - tp->snd_una,
2222                rx_queue,
2223                timer_active,
2224                jiffies_delta_to_clock_t(timer_expires - jiffies),
2225                icsk->icsk_retransmits,
2226                from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
2227                icsk->icsk_probes_out,
2228                sock_i_ino(sk),
2229                atomic_read(&sk->sk_refcnt), sk,
2230                jiffies_to_clock_t(icsk->icsk_rto),
2231                jiffies_to_clock_t(icsk->icsk_ack.ato),
2232                (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2233                tp->snd_cwnd,
2234                state == TCP_LISTEN ?
2235                    fastopenq->max_qlen :
2236                    (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
2237}
2238
2239static void get_timewait4_sock(const struct inet_timewait_sock *tw,
2240                               struct seq_file *f, int i)
2241{
2242        long delta = tw->tw_timer.expires - jiffies;
2243        __be32 dest, src;
2244        __u16 destp, srcp;
2245
2246        dest  = tw->tw_daddr;
2247        src   = tw->tw_rcv_saddr;
2248        destp = ntohs(tw->tw_dport);
2249        srcp  = ntohs(tw->tw_sport);
2250
2251        seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2252                " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
2253                i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2254                3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2255                atomic_read(&tw->tw_refcnt), tw);
2256}
2257
2258#define TMPSZ 150
2259
2260static int tcp4_seq_show(struct seq_file *seq, void *v)
2261{
2262        struct tcp_iter_state *st;
2263        struct sock *sk = v;
2264
2265        seq_setwidth(seq, TMPSZ - 1);
2266        if (v == SEQ_START_TOKEN) {
2267                seq_puts(seq, "  sl  local_address rem_address   st tx_queue "
2268                           "rx_queue tr tm->when retrnsmt   uid  timeout "
2269                           "inode");
2270                goto out;
2271        }
2272        st = seq->private;
2273
2274        if (sk->sk_state == TCP_TIME_WAIT)
2275                get_timewait4_sock(v, seq, st->num);
2276        else if (sk->sk_state == TCP_NEW_SYN_RECV)
2277                get_openreq4(v, seq, st->num);
2278        else
2279                get_tcp4_sock(v, seq, st->num);
2280out:
2281        seq_pad(seq, '\n');
2282        return 0;
2283}
2284
2285static const struct file_operations tcp_afinfo_seq_fops = {
2286        .owner   = THIS_MODULE,
2287        .open    = tcp_seq_open,
2288        .read    = seq_read,
2289        .llseek  = seq_lseek,
2290        .release = seq_release_net
2291};
2292
2293static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2294        .name           = "tcp",
2295        .family         = AF_INET,
2296        .seq_fops       = &tcp_afinfo_seq_fops,
2297        .seq_ops        = {
2298                .show           = tcp4_seq_show,
2299        },
2300};
2301
2302static int __net_init tcp4_proc_init_net(struct net *net)
2303{
2304        return tcp_proc_register(net, &tcp4_seq_afinfo);
2305}
2306
2307static void __net_exit tcp4_proc_exit_net(struct net *net)
2308{
2309        tcp_proc_unregister(net, &tcp4_seq_afinfo);
2310}
2311
2312static struct pernet_operations tcp4_net_ops = {
2313        .init = tcp4_proc_init_net,
2314        .exit = tcp4_proc_exit_net,
2315};
2316
2317int __init tcp4_proc_init(void)
2318{
2319        return register_pernet_subsys(&tcp4_net_ops);
2320}
2321
2322void tcp4_proc_exit(void)
2323{
2324        unregister_pernet_subsys(&tcp4_net_ops);
2325}
2326#endif /* CONFIG_PROC_FS */
2327
2328struct proto tcp_prot = {
2329        .name                   = "TCP",
2330        .owner                  = THIS_MODULE,
2331        .close                  = tcp_close,
2332        .connect                = tcp_v4_connect,
2333        .disconnect             = tcp_disconnect,
2334        .accept                 = inet_csk_accept,
2335        .ioctl                  = tcp_ioctl,
2336        .init                   = tcp_v4_init_sock,
2337        .destroy                = tcp_v4_destroy_sock,
2338        .shutdown               = tcp_shutdown,
2339        .setsockopt             = tcp_setsockopt,
2340        .getsockopt             = tcp_getsockopt,
2341        .recvmsg                = tcp_recvmsg,
2342        .sendmsg                = tcp_sendmsg,
2343        .sendpage               = tcp_sendpage,
2344        .backlog_rcv            = tcp_v4_do_rcv,
2345        .release_cb             = tcp_release_cb,
2346        .hash                   = inet_hash,
2347        .unhash                 = inet_unhash,
2348        .get_port               = inet_csk_get_port,
2349        .enter_memory_pressure  = tcp_enter_memory_pressure,
2350        .stream_memory_free     = tcp_stream_memory_free,
2351        .sockets_allocated      = &tcp_sockets_allocated,
2352        .orphan_count           = &tcp_orphan_count,
2353        .memory_allocated       = &tcp_memory_allocated,
2354        .memory_pressure        = &tcp_memory_pressure,
2355        .sysctl_mem             = sysctl_tcp_mem,
2356        .sysctl_wmem            = sysctl_tcp_wmem,
2357        .sysctl_rmem            = sysctl_tcp_rmem,
2358        .max_header             = MAX_TCP_HEADER,
2359        .obj_size               = sizeof(struct tcp_sock),
2360        .slab_flags             = SLAB_DESTROY_BY_RCU,
2361        .twsk_prot              = &tcp_timewait_sock_ops,
2362        .rsk_prot               = &tcp_request_sock_ops,
2363        .h.hashinfo             = &tcp_hashinfo,
2364        .no_autobind            = true,
2365#ifdef CONFIG_COMPAT
2366        .compat_setsockopt      = compat_tcp_setsockopt,
2367        .compat_getsockopt      = compat_tcp_getsockopt,
2368#endif
2369        .diag_destroy           = tcp_abort,
2370};
2371EXPORT_SYMBOL(tcp_prot);
2372
2373static void __net_exit tcp_sk_exit(struct net *net)
2374{
2375        int cpu;
2376
2377        for_each_possible_cpu(cpu)
2378                inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
2379        free_percpu(net->ipv4.tcp_sk);
2380}
2381
2382static int __net_init tcp_sk_init(struct net *net)
2383{
2384        int res, cpu;
2385
2386        net->ipv4.tcp_sk = alloc_percpu(struct sock *);
2387        if (!net->ipv4.tcp_sk)
2388                return -ENOMEM;
2389
2390        for_each_possible_cpu(cpu) {
2391                struct sock *sk;
2392
2393                res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
2394                                           IPPROTO_TCP, net);
2395                if (res)
2396                        goto fail;
2397                sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
2398                *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
2399        }
2400
2401        net->ipv4.sysctl_tcp_ecn = 2;
2402        net->ipv4.sysctl_tcp_ecn_fallback = 1;
2403
2404        net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
2405        net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
2406        net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
2407
2408        net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
2409        net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
2410        net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
2411
2412        net->ipv4.sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
2413        net->ipv4.sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
2414        net->ipv4.sysctl_tcp_syncookies = 1;
2415        net->ipv4.sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH;
2416        net->ipv4.sysctl_tcp_retries1 = TCP_RETR1;
2417        net->ipv4.sysctl_tcp_retries2 = TCP_RETR2;
2418        net->ipv4.sysctl_tcp_orphan_retries = 0;
2419        net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
2420        net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
2421
2422        return 0;
2423fail:
2424        tcp_sk_exit(net);
2425
2426        return res;
2427}
2428
2429static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2430{
2431        inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
2432}
2433
2434static struct pernet_operations __net_initdata tcp_sk_ops = {
2435       .init       = tcp_sk_init,
2436       .exit       = tcp_sk_exit,
2437       .exit_batch = tcp_sk_exit_batch,
2438};
2439
2440void __init tcp_v4_init(void)
2441{
2442        inet_hashinfo_init(&tcp_hashinfo);
2443        if (register_pernet_subsys(&tcp_sk_ops))
2444                panic("Failed to create the TCP control socket.\n");
2445}
2446
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.