linux/net/ipv4/tcp_ipv4.c
<<
>>
Prefs
   1/*
   2 * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3 *              operating system.  INET is implemented using the  BSD Socket
   4 *              interface as the means of communication with the user level.
   5 *
   6 *              Implementation of the Transmission Control Protocol(TCP).
   7 *
   8 *              IPv4 specific functions
   9 *
  10 *
  11 *              code split from:
  12 *              linux/ipv4/tcp.c
  13 *              linux/ipv4/tcp_input.c
  14 *              linux/ipv4/tcp_output.c
  15 *
  16 *              See tcp.c for author information
  17 *
  18 *      This program is free software; you can redistribute it and/or
  19 *      modify it under the terms of the GNU General Public License
  20 *      as published by the Free Software Foundation; either version
  21 *      2 of the License, or (at your option) any later version.
  22 */
  23
  24/*
  25 * Changes:
  26 *              David S. Miller :       New socket lookup architecture.
  27 *                                      This code is dedicated to John Dyson.
  28 *              David S. Miller :       Change semantics of established hash,
  29 *                                      half is devoted to TIME_WAIT sockets
  30 *                                      and the rest go in the other half.
  31 *              Andi Kleen :            Add support for syncookies and fixed
  32 *                                      some bugs: ip options weren't passed to
  33 *                                      the TCP layer, missed a check for an
  34 *                                      ACK bit.
  35 *              Andi Kleen :            Implemented fast path mtu discovery.
  36 *                                      Fixed many serious bugs in the
  37 *                                      request_sock handling and moved
  38 *                                      most of it into the af independent code.
  39 *                                      Added tail drop and some other bugfixes.
  40 *                                      Added new listen semantics.
  41 *              Mike McLagan    :       Routing by source
  42 *      Juan Jose Ciarlante:            ip_dynaddr bits
  43 *              Andi Kleen:             various fixes.
  44 *      Vitaly E. Lavrov        :       Transparent proxy revived after year
  45 *                                      coma.
  46 *      Andi Kleen              :       Fix new listen.
  47 *      Andi Kleen              :       Fix accept error reporting.
  48 *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
  49 *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
  50 *                                      a single port at the same time.
  51 */
  52
  53#define pr_fmt(fmt) "TCP: " fmt
  54
  55#include <linux/bottom_half.h>
  56#include <linux/types.h>
  57#include <linux/fcntl.h>
  58#include <linux/module.h>
  59#include <linux/random.h>
  60#include <linux/cache.h>
  61#include <linux/jhash.h>
  62#include <linux/init.h>
  63#include <linux/times.h>
  64#include <linux/slab.h>
  65
  66#include <net/net_namespace.h>
  67#include <net/icmp.h>
  68#include <net/inet_hashtables.h>
  69#include <net/tcp.h>
  70#include <net/transp_v6.h>
  71#include <net/ipv6.h>
  72#include <net/inet_common.h>
  73#include <net/timewait_sock.h>
  74#include <net/xfrm.h>
  75#include <net/netdma.h>
  76#include <net/secure_seq.h>
  77#include <net/tcp_memcontrol.h>
  78
  79#include <linux/inet.h>
  80#include <linux/ipv6.h>
  81#include <linux/stddef.h>
  82#include <linux/proc_fs.h>
  83#include <linux/seq_file.h>
  84
  85#include <linux/crypto.h>
  86#include <linux/scatterlist.h>
  87
  88int sysctl_tcp_tw_reuse __read_mostly;
  89int sysctl_tcp_low_latency __read_mostly;
  90EXPORT_SYMBOL(sysctl_tcp_low_latency);
  91
  92
  93#ifdef CONFIG_TCP_MD5SIG
  94static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
  95                               __be32 daddr, __be32 saddr, const struct tcphdr *th);
  96#endif
  97
  98struct inet_hashinfo tcp_hashinfo;
  99EXPORT_SYMBOL(tcp_hashinfo);
 100
 101static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
 102{
 103        return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
 104                                          ip_hdr(skb)->saddr,
 105                                          tcp_hdr(skb)->dest,
 106                                          tcp_hdr(skb)->source);
 107}
 108
 109int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
 110{
 111        const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
 112        struct tcp_sock *tp = tcp_sk(sk);
 113
 114        /* With PAWS, it is safe from the viewpoint
 115           of data integrity. Even without PAWS it is safe provided sequence
 116           spaces do not overlap i.e. at data rates <= 80Mbit/sec.
 117
 118           Actually, the idea is close to VJ's one, only timestamp cache is
 119           held not per host, but per port pair and TW bucket is used as state
 120           holder.
 121
 122           If TW bucket has been already destroyed we fall back to VJ's scheme
 123           and use initial timestamp retrieved from peer table.
 124         */
 125        if (tcptw->tw_ts_recent_stamp &&
 126            (twp == NULL || (sysctl_tcp_tw_reuse &&
 127                             get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
 128                tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
 129                if (tp->write_seq == 0)
 130                        tp->write_seq = 1;
 131                tp->rx_opt.ts_recent       = tcptw->tw_ts_recent;
 132                tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
 133                sock_hold(sktw);
 134                return 1;
 135        }
 136
 137        return 0;
 138}
 139EXPORT_SYMBOL_GPL(tcp_twsk_unique);
 140
 141static int tcp_repair_connect(struct sock *sk)
 142{
 143        tcp_connect_init(sk);
 144        tcp_finish_connect(sk, NULL);
 145
 146        return 0;
 147}
 148
 149/* This will initiate an outgoing connection. */
 150int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 151{
 152        struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
 153        struct inet_sock *inet = inet_sk(sk);
 154        struct tcp_sock *tp = tcp_sk(sk);
 155        __be16 orig_sport, orig_dport;
 156        __be32 daddr, nexthop;
 157        struct flowi4 *fl4;
 158        struct rtable *rt;
 159        int err;
 160        struct ip_options_rcu *inet_opt;
 161
 162        if (addr_len < sizeof(struct sockaddr_in))
 163                return -EINVAL;
 164
 165        if (usin->sin_family != AF_INET)
 166                return -EAFNOSUPPORT;
 167
 168        nexthop = daddr = usin->sin_addr.s_addr;
 169        inet_opt = rcu_dereference_protected(inet->inet_opt,
 170                                             sock_owned_by_user(sk));
 171        if (inet_opt && inet_opt->opt.srr) {
 172                if (!daddr)
 173                        return -EINVAL;
 174                nexthop = inet_opt->opt.faddr;
 175        }
 176
 177        orig_sport = inet->inet_sport;
 178        orig_dport = usin->sin_port;
 179        fl4 = &inet->cork.fl.u.ip4;
 180        rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
 181                              RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
 182                              IPPROTO_TCP,
 183                              orig_sport, orig_dport, sk, true);
 184        if (IS_ERR(rt)) {
 185                err = PTR_ERR(rt);
 186                if (err == -ENETUNREACH)
 187                        IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
 188                return err;
 189        }
 190
 191        if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
 192                ip_rt_put(rt);
 193                return -ENETUNREACH;
 194        }
 195
 196        if (!inet_opt || !inet_opt->opt.srr)
 197                daddr = fl4->daddr;
 198
 199        if (!inet->inet_saddr)
 200                inet->inet_saddr = fl4->saddr;
 201        inet->inet_rcv_saddr = inet->inet_saddr;
 202
 203        if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
 204                /* Reset inherited state */
 205                tp->rx_opt.ts_recent       = 0;
 206                tp->rx_opt.ts_recent_stamp = 0;
 207                if (likely(!tp->repair))
 208                        tp->write_seq      = 0;
 209        }
 210
 211        if (tcp_death_row.sysctl_tw_recycle &&
 212            !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
 213                tcp_fetch_timewait_stamp(sk, &rt->dst);
 214
 215        inet->inet_dport = usin->sin_port;
 216        inet->inet_daddr = daddr;
 217
 218        inet_csk(sk)->icsk_ext_hdr_len = 0;
 219        if (inet_opt)
 220                inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
 221
 222        tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
 223
 224        /* Socket identity is still unknown (sport may be zero).
 225         * However we set state to SYN-SENT and not releasing socket
 226         * lock select source port, enter ourselves into the hash tables and
 227         * complete initialization after this.
 228         */
 229        tcp_set_state(sk, TCP_SYN_SENT);
 230        err = inet_hash_connect(&tcp_death_row, sk);
 231        if (err)
 232                goto failure;
 233
 234        rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
 235                               inet->inet_sport, inet->inet_dport, sk);
 236        if (IS_ERR(rt)) {
 237                err = PTR_ERR(rt);
 238                rt = NULL;
 239                goto failure;
 240        }
 241        /* OK, now commit destination to socket.  */
 242        sk->sk_gso_type = SKB_GSO_TCPV4;
 243        sk_setup_caps(sk, &rt->dst);
 244
 245        if (!tp->write_seq && likely(!tp->repair))
 246                tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
 247                                                           inet->inet_daddr,
 248                                                           inet->inet_sport,
 249                                                           usin->sin_port);
 250
 251        inet->inet_id = tp->write_seq ^ jiffies;
 252
 253        if (likely(!tp->repair))
 254                err = tcp_connect(sk);
 255        else
 256                err = tcp_repair_connect(sk);
 257
 258        rt = NULL;
 259        if (err)
 260                goto failure;
 261
 262        return 0;
 263
 264failure:
 265        /*
 266         * This unhashes the socket and releases the local port,
 267         * if necessary.
 268         */
 269        tcp_set_state(sk, TCP_CLOSE);
 270        ip_rt_put(rt);
 271        sk->sk_route_caps = 0;
 272        inet->inet_dport = 0;
 273        return err;
 274}
 275EXPORT_SYMBOL(tcp_v4_connect);
 276
 277/*
 278 * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
 279 * It can be called through tcp_release_cb() if socket was owned by user
 280 * at the time tcp_v4_err() was called to handle ICMP message.
 281 */
 282static void tcp_v4_mtu_reduced(struct sock *sk)
 283{
 284        struct dst_entry *dst;
 285        struct inet_sock *inet = inet_sk(sk);
 286        u32 mtu = tcp_sk(sk)->mtu_info;
 287
 288        /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
 289         * send out by Linux are always <576bytes so they should go through
 290         * unfragmented).
 291         */
 292        if (sk->sk_state == TCP_LISTEN)
 293                return;
 294
 295        dst = inet_csk_update_pmtu(sk, mtu);
 296        if (!dst)
 297                return;
 298
 299        /* Something is about to be wrong... Remember soft error
 300         * for the case, if this connection will not able to recover.
 301         */
 302        if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
 303                sk->sk_err_soft = EMSGSIZE;
 304
 305        mtu = dst_mtu(dst);
 306
 307        if (inet->pmtudisc != IP_PMTUDISC_DONT &&
 308            inet_csk(sk)->icsk_pmtu_cookie > mtu) {
 309                tcp_sync_mss(sk, mtu);
 310
 311                /* Resend the TCP packet because it's
 312                 * clear that the old packet has been
 313                 * dropped. This is the new "fast" path mtu
 314                 * discovery.
 315                 */
 316                tcp_simple_retransmit(sk);
 317        } /* else let the usual retransmit timer handle it */
 318}
 319
 320static void do_redirect(struct sk_buff *skb, struct sock *sk)
 321{
 322        struct dst_entry *dst = __sk_dst_check(sk, 0);
 323
 324        if (dst)
 325                dst->ops->redirect(dst, sk, skb);
 326}
 327
 328/*
 329 * This routine is called by the ICMP module when it gets some
 330 * sort of error condition.  If err < 0 then the socket should
 331 * be closed and the error returned to the user.  If err > 0
 332 * it's just the icmp type << 8 | icmp code.  After adjustment
 333 * header points to the first 8 bytes of the tcp header.  We need
 334 * to find the appropriate port.
 335 *
 336 * The locking strategy used here is very "optimistic". When
 337 * someone else accesses the socket the ICMP is just dropped
 338 * and for some paths there is no check at all.
 339 * A more general error queue to queue errors for later handling
 340 * is probably better.
 341 *
 342 */
 343
 344void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 345{
 346        const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
 347        struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
 348        struct inet_connection_sock *icsk;
 349        struct tcp_sock *tp;
 350        struct inet_sock *inet;
 351        const int type = icmp_hdr(icmp_skb)->type;
 352        const int code = icmp_hdr(icmp_skb)->code;
 353        struct sock *sk;
 354        struct sk_buff *skb;
 355        __u32 seq;
 356        __u32 remaining;
 357        int err;
 358        struct net *net = dev_net(icmp_skb->dev);
 359
 360        if (icmp_skb->len < (iph->ihl << 2) + 8) {
 361                ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
 362                return;
 363        }
 364
 365        sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
 366                        iph->saddr, th->source, inet_iif(icmp_skb));
 367        if (!sk) {
 368                ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
 369                return;
 370        }
 371        if (sk->sk_state == TCP_TIME_WAIT) {
 372                inet_twsk_put(inet_twsk(sk));
 373                return;
 374        }
 375
 376        bh_lock_sock(sk);
 377        /* If too many ICMPs get dropped on busy
 378         * servers this needs to be solved differently.
 379         * We do take care of PMTU discovery (RFC1191) special case :
 380         * we can receive locally generated ICMP messages while socket is held.
 381         */
 382        if (sock_owned_by_user(sk) &&
 383            type != ICMP_DEST_UNREACH &&
 384            code != ICMP_FRAG_NEEDED)
 385                NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
 386
 387        if (sk->sk_state == TCP_CLOSE)
 388                goto out;
 389
 390        if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
 391                NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
 392                goto out;
 393        }
 394
 395        icsk = inet_csk(sk);
 396        tp = tcp_sk(sk);
 397        seq = ntohl(th->seq);
 398        if (sk->sk_state != TCP_LISTEN &&
 399            !between(seq, tp->snd_una, tp->snd_nxt)) {
 400                NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
 401                goto out;
 402        }
 403
 404        switch (type) {
 405        case ICMP_REDIRECT:
 406                do_redirect(icmp_skb, sk);
 407                goto out;
 408        case ICMP_SOURCE_QUENCH:
 409                /* Just silently ignore these. */
 410                goto out;
 411        case ICMP_PARAMETERPROB:
 412                err = EPROTO;
 413                break;
 414        case ICMP_DEST_UNREACH:
 415                if (code > NR_ICMP_UNREACH)
 416                        goto out;
 417
 418                if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
 419                        tp->mtu_info = info;
 420                        if (!sock_owned_by_user(sk)) {
 421                                tcp_v4_mtu_reduced(sk);
 422                        } else {
 423                                if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
 424                                        sock_hold(sk);
 425                        }
 426                        goto out;
 427                }
 428
 429                err = icmp_err_convert[code].errno;
 430                /* check if icmp_skb allows revert of backoff
 431                 * (see draft-zimmermann-tcp-lcd) */
 432                if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
 433                        break;
 434                if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
 435                    !icsk->icsk_backoff)
 436                        break;
 437
 438                if (sock_owned_by_user(sk))
 439                        break;
 440
 441                icsk->icsk_backoff--;
 442                inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
 443                        TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
 444                tcp_bound_rto(sk);
 445
 446                skb = tcp_write_queue_head(sk);
 447                BUG_ON(!skb);
 448
 449                remaining = icsk->icsk_rto - min(icsk->icsk_rto,
 450                                tcp_time_stamp - TCP_SKB_CB(skb)->when);
 451
 452                if (remaining) {
 453                        inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
 454                                                  remaining, TCP_RTO_MAX);
 455                } else {
 456                        /* RTO revert clocked out retransmission.
 457                         * Will retransmit now */
 458                        tcp_retransmit_timer(sk);
 459                }
 460
 461                break;
 462        case ICMP_TIME_EXCEEDED:
 463                err = EHOSTUNREACH;
 464                break;
 465        default:
 466                goto out;
 467        }
 468
 469        switch (sk->sk_state) {
 470                struct request_sock *req, **prev;
 471        case TCP_LISTEN:
 472                if (sock_owned_by_user(sk))
 473                        goto out;
 474
 475                req = inet_csk_search_req(sk, &prev, th->dest,
 476                                          iph->daddr, iph->saddr);
 477                if (!req)
 478                        goto out;
 479
 480                /* ICMPs are not backlogged, hence we cannot get
 481                   an established socket here.
 482                 */
 483                WARN_ON(req->sk);
 484
 485                if (seq != tcp_rsk(req)->snt_isn) {
 486                        NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
 487                        goto out;
 488                }
 489
 490                /*
 491                 * Still in SYN_RECV, just remove it silently.
 492                 * There is no good way to pass the error to the newly
 493                 * created socket, and POSIX does not want network
 494                 * errors returned from accept().
 495                 */
 496                inet_csk_reqsk_queue_drop(sk, req, prev);
 497                goto out;
 498
 499        case TCP_SYN_SENT:
 500        case TCP_SYN_RECV:  /* Cannot happen.
 501                               It can f.e. if SYNs crossed.
 502                             */
 503                if (!sock_owned_by_user(sk)) {
 504                        sk->sk_err = err;
 505
 506                        sk->sk_error_report(sk);
 507
 508                        tcp_done(sk);
 509                } else {
 510                        sk->sk_err_soft = err;
 511                }
 512                goto out;
 513        }
 514
 515        /* If we've already connected we will keep trying
 516         * until we time out, or the user gives up.
 517         *
 518         * rfc1122 4.2.3.9 allows to consider as hard errors
 519         * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
 520         * but it is obsoleted by pmtu discovery).
 521         *
 522         * Note, that in modern internet, where routing is unreliable
 523         * and in each dark corner broken firewalls sit, sending random
 524         * errors ordered by their masters even this two messages finally lose
 525         * their original sense (even Linux sends invalid PORT_UNREACHs)
 526         *
 527         * Now we are in compliance with RFCs.
 528         *                                                      --ANK (980905)
 529         */
 530
 531        inet = inet_sk(sk);
 532        if (!sock_owned_by_user(sk) && inet->recverr) {
 533                sk->sk_err = err;
 534                sk->sk_error_report(sk);
 535        } else  { /* Only an error on timeout */
 536                sk->sk_err_soft = err;
 537        }
 538
 539out:
 540        bh_unlock_sock(sk);
 541        sock_put(sk);
 542}
 543
 544static void __tcp_v4_send_check(struct sk_buff *skb,
 545                                __be32 saddr, __be32 daddr)
 546{
 547        struct tcphdr *th = tcp_hdr(skb);
 548
 549        if (skb->ip_summed == CHECKSUM_PARTIAL) {
 550                th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
 551                skb->csum_start = skb_transport_header(skb) - skb->head;
 552                skb->csum_offset = offsetof(struct tcphdr, check);
 553        } else {
 554                th->check = tcp_v4_check(skb->len, saddr, daddr,
 555                                         csum_partial(th,
 556                                                      th->doff << 2,
 557                                                      skb->csum));
 558        }
 559}
 560
 561/* This routine computes an IPv4 TCP checksum. */
 562void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
 563{
 564        const struct inet_sock *inet = inet_sk(sk);
 565
 566        __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
 567}
 568EXPORT_SYMBOL(tcp_v4_send_check);
 569
 570int tcp_v4_gso_send_check(struct sk_buff *skb)
 571{
 572        const struct iphdr *iph;
 573        struct tcphdr *th;
 574
 575        if (!pskb_may_pull(skb, sizeof(*th)))
 576                return -EINVAL;
 577
 578        iph = ip_hdr(skb);
 579        th = tcp_hdr(skb);
 580
 581        th->check = 0;
 582        skb->ip_summed = CHECKSUM_PARTIAL;
 583        __tcp_v4_send_check(skb, iph->saddr, iph->daddr);
 584        return 0;
 585}
 586
 587/*
 588 *      This routine will send an RST to the other tcp.
 589 *
 590 *      Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
 591 *                    for reset.
 592 *      Answer: if a packet caused RST, it is not for a socket
 593 *              existing in our system, if it is matched to a socket,
 594 *              it is just duplicate segment or bug in other side's TCP.
 595 *              So that we build reply only basing on parameters
 596 *              arrived with segment.
 597 *      Exception: precedence violation. We do not implement it in any case.
 598 */
 599
 600static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 601{
 602        const struct tcphdr *th = tcp_hdr(skb);
 603        struct {
 604                struct tcphdr th;
 605#ifdef CONFIG_TCP_MD5SIG
 606                __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
 607#endif
 608        } rep;
 609        struct ip_reply_arg arg;
 610#ifdef CONFIG_TCP_MD5SIG
 611        struct tcp_md5sig_key *key;
 612        const __u8 *hash_location = NULL;
 613        unsigned char newhash[16];
 614        int genhash;
 615        struct sock *sk1 = NULL;
 616#endif
 617        struct net *net;
 618
 619        /* Never send a reset in response to a reset. */
 620        if (th->rst)
 621                return;
 622
 623        if (skb_rtable(skb)->rt_type != RTN_LOCAL)
 624                return;
 625
 626        /* Swap the send and the receive. */
 627        memset(&rep, 0, sizeof(rep));
 628        rep.th.dest   = th->source;
 629        rep.th.source = th->dest;
 630        rep.th.doff   = sizeof(struct tcphdr) / 4;
 631        rep.th.rst    = 1;
 632
 633        if (th->ack) {
 634                rep.th.seq = th->ack_seq;
 635        } else {
 636                rep.th.ack = 1;
 637                rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
 638                                       skb->len - (th->doff << 2));
 639        }
 640
 641        memset(&arg, 0, sizeof(arg));
 642        arg.iov[0].iov_base = (unsigned char *)&rep;
 643        arg.iov[0].iov_len  = sizeof(rep.th);
 644
 645#ifdef CONFIG_TCP_MD5SIG
 646        hash_location = tcp_parse_md5sig_option(th);
 647        if (!sk && hash_location) {
 648                /*
 649                 * active side is lost. Try to find listening socket through
 650                 * source port, and then find md5 key through listening socket.
 651                 * we are not loose security here:
 652                 * Incoming packet is checked with md5 hash with finding key,
 653                 * no RST generated if md5 hash doesn't match.
 654                 */
 655                sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
 656                                             &tcp_hashinfo, ip_hdr(skb)->daddr,
 657                                             ntohs(th->source), inet_iif(skb));
 658                /* don't send rst if it can't find key */
 659                if (!sk1)
 660                        return;
 661                rcu_read_lock();
 662                key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
 663                                        &ip_hdr(skb)->saddr, AF_INET);
 664                if (!key)
 665                        goto release_sk1;
 666
 667                genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
 668                if (genhash || memcmp(hash_location, newhash, 16) != 0)
 669                        goto release_sk1;
 670        } else {
 671                key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
 672                                             &ip_hdr(skb)->saddr,
 673                                             AF_INET) : NULL;
 674        }
 675
 676        if (key) {
 677                rep.opt[0] = htonl((TCPOPT_NOP << 24) |
 678                                   (TCPOPT_NOP << 16) |
 679                                   (TCPOPT_MD5SIG << 8) |
 680                                   TCPOLEN_MD5SIG);
 681                /* Update length and the length the header thinks exists */
 682                arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
 683                rep.th.doff = arg.iov[0].iov_len / 4;
 684
 685                tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
 686                                     key, ip_hdr(skb)->saddr,
 687                                     ip_hdr(skb)->daddr, &rep.th);
 688        }
 689#endif
 690        arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
 691                                      ip_hdr(skb)->saddr, /* XXX */
 692                                      arg.iov[0].iov_len, IPPROTO_TCP, 0);
 693        arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 694        arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
 695        /* When socket is gone, all binding information is lost.
 696         * routing might fail in this case. No choice here, if we choose to force
 697         * input interface, we will misroute in case of asymmetric route.
 698         */
 699        if (sk)
 700                arg.bound_dev_if = sk->sk_bound_dev_if;
 701
 702        net = dev_net(skb_dst(skb)->dev);
 703        arg.tos = ip_hdr(skb)->tos;
 704        ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
 705                              ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
 706
 707        TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
 708        TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
 709
 710#ifdef CONFIG_TCP_MD5SIG
 711release_sk1:
 712        if (sk1) {
 713                rcu_read_unlock();
 714                sock_put(sk1);
 715        }
 716#endif
 717}
 718
 719/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
 720   outside socket context is ugly, certainly. What can I do?
 721 */
 722
 723static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
 724                            u32 win, u32 ts, int oif,
 725                            struct tcp_md5sig_key *key,
 726                            int reply_flags, u8 tos)
 727{
 728        const struct tcphdr *th = tcp_hdr(skb);
 729        struct {
 730                struct tcphdr th;
 731                __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
 732#ifdef CONFIG_TCP_MD5SIG
 733                           + (TCPOLEN_MD5SIG_ALIGNED >> 2)
 734#endif
 735                        ];
 736        } rep;
 737        struct ip_reply_arg arg;
 738        struct net *net = dev_net(skb_dst(skb)->dev);
 739
 740        memset(&rep.th, 0, sizeof(struct tcphdr));
 741        memset(&arg, 0, sizeof(arg));
 742
 743        arg.iov[0].iov_base = (unsigned char *)&rep;
 744        arg.iov[0].iov_len  = sizeof(rep.th);
 745        if (ts) {
 746                rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
 747                                   (TCPOPT_TIMESTAMP << 8) |
 748                                   TCPOLEN_TIMESTAMP);
 749                rep.opt[1] = htonl(tcp_time_stamp);
 750                rep.opt[2] = htonl(ts);
 751                arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
 752        }
 753
 754        /* Swap the send and the receive. */
 755        rep.th.dest    = th->source;
 756        rep.th.source  = th->dest;
 757        rep.th.doff    = arg.iov[0].iov_len / 4;
 758        rep.th.seq     = htonl(seq);
 759        rep.th.ack_seq = htonl(ack);
 760        rep.th.ack     = 1;
 761        rep.th.window  = htons(win);
 762
 763#ifdef CONFIG_TCP_MD5SIG
 764        if (key) {
 765                int offset = (ts) ? 3 : 0;
 766
 767                rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
 768                                          (TCPOPT_NOP << 16) |
 769                                          (TCPOPT_MD5SIG << 8) |
 770                                          TCPOLEN_MD5SIG);
 771                arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
 772                rep.th.doff = arg.iov[0].iov_len/4;
 773
 774                tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
 775                                    key, ip_hdr(skb)->saddr,
 776                                    ip_hdr(skb)->daddr, &rep.th);
 777        }
 778#endif
 779        arg.flags = reply_flags;
 780        arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
 781                                      ip_hdr(skb)->saddr, /* XXX */
 782                                      arg.iov[0].iov_len, IPPROTO_TCP, 0);
 783        arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 784        if (oif)
 785                arg.bound_dev_if = oif;
 786        arg.tos = tos;
 787        ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
 788                              ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
 789
 790        TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
 791}
 792
 793static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
 794{
 795        struct inet_timewait_sock *tw = inet_twsk(sk);
 796        struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
 797
 798        tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
 799                        tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
 800                        tcptw->tw_ts_recent,
 801                        tw->tw_bound_dev_if,
 802                        tcp_twsk_md5_key(tcptw),
 803                        tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
 804                        tw->tw_tos
 805                        );
 806
 807        inet_twsk_put(tw);
 808}
 809
 810static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
 811                                  struct request_sock *req)
 812{
 813        tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
 814                        tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
 815                        req->ts_recent,
 816                        0,
 817                        tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
 818                                          AF_INET),
 819                        inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
 820                        ip_hdr(skb)->tos);
 821}
 822
 823/*
 824 *      Send a SYN-ACK after having received a SYN.
 825 *      This still operates on a request_sock only, not on a big
 826 *      socket.
 827 */
 828static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
 829                              struct request_sock *req,
 830                              struct request_values *rvp,
 831                              u16 queue_mapping,
 832                              bool nocache)
 833{
 834        const struct inet_request_sock *ireq = inet_rsk(req);
 835        struct flowi4 fl4;
 836        int err = -1;
 837        struct sk_buff * skb;
 838
 839        /* First, grab a route. */
 840        if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
 841                return -1;
 842
 843        skb = tcp_make_synack(sk, dst, req, rvp);
 844
 845        if (skb) {
 846                __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
 847
 848                skb_set_queue_mapping(skb, queue_mapping);
 849                err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
 850                                            ireq->rmt_addr,
 851                                            ireq->opt);
 852                err = net_xmit_eval(err);
 853        }
 854
 855        return err;
 856}
 857
 858static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
 859                              struct request_values *rvp)
 860{
 861        TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
 862        return tcp_v4_send_synack(sk, NULL, req, rvp, 0, false);
 863}
 864
 865/*
 866 *      IPv4 request_sock destructor.
 867 */
 868static void tcp_v4_reqsk_destructor(struct request_sock *req)
 869{
 870        kfree(inet_rsk(req)->opt);
 871}
 872
 873/*
 874 * Return true if a syncookie should be sent
 875 */
 876bool tcp_syn_flood_action(struct sock *sk,
 877                         const struct sk_buff *skb,
 878                         const char *proto)
 879{
 880        const char *msg = "Dropping request";
 881        bool want_cookie = false;
 882        struct listen_sock *lopt;
 883
 884
 885
 886#ifdef CONFIG_SYN_COOKIES
 887        if (sysctl_tcp_syncookies) {
 888                msg = "Sending cookies";
 889                want_cookie = true;
 890                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
 891        } else
 892#endif
 893                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
 894
 895        lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
 896        if (!lopt->synflood_warned) {
 897                lopt->synflood_warned = 1;
 898                pr_info("%s: Possible SYN flooding on port %d. %s.  Check SNMP counters.\n",
 899                        proto, ntohs(tcp_hdr(skb)->dest), msg);
 900        }
 901        return want_cookie;
 902}
 903EXPORT_SYMBOL(tcp_syn_flood_action);
 904
 905/*
 906 * Save and compile IPv4 options into the request_sock if needed.
 907 */
 908static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk,
 909                                                  struct sk_buff *skb)
 910{
 911        const struct ip_options *opt = &(IPCB(skb)->opt);
 912        struct ip_options_rcu *dopt = NULL;
 913
 914        if (opt && opt->optlen) {
 915                int opt_size = sizeof(*dopt) + opt->optlen;
 916
 917                dopt = kmalloc(opt_size, GFP_ATOMIC);
 918                if (dopt) {
 919                        if (ip_options_echo(&dopt->opt, skb)) {
 920                                kfree(dopt);
 921                                dopt = NULL;
 922                        }
 923                }
 924        }
 925        return dopt;
 926}
 927
 928#ifdef CONFIG_TCP_MD5SIG
 929/*
 930 * RFC2385 MD5 checksumming requires a mapping of
 931 * IP address->MD5 Key.
 932 * We need to maintain these in the sk structure.
 933 */
 934
 935/* Find the Key structure for an address.  */
 936struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
 937                                         const union tcp_md5_addr *addr,
 938                                         int family)
 939{
 940        struct tcp_sock *tp = tcp_sk(sk);
 941        struct tcp_md5sig_key *key;
 942        struct hlist_node *pos;
 943        unsigned int size = sizeof(struct in_addr);
 944        struct tcp_md5sig_info *md5sig;
 945
 946        /* caller either holds rcu_read_lock() or socket lock */
 947        md5sig = rcu_dereference_check(tp->md5sig_info,
 948                                       sock_owned_by_user(sk) ||
 949                                       lockdep_is_held(&sk->sk_lock.slock));
 950        if (!md5sig)
 951                return NULL;
 952#if IS_ENABLED(CONFIG_IPV6)
 953        if (family == AF_INET6)
 954                size = sizeof(struct in6_addr);
 955#endif
 956        hlist_for_each_entry_rcu(key, pos, &md5sig->head, node) {
 957                if (key->family != family)
 958                        continue;
 959                if (!memcmp(&key->addr, addr, size))
 960                        return key;
 961        }
 962        return NULL;
 963}
 964EXPORT_SYMBOL(tcp_md5_do_lookup);
 965
 966struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
 967                                         struct sock *addr_sk)
 968{
 969        union tcp_md5_addr *addr;
 970
 971        addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
 972        return tcp_md5_do_lookup(sk, addr, AF_INET);
 973}
 974EXPORT_SYMBOL(tcp_v4_md5_lookup);
 975
 976static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
 977                                                      struct request_sock *req)
 978{
 979        union tcp_md5_addr *addr;
 980
 981        addr = (union tcp_md5_addr *)&inet_rsk(req)->rmt_addr;
 982        return tcp_md5_do_lookup(sk, addr, AF_INET);
 983}
 984
 985/* This can be called on a newly created socket, from other files */
 986int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
 987                   int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
 988{
 989        /* Add Key to the list */
 990        struct tcp_md5sig_key *key;
 991        struct tcp_sock *tp = tcp_sk(sk);
 992        struct tcp_md5sig_info *md5sig;
 993
 994        key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
 995        if (key) {
 996                /* Pre-existing entry - just update that one. */
 997                memcpy(key->key, newkey, newkeylen);
 998                key->keylen = newkeylen;
 999                return 0;
1000        }
1001
1002        md5sig = rcu_dereference_protected(tp->md5sig_info,
1003                                           sock_owned_by_user(sk));
1004        if (!md5sig) {
1005                md5sig = kmalloc(sizeof(*md5sig), gfp);
1006                if (!md5sig)
1007                        return -ENOMEM;
1008
1009                sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1010                INIT_HLIST_HEAD(&md5sig->head);
1011                rcu_assign_pointer(tp->md5sig_info, md5sig);
1012        }
1013
1014        key = sock_kmalloc(sk, sizeof(*key), gfp);
1015        if (!key)
1016                return -ENOMEM;
1017        if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) {
1018                sock_kfree_s(sk, key, sizeof(*key));
1019                return -ENOMEM;
1020        }
1021
1022        memcpy(key->key, newkey, newkeylen);
1023        key->keylen = newkeylen;
1024        key->family = family;
1025        memcpy(&key->addr, addr,
1026               (family == AF_INET6) ? sizeof(struct in6_addr) :
1027                                      sizeof(struct in_addr));
1028        hlist_add_head_rcu(&key->node, &md5sig->head);
1029        return 0;
1030}
1031EXPORT_SYMBOL(tcp_md5_do_add);
1032
1033int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
1034{
1035        struct tcp_sock *tp = tcp_sk(sk);
1036        struct tcp_md5sig_key *key;
1037        struct tcp_md5sig_info *md5sig;
1038
1039        key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
1040        if (!key)
1041                return -ENOENT;
1042        hlist_del_rcu(&key->node);
1043        atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1044        kfree_rcu(key, rcu);
1045        md5sig = rcu_dereference_protected(tp->md5sig_info,
1046                                           sock_owned_by_user(sk));
1047        if (hlist_empty(&md5sig->head))
1048                tcp_free_md5sig_pool();
1049        return 0;
1050}
1051EXPORT_SYMBOL(tcp_md5_do_del);
1052
1053void tcp_clear_md5_list(struct sock *sk)
1054{
1055        struct tcp_sock *tp = tcp_sk(sk);
1056        struct tcp_md5sig_key *key;
1057        struct hlist_node *pos, *n;
1058        struct tcp_md5sig_info *md5sig;
1059
1060        md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1061
1062        if (!hlist_empty(&md5sig->head))
1063                tcp_free_md5sig_pool();
1064        hlist_for_each_entry_safe(key, pos, n, &md5sig->head, node) {
1065                hlist_del_rcu(&key->node);
1066                atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1067                kfree_rcu(key, rcu);
1068        }
1069}
1070
1071static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1072                                 int optlen)
1073{
1074        struct tcp_md5sig cmd;
1075        struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1076
1077        if (optlen < sizeof(cmd))
1078                return -EINVAL;
1079
1080        if (copy_from_user(&cmd, optval, sizeof(cmd)))
1081                return -EFAULT;
1082
1083        if (sin->sin_family != AF_INET)
1084                return -EINVAL;
1085
1086        if (!cmd.tcpm_key || !cmd.tcpm_keylen)
1087                return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1088                                      AF_INET);
1089
1090        if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1091                return -EINVAL;
1092
1093        return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1094                              AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1095                              GFP_KERNEL);
1096}
1097
1098static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1099                                        __be32 daddr, __be32 saddr, int nbytes)
1100{
1101        struct tcp4_pseudohdr *bp;
1102        struct scatterlist sg;
1103
1104        bp = &hp->md5_blk.ip4;
1105
1106        /*
1107         * 1. the TCP pseudo-header (in the order: source IP address,
1108         * destination IP address, zero-padded protocol number, and
1109         * segment length)
1110         */
1111        bp->saddr = saddr;
1112        bp->daddr = daddr;
1113        bp->pad = 0;
1114        bp->protocol = IPPROTO_TCP;
1115        bp->len = cpu_to_be16(nbytes);
1116
1117        sg_init_one(&sg, bp, sizeof(*bp));
1118        return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1119}
1120
1121static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
1122                               __be32 daddr, __be32 saddr, const struct tcphdr *th)
1123{
1124        struct tcp_md5sig_pool *hp;
1125        struct hash_desc *desc;
1126
1127        hp = tcp_get_md5sig_pool();
1128        if (!hp)
1129                goto clear_hash_noput;
1130        desc = &hp->md5_desc;
1131
1132        if (crypto_hash_init(desc))
1133                goto clear_hash;
1134        if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1135                goto clear_hash;
1136        if (tcp_md5_hash_header(hp, th))
1137                goto clear_hash;
1138        if (tcp_md5_hash_key(hp, key))
1139                goto clear_hash;
1140        if (crypto_hash_final(desc, md5_hash))
1141                goto clear_hash;
1142
1143        tcp_put_md5sig_pool();
1144        return 0;
1145
1146clear_hash:
1147        tcp_put_md5sig_pool();
1148clear_hash_noput:
1149        memset(md5_hash, 0, 16);
1150        return 1;
1151}
1152
1153int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1154                        const struct sock *sk, const struct request_sock *req,
1155                        const struct sk_buff *skb)
1156{
1157        struct tcp_md5sig_pool *hp;
1158        struct hash_desc *desc;
1159        const struct tcphdr *th = tcp_hdr(skb);
1160        __be32 saddr, daddr;
1161
1162        if (sk) {
1163                saddr = inet_sk(sk)->inet_saddr;
1164                daddr = inet_sk(sk)->inet_daddr;
1165        } else if (req) {
1166                saddr = inet_rsk(req)->loc_addr;
1167                daddr = inet_rsk(req)->rmt_addr;
1168        } else {
1169                const struct iphdr *iph = ip_hdr(skb);
1170                saddr = iph->saddr;
1171                daddr = iph->daddr;
1172        }
1173
1174        hp = tcp_get_md5sig_pool();
1175        if (!hp)
1176                goto clear_hash_noput;
1177        desc = &hp->md5_desc;
1178
1179        if (crypto_hash_init(desc))
1180                goto clear_hash;
1181
1182        if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1183                goto clear_hash;
1184        if (tcp_md5_hash_header(hp, th))
1185                goto clear_hash;
1186        if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1187                goto clear_hash;
1188        if (tcp_md5_hash_key(hp, key))
1189                goto clear_hash;
1190        if (crypto_hash_final(desc, md5_hash))
1191                goto clear_hash;
1192
1193        tcp_put_md5sig_pool();
1194        return 0;
1195
1196clear_hash:
1197        tcp_put_md5sig_pool();
1198clear_hash_noput:
1199        memset(md5_hash, 0, 16);
1200        return 1;
1201}
1202EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1203
1204static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
1205{
1206        /*
1207         * This gets called for each TCP segment that arrives
1208         * so we want to be efficient.
1209         * We have 3 drop cases:
1210         * o No MD5 hash and one expected.
1211         * o MD5 hash and we're not expecting one.
1212         * o MD5 hash and its wrong.
1213         */
1214        const __u8 *hash_location = NULL;
1215        struct tcp_md5sig_key *hash_expected;
1216        const struct iphdr *iph = ip_hdr(skb);
1217        const struct tcphdr *th = tcp_hdr(skb);
1218        int genhash;
1219        unsigned char newhash[16];
1220
1221        hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1222                                          AF_INET);
1223        hash_location = tcp_parse_md5sig_option(th);
1224
1225        /* We've parsed the options - do we have a hash? */
1226        if (!hash_expected && !hash_location)
1227                return false;
1228
1229        if (hash_expected && !hash_location) {
1230                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1231                return true;
1232        }
1233
1234        if (!hash_expected && hash_location) {
1235                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1236                return true;
1237        }
1238
1239        /* Okay, so this is hash_expected and hash_location -
1240         * so we need to calculate the checksum.
1241         */
1242        genhash = tcp_v4_md5_hash_skb(newhash,
1243                                      hash_expected,
1244                                      NULL, NULL, skb);
1245
1246        if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1247                net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1248                                     &iph->saddr, ntohs(th->source),
1249                                     &iph->daddr, ntohs(th->dest),
1250                                     genhash ? " tcp_v4_calc_md5_hash failed"
1251                                     : "");
1252                return true;
1253        }
1254        return false;
1255}
1256
1257#endif
1258
1259struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1260        .family         =       PF_INET,
1261        .obj_size       =       sizeof(struct tcp_request_sock),
1262        .rtx_syn_ack    =       tcp_v4_rtx_synack,
1263        .send_ack       =       tcp_v4_reqsk_send_ack,
1264        .destructor     =       tcp_v4_reqsk_destructor,
1265        .send_reset     =       tcp_v4_send_reset,
1266        .syn_ack_timeout =      tcp_syn_ack_timeout,
1267};
1268
1269#ifdef CONFIG_TCP_MD5SIG
1270static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1271        .md5_lookup     =       tcp_v4_reqsk_md5_lookup,
1272        .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1273};
1274#endif
1275
1276int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1277{
1278        struct tcp_extend_values tmp_ext;
1279        struct tcp_options_received tmp_opt;
1280        const u8 *hash_location;
1281        struct request_sock *req;
1282        struct inet_request_sock *ireq;
1283        struct tcp_sock *tp = tcp_sk(sk);
1284        struct dst_entry *dst = NULL;
1285        __be32 saddr = ip_hdr(skb)->saddr;
1286        __be32 daddr = ip_hdr(skb)->daddr;
1287        __u32 isn = TCP_SKB_CB(skb)->when;
1288        bool want_cookie = false;
1289
1290        /* Never answer to SYNs send to broadcast or multicast */
1291        if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1292                goto drop;
1293
1294        /* TW buckets are converted to open requests without
1295         * limitations, they conserve resources and peer is
1296         * evidently real one.
1297         */
1298        if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1299                want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1300                if (!want_cookie)
1301                        goto drop;
1302        }
1303
1304        /* Accept backlog is full. If we have already queued enough
1305         * of warm entries in syn queue, drop request. It is better than
1306         * clogging syn queue with openreqs with exponentially increasing
1307         * timeout.
1308         */
1309        if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1310                goto drop;
1311
1312        req = inet_reqsk_alloc(&tcp_request_sock_ops);
1313        if (!req)
1314                goto drop;
1315
1316#ifdef CONFIG_TCP_MD5SIG
1317        tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1318#endif
1319
1320        tcp_clear_options(&tmp_opt);
1321        tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
1322        tmp_opt.user_mss  = tp->rx_opt.user_mss;
1323        tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
1324
1325        if (tmp_opt.cookie_plus > 0 &&
1326            tmp_opt.saw_tstamp &&
1327            !tp->rx_opt.cookie_out_never &&
1328            (sysctl_tcp_cookie_size > 0 ||
1329             (tp->cookie_values != NULL &&
1330              tp->cookie_values->cookie_desired > 0))) {
1331                u8 *c;
1332                u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1333                int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1334
1335                if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1336                        goto drop_and_release;
1337
1338                /* Secret recipe starts with IP addresses */
1339                *mess++ ^= (__force u32)daddr;
1340                *mess++ ^= (__force u32)saddr;
1341
1342                /* plus variable length Initiator Cookie */
1343                c = (u8 *)mess;
1344                while (l-- > 0)
1345                        *c++ ^= *hash_location++;
1346
1347                want_cookie = false;    /* not our kind of cookie */
1348                tmp_ext.cookie_out_never = 0; /* false */
1349                tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1350        } else if (!tp->rx_opt.cookie_in_always) {
1351                /* redundant indications, but ensure initialization. */
1352                tmp_ext.cookie_out_never = 1; /* true */
1353                tmp_ext.cookie_plus = 0;
1354        } else {
1355                goto drop_and_release;
1356        }
1357        tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1358
1359        if (want_cookie && !tmp_opt.saw_tstamp)
1360                tcp_clear_options(&tmp_opt);
1361
1362        tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1363        tcp_openreq_init(req, &tmp_opt, skb);
1364
1365        ireq = inet_rsk(req);
1366        ireq->loc_addr = daddr;
1367        ireq->rmt_addr = saddr;
1368        ireq->no_srccheck = inet_sk(sk)->transparent;
1369        ireq->opt = tcp_v4_save_options(sk, skb);
1370
1371        if (security_inet_conn_request(sk, skb, req))
1372                goto drop_and_free;
1373
1374        if (!want_cookie || tmp_opt.tstamp_ok)
1375                TCP_ECN_create_request(req, skb);
1376
1377        if (want_cookie) {
1378                isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1379                req->cookie_ts = tmp_opt.tstamp_ok;
1380        } else if (!isn) {
1381                struct flowi4 fl4;
1382
1383                /* VJ's idea. We save last timestamp seen
1384                 * from the destination in peer table, when entering
1385                 * state TIME-WAIT, and check against it before
1386                 * accepting new connection request.
1387                 *
1388                 * If "isn" is not zero, this request hit alive
1389                 * timewait bucket, so that all the necessary checks
1390                 * are made in the function processing timewait state.
1391                 */
1392                if (tmp_opt.saw_tstamp &&
1393                    tcp_death_row.sysctl_tw_recycle &&
1394                    (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
1395                    fl4.daddr == saddr) {
1396                        if (!tcp_peer_is_proven(req, dst, true)) {
1397                                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1398                                goto drop_and_release;
1399                        }
1400                }
1401                /* Kill the following clause, if you dislike this way. */
1402                else if (!sysctl_tcp_syncookies &&
1403                         (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1404                          (sysctl_max_syn_backlog >> 2)) &&
1405                         !tcp_peer_is_proven(req, dst, false)) {
1406                        /* Without syncookies last quarter of
1407                         * backlog is filled with destinations,
1408                         * proven to be alive.
1409                         * It means that we continue to communicate
1410                         * to destinations, already remembered
1411                         * to the moment of synflood.
1412                         */
1413                        LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
1414                                       &saddr, ntohs(tcp_hdr(skb)->source));
1415                        goto drop_and_release;
1416                }
1417
1418                isn = tcp_v4_init_sequence(skb);
1419        }
1420        tcp_rsk(req)->snt_isn = isn;
1421        tcp_rsk(req)->snt_synack = tcp_time_stamp;
1422
1423        if (tcp_v4_send_synack(sk, dst, req,
1424                               (struct request_values *)&tmp_ext,
1425                               skb_get_queue_mapping(skb),
1426                               want_cookie) ||
1427            want_cookie)
1428                goto drop_and_free;
1429
1430        inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1431        return 0;
1432
1433drop_and_release:
1434        dst_release(dst);
1435drop_and_free:
1436        reqsk_free(req);
1437drop:
1438        return 0;
1439}
1440EXPORT_SYMBOL(tcp_v4_conn_request);
1441
1442
1443/*
1444 * The three way handshake has completed - we got a valid synack -
1445 * now create the new socket.
1446 */
1447struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1448                                  struct request_sock *req,
1449                                  struct dst_entry *dst)
1450{
1451        struct inet_request_sock *ireq;
1452        struct inet_sock *newinet;
1453        struct tcp_sock *newtp;
1454        struct sock *newsk;
1455#ifdef CONFIG_TCP_MD5SIG
1456        struct tcp_md5sig_key *key;
1457#endif
1458        struct ip_options_rcu *inet_opt;
1459
1460        if (sk_acceptq_is_full(sk))
1461                goto exit_overflow;
1462
1463        newsk = tcp_create_openreq_child(sk, req, skb);
1464        if (!newsk)
1465                goto exit_nonewsk;
1466
1467        newsk->sk_gso_type = SKB_GSO_TCPV4;
1468        inet_sk_rx_dst_set(newsk, skb);
1469
1470        newtp                 = tcp_sk(newsk);
1471        newinet               = inet_sk(newsk);
1472        ireq                  = inet_rsk(req);
1473        newinet->inet_daddr   = ireq->rmt_addr;
1474        newinet->inet_rcv_saddr = ireq->loc_addr;
1475        newinet->inet_saddr           = ireq->loc_addr;
1476        inet_opt              = ireq->opt;
1477        rcu_assign_pointer(newinet->inet_opt, inet_opt);
1478        ireq->opt             = NULL;
1479        newinet->mc_index     = inet_iif(skb);
1480        newinet->mc_ttl       = ip_hdr(skb)->ttl;
1481        newinet->rcv_tos      = ip_hdr(skb)->tos;
1482        inet_csk(newsk)->icsk_ext_hdr_len = 0;
1483        if (inet_opt)
1484                inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1485        newinet->inet_id = newtp->write_seq ^ jiffies;
1486
1487        if (!dst) {
1488                dst = inet_csk_route_child_sock(sk, newsk, req);
1489                if (!dst)
1490                        goto put_and_exit;
1491        } else {
1492                /* syncookie case : see end of cookie_v4_check() */
1493        }
1494        sk_setup_caps(newsk, dst);
1495
1496        tcp_mtup_init(newsk);
1497        tcp_sync_mss(newsk, dst_mtu(dst));
1498        newtp->advmss = dst_metric_advmss(dst);
1499        if (tcp_sk(sk)->rx_opt.user_mss &&
1500            tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1501                newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1502
1503        tcp_initialize_rcv_mss(newsk);
1504        if (tcp_rsk(req)->snt_synack)
1505                tcp_valid_rtt_meas(newsk,
1506                    tcp_time_stamp - tcp_rsk(req)->snt_synack);
1507        newtp->total_retrans = req->retrans;
1508
1509#ifdef CONFIG_TCP_MD5SIG
1510        /* Copy over the MD5 key from the original socket */
1511        key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1512                                AF_INET);
1513        if (key != NULL) {
1514                /*
1515                 * We're using one, so create a matching key
1516                 * on the newsk structure. If we fail to get
1517                 * memory, then we end up not copying the key
1518                 * across. Shucks.
1519                 */
1520                tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1521                               AF_INET, key->key, key->keylen, GFP_ATOMIC);
1522                sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1523        }
1524#endif
1525
1526        if (__inet_inherit_port(sk, newsk) < 0)
1527                goto put_and_exit;
1528        __inet_hash_nolisten(newsk, NULL);
1529
1530        return newsk;
1531
1532exit_overflow:
1533        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1534exit_nonewsk:
1535        dst_release(dst);
1536exit:
1537        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1538        return NULL;
1539put_and_exit:
1540        tcp_clear_xmit_timers(newsk);
1541        tcp_cleanup_congestion_control(newsk);
1542        bh_unlock_sock(newsk);
1543        sock_put(newsk);
1544        goto exit;
1545}
1546EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1547
1548static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1549{
1550        struct tcphdr *th = tcp_hdr(skb);
1551        const struct iphdr *iph = ip_hdr(skb);
1552        struct sock *nsk;
1553        struct request_sock **prev;
1554        /* Find possible connection requests. */
1555        struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1556                                                       iph->saddr, iph->daddr);
1557        if (req)
1558                return tcp_check_req(sk, skb, req, prev);
1559
1560        nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
1561                        th->source, iph->daddr, th->dest, inet_iif(skb));
1562
1563        if (nsk) {
1564                if (nsk->sk_state != TCP_TIME_WAIT) {
1565                        bh_lock_sock(nsk);
1566                        return nsk;
1567                }
1568                inet_twsk_put(inet_twsk(nsk));
1569                return NULL;
1570        }
1571
1572#ifdef CONFIG_SYN_COOKIES
1573        if (!th->syn)
1574                sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1575#endif
1576        return sk;
1577}
1578
1579static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1580{
1581        const struct iphdr *iph = ip_hdr(skb);
1582
1583        if (skb->ip_summed == CHECKSUM_COMPLETE) {
1584                if (!tcp_v4_check(skb->len, iph->saddr,
1585                                  iph->daddr, skb->csum)) {
1586                        skb->ip_summed = CHECKSUM_UNNECESSARY;
1587                        return 0;
1588                }
1589        }
1590
1591        skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1592                                       skb->len, IPPROTO_TCP, 0);
1593
1594        if (skb->len <= 76) {
1595                return __skb_checksum_complete(skb);
1596        }
1597        return 0;
1598}
1599
1600
1601/* The socket must have it's spinlock held when we get
1602 * here.
1603 *
1604 * We have a potential double-lock case here, so even when
1605 * doing backlog processing we use the BH locking scheme.
1606 * This is because we cannot sleep with the original spinlock
1607 * held.
1608 */
1609int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1610{
1611        struct sock *rsk;
1612#ifdef CONFIG_TCP_MD5SIG
1613        /*
1614         * We really want to reject the packet as early as possible
1615         * if:
1616         *  o We're expecting an MD5'd packet and this is no MD5 tcp option
1617         *  o There is an MD5 option and we're not expecting one
1618         */
1619        if (tcp_v4_inbound_md5_hash(sk, skb))
1620                goto discard;
1621#endif
1622
1623        if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1624                struct dst_entry *dst = sk->sk_rx_dst;
1625
1626                sock_rps_save_rxhash(sk, skb);
1627                if (dst) {
1628                        if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1629                            dst->ops->check(dst, 0) == NULL) {
1630                                dst_release(dst);
1631                                sk->sk_rx_dst = NULL;
1632                        }
1633                }
1634                if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1635                        rsk = sk;
1636                        goto reset;
1637                }
1638                return 0;
1639        }
1640
1641        if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1642                goto csum_err;
1643
1644        if (sk->sk_state == TCP_LISTEN) {
1645                struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1646                if (!nsk)
1647                        goto discard;
1648
1649                if (nsk != sk) {
1650                        sock_rps_save_rxhash(nsk, skb);
1651                        if (tcp_child_process(sk, nsk, skb)) {
1652                                rsk = nsk;
1653                                goto reset;
1654                        }
1655                        return 0;
1656                }
1657        } else
1658                sock_rps_save_rxhash(sk, skb);
1659
1660        if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1661                rsk = sk;
1662                goto reset;
1663        }
1664        return 0;
1665
1666reset:
1667        tcp_v4_send_reset(rsk, skb);
1668discard:
1669        kfree_skb(skb);
1670        /* Be careful here. If this function gets more complicated and
1671         * gcc suffers from register pressure on the x86, sk (in %ebx)
1672         * might be destroyed here. This current version compiles correctly,
1673         * but you have been warned.
1674         */
1675        return 0;
1676
1677csum_err:
1678        TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1679        goto discard;
1680}
1681EXPORT_SYMBOL(tcp_v4_do_rcv);
1682
1683void tcp_v4_early_demux(struct sk_buff *skb)
1684{
1685        struct net *net = dev_net(skb->dev);
1686        const struct iphdr *iph;
1687        const struct tcphdr *th;
1688        struct sock *sk;
1689
1690        if (skb->pkt_type != PACKET_HOST)
1691                return;
1692
1693        if (!pskb_may_pull(skb, ip_hdrlen(skb) + sizeof(struct tcphdr)))
1694                return;
1695
1696        iph = ip_hdr(skb);
1697        th = (struct tcphdr *) ((char *)iph + ip_hdrlen(skb));
1698
1699        if (th->doff < sizeof(struct tcphdr) / 4)
1700                return;
1701
1702        sk = __inet_lookup_established(net, &tcp_hashinfo,
1703                                       iph->saddr, th->source,
1704                                       iph->daddr, ntohs(th->dest),
1705                                       skb->skb_iif);
1706        if (sk) {
1707                skb->sk = sk;
1708                skb->destructor = sock_edemux;
1709                if (sk->sk_state != TCP_TIME_WAIT) {
1710                        struct dst_entry *dst = sk->sk_rx_dst;
1711
1712                        if (dst)
1713                                dst = dst_check(dst, 0);
1714                        if (dst &&
1715                            inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1716                                skb_dst_set_noref(skb, dst);
1717                }
1718        }
1719}
1720
1721/*
1722 *      From tcp_input.c
1723 */
1724
1725int tcp_v4_rcv(struct sk_buff *skb)
1726{
1727        const struct iphdr *iph;
1728        const struct tcphdr *th;
1729        struct sock *sk;
1730        int ret;
1731        struct net *net = dev_net(skb->dev);
1732
1733        if (skb->pkt_type != PACKET_HOST)
1734                goto discard_it;
1735
1736        /* Count it even if it's bad */
1737        TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1738
1739        if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1740                goto discard_it;
1741
1742        th = tcp_hdr(skb);
1743
1744        if (th->doff < sizeof(struct tcphdr) / 4)
1745                goto bad_packet;
1746        if (!pskb_may_pull(skb, th->doff * 4))
1747                goto discard_it;
1748
1749        /* An explanation is required here, I think.
1750         * Packet length and doff are validated by header prediction,
1751         * provided case of th->doff==0 is eliminated.
1752         * So, we defer the checks. */
1753        if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1754                goto bad_packet;
1755
1756        th = tcp_hdr(skb);
1757        iph = ip_hdr(skb);
1758        TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1759        TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1760                                    skb->len - th->doff * 4);
1761        TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1762        TCP_SKB_CB(skb)->when    = 0;
1763        TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1764        TCP_SKB_CB(skb)->sacked  = 0;
1765
1766        sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1767        if (!sk)
1768                goto no_tcp_socket;
1769
1770process:
1771        if (sk->sk_state == TCP_TIME_WAIT)
1772                goto do_time_wait;
1773
1774        if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1775                NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1776                goto discard_and_relse;
1777        }
1778
1779        if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1780                goto discard_and_relse;
1781        nf_reset(skb);
1782
1783        if (sk_filter(sk, skb))
1784                goto discard_and_relse;
1785
1786        skb->dev = NULL;
1787
1788        bh_lock_sock_nested(sk);
1789        ret = 0;
1790        if (!sock_owned_by_user(sk)) {
1791#ifdef CONFIG_NET_DMA
1792                struct tcp_sock *tp = tcp_sk(sk);
1793                if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1794                        tp->ucopy.dma_chan = net_dma_find_channel();
1795                if (tp->ucopy.dma_chan)
1796                        ret = tcp_v4_do_rcv(sk, skb);
1797                else
1798#endif
1799                {
1800                        if (!tcp_prequeue(sk, skb))
1801                                ret = tcp_v4_do_rcv(sk, skb);
1802                }
1803        } else if (unlikely(sk_add_backlog(sk, skb,
1804                                           sk->sk_rcvbuf + sk->sk_sndbuf))) {
1805                bh_unlock_sock(sk);
1806                NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1807                goto discard_and_relse;
1808        }
1809        bh_unlock_sock(sk);
1810
1811        sock_put(sk);
1812
1813        return ret;
1814
1815no_tcp_socket:
1816        if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1817                goto discard_it;
1818
1819        if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1820bad_packet:
1821                TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1822        } else {
1823                tcp_v4_send_reset(NULL, skb);
1824        }
1825
1826discard_it:
1827        /* Discard frame. */
1828        kfree_skb(skb);
1829        return 0;
1830
1831discard_and_relse:
1832        sock_put(sk);
1833        goto discard_it;
1834
1835do_time_wait:
1836        if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1837                inet_twsk_put(inet_twsk(sk));
1838                goto discard_it;
1839        }
1840
1841        if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1842                TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1843                inet_twsk_put(inet_twsk(sk));
1844                goto discard_it;
1845        }
1846        switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1847        case TCP_TW_SYN: {
1848                struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1849                                                        &tcp_hashinfo,
1850                                                        iph->daddr, th->dest,
1851                                                        inet_iif(skb));
1852                if (sk2) {
1853                        inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1854                        inet_twsk_put(inet_twsk(sk));
1855                        sk = sk2;
1856                        goto process;
1857                }
1858                /* Fall through to ACK */
1859        }
1860        case TCP_TW_ACK:
1861                tcp_v4_timewait_ack(sk, skb);
1862                break;
1863        case TCP_TW_RST:
1864                goto no_tcp_socket;
1865        case TCP_TW_SUCCESS:;
1866        }
1867        goto discard_it;
1868}
1869
1870static struct timewait_sock_ops tcp_timewait_sock_ops = {
1871        .twsk_obj_size  = sizeof(struct tcp_timewait_sock),
1872        .twsk_unique    = tcp_twsk_unique,
1873        .twsk_destructor= tcp_twsk_destructor,
1874};
1875
1876void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
1877{
1878        struct dst_entry *dst = skb_dst(skb);
1879
1880        dst_hold(dst);
1881        sk->sk_rx_dst = dst;
1882        inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
1883}
1884EXPORT_SYMBOL(inet_sk_rx_dst_set);
1885
1886const struct inet_connection_sock_af_ops ipv4_specific = {
1887        .queue_xmit        = ip_queue_xmit,
1888        .send_check        = tcp_v4_send_check,
1889        .rebuild_header    = inet_sk_rebuild_header,
1890        .sk_rx_dst_set     = inet_sk_rx_dst_set,
1891        .conn_request      = tcp_v4_conn_request,
1892        .syn_recv_sock     = tcp_v4_syn_recv_sock,
1893        .net_header_len    = sizeof(struct iphdr),
1894        .setsockopt        = ip_setsockopt,
1895        .getsockopt        = ip_getsockopt,
1896        .addr2sockaddr     = inet_csk_addr2sockaddr,
1897        .sockaddr_len      = sizeof(struct sockaddr_in),
1898        .bind_conflict     = inet_csk_bind_conflict,
1899#ifdef CONFIG_COMPAT
1900        .compat_setsockopt = compat_ip_setsockopt,
1901        .compat_getsockopt = compat_ip_getsockopt,
1902#endif
1903};
1904EXPORT_SYMBOL(ipv4_specific);
1905
1906#ifdef CONFIG_TCP_MD5SIG
1907static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1908        .md5_lookup             = tcp_v4_md5_lookup,
1909        .calc_md5_hash          = tcp_v4_md5_hash_skb,
1910        .md5_parse              = tcp_v4_parse_md5_keys,
1911};
1912#endif
1913
1914/* NOTE: A lot of things set to zero explicitly by call to
1915 *       sk_alloc() so need not be done here.
1916 */
1917static int tcp_v4_init_sock(struct sock *sk)
1918{
1919        struct inet_connection_sock *icsk = inet_csk(sk);
1920
1921        tcp_init_sock(sk);
1922
1923        icsk->icsk_af_ops = &ipv4_specific;
1924
1925#ifdef CONFIG_TCP_MD5SIG
1926        tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
1927#endif
1928
1929        return 0;
1930}
1931
1932void tcp_v4_destroy_sock(struct sock *sk)
1933{
1934        struct tcp_sock *tp = tcp_sk(sk);
1935
1936        tcp_clear_xmit_timers(sk);
1937
1938        tcp_cleanup_congestion_control(sk);
1939
1940        /* Cleanup up the write buffer. */
1941        tcp_write_queue_purge(sk);
1942
1943        /* Cleans up our, hopefully empty, out_of_order_queue. */
1944        __skb_queue_purge(&tp->out_of_order_queue);
1945
1946#ifdef CONFIG_TCP_MD5SIG
1947        /* Clean up the MD5 key list, if any */
1948        if (tp->md5sig_info) {
1949                tcp_clear_md5_list(sk);
1950                kfree_rcu(tp->md5sig_info, rcu);
1951                tp->md5sig_info = NULL;
1952        }
1953#endif
1954
1955#ifdef CONFIG_NET_DMA
1956        /* Cleans up our sk_async_wait_queue */
1957        __skb_queue_purge(&sk->sk_async_wait_queue);
1958#endif
1959
1960        /* Clean prequeue, it must be empty really */
1961        __skb_queue_purge(&tp->ucopy.prequeue);
1962
1963        /* Clean up a referenced TCP bind bucket. */
1964        if (inet_csk(sk)->icsk_bind_hash)
1965                inet_put_port(sk);
1966
1967        /*
1968         * If sendmsg cached page exists, toss it.
1969         */
1970        if (sk->sk_sndmsg_page) {
1971                __free_page(sk->sk_sndmsg_page);
1972                sk->sk_sndmsg_page = NULL;
1973        }
1974
1975        /* TCP Cookie Transactions */
1976        if (tp->cookie_values != NULL) {
1977                kref_put(&tp->cookie_values->kref,
1978                         tcp_cookie_values_release);
1979                tp->cookie_values = NULL;
1980        }
1981
1982        /* If socket is aborted during connect operation */
1983        tcp_free_fastopen_req(tp);
1984
1985        sk_sockets_allocated_dec(sk);
1986        sock_release_memcg(sk);
1987}
1988EXPORT_SYMBOL(tcp_v4_destroy_sock);
1989
1990#ifdef CONFIG_PROC_FS
1991/* Proc filesystem TCP sock list dumping. */
1992
1993static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
1994{
1995        return hlist_nulls_empty(head) ? NULL :
1996                list_entry(head->first, struct inet_timewait_sock, tw_node);
1997}
1998
1999static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
2000{
2001        return !is_a_nulls(tw->tw_node.next) ?
2002                hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
2003}
2004
2005/*
2006 * Get next listener socket follow cur.  If cur is NULL, get first socket
2007 * starting from bucket given in st->bucket; when st->bucket is zero the
2008 * very first socket in the hash table is returned.
2009 */
2010static void *listening_get_next(struct seq_file *seq, void *cur)
2011{
2012        struct inet_connection_sock *icsk;
2013        struct hlist_nulls_node *node;
2014        struct sock *sk = cur;
2015        struct inet_listen_hashbucket *ilb;
2016        struct tcp_iter_state *st = seq->private;
2017        struct net *net = seq_file_net(seq);
2018
2019        if (!sk) {
2020                ilb = &tcp_hashinfo.listening_hash[st->bucket];
2021                spin_lock_bh(&ilb->lock);
2022                sk = sk_nulls_head(&ilb->head);
2023                st->offset = 0;
2024                goto get_sk;
2025        }
2026        ilb = &tcp_hashinfo.listening_hash[st->bucket];
2027        ++st->num;
2028        ++st->offset;
2029
2030        if (st->state == TCP_SEQ_STATE_OPENREQ) {
2031                struct request_sock *req = cur;
2032
2033                icsk = inet_csk(st->syn_wait_sk);
2034                req = req->dl_next;
2035                while (1) {
2036                        while (req) {
2037                                if (req->rsk_ops->family == st->family) {
2038                                        cur = req;
2039                                        goto out;
2040                                }
2041                                req = req->dl_next;
2042                        }
2043                        if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
2044                                break;
2045get_req:
2046                        req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
2047                }
2048                sk        = sk_nulls_next(st->syn_wait_sk);
2049                st->state = TCP_SEQ_STATE_LISTENING;
2050                read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2051        } else {
2052                icsk = inet_csk(sk);
2053                read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2054                if (reqsk_queue_len(&icsk->icsk_accept_queue))
2055                        goto start_req;
2056                read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2057                sk = sk_nulls_next(sk);
2058        }
2059get_sk:
2060        sk_nulls_for_each_from(sk, node) {
2061                if (!net_eq(sock_net(sk), net))
2062                        continue;
2063                if (sk->sk_family == st->family) {
2064                        cur = sk;
2065                        goto out;
2066                }
2067                icsk = inet_csk(sk);
2068                read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2069                if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
2070start_req:
2071                        st->uid         = sock_i_uid(sk);
2072                        st->syn_wait_sk = sk;
2073                        st->state       = TCP_SEQ_STATE_OPENREQ;
2074                        st->sbucket     = 0;
2075                        goto get_req;
2076                }
2077                read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2078        }
2079        spin_unlock_bh(&ilb->lock);
2080        st->offset = 0;
2081        if (++st->bucket < INET_LHTABLE_SIZE) {
2082                ilb = &tcp_hashinfo.listening_hash[st->bucket];
2083                spin_lock_bh(&ilb->lock);
2084                sk = sk_nulls_head(&ilb->head);
2085                goto get_sk;
2086        }
2087        cur = NULL;
2088out:
2089        return cur;
2090}
2091
2092static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2093{
2094        struct tcp_iter_state *st = seq->private;
2095        void *rc;
2096
2097        st->bucket = 0;
2098        st->offset = 0;
2099        rc = listening_get_next(seq, NULL);
2100
2101        while (rc && *pos) {
2102                rc = listening_get_next(seq, rc);
2103                --*pos;
2104        }
2105        return rc;
2106}
2107
2108static inline bool empty_bucket(struct tcp_iter_state *st)
2109{
2110        return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
2111                hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
2112}
2113
2114/*
2115 * Get first established socket starting from bucket given in st->bucket.
2116 * If st->bucket is zero, the very first socket in the hash is returned.
2117 */
2118static void *established_get_first(struct seq_file *seq)
2119{
2120        struct tcp_iter_state *st = seq->private;
2121        struct net *net = seq_file_net(seq);
2122        void *rc = NULL;
2123
2124        st->offset = 0;
2125        for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
2126                struct sock *sk;
2127                struct hlist_nulls_node *node;
2128                struct inet_timewait_sock *tw;
2129                spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
2130
2131                /* Lockless fast path for the common case of empty buckets */
2132                if (empty_bucket(st))
2133                        continue;
2134
2135                spin_lock_bh(lock);
2136                sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2137                        if (sk->sk_family != st->family ||
2138                            !net_eq(sock_net(sk), net)) {
2139                                continue;
2140                        }
2141                        rc = sk;
2142                        goto out;
2143                }
2144                st->state = TCP_SEQ_STATE_TIME_WAIT;
2145                inet_twsk_for_each(tw, node,
2146                                   &tcp_hashinfo.ehash[st->bucket].twchain) {
2147                        if (tw->tw_family != st->family ||
2148                            !net_eq(twsk_net(tw), net)) {
2149                                continue;
2150                        }
2151                        rc = tw;
2152                        goto out;
2153                }
2154                spin_unlock_bh(lock);
2155                st->state = TCP_SEQ_STATE_ESTABLISHED;
2156        }
2157out:
2158        return rc;
2159}
2160
2161static void *established_get_next(struct seq_file *seq, void *cur)
2162{
2163        struct sock *sk = cur;
2164        struct inet_timewait_sock *tw;
2165        struct hlist_nulls_node *node;
2166        struct tcp_iter_state *st = seq->private;
2167        struct net *net = seq_file_net(seq);
2168
2169        ++st->num;
2170        ++st->offset;
2171
2172        if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2173                tw = cur;
2174                tw = tw_next(tw);
2175get_tw:
2176                while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
2177                        tw = tw_next(tw);
2178                }
2179                if (tw) {
2180                        cur = tw;
2181                        goto out;
2182                }
2183                spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2184                st->state = TCP_SEQ_STATE_ESTABLISHED;
2185
2186                /* Look for next non empty bucket */
2187                st->offset = 0;
2188                while (++st->bucket <= tcp_hashinfo.ehash_mask &&
2189                                empty_bucket(st))
2190                        ;
2191                if (st->bucket > tcp_hashinfo.ehash_mask)
2192                        return NULL;
2193
2194                spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2195                sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
2196        } else
2197                sk = sk_nulls_next(sk);
2198
2199        sk_nulls_for_each_from(sk, node) {
2200                if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
2201                        goto found;
2202        }
2203
2204        st->state = TCP_SEQ_STATE_TIME_WAIT;
2205        tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
2206        goto get_tw;
2207found:
2208        cur = sk;
2209out:
2210        return cur;
2211}
2212
2213static void *established_get_idx(struct seq_file *seq, loff_t pos)
2214{
2215        struct tcp_iter_state *st = seq->private;
2216        void *rc;
2217
2218        st->bucket = 0;
2219        rc = established_get_first(seq);
2220
2221        while (rc && pos) {
2222                rc = established_get_next(seq, rc);
2223                --pos;
2224        }
2225        return rc;
2226}
2227
2228static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2229{
2230        void *rc;
2231        struct tcp_iter_state *st = seq->private;
2232
2233        st->state = TCP_SEQ_STATE_LISTENING;
2234        rc        = listening_get_idx(seq, &pos);
2235
2236        if (!rc) {
2237                st->state = TCP_SEQ_STATE_ESTABLISHED;
2238                rc        = established_get_idx(seq, pos);
2239        }
2240
2241        return rc;
2242}
2243
2244static void *tcp_seek_last_pos(struct seq_file *seq)
2245{
2246        struct tcp_iter_state *st = seq->private;
2247        int offset = st->offset;
2248        int orig_num = st->num;
2249        void *rc = NULL;
2250
2251        switch (st->state) {
2252        case TCP_SEQ_STATE_OPENREQ:
2253        case TCP_SEQ_STATE_LISTENING:
2254                if (st->bucket >= INET_LHTABLE_SIZE)
2255                        break;
2256                st->state = TCP_SEQ_STATE_LISTENING;
2257                rc = listening_get_next(seq, NULL);
2258                while (offset-- && rc)
2259                        rc = listening_get_next(seq, rc);
2260                if (rc)
2261                        break;
2262                st->bucket = 0;
2263                /* Fallthrough */
2264        case TCP_SEQ_STATE_ESTABLISHED:
2265        case TCP_SEQ_STATE_TIME_WAIT:
2266                st->state = TCP_SEQ_STATE_ESTABLISHED;
2267                if (st->bucket > tcp_hashinfo.ehash_mask)
2268                        break;
2269                rc = established_get_first(seq);
2270                while (offset-- && rc)
2271                        rc = established_get_next(seq, rc);
2272        }
2273
2274        st->num = orig_num;
2275
2276        return rc;
2277}
2278
2279static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2280{
2281        struct tcp_iter_state *st = seq->private;
2282        void *rc;
2283
2284        if (*pos && *pos == st->last_pos) {
2285                rc = tcp_seek_last_pos(seq);
2286                if (rc)
2287                        goto out;
2288        }
2289
2290        st->state = TCP_SEQ_STATE_LISTENING;
2291        st->num = 0;
2292        st->bucket = 0;
2293        st->offset = 0;
2294        rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2295
2296out:
2297        st->last_pos = *pos;
2298        return rc;
2299}
2300
2301static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2302{
2303        struct tcp_iter_state *st = seq->private;
2304        void *rc = NULL;
2305
2306        if (v == SEQ_START_TOKEN) {
2307                rc = tcp_get_idx(seq, 0);
2308                goto out;
2309        }
2310
2311        switch (st->state) {
2312        case TCP_SEQ_STATE_OPENREQ:
2313        case TCP_SEQ_STATE_LISTENING:
2314                rc = listening_get_next(seq, v);
2315                if (!rc) {
2316                        st->state = TCP_SEQ_STATE_ESTABLISHED;
2317                        st->bucket = 0;
2318                        st->offset = 0;
2319                        rc        = established_get_first(seq);
2320                }
2321                break;
2322        case TCP_SEQ_STATE_ESTABLISHED:
2323        case TCP_SEQ_STATE_TIME_WAIT:
2324                rc = established_get_next(seq, v);
2325                break;
2326        }
2327out:
2328        ++*pos;
2329        st->last_pos = *pos;
2330        return rc;
2331}
2332
2333static void tcp_seq_stop(struct seq_file *seq, void *v)
2334{
2335        struct tcp_iter_state *st = seq->private;
2336
2337        switch (st->state) {
2338        case TCP_SEQ_STATE_OPENREQ:
2339                if (v) {
2340                        struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2341                        read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2342                }
2343        case TCP_SEQ_STATE_LISTENING:
2344                if (v != SEQ_START_TOKEN)
2345                        spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
2346                break;
2347        case TCP_SEQ_STATE_TIME_WAIT:
2348        case TCP_SEQ_STATE_ESTABLISHED:
2349                if (v)
2350                        spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2351                break;
2352        }
2353}
2354
2355int tcp_seq_open(struct inode *inode, struct file *file)
2356{
2357        struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2358        struct tcp_iter_state *s;
2359        int err;
2360
2361        err = seq_open_net(inode, file, &afinfo->seq_ops,
2362                          sizeof(struct tcp_iter_state));
2363        if (err < 0)
2364                return err;
2365
2366        s = ((struct seq_file *)file->private_data)->private;
2367        s->family               = afinfo->family;
2368        s->last_pos             = 0;
2369        return 0;
2370}
2371EXPORT_SYMBOL(tcp_seq_open);
2372
2373int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2374{
2375        int rc = 0;
2376        struct proc_dir_entry *p;
2377
2378        afinfo->seq_ops.start           = tcp_seq_start;
2379        afinfo->seq_ops.next            = tcp_seq_next;
2380        afinfo->seq_ops.stop            = tcp_seq_stop;
2381
2382        p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2383                             afinfo->seq_fops, afinfo);
2384        if (!p)
2385                rc = -ENOMEM;
2386        return rc;
2387}
2388EXPORT_SYMBOL(tcp_proc_register);
2389
2390void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2391{
2392        proc_net_remove(net, afinfo->name);
2393}
2394EXPORT_SYMBOL(tcp_proc_unregister);
2395
2396static void get_openreq4(const struct sock *sk, const struct request_sock *req,
2397                         struct seq_file *f, int i, int uid, int *len)
2398{
2399        const struct inet_request_sock *ireq = inet_rsk(req);
2400        int ttd = req->expires - jiffies;
2401
2402        seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2403                " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
2404                i,
2405                ireq->loc_addr,
2406                ntohs(inet_sk(sk)->inet_sport),
2407                ireq->rmt_addr,
2408                ntohs(ireq->rmt_port),
2409                TCP_SYN_RECV,
2410                0, 0, /* could print option size, but that is af dependent. */
2411                1,    /* timers active (only the expire timer) */
2412                jiffies_to_clock_t(ttd),
2413                req->retrans,
2414                uid,
2415                0,  /* non standard timer */
2416                0, /* open_requests have no inode */
2417                atomic_read(&sk->sk_refcnt),
2418                req,
2419                len);
2420}
2421
2422static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
2423{
2424        int timer_active;
2425        unsigned long timer_expires;
2426        const struct tcp_sock *tp = tcp_sk(sk);
2427        const struct inet_connection_sock *icsk = inet_csk(sk);
2428        const struct inet_sock *inet = inet_sk(sk);
2429        __be32 dest = inet->inet_daddr;
2430        __be32 src = inet->inet_rcv_saddr;
2431        __u16 destp = ntohs(inet->inet_dport);
2432        __u16 srcp = ntohs(inet->inet_sport);
2433        int rx_queue;
2434
2435        if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
2436                timer_active    = 1;
2437                timer_expires   = icsk->icsk_timeout;
2438        } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2439                timer_active    = 4;
2440                timer_expires   = icsk->icsk_timeout;
2441        } else if (timer_pending(&sk->sk_timer)) {
2442                timer_active    = 2;
2443                timer_expires   = sk->sk_timer.expires;
2444        } else {
2445                timer_active    = 0;
2446                timer_expires = jiffies;
2447        }
2448
2449        if (sk->sk_state == TCP_LISTEN)
2450                rx_queue = sk->sk_ack_backlog;
2451        else
2452                /*
2453                 * because we dont lock socket, we might find a transient negative value
2454                 */
2455                rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2456
2457        seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2458                        "%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n",
2459                i, src, srcp, dest, destp, sk->sk_state,
2460                tp->write_seq - tp->snd_una,
2461                rx_queue,
2462                timer_active,
2463                jiffies_to_clock_t(timer_expires - jiffies),
2464                icsk->icsk_retransmits,
2465                sock_i_uid(sk),
2466                icsk->icsk_probes_out,
2467                sock_i_ino(sk),
2468                atomic_read(&sk->sk_refcnt), sk,
2469                jiffies_to_clock_t(icsk->icsk_rto),
2470                jiffies_to_clock_t(icsk->icsk_ack.ato),
2471                (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2472                tp->snd_cwnd,
2473                tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
2474                len);
2475}
2476
2477static void get_timewait4_sock(const struct inet_timewait_sock *tw,
2478                               struct seq_file *f, int i, int *len)
2479{
2480        __be32 dest, src;
2481        __u16 destp, srcp;
2482        int ttd = tw->tw_ttd - jiffies;
2483
2484        if (ttd < 0)
2485                ttd = 0;
2486
2487        dest  = tw->tw_daddr;
2488        src   = tw->tw_rcv_saddr;
2489        destp = ntohs(tw->tw_dport);
2490        srcp  = ntohs(tw->tw_sport);
2491
2492        seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2493                " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n",
2494                i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2495                3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2496                atomic_read(&tw->tw_refcnt), tw, len);
2497}
2498
2499#define TMPSZ 150
2500
2501static int tcp4_seq_show(struct seq_file *seq, void *v)
2502{
2503        struct tcp_iter_state *st;
2504        int len;
2505
2506        if (v == SEQ_START_TOKEN) {
2507                seq_printf(seq, "%-*s\n", TMPSZ - 1,
2508                           "  sl  local_address rem_address   st tx_queue "
2509                           "rx_queue tr tm->when retrnsmt   uid  timeout "
2510                           "inode");
2511                goto out;
2512        }
2513        st = seq->private;
2514
2515        switch (st->state) {
2516        case TCP_SEQ_STATE_LISTENING:
2517        case TCP_SEQ_STATE_ESTABLISHED:
2518                get_tcp4_sock(v, seq, st->num, &len);
2519                break;
2520        case TCP_SEQ_STATE_OPENREQ:
2521                get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
2522                break;
2523        case TCP_SEQ_STATE_TIME_WAIT:
2524                get_timewait4_sock(v, seq, st->num, &len);
2525                break;
2526        }
2527        seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
2528out:
2529        return 0;
2530}
2531
2532static const struct file_operations tcp_afinfo_seq_fops = {
2533        .owner   = THIS_MODULE,
2534        .open    = tcp_seq_open,
2535        .read    = seq_read,
2536        .llseek  = seq_lseek,
2537        .release = seq_release_net
2538};
2539
2540static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2541        .name           = "tcp",
2542        .family         = AF_INET,
2543        .seq_fops       = &tcp_afinfo_seq_fops,
2544        .seq_ops        = {
2545                .show           = tcp4_seq_show,
2546        },
2547};
2548
2549static int __net_init tcp4_proc_init_net(struct net *net)
2550{
2551        return tcp_proc_register(net, &tcp4_seq_afinfo);
2552}
2553
2554static void __net_exit tcp4_proc_exit_net(struct net *net)
2555{
2556        tcp_proc_unregister(net, &tcp4_seq_afinfo);
2557}
2558
2559static struct pernet_operations tcp4_net_ops = {
2560        .init = tcp4_proc_init_net,
2561        .exit = tcp4_proc_exit_net,
2562};
2563
2564int __init tcp4_proc_init(void)
2565{
2566        return register_pernet_subsys(&tcp4_net_ops);
2567}
2568
2569void tcp4_proc_exit(void)
2570{
2571        unregister_pernet_subsys(&tcp4_net_ops);
2572}
2573#endif /* CONFIG_PROC_FS */
2574
2575struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2576{
2577        const struct iphdr *iph = skb_gro_network_header(skb);
2578
2579        switch (skb->ip_summed) {
2580        case CHECKSUM_COMPLETE:
2581                if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
2582                                  skb->csum)) {
2583                        skb->ip_summed = CHECKSUM_UNNECESSARY;
2584                        break;
2585                }
2586
2587                /* fall through */
2588        case CHECKSUM_NONE:
2589                NAPI_GRO_CB(skb)->flush = 1;
2590                return NULL;
2591        }
2592
2593        return tcp_gro_receive(head, skb);
2594}
2595
2596int tcp4_gro_complete(struct sk_buff *skb)
2597{
2598        const struct iphdr *iph = ip_hdr(skb);
2599        struct tcphdr *th = tcp_hdr(skb);
2600
2601        th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2602                                  iph->saddr, iph->daddr, 0);
2603        skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2604
2605        return tcp_gro_complete(skb);
2606}
2607
2608struct proto tcp_prot = {
2609        .name                   = "TCP",
2610        .owner                  = THIS_MODULE,
2611        .close                  = tcp_close,
2612        .connect                = tcp_v4_connect,
2613        .disconnect             = tcp_disconnect,
2614        .accept                 = inet_csk_accept,
2615        .ioctl                  = tcp_ioctl,
2616        .init                   = tcp_v4_init_sock,
2617        .destroy                = tcp_v4_destroy_sock,
2618        .shutdown               = tcp_shutdown,
2619        .setsockopt             = tcp_setsockopt,
2620        .getsockopt             = tcp_getsockopt,
2621        .recvmsg                = tcp_recvmsg,
2622        .sendmsg                = tcp_sendmsg,
2623        .sendpage               = tcp_sendpage,
2624        .backlog_rcv            = tcp_v4_do_rcv,
2625        .release_cb             = tcp_release_cb,
2626        .mtu_reduced            = tcp_v4_mtu_reduced,
2627        .hash                   = inet_hash,
2628        .unhash                 = inet_unhash,
2629        .get_port               = inet_csk_get_port,
2630        .enter_memory_pressure  = tcp_enter_memory_pressure,
2631        .sockets_allocated      = &tcp_sockets_allocated,
2632        .orphan_count           = &tcp_orphan_count,
2633        .memory_allocated       = &tcp_memory_allocated,
2634        .memory_pressure        = &tcp_memory_pressure,
2635        .sysctl_wmem            = sysctl_tcp_wmem,
2636        .sysctl_rmem            = sysctl_tcp_rmem,
2637        .max_header             = MAX_TCP_HEADER,
2638        .obj_size               = sizeof(struct tcp_sock),
2639        .slab_flags             = SLAB_DESTROY_BY_RCU,
2640        .twsk_prot              = &tcp_timewait_sock_ops,
2641        .rsk_prot               = &tcp_request_sock_ops,
2642        .h.hashinfo             = &tcp_hashinfo,
2643        .no_autobind            = true,
2644#ifdef CONFIG_COMPAT
2645        .compat_setsockopt      = compat_tcp_setsockopt,
2646        .compat_getsockopt      = compat_tcp_getsockopt,
2647#endif
2648#ifdef CONFIG_MEMCG_KMEM
2649        .init_cgroup            = tcp_init_cgroup,
2650        .destroy_cgroup         = tcp_destroy_cgroup,
2651        .proto_cgroup           = tcp_proto_cgroup,
2652#endif
2653};
2654EXPORT_SYMBOL(tcp_prot);
2655
2656static int __net_init tcp_sk_init(struct net *net)
2657{
2658        return 0;
2659}
2660
2661static void __net_exit tcp_sk_exit(struct net *net)
2662{
2663}
2664
2665static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2666{
2667        inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
2668}
2669
2670static struct pernet_operations __net_initdata tcp_sk_ops = {
2671       .init       = tcp_sk_init,
2672       .exit       = tcp_sk_exit,
2673       .exit_batch = tcp_sk_exit_batch,
2674};
2675
2676void __init tcp_v4_init(void)
2677{
2678        inet_hashinfo_init(&tcp_hashinfo);
2679        if (register_pernet_subsys(&tcp_sk_ops))
2680                panic("Failed to create the TCP control socket.\n");
2681}
2682
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.