linux/net/ipv4/tcp_ipv4.c
<<
>>
Prefs
   1/*
   2 * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3 *              operating system.  INET is implemented using the  BSD Socket
   4 *              interface as the means of communication with the user level.
   5 *
   6 *              Implementation of the Transmission Control Protocol(TCP).
   7 *
   8 *              IPv4 specific functions
   9 *
  10 *
  11 *              code split from:
  12 *              linux/ipv4/tcp.c
  13 *              linux/ipv4/tcp_input.c
  14 *              linux/ipv4/tcp_output.c
  15 *
  16 *              See tcp.c for author information
  17 *
  18 *      This program is free software; you can redistribute it and/or
  19 *      modify it under the terms of the GNU General Public License
  20 *      as published by the Free Software Foundation; either version
  21 *      2 of the License, or (at your option) any later version.
  22 */
  23
  24/*
  25 * Changes:
  26 *              David S. Miller :       New socket lookup architecture.
  27 *                                      This code is dedicated to John Dyson.
  28 *              David S. Miller :       Change semantics of established hash,
  29 *                                      half is devoted to TIME_WAIT sockets
  30 *                                      and the rest go in the other half.
  31 *              Andi Kleen :            Add support for syncookies and fixed
  32 *                                      some bugs: ip options weren't passed to
  33 *                                      the TCP layer, missed a check for an
  34 *                                      ACK bit.
  35 *              Andi Kleen :            Implemented fast path mtu discovery.
  36 *                                      Fixed many serious bugs in the
  37 *                                      request_sock handling and moved
  38 *                                      most of it into the af independent code.
  39 *                                      Added tail drop and some other bugfixes.
  40 *                                      Added new listen semantics.
  41 *              Mike McLagan    :       Routing by source
  42 *      Juan Jose Ciarlante:            ip_dynaddr bits
  43 *              Andi Kleen:             various fixes.
  44 *      Vitaly E. Lavrov        :       Transparent proxy revived after year
  45 *                                      coma.
  46 *      Andi Kleen              :       Fix new listen.
  47 *      Andi Kleen              :       Fix accept error reporting.
  48 *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
  49 *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
  50 *                                      a single port at the same time.
  51 */
  52
  53#define pr_fmt(fmt) "TCP: " fmt
  54
  55#include <linux/bottom_half.h>
  56#include <linux/types.h>
  57#include <linux/fcntl.h>
  58#include <linux/module.h>
  59#include <linux/random.h>
  60#include <linux/cache.h>
  61#include <linux/jhash.h>
  62#include <linux/init.h>
  63#include <linux/times.h>
  64#include <linux/slab.h>
  65
  66#include <net/net_namespace.h>
  67#include <net/icmp.h>
  68#include <net/inet_hashtables.h>
  69#include <net/tcp.h>
  70#include <net/transp_v6.h>
  71#include <net/ipv6.h>
  72#include <net/inet_common.h>
  73#include <net/timewait_sock.h>
  74#include <net/xfrm.h>
  75#include <net/netdma.h>
  76#include <net/secure_seq.h>
  77#include <net/tcp_memcontrol.h>
  78
  79#include <linux/inet.h>
  80#include <linux/ipv6.h>
  81#include <linux/stddef.h>
  82#include <linux/proc_fs.h>
  83#include <linux/seq_file.h>
  84
  85#include <linux/crypto.h>
  86#include <linux/scatterlist.h>
  87
  88int sysctl_tcp_tw_reuse __read_mostly;
  89int sysctl_tcp_low_latency __read_mostly;
  90EXPORT_SYMBOL(sysctl_tcp_low_latency);
  91
  92
  93#ifdef CONFIG_TCP_MD5SIG
  94static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
  95                               __be32 daddr, __be32 saddr, const struct tcphdr *th);
  96#endif
  97
  98struct inet_hashinfo tcp_hashinfo;
  99EXPORT_SYMBOL(tcp_hashinfo);
 100
 101static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
 102{
 103        return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
 104                                          ip_hdr(skb)->saddr,
 105                                          tcp_hdr(skb)->dest,
 106                                          tcp_hdr(skb)->source);
 107}
 108
 109int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
 110{
 111        const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
 112        struct tcp_sock *tp = tcp_sk(sk);
 113
 114        /* With PAWS, it is safe from the viewpoint
 115           of data integrity. Even without PAWS it is safe provided sequence
 116           spaces do not overlap i.e. at data rates <= 80Mbit/sec.
 117
 118           Actually, the idea is close to VJ's one, only timestamp cache is
 119           held not per host, but per port pair and TW bucket is used as state
 120           holder.
 121
 122           If TW bucket has been already destroyed we fall back to VJ's scheme
 123           and use initial timestamp retrieved from peer table.
 124         */
 125        if (tcptw->tw_ts_recent_stamp &&
 126            (twp == NULL || (sysctl_tcp_tw_reuse &&
 127                             get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
 128                tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
 129                if (tp->write_seq == 0)
 130                        tp->write_seq = 1;
 131                tp->rx_opt.ts_recent       = tcptw->tw_ts_recent;
 132                tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
 133                sock_hold(sktw);
 134                return 1;
 135        }
 136
 137        return 0;
 138}
 139EXPORT_SYMBOL_GPL(tcp_twsk_unique);
 140
 141static int tcp_repair_connect(struct sock *sk)
 142{
 143        tcp_connect_init(sk);
 144        tcp_finish_connect(sk, NULL);
 145
 146        return 0;
 147}
 148
 149/* This will initiate an outgoing connection. */
 150int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 151{
 152        struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
 153        struct inet_sock *inet = inet_sk(sk);
 154        struct tcp_sock *tp = tcp_sk(sk);
 155        __be16 orig_sport, orig_dport;
 156        __be32 daddr, nexthop;
 157        struct flowi4 *fl4;
 158        struct rtable *rt;
 159        int err;
 160        struct ip_options_rcu *inet_opt;
 161
 162        if (addr_len < sizeof(struct sockaddr_in))
 163                return -EINVAL;
 164
 165        if (usin->sin_family != AF_INET)
 166                return -EAFNOSUPPORT;
 167
 168        nexthop = daddr = usin->sin_addr.s_addr;
 169        inet_opt = rcu_dereference_protected(inet->inet_opt,
 170                                             sock_owned_by_user(sk));
 171        if (inet_opt && inet_opt->opt.srr) {
 172                if (!daddr)
 173                        return -EINVAL;
 174                nexthop = inet_opt->opt.faddr;
 175        }
 176
 177        orig_sport = inet->inet_sport;
 178        orig_dport = usin->sin_port;
 179        fl4 = &inet->cork.fl.u.ip4;
 180        rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
 181                              RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
 182                              IPPROTO_TCP,
 183                              orig_sport, orig_dport, sk, true);
 184        if (IS_ERR(rt)) {
 185                err = PTR_ERR(rt);
 186                if (err == -ENETUNREACH)
 187                        IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
 188                return err;
 189        }
 190
 191        if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
 192                ip_rt_put(rt);
 193                return -ENETUNREACH;
 194        }
 195
 196        if (!inet_opt || !inet_opt->opt.srr)
 197                daddr = fl4->daddr;
 198
 199        if (!inet->inet_saddr)
 200                inet->inet_saddr = fl4->saddr;
 201        inet->inet_rcv_saddr = inet->inet_saddr;
 202
 203        if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
 204                /* Reset inherited state */
 205                tp->rx_opt.ts_recent       = 0;
 206                tp->rx_opt.ts_recent_stamp = 0;
 207                if (likely(!tp->repair))
 208                        tp->write_seq      = 0;
 209        }
 210
 211        if (tcp_death_row.sysctl_tw_recycle &&
 212            !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
 213                tcp_fetch_timewait_stamp(sk, &rt->dst);
 214
 215        inet->inet_dport = usin->sin_port;
 216        inet->inet_daddr = daddr;
 217
 218        inet_csk(sk)->icsk_ext_hdr_len = 0;
 219        if (inet_opt)
 220                inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
 221
 222        tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
 223
 224        /* Socket identity is still unknown (sport may be zero).
 225         * However we set state to SYN-SENT and not releasing socket
 226         * lock select source port, enter ourselves into the hash tables and
 227         * complete initialization after this.
 228         */
 229        tcp_set_state(sk, TCP_SYN_SENT);
 230        err = inet_hash_connect(&tcp_death_row, sk);
 231        if (err)
 232                goto failure;
 233
 234        rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
 235                               inet->inet_sport, inet->inet_dport, sk);
 236        if (IS_ERR(rt)) {
 237                err = PTR_ERR(rt);
 238                rt = NULL;
 239                goto failure;
 240        }
 241        /* OK, now commit destination to socket.  */
 242        sk->sk_gso_type = SKB_GSO_TCPV4;
 243        sk_setup_caps(sk, &rt->dst);
 244
 245        if (!tp->write_seq && likely(!tp->repair))
 246                tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
 247                                                           inet->inet_daddr,
 248                                                           inet->inet_sport,
 249                                                           usin->sin_port);
 250
 251        inet->inet_id = tp->write_seq ^ jiffies;
 252
 253        if (likely(!tp->repair))
 254                err = tcp_connect(sk);
 255        else
 256                err = tcp_repair_connect(sk);
 257
 258        rt = NULL;
 259        if (err)
 260                goto failure;
 261
 262        return 0;
 263
 264failure:
 265        /*
 266         * This unhashes the socket and releases the local port,
 267         * if necessary.
 268         */
 269        tcp_set_state(sk, TCP_CLOSE);
 270        ip_rt_put(rt);
 271        sk->sk_route_caps = 0;
 272        inet->inet_dport = 0;
 273        return err;
 274}
 275EXPORT_SYMBOL(tcp_v4_connect);
 276
 277/*
 278 * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
 279 * It can be called through tcp_release_cb() if socket was owned by user
 280 * at the time tcp_v4_err() was called to handle ICMP message.
 281 */
 282static void tcp_v4_mtu_reduced(struct sock *sk)
 283{
 284        struct dst_entry *dst;
 285        struct inet_sock *inet = inet_sk(sk);
 286        u32 mtu = tcp_sk(sk)->mtu_info;
 287
 288        /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
 289         * send out by Linux are always <576bytes so they should go through
 290         * unfragmented).
 291         */
 292        if (sk->sk_state == TCP_LISTEN)
 293                return;
 294
 295        dst = inet_csk_update_pmtu(sk, mtu);
 296        if (!dst)
 297                return;
 298
 299        /* Something is about to be wrong... Remember soft error
 300         * for the case, if this connection will not able to recover.
 301         */
 302        if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
 303                sk->sk_err_soft = EMSGSIZE;
 304
 305        mtu = dst_mtu(dst);
 306
 307        if (inet->pmtudisc != IP_PMTUDISC_DONT &&
 308            inet_csk(sk)->icsk_pmtu_cookie > mtu) {
 309                tcp_sync_mss(sk, mtu);
 310
 311                /* Resend the TCP packet because it's
 312                 * clear that the old packet has been
 313                 * dropped. This is the new "fast" path mtu
 314                 * discovery.
 315                 */
 316                tcp_simple_retransmit(sk);
 317        } /* else let the usual retransmit timer handle it */
 318}
 319
 320static void do_redirect(struct sk_buff *skb, struct sock *sk)
 321{
 322        struct dst_entry *dst = __sk_dst_check(sk, 0);
 323
 324        if (dst)
 325                dst->ops->redirect(dst, sk, skb);
 326}
 327
 328/*
 329 * This routine is called by the ICMP module when it gets some
 330 * sort of error condition.  If err < 0 then the socket should
 331 * be closed and the error returned to the user.  If err > 0
 332 * it's just the icmp type << 8 | icmp code.  After adjustment
 333 * header points to the first 8 bytes of the tcp header.  We need
 334 * to find the appropriate port.
 335 *
 336 * The locking strategy used here is very "optimistic". When
 337 * someone else accesses the socket the ICMP is just dropped
 338 * and for some paths there is no check at all.
 339 * A more general error queue to queue errors for later handling
 340 * is probably better.
 341 *
 342 */
 343
 344void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 345{
 346        const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
 347        struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
 348        struct inet_connection_sock *icsk;
 349        struct tcp_sock *tp;
 350        struct inet_sock *inet;
 351        const int type = icmp_hdr(icmp_skb)->type;
 352        const int code = icmp_hdr(icmp_skb)->code;
 353        struct sock *sk;
 354        struct sk_buff *skb;
 355        __u32 seq;
 356        __u32 remaining;
 357        int err;
 358        struct net *net = dev_net(icmp_skb->dev);
 359
 360        if (icmp_skb->len < (iph->ihl << 2) + 8) {
 361                ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
 362                return;
 363        }
 364
 365        sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
 366                        iph->saddr, th->source, inet_iif(icmp_skb));
 367        if (!sk) {
 368                ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
 369                return;
 370        }
 371        if (sk->sk_state == TCP_TIME_WAIT) {
 372                inet_twsk_put(inet_twsk(sk));
 373                return;
 374        }
 375
 376        bh_lock_sock(sk);
 377        /* If too many ICMPs get dropped on busy
 378         * servers this needs to be solved differently.
 379         * We do take care of PMTU discovery (RFC1191) special case :
 380         * we can receive locally generated ICMP messages while socket is held.
 381         */
 382        if (sock_owned_by_user(sk) &&
 383            type != ICMP_DEST_UNREACH &&
 384            code != ICMP_FRAG_NEEDED)
 385                NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
 386
 387        if (sk->sk_state == TCP_CLOSE)
 388                goto out;
 389
 390        if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
 391                NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
 392                goto out;
 393        }
 394
 395        icsk = inet_csk(sk);
 396        tp = tcp_sk(sk);
 397        seq = ntohl(th->seq);
 398        if (sk->sk_state != TCP_LISTEN &&
 399            !between(seq, tp->snd_una, tp->snd_nxt)) {
 400                NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
 401                goto out;
 402        }
 403
 404        switch (type) {
 405        case ICMP_REDIRECT:
 406                do_redirect(icmp_skb, sk);
 407                goto out;
 408        case ICMP_SOURCE_QUENCH:
 409                /* Just silently ignore these. */
 410                goto out;
 411        case ICMP_PARAMETERPROB:
 412                err = EPROTO;
 413                break;
 414        case ICMP_DEST_UNREACH:
 415                if (code > NR_ICMP_UNREACH)
 416                        goto out;
 417
 418                if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
 419                        tp->mtu_info = info;
 420                        if (!sock_owned_by_user(sk)) {
 421                                tcp_v4_mtu_reduced(sk);
 422                        } else {
 423                                if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
 424                                        sock_hold(sk);
 425                        }
 426                        goto out;
 427                }
 428
 429                err = icmp_err_convert[code].errno;
 430                /* check if icmp_skb allows revert of backoff
 431                 * (see draft-zimmermann-tcp-lcd) */
 432                if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
 433                        break;
 434                if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
 435                    !icsk->icsk_backoff)
 436                        break;
 437
 438                if (sock_owned_by_user(sk))
 439                        break;
 440
 441                icsk->icsk_backoff--;
 442                inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
 443                        TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
 444                tcp_bound_rto(sk);
 445
 446                skb = tcp_write_queue_head(sk);
 447                BUG_ON(!skb);
 448
 449                remaining = icsk->icsk_rto - min(icsk->icsk_rto,
 450                                tcp_time_stamp - TCP_SKB_CB(skb)->when);
 451
 452                if (remaining) {
 453                        inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
 454                                                  remaining, TCP_RTO_MAX);
 455                } else {
 456                        /* RTO revert clocked out retransmission.
 457                         * Will retransmit now */
 458                        tcp_retransmit_timer(sk);
 459                }
 460
 461                break;
 462        case ICMP_TIME_EXCEEDED:
 463                err = EHOSTUNREACH;
 464                break;
 465        default:
 466                goto out;
 467        }
 468
 469        switch (sk->sk_state) {
 470                struct request_sock *req, **prev;
 471        case TCP_LISTEN:
 472                if (sock_owned_by_user(sk))
 473                        goto out;
 474
 475                req = inet_csk_search_req(sk, &prev, th->dest,
 476                                          iph->daddr, iph->saddr);
 477                if (!req)
 478                        goto out;
 479
 480                /* ICMPs are not backlogged, hence we cannot get
 481                   an established socket here.
 482                 */
 483                WARN_ON(req->sk);
 484
 485                if (seq != tcp_rsk(req)->snt_isn) {
 486                        NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
 487                        goto out;
 488                }
 489
 490                /*
 491                 * Still in SYN_RECV, just remove it silently.
 492                 * There is no good way to pass the error to the newly
 493                 * created socket, and POSIX does not want network
 494                 * errors returned from accept().
 495                 */
 496                inet_csk_reqsk_queue_drop(sk, req, prev);
 497                goto out;
 498
 499        case TCP_SYN_SENT:
 500        case TCP_SYN_RECV:  /* Cannot happen.
 501                               It can f.e. if SYNs crossed.
 502                             */
 503                if (!sock_owned_by_user(sk)) {
 504                        sk->sk_err = err;
 505
 506                        sk->sk_error_report(sk);
 507
 508                        tcp_done(sk);
 509                } else {
 510                        sk->sk_err_soft = err;
 511                }
 512                goto out;
 513        }
 514
 515        /* If we've already connected we will keep trying
 516         * until we time out, or the user gives up.
 517         *
 518         * rfc1122 4.2.3.9 allows to consider as hard errors
 519         * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
 520         * but it is obsoleted by pmtu discovery).
 521         *
 522         * Note, that in modern internet, where routing is unreliable
 523         * and in each dark corner broken firewalls sit, sending random
 524         * errors ordered by their masters even this two messages finally lose
 525         * their original sense (even Linux sends invalid PORT_UNREACHs)
 526         *
 527         * Now we are in compliance with RFCs.
 528         *                                                      --ANK (980905)
 529         */
 530
 531        inet = inet_sk(sk);
 532        if (!sock_owned_by_user(sk) && inet->recverr) {
 533                sk->sk_err = err;
 534                sk->sk_error_report(sk);
 535        } else  { /* Only an error on timeout */
 536                sk->sk_err_soft = err;
 537        }
 538
 539out:
 540        bh_unlock_sock(sk);
 541        sock_put(sk);
 542}
 543
 544static void __tcp_v4_send_check(struct sk_buff *skb,
 545                                __be32 saddr, __be32 daddr)
 546{
 547        struct tcphdr *th = tcp_hdr(skb);
 548
 549        if (skb->ip_summed == CHECKSUM_PARTIAL) {
 550                th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
 551                skb->csum_start = skb_transport_header(skb) - skb->head;
 552                skb->csum_offset = offsetof(struct tcphdr, check);
 553        } else {
 554                th->check = tcp_v4_check(skb->len, saddr, daddr,
 555                                         csum_partial(th,
 556                                                      th->doff << 2,
 557                                                      skb->csum));
 558        }
 559}
 560
 561/* This routine computes an IPv4 TCP checksum. */
 562void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
 563{
 564        const struct inet_sock *inet = inet_sk(sk);
 565
 566        __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
 567}
 568EXPORT_SYMBOL(tcp_v4_send_check);
 569
 570int tcp_v4_gso_send_check(struct sk_buff *skb)
 571{
 572        const struct iphdr *iph;
 573        struct tcphdr *th;
 574
 575        if (!pskb_may_pull(skb, sizeof(*th)))
 576                return -EINVAL;
 577
 578        iph = ip_hdr(skb);
 579        th = tcp_hdr(skb);
 580
 581        th->check = 0;
 582        skb->ip_summed = CHECKSUM_PARTIAL;
 583        __tcp_v4_send_check(skb, iph->saddr, iph->daddr);
 584        return 0;
 585}
 586
 587/*
 588 *      This routine will send an RST to the other tcp.
 589 *
 590 *      Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
 591 *                    for reset.
 592 *      Answer: if a packet caused RST, it is not for a socket
 593 *              existing in our system, if it is matched to a socket,
 594 *              it is just duplicate segment or bug in other side's TCP.
 595 *              So that we build reply only basing on parameters
 596 *              arrived with segment.
 597 *      Exception: precedence violation. We do not implement it in any case.
 598 */
 599
 600static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 601{
 602        const struct tcphdr *th = tcp_hdr(skb);
 603        struct {
 604                struct tcphdr th;
 605#ifdef CONFIG_TCP_MD5SIG
 606                __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
 607#endif
 608        } rep;
 609        struct ip_reply_arg arg;
 610#ifdef CONFIG_TCP_MD5SIG
 611        struct tcp_md5sig_key *key;
 612        const __u8 *hash_location = NULL;
 613        unsigned char newhash[16];
 614        int genhash;
 615        struct sock *sk1 = NULL;
 616#endif
 617        struct net *net;
 618
 619        /* Never send a reset in response to a reset. */
 620        if (th->rst)
 621                return;
 622
 623        if (skb_rtable(skb)->rt_type != RTN_LOCAL)
 624                return;
 625
 626        /* Swap the send and the receive. */
 627        memset(&rep, 0, sizeof(rep));
 628        rep.th.dest   = th->source;
 629        rep.th.source = th->dest;
 630        rep.th.doff   = sizeof(struct tcphdr) / 4;
 631        rep.th.rst    = 1;
 632
 633        if (th->ack) {
 634                rep.th.seq = th->ack_seq;
 635        } else {
 636                rep.th.ack = 1;
 637                rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
 638                                       skb->len - (th->doff << 2));
 639        }
 640
 641        memset(&arg, 0, sizeof(arg));
 642        arg.iov[0].iov_base = (unsigned char *)&rep;
 643        arg.iov[0].iov_len  = sizeof(rep.th);
 644
 645#ifdef CONFIG_TCP_MD5SIG
 646        hash_location = tcp_parse_md5sig_option(th);
 647        if (!sk && hash_location) {
 648                /*
 649                 * active side is lost. Try to find listening socket through
 650                 * source port, and then find md5 key through listening socket.
 651                 * we are not loose security here:
 652                 * Incoming packet is checked with md5 hash with finding key,
 653                 * no RST generated if md5 hash doesn't match.
 654                 */
 655                sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
 656                                             &tcp_hashinfo, ip_hdr(skb)->daddr,
 657                                             ntohs(th->source), inet_iif(skb));
 658                /* don't send rst if it can't find key */
 659                if (!sk1)
 660                        return;
 661                rcu_read_lock();
 662                key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
 663                                        &ip_hdr(skb)->saddr, AF_INET);
 664                if (!key)
 665                        goto release_sk1;
 666
 667                genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
 668                if (genhash || memcmp(hash_location, newhash, 16) != 0)
 669                        goto release_sk1;
 670        } else {
 671                key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
 672                                             &ip_hdr(skb)->saddr,
 673                                             AF_INET) : NULL;
 674        }
 675
 676        if (key) {
 677                rep.opt[0] = htonl((TCPOPT_NOP << 24) |
 678                                   (TCPOPT_NOP << 16) |
 679                                   (TCPOPT_MD5SIG << 8) |
 680                                   TCPOLEN_MD5SIG);
 681                /* Update length and the length the header thinks exists */
 682                arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
 683                rep.th.doff = arg.iov[0].iov_len / 4;
 684
 685                tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
 686                                     key, ip_hdr(skb)->saddr,
 687                                     ip_hdr(skb)->daddr, &rep.th);
 688        }
 689#endif
 690        arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
 691                                      ip_hdr(skb)->saddr, /* XXX */
 692                                      arg.iov[0].iov_len, IPPROTO_TCP, 0);
 693        arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 694        arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
 695        /* When socket is gone, all binding information is lost.
 696         * routing might fail in this case. using iif for oif to
 697         * make sure we can deliver it
 698         */
 699        arg.bound_dev_if = sk ? sk->sk_bound_dev_if : inet_iif(skb);
 700
 701        net = dev_net(skb_dst(skb)->dev);
 702        arg.tos = ip_hdr(skb)->tos;
 703        ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
 704                              ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
 705
 706        TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
 707        TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
 708
 709#ifdef CONFIG_TCP_MD5SIG
 710release_sk1:
 711        if (sk1) {
 712                rcu_read_unlock();
 713                sock_put(sk1);
 714        }
 715#endif
 716}
 717
 718/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
 719   outside socket context is ugly, certainly. What can I do?
 720 */
 721
 722static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
 723                            u32 win, u32 ts, int oif,
 724                            struct tcp_md5sig_key *key,
 725                            int reply_flags, u8 tos)
 726{
 727        const struct tcphdr *th = tcp_hdr(skb);
 728        struct {
 729                struct tcphdr th;
 730                __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
 731#ifdef CONFIG_TCP_MD5SIG
 732                           + (TCPOLEN_MD5SIG_ALIGNED >> 2)
 733#endif
 734                        ];
 735        } rep;
 736        struct ip_reply_arg arg;
 737        struct net *net = dev_net(skb_dst(skb)->dev);
 738
 739        memset(&rep.th, 0, sizeof(struct tcphdr));
 740        memset(&arg, 0, sizeof(arg));
 741
 742        arg.iov[0].iov_base = (unsigned char *)&rep;
 743        arg.iov[0].iov_len  = sizeof(rep.th);
 744        if (ts) {
 745                rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
 746                                   (TCPOPT_TIMESTAMP << 8) |
 747                                   TCPOLEN_TIMESTAMP);
 748                rep.opt[1] = htonl(tcp_time_stamp);
 749                rep.opt[2] = htonl(ts);
 750                arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
 751        }
 752
 753        /* Swap the send and the receive. */
 754        rep.th.dest    = th->source;
 755        rep.th.source  = th->dest;
 756        rep.th.doff    = arg.iov[0].iov_len / 4;
 757        rep.th.seq     = htonl(seq);
 758        rep.th.ack_seq = htonl(ack);
 759        rep.th.ack     = 1;
 760        rep.th.window  = htons(win);
 761
 762#ifdef CONFIG_TCP_MD5SIG
 763        if (key) {
 764                int offset = (ts) ? 3 : 0;
 765
 766                rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
 767                                          (TCPOPT_NOP << 16) |
 768                                          (TCPOPT_MD5SIG << 8) |
 769                                          TCPOLEN_MD5SIG);
 770                arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
 771                rep.th.doff = arg.iov[0].iov_len/4;
 772
 773                tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
 774                                    key, ip_hdr(skb)->saddr,
 775                                    ip_hdr(skb)->daddr, &rep.th);
 776        }
 777#endif
 778        arg.flags = reply_flags;
 779        arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
 780                                      ip_hdr(skb)->saddr, /* XXX */
 781                                      arg.iov[0].iov_len, IPPROTO_TCP, 0);
 782        arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 783        if (oif)
 784                arg.bound_dev_if = oif;
 785        arg.tos = tos;
 786        ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
 787                              ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
 788
 789        TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
 790}
 791
 792static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
 793{
 794        struct inet_timewait_sock *tw = inet_twsk(sk);
 795        struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
 796
 797        tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
 798                        tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
 799                        tcptw->tw_ts_recent,
 800                        tw->tw_bound_dev_if,
 801                        tcp_twsk_md5_key(tcptw),
 802                        tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
 803                        tw->tw_tos
 804                        );
 805
 806        inet_twsk_put(tw);
 807}
 808
 809static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
 810                                  struct request_sock *req)
 811{
 812        tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
 813                        tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
 814                        req->ts_recent,
 815                        0,
 816                        tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
 817                                          AF_INET),
 818                        inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
 819                        ip_hdr(skb)->tos);
 820}
 821
 822/*
 823 *      Send a SYN-ACK after having received a SYN.
 824 *      This still operates on a request_sock only, not on a big
 825 *      socket.
 826 */
 827static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
 828                              struct request_sock *req,
 829                              struct request_values *rvp,
 830                              u16 queue_mapping,
 831                              bool nocache)
 832{
 833        const struct inet_request_sock *ireq = inet_rsk(req);
 834        struct flowi4 fl4;
 835        int err = -1;
 836        struct sk_buff * skb;
 837
 838        /* First, grab a route. */
 839        if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
 840                return -1;
 841
 842        skb = tcp_make_synack(sk, dst, req, rvp);
 843
 844        if (skb) {
 845                __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
 846
 847                skb_set_queue_mapping(skb, queue_mapping);
 848                err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
 849                                            ireq->rmt_addr,
 850                                            ireq->opt);
 851                err = net_xmit_eval(err);
 852        }
 853
 854        return err;
 855}
 856
 857static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
 858                              struct request_values *rvp)
 859{
 860        TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
 861        return tcp_v4_send_synack(sk, NULL, req, rvp, 0, false);
 862}
 863
 864/*
 865 *      IPv4 request_sock destructor.
 866 */
 867static void tcp_v4_reqsk_destructor(struct request_sock *req)
 868{
 869        kfree(inet_rsk(req)->opt);
 870}
 871
 872/*
 873 * Return true if a syncookie should be sent
 874 */
 875bool tcp_syn_flood_action(struct sock *sk,
 876                         const struct sk_buff *skb,
 877                         const char *proto)
 878{
 879        const char *msg = "Dropping request";
 880        bool want_cookie = false;
 881        struct listen_sock *lopt;
 882
 883
 884
 885#ifdef CONFIG_SYN_COOKIES
 886        if (sysctl_tcp_syncookies) {
 887                msg = "Sending cookies";
 888                want_cookie = true;
 889                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
 890        } else
 891#endif
 892                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
 893
 894        lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
 895        if (!lopt->synflood_warned) {
 896                lopt->synflood_warned = 1;
 897                pr_info("%s: Possible SYN flooding on port %d. %s.  Check SNMP counters.\n",
 898                        proto, ntohs(tcp_hdr(skb)->dest), msg);
 899        }
 900        return want_cookie;
 901}
 902EXPORT_SYMBOL(tcp_syn_flood_action);
 903
 904/*
 905 * Save and compile IPv4 options into the request_sock if needed.
 906 */
 907static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk,
 908                                                  struct sk_buff *skb)
 909{
 910        const struct ip_options *opt = &(IPCB(skb)->opt);
 911        struct ip_options_rcu *dopt = NULL;
 912
 913        if (opt && opt->optlen) {
 914                int opt_size = sizeof(*dopt) + opt->optlen;
 915
 916                dopt = kmalloc(opt_size, GFP_ATOMIC);
 917                if (dopt) {
 918                        if (ip_options_echo(&dopt->opt, skb)) {
 919                                kfree(dopt);
 920                                dopt = NULL;
 921                        }
 922                }
 923        }
 924        return dopt;
 925}
 926
 927#ifdef CONFIG_TCP_MD5SIG
 928/*
 929 * RFC2385 MD5 checksumming requires a mapping of
 930 * IP address->MD5 Key.
 931 * We need to maintain these in the sk structure.
 932 */
 933
 934/* Find the Key structure for an address.  */
 935struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
 936                                         const union tcp_md5_addr *addr,
 937                                         int family)
 938{
 939        struct tcp_sock *tp = tcp_sk(sk);
 940        struct tcp_md5sig_key *key;
 941        struct hlist_node *pos;
 942        unsigned int size = sizeof(struct in_addr);
 943        struct tcp_md5sig_info *md5sig;
 944
 945        /* caller either holds rcu_read_lock() or socket lock */
 946        md5sig = rcu_dereference_check(tp->md5sig_info,
 947                                       sock_owned_by_user(sk) ||
 948                                       lockdep_is_held(&sk->sk_lock.slock));
 949        if (!md5sig)
 950                return NULL;
 951#if IS_ENABLED(CONFIG_IPV6)
 952        if (family == AF_INET6)
 953                size = sizeof(struct in6_addr);
 954#endif
 955        hlist_for_each_entry_rcu(key, pos, &md5sig->head, node) {
 956                if (key->family != family)
 957                        continue;
 958                if (!memcmp(&key->addr, addr, size))
 959                        return key;
 960        }
 961        return NULL;
 962}
 963EXPORT_SYMBOL(tcp_md5_do_lookup);
 964
 965struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
 966                                         struct sock *addr_sk)
 967{
 968        union tcp_md5_addr *addr;
 969
 970        addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
 971        return tcp_md5_do_lookup(sk, addr, AF_INET);
 972}
 973EXPORT_SYMBOL(tcp_v4_md5_lookup);
 974
 975static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
 976                                                      struct request_sock *req)
 977{
 978        union tcp_md5_addr *addr;
 979
 980        addr = (union tcp_md5_addr *)&inet_rsk(req)->rmt_addr;
 981        return tcp_md5_do_lookup(sk, addr, AF_INET);
 982}
 983
 984/* This can be called on a newly created socket, from other files */
 985int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
 986                   int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
 987{
 988        /* Add Key to the list */
 989        struct tcp_md5sig_key *key;
 990        struct tcp_sock *tp = tcp_sk(sk);
 991        struct tcp_md5sig_info *md5sig;
 992
 993        key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
 994        if (key) {
 995                /* Pre-existing entry - just update that one. */
 996                memcpy(key->key, newkey, newkeylen);
 997                key->keylen = newkeylen;
 998                return 0;
 999        }
1000
1001        md5sig = rcu_dereference_protected(tp->md5sig_info,
1002                                           sock_owned_by_user(sk));
1003        if (!md5sig) {
1004                md5sig = kmalloc(sizeof(*md5sig), gfp);
1005                if (!md5sig)
1006                        return -ENOMEM;
1007
1008                sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1009                INIT_HLIST_HEAD(&md5sig->head);
1010                rcu_assign_pointer(tp->md5sig_info, md5sig);
1011        }
1012
1013        key = sock_kmalloc(sk, sizeof(*key), gfp);
1014        if (!key)
1015                return -ENOMEM;
1016        if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) {
1017                sock_kfree_s(sk, key, sizeof(*key));
1018                return -ENOMEM;
1019        }
1020
1021        memcpy(key->key, newkey, newkeylen);
1022        key->keylen = newkeylen;
1023        key->family = family;
1024        memcpy(&key->addr, addr,
1025               (family == AF_INET6) ? sizeof(struct in6_addr) :
1026                                      sizeof(struct in_addr));
1027        hlist_add_head_rcu(&key->node, &md5sig->head);
1028        return 0;
1029}
1030EXPORT_SYMBOL(tcp_md5_do_add);
1031
1032int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
1033{
1034        struct tcp_sock *tp = tcp_sk(sk);
1035        struct tcp_md5sig_key *key;
1036        struct tcp_md5sig_info *md5sig;
1037
1038        key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
1039        if (!key)
1040                return -ENOENT;
1041        hlist_del_rcu(&key->node);
1042        atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1043        kfree_rcu(key, rcu);
1044        md5sig = rcu_dereference_protected(tp->md5sig_info,
1045                                           sock_owned_by_user(sk));
1046        if (hlist_empty(&md5sig->head))
1047                tcp_free_md5sig_pool();
1048        return 0;
1049}
1050EXPORT_SYMBOL(tcp_md5_do_del);
1051
1052void tcp_clear_md5_list(struct sock *sk)
1053{
1054        struct tcp_sock *tp = tcp_sk(sk);
1055        struct tcp_md5sig_key *key;
1056        struct hlist_node *pos, *n;
1057        struct tcp_md5sig_info *md5sig;
1058
1059        md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1060
1061        if (!hlist_empty(&md5sig->head))
1062                tcp_free_md5sig_pool();
1063        hlist_for_each_entry_safe(key, pos, n, &md5sig->head, node) {
1064                hlist_del_rcu(&key->node);
1065                atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1066                kfree_rcu(key, rcu);
1067        }
1068}
1069
1070static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1071                                 int optlen)
1072{
1073        struct tcp_md5sig cmd;
1074        struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1075
1076        if (optlen < sizeof(cmd))
1077                return -EINVAL;
1078
1079        if (copy_from_user(&cmd, optval, sizeof(cmd)))
1080                return -EFAULT;
1081
1082        if (sin->sin_family != AF_INET)
1083                return -EINVAL;
1084
1085        if (!cmd.tcpm_key || !cmd.tcpm_keylen)
1086                return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1087                                      AF_INET);
1088
1089        if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1090                return -EINVAL;
1091
1092        return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1093                              AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1094                              GFP_KERNEL);
1095}
1096
1097static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1098                                        __be32 daddr, __be32 saddr, int nbytes)
1099{
1100        struct tcp4_pseudohdr *bp;
1101        struct scatterlist sg;
1102
1103        bp = &hp->md5_blk.ip4;
1104
1105        /*
1106         * 1. the TCP pseudo-header (in the order: source IP address,
1107         * destination IP address, zero-padded protocol number, and
1108         * segment length)
1109         */
1110        bp->saddr = saddr;
1111        bp->daddr = daddr;
1112        bp->pad = 0;
1113        bp->protocol = IPPROTO_TCP;
1114        bp->len = cpu_to_be16(nbytes);
1115
1116        sg_init_one(&sg, bp, sizeof(*bp));
1117        return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1118}
1119
1120static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
1121                               __be32 daddr, __be32 saddr, const struct tcphdr *th)
1122{
1123        struct tcp_md5sig_pool *hp;
1124        struct hash_desc *desc;
1125
1126        hp = tcp_get_md5sig_pool();
1127        if (!hp)
1128                goto clear_hash_noput;
1129        desc = &hp->md5_desc;
1130
1131        if (crypto_hash_init(desc))
1132                goto clear_hash;
1133        if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1134                goto clear_hash;
1135        if (tcp_md5_hash_header(hp, th))
1136                goto clear_hash;
1137        if (tcp_md5_hash_key(hp, key))
1138                goto clear_hash;
1139        if (crypto_hash_final(desc, md5_hash))
1140                goto clear_hash;
1141
1142        tcp_put_md5sig_pool();
1143        return 0;
1144
1145clear_hash:
1146        tcp_put_md5sig_pool();
1147clear_hash_noput:
1148        memset(md5_hash, 0, 16);
1149        return 1;
1150}
1151
1152int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1153                        const struct sock *sk, const struct request_sock *req,
1154                        const struct sk_buff *skb)
1155{
1156        struct tcp_md5sig_pool *hp;
1157        struct hash_desc *desc;
1158        const struct tcphdr *th = tcp_hdr(skb);
1159        __be32 saddr, daddr;
1160
1161        if (sk) {
1162                saddr = inet_sk(sk)->inet_saddr;
1163                daddr = inet_sk(sk)->inet_daddr;
1164        } else if (req) {
1165                saddr = inet_rsk(req)->loc_addr;
1166                daddr = inet_rsk(req)->rmt_addr;
1167        } else {
1168                const struct iphdr *iph = ip_hdr(skb);
1169                saddr = iph->saddr;
1170                daddr = iph->daddr;
1171        }
1172
1173        hp = tcp_get_md5sig_pool();
1174        if (!hp)
1175                goto clear_hash_noput;
1176        desc = &hp->md5_desc;
1177
1178        if (crypto_hash_init(desc))
1179                goto clear_hash;
1180
1181        if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1182                goto clear_hash;
1183        if (tcp_md5_hash_header(hp, th))
1184                goto clear_hash;
1185        if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1186                goto clear_hash;
1187        if (tcp_md5_hash_key(hp, key))
1188                goto clear_hash;
1189        if (crypto_hash_final(desc, md5_hash))
1190                goto clear_hash;
1191
1192        tcp_put_md5sig_pool();
1193        return 0;
1194
1195clear_hash:
1196        tcp_put_md5sig_pool();
1197clear_hash_noput:
1198        memset(md5_hash, 0, 16);
1199        return 1;
1200}
1201EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1202
1203static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
1204{
1205        /*
1206         * This gets called for each TCP segment that arrives
1207         * so we want to be efficient.
1208         * We have 3 drop cases:
1209         * o No MD5 hash and one expected.
1210         * o MD5 hash and we're not expecting one.
1211         * o MD5 hash and its wrong.
1212         */
1213        const __u8 *hash_location = NULL;
1214        struct tcp_md5sig_key *hash_expected;
1215        const struct iphdr *iph = ip_hdr(skb);
1216        const struct tcphdr *th = tcp_hdr(skb);
1217        int genhash;
1218        unsigned char newhash[16];
1219
1220        hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1221                                          AF_INET);
1222        hash_location = tcp_parse_md5sig_option(th);
1223
1224        /* We've parsed the options - do we have a hash? */
1225        if (!hash_expected && !hash_location)
1226                return false;
1227
1228        if (hash_expected && !hash_location) {
1229                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1230                return true;
1231        }
1232
1233        if (!hash_expected && hash_location) {
1234                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1235                return true;
1236        }
1237
1238        /* Okay, so this is hash_expected and hash_location -
1239         * so we need to calculate the checksum.
1240         */
1241        genhash = tcp_v4_md5_hash_skb(newhash,
1242                                      hash_expected,
1243                                      NULL, NULL, skb);
1244
1245        if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1246                net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1247                                     &iph->saddr, ntohs(th->source),
1248                                     &iph->daddr, ntohs(th->dest),
1249                                     genhash ? " tcp_v4_calc_md5_hash failed"
1250                                     : "");
1251                return true;
1252        }
1253        return false;
1254}
1255
1256#endif
1257
1258struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1259        .family         =       PF_INET,
1260        .obj_size       =       sizeof(struct tcp_request_sock),
1261        .rtx_syn_ack    =       tcp_v4_rtx_synack,
1262        .send_ack       =       tcp_v4_reqsk_send_ack,
1263        .destructor     =       tcp_v4_reqsk_destructor,
1264        .send_reset     =       tcp_v4_send_reset,
1265        .syn_ack_timeout =      tcp_syn_ack_timeout,
1266};
1267
1268#ifdef CONFIG_TCP_MD5SIG
1269static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1270        .md5_lookup     =       tcp_v4_reqsk_md5_lookup,
1271        .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1272};
1273#endif
1274
1275int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1276{
1277        struct tcp_extend_values tmp_ext;
1278        struct tcp_options_received tmp_opt;
1279        const u8 *hash_location;
1280        struct request_sock *req;
1281        struct inet_request_sock *ireq;
1282        struct tcp_sock *tp = tcp_sk(sk);
1283        struct dst_entry *dst = NULL;
1284        __be32 saddr = ip_hdr(skb)->saddr;
1285        __be32 daddr = ip_hdr(skb)->daddr;
1286        __u32 isn = TCP_SKB_CB(skb)->when;
1287        bool want_cookie = false;
1288
1289        /* Never answer to SYNs send to broadcast or multicast */
1290        if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1291                goto drop;
1292
1293        /* TW buckets are converted to open requests without
1294         * limitations, they conserve resources and peer is
1295         * evidently real one.
1296         */
1297        if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1298                want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1299                if (!want_cookie)
1300                        goto drop;
1301        }
1302
1303        /* Accept backlog is full. If we have already queued enough
1304         * of warm entries in syn queue, drop request. It is better than
1305         * clogging syn queue with openreqs with exponentially increasing
1306         * timeout.
1307         */
1308        if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1309                goto drop;
1310
1311        req = inet_reqsk_alloc(&tcp_request_sock_ops);
1312        if (!req)
1313                goto drop;
1314
1315#ifdef CONFIG_TCP_MD5SIG
1316        tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1317#endif
1318
1319        tcp_clear_options(&tmp_opt);
1320        tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
1321        tmp_opt.user_mss  = tp->rx_opt.user_mss;
1322        tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
1323
1324        if (tmp_opt.cookie_plus > 0 &&
1325            tmp_opt.saw_tstamp &&
1326            !tp->rx_opt.cookie_out_never &&
1327            (sysctl_tcp_cookie_size > 0 ||
1328             (tp->cookie_values != NULL &&
1329              tp->cookie_values->cookie_desired > 0))) {
1330                u8 *c;
1331                u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1332                int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1333
1334                if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1335                        goto drop_and_release;
1336
1337                /* Secret recipe starts with IP addresses */
1338                *mess++ ^= (__force u32)daddr;
1339                *mess++ ^= (__force u32)saddr;
1340
1341                /* plus variable length Initiator Cookie */
1342                c = (u8 *)mess;
1343                while (l-- > 0)
1344                        *c++ ^= *hash_location++;
1345
1346                want_cookie = false;    /* not our kind of cookie */
1347                tmp_ext.cookie_out_never = 0; /* false */
1348                tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1349        } else if (!tp->rx_opt.cookie_in_always) {
1350                /* redundant indications, but ensure initialization. */
1351                tmp_ext.cookie_out_never = 1; /* true */
1352                tmp_ext.cookie_plus = 0;
1353        } else {
1354                goto drop_and_release;
1355        }
1356        tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1357
1358        if (want_cookie && !tmp_opt.saw_tstamp)
1359                tcp_clear_options(&tmp_opt);
1360
1361        tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1362        tcp_openreq_init(req, &tmp_opt, skb);
1363
1364        ireq = inet_rsk(req);
1365        ireq->loc_addr = daddr;
1366        ireq->rmt_addr = saddr;
1367        ireq->no_srccheck = inet_sk(sk)->transparent;
1368        ireq->opt = tcp_v4_save_options(sk, skb);
1369
1370        if (security_inet_conn_request(sk, skb, req))
1371                goto drop_and_free;
1372
1373        if (!want_cookie || tmp_opt.tstamp_ok)
1374                TCP_ECN_create_request(req, skb);
1375
1376        if (want_cookie) {
1377                isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1378                req->cookie_ts = tmp_opt.tstamp_ok;
1379        } else if (!isn) {
1380                struct flowi4 fl4;
1381
1382                /* VJ's idea. We save last timestamp seen
1383                 * from the destination in peer table, when entering
1384                 * state TIME-WAIT, and check against it before
1385                 * accepting new connection request.
1386                 *
1387                 * If "isn" is not zero, this request hit alive
1388                 * timewait bucket, so that all the necessary checks
1389                 * are made in the function processing timewait state.
1390                 */
1391                if (tmp_opt.saw_tstamp &&
1392                    tcp_death_row.sysctl_tw_recycle &&
1393                    (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
1394                    fl4.daddr == saddr) {
1395                        if (!tcp_peer_is_proven(req, dst, true)) {
1396                                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1397                                goto drop_and_release;
1398                        }
1399                }
1400                /* Kill the following clause, if you dislike this way. */
1401                else if (!sysctl_tcp_syncookies &&
1402                         (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1403                          (sysctl_max_syn_backlog >> 2)) &&
1404                         !tcp_peer_is_proven(req, dst, false)) {
1405                        /* Without syncookies last quarter of
1406                         * backlog is filled with destinations,
1407                         * proven to be alive.
1408                         * It means that we continue to communicate
1409                         * to destinations, already remembered
1410                         * to the moment of synflood.
1411                         */
1412                        LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
1413                                       &saddr, ntohs(tcp_hdr(skb)->source));
1414                        goto drop_and_release;
1415                }
1416
1417                isn = tcp_v4_init_sequence(skb);
1418        }
1419        tcp_rsk(req)->snt_isn = isn;
1420        tcp_rsk(req)->snt_synack = tcp_time_stamp;
1421
1422        if (tcp_v4_send_synack(sk, dst, req,
1423                               (struct request_values *)&tmp_ext,
1424                               skb_get_queue_mapping(skb),
1425                               want_cookie) ||
1426            want_cookie)
1427                goto drop_and_free;
1428
1429        inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1430        return 0;
1431
1432drop_and_release:
1433        dst_release(dst);
1434drop_and_free:
1435        reqsk_free(req);
1436drop:
1437        return 0;
1438}
1439EXPORT_SYMBOL(tcp_v4_conn_request);
1440
1441
1442/*
1443 * The three way handshake has completed - we got a valid synack -
1444 * now create the new socket.
1445 */
1446struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1447                                  struct request_sock *req,
1448                                  struct dst_entry *dst)
1449{
1450        struct inet_request_sock *ireq;
1451        struct inet_sock *newinet;
1452        struct tcp_sock *newtp;
1453        struct sock *newsk;
1454#ifdef CONFIG_TCP_MD5SIG
1455        struct tcp_md5sig_key *key;
1456#endif
1457        struct ip_options_rcu *inet_opt;
1458
1459        if (sk_acceptq_is_full(sk))
1460                goto exit_overflow;
1461
1462        newsk = tcp_create_openreq_child(sk, req, skb);
1463        if (!newsk)
1464                goto exit_nonewsk;
1465
1466        newsk->sk_gso_type = SKB_GSO_TCPV4;
1467        inet_sk_rx_dst_set(newsk, skb);
1468
1469        newtp                 = tcp_sk(newsk);
1470        newinet               = inet_sk(newsk);
1471        ireq                  = inet_rsk(req);
1472        newinet->inet_daddr   = ireq->rmt_addr;
1473        newinet->inet_rcv_saddr = ireq->loc_addr;
1474        newinet->inet_saddr           = ireq->loc_addr;
1475        inet_opt              = ireq->opt;
1476        rcu_assign_pointer(newinet->inet_opt, inet_opt);
1477        ireq->opt             = NULL;
1478        newinet->mc_index     = inet_iif(skb);
1479        newinet->mc_ttl       = ip_hdr(skb)->ttl;
1480        newinet->rcv_tos      = ip_hdr(skb)->tos;
1481        inet_csk(newsk)->icsk_ext_hdr_len = 0;
1482        if (inet_opt)
1483                inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1484        newinet->inet_id = newtp->write_seq ^ jiffies;
1485
1486        if (!dst) {
1487                dst = inet_csk_route_child_sock(sk, newsk, req);
1488                if (!dst)
1489                        goto put_and_exit;
1490        } else {
1491                /* syncookie case : see end of cookie_v4_check() */
1492        }
1493        sk_setup_caps(newsk, dst);
1494
1495        tcp_mtup_init(newsk);
1496        tcp_sync_mss(newsk, dst_mtu(dst));
1497        newtp->advmss = dst_metric_advmss(dst);
1498        if (tcp_sk(sk)->rx_opt.user_mss &&
1499            tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1500                newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1501
1502        tcp_initialize_rcv_mss(newsk);
1503        if (tcp_rsk(req)->snt_synack)
1504                tcp_valid_rtt_meas(newsk,
1505                    tcp_time_stamp - tcp_rsk(req)->snt_synack);
1506        newtp->total_retrans = req->retrans;
1507
1508#ifdef CONFIG_TCP_MD5SIG
1509        /* Copy over the MD5 key from the original socket */
1510        key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1511                                AF_INET);
1512        if (key != NULL) {
1513                /*
1514                 * We're using one, so create a matching key
1515                 * on the newsk structure. If we fail to get
1516                 * memory, then we end up not copying the key
1517                 * across. Shucks.
1518                 */
1519                tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1520                               AF_INET, key->key, key->keylen, GFP_ATOMIC);
1521                sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1522        }
1523#endif
1524
1525        if (__inet_inherit_port(sk, newsk) < 0)
1526                goto put_and_exit;
1527        __inet_hash_nolisten(newsk, NULL);
1528
1529        return newsk;
1530
1531exit_overflow:
1532        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1533exit_nonewsk:
1534        dst_release(dst);
1535exit:
1536        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1537        return NULL;
1538put_and_exit:
1539        tcp_clear_xmit_timers(newsk);
1540        tcp_cleanup_congestion_control(newsk);
1541        bh_unlock_sock(newsk);
1542        sock_put(newsk);
1543        goto exit;
1544}
1545EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1546
1547static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1548{
1549        struct tcphdr *th = tcp_hdr(skb);
1550        const struct iphdr *iph = ip_hdr(skb);
1551        struct sock *nsk;
1552        struct request_sock **prev;
1553        /* Find possible connection requests. */
1554        struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1555                                                       iph->saddr, iph->daddr);
1556        if (req)
1557                return tcp_check_req(sk, skb, req, prev);
1558
1559        nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
1560                        th->source, iph->daddr, th->dest, inet_iif(skb));
1561
1562        if (nsk) {
1563                if (nsk->sk_state != TCP_TIME_WAIT) {
1564                        bh_lock_sock(nsk);
1565                        return nsk;
1566                }
1567                inet_twsk_put(inet_twsk(nsk));
1568                return NULL;
1569        }
1570
1571#ifdef CONFIG_SYN_COOKIES
1572        if (!th->syn)
1573                sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1574#endif
1575        return sk;
1576}
1577
1578static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1579{
1580        const struct iphdr *iph = ip_hdr(skb);
1581
1582        if (skb->ip_summed == CHECKSUM_COMPLETE) {
1583                if (!tcp_v4_check(skb->len, iph->saddr,
1584                                  iph->daddr, skb->csum)) {
1585                        skb->ip_summed = CHECKSUM_UNNECESSARY;
1586                        return 0;
1587                }
1588        }
1589
1590        skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1591                                       skb->len, IPPROTO_TCP, 0);
1592
1593        if (skb->len <= 76) {
1594                return __skb_checksum_complete(skb);
1595        }
1596        return 0;
1597}
1598
1599
1600/* The socket must have it's spinlock held when we get
1601 * here.
1602 *
1603 * We have a potential double-lock case here, so even when
1604 * doing backlog processing we use the BH locking scheme.
1605 * This is because we cannot sleep with the original spinlock
1606 * held.
1607 */
1608int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1609{
1610        struct sock *rsk;
1611#ifdef CONFIG_TCP_MD5SIG
1612        /*
1613         * We really want to reject the packet as early as possible
1614         * if:
1615         *  o We're expecting an MD5'd packet and this is no MD5 tcp option
1616         *  o There is an MD5 option and we're not expecting one
1617         */
1618        if (tcp_v4_inbound_md5_hash(sk, skb))
1619                goto discard;
1620#endif
1621
1622        if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1623                struct dst_entry *dst = sk->sk_rx_dst;
1624
1625                sock_rps_save_rxhash(sk, skb);
1626                if (dst) {
1627                        if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1628                            dst->ops->check(dst, 0) == NULL) {
1629                                dst_release(dst);
1630                                sk->sk_rx_dst = NULL;
1631                        }
1632                }
1633                if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1634                        rsk = sk;
1635                        goto reset;
1636                }
1637                return 0;
1638        }
1639
1640        if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1641                goto csum_err;
1642
1643        if (sk->sk_state == TCP_LISTEN) {
1644                struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1645                if (!nsk)
1646                        goto discard;
1647
1648                if (nsk != sk) {
1649                        sock_rps_save_rxhash(nsk, skb);
1650                        if (tcp_child_process(sk, nsk, skb)) {
1651                                rsk = nsk;
1652                                goto reset;
1653                        }
1654                        return 0;
1655                }
1656        } else
1657                sock_rps_save_rxhash(sk, skb);
1658
1659        if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1660                rsk = sk;
1661                goto reset;
1662        }
1663        return 0;
1664
1665reset:
1666        tcp_v4_send_reset(rsk, skb);
1667discard:
1668        kfree_skb(skb);
1669        /* Be careful here. If this function gets more complicated and
1670         * gcc suffers from register pressure on the x86, sk (in %ebx)
1671         * might be destroyed here. This current version compiles correctly,
1672         * but you have been warned.
1673         */
1674        return 0;
1675
1676csum_err:
1677        TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1678        goto discard;
1679}
1680EXPORT_SYMBOL(tcp_v4_do_rcv);
1681
1682void tcp_v4_early_demux(struct sk_buff *skb)
1683{
1684        struct net *net = dev_net(skb->dev);
1685        const struct iphdr *iph;
1686        const struct tcphdr *th;
1687        struct sock *sk;
1688
1689        if (skb->pkt_type != PACKET_HOST)
1690                return;
1691
1692        if (!pskb_may_pull(skb, ip_hdrlen(skb) + sizeof(struct tcphdr)))
1693                return;
1694
1695        iph = ip_hdr(skb);
1696        th = (struct tcphdr *) ((char *)iph + ip_hdrlen(skb));
1697
1698        if (th->doff < sizeof(struct tcphdr) / 4)
1699                return;
1700
1701        sk = __inet_lookup_established(net, &tcp_hashinfo,
1702                                       iph->saddr, th->source,
1703                                       iph->daddr, ntohs(th->dest),
1704                                       skb->skb_iif);
1705        if (sk) {
1706                skb->sk = sk;
1707                skb->destructor = sock_edemux;
1708                if (sk->sk_state != TCP_TIME_WAIT) {
1709                        struct dst_entry *dst = sk->sk_rx_dst;
1710
1711                        if (dst)
1712                                dst = dst_check(dst, 0);
1713                        if (dst &&
1714                            inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1715                                skb_dst_set_noref(skb, dst);
1716                }
1717        }
1718}
1719
1720/*
1721 *      From tcp_input.c
1722 */
1723
1724int tcp_v4_rcv(struct sk_buff *skb)
1725{
1726        const struct iphdr *iph;
1727        const struct tcphdr *th;
1728        struct sock *sk;
1729        int ret;
1730        struct net *net = dev_net(skb->dev);
1731
1732        if (skb->pkt_type != PACKET_HOST)
1733                goto discard_it;
1734
1735        /* Count it even if it's bad */
1736        TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1737
1738        if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1739                goto discard_it;
1740
1741        th = tcp_hdr(skb);
1742
1743        if (th->doff < sizeof(struct tcphdr) / 4)
1744                goto bad_packet;
1745        if (!pskb_may_pull(skb, th->doff * 4))
1746                goto discard_it;
1747
1748        /* An explanation is required here, I think.
1749         * Packet length and doff are validated by header prediction,
1750         * provided case of th->doff==0 is eliminated.
1751         * So, we defer the checks. */
1752        if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1753                goto bad_packet;
1754
1755        th = tcp_hdr(skb);
1756        iph = ip_hdr(skb);
1757        TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1758        TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1759                                    skb->len - th->doff * 4);
1760        TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1761        TCP_SKB_CB(skb)->when    = 0;
1762        TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1763        TCP_SKB_CB(skb)->sacked  = 0;
1764
1765        sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1766        if (!sk)
1767                goto no_tcp_socket;
1768
1769process:
1770        if (sk->sk_state == TCP_TIME_WAIT)
1771                goto do_time_wait;
1772
1773        if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1774                NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1775                goto discard_and_relse;
1776        }
1777
1778        if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1779                goto discard_and_relse;
1780        nf_reset(skb);
1781
1782        if (sk_filter(sk, skb))
1783                goto discard_and_relse;
1784
1785        skb->dev = NULL;
1786
1787        bh_lock_sock_nested(sk);
1788        ret = 0;
1789        if (!sock_owned_by_user(sk)) {
1790#ifdef CONFIG_NET_DMA
1791                struct tcp_sock *tp = tcp_sk(sk);
1792                if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1793                        tp->ucopy.dma_chan = net_dma_find_channel();
1794                if (tp->ucopy.dma_chan)
1795                        ret = tcp_v4_do_rcv(sk, skb);
1796                else
1797#endif
1798                {
1799                        if (!tcp_prequeue(sk, skb))
1800                                ret = tcp_v4_do_rcv(sk, skb);
1801                }
1802        } else if (unlikely(sk_add_backlog(sk, skb,
1803                                           sk->sk_rcvbuf + sk->sk_sndbuf))) {
1804                bh_unlock_sock(sk);
1805                NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1806                goto discard_and_relse;
1807        }
1808        bh_unlock_sock(sk);
1809
1810        sock_put(sk);
1811
1812        return ret;
1813
1814no_tcp_socket:
1815        if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1816                goto discard_it;
1817
1818        if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1819bad_packet:
1820                TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1821        } else {
1822                tcp_v4_send_reset(NULL, skb);
1823        }
1824
1825discard_it:
1826        /* Discard frame. */
1827        kfree_skb(skb);
1828        return 0;
1829
1830discard_and_relse:
1831        sock_put(sk);
1832        goto discard_it;
1833
1834do_time_wait:
1835        if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1836                inet_twsk_put(inet_twsk(sk));
1837                goto discard_it;
1838        }
1839
1840        if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1841                TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1842                inet_twsk_put(inet_twsk(sk));
1843                goto discard_it;
1844        }
1845        switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1846        case TCP_TW_SYN: {
1847                struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1848                                                        &tcp_hashinfo,
1849                                                        iph->daddr, th->dest,
1850                                                        inet_iif(skb));
1851                if (sk2) {
1852                        inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1853                        inet_twsk_put(inet_twsk(sk));
1854                        sk = sk2;
1855                        goto process;
1856                }
1857                /* Fall through to ACK */
1858        }
1859        case TCP_TW_ACK:
1860                tcp_v4_timewait_ack(sk, skb);
1861                break;
1862        case TCP_TW_RST:
1863                goto no_tcp_socket;
1864        case TCP_TW_SUCCESS:;
1865        }
1866        goto discard_it;
1867}
1868
1869static struct timewait_sock_ops tcp_timewait_sock_ops = {
1870        .twsk_obj_size  = sizeof(struct tcp_timewait_sock),
1871        .twsk_unique    = tcp_twsk_unique,
1872        .twsk_destructor= tcp_twsk_destructor,
1873};
1874
1875void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
1876{
1877        struct dst_entry *dst = skb_dst(skb);
1878
1879        dst_hold(dst);
1880        sk->sk_rx_dst = dst;
1881        inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
1882}
1883EXPORT_SYMBOL(inet_sk_rx_dst_set);
1884
1885const struct inet_connection_sock_af_ops ipv4_specific = {
1886        .queue_xmit        = ip_queue_xmit,
1887        .send_check        = tcp_v4_send_check,
1888        .rebuild_header    = inet_sk_rebuild_header,
1889        .sk_rx_dst_set     = inet_sk_rx_dst_set,
1890        .conn_request      = tcp_v4_conn_request,
1891        .syn_recv_sock     = tcp_v4_syn_recv_sock,
1892        .net_header_len    = sizeof(struct iphdr),
1893        .setsockopt        = ip_setsockopt,
1894        .getsockopt        = ip_getsockopt,
1895        .addr2sockaddr     = inet_csk_addr2sockaddr,
1896        .sockaddr_len      = sizeof(struct sockaddr_in),
1897        .bind_conflict     = inet_csk_bind_conflict,
1898#ifdef CONFIG_COMPAT
1899        .compat_setsockopt = compat_ip_setsockopt,
1900        .compat_getsockopt = compat_ip_getsockopt,
1901#endif
1902};
1903EXPORT_SYMBOL(ipv4_specific);
1904
1905#ifdef CONFIG_TCP_MD5SIG
1906static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1907        .md5_lookup             = tcp_v4_md5_lookup,
1908        .calc_md5_hash          = tcp_v4_md5_hash_skb,
1909        .md5_parse              = tcp_v4_parse_md5_keys,
1910};
1911#endif
1912
1913/* NOTE: A lot of things set to zero explicitly by call to
1914 *       sk_alloc() so need not be done here.
1915 */
1916static int tcp_v4_init_sock(struct sock *sk)
1917{
1918        struct inet_connection_sock *icsk = inet_csk(sk);
1919
1920        tcp_init_sock(sk);
1921
1922        icsk->icsk_af_ops = &ipv4_specific;
1923
1924#ifdef CONFIG_TCP_MD5SIG
1925        tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
1926#endif
1927
1928        return 0;
1929}
1930
1931void tcp_v4_destroy_sock(struct sock *sk)
1932{
1933        struct tcp_sock *tp = tcp_sk(sk);
1934
1935        tcp_clear_xmit_timers(sk);
1936
1937        tcp_cleanup_congestion_control(sk);
1938
1939        /* Cleanup up the write buffer. */
1940        tcp_write_queue_purge(sk);
1941
1942        /* Cleans up our, hopefully empty, out_of_order_queue. */
1943        __skb_queue_purge(&tp->out_of_order_queue);
1944
1945#ifdef CONFIG_TCP_MD5SIG
1946        /* Clean up the MD5 key list, if any */
1947        if (tp->md5sig_info) {
1948                tcp_clear_md5_list(sk);
1949                kfree_rcu(tp->md5sig_info, rcu);
1950                tp->md5sig_info = NULL;
1951        }
1952#endif
1953
1954#ifdef CONFIG_NET_DMA
1955        /* Cleans up our sk_async_wait_queue */
1956        __skb_queue_purge(&sk->sk_async_wait_queue);
1957#endif
1958
1959        /* Clean prequeue, it must be empty really */
1960        __skb_queue_purge(&tp->ucopy.prequeue);
1961
1962        /* Clean up a referenced TCP bind bucket. */
1963        if (inet_csk(sk)->icsk_bind_hash)
1964                inet_put_port(sk);
1965
1966        /*
1967         * If sendmsg cached page exists, toss it.
1968         */
1969        if (sk->sk_sndmsg_page) {
1970                __free_page(sk->sk_sndmsg_page);
1971                sk->sk_sndmsg_page = NULL;
1972        }
1973
1974        /* TCP Cookie Transactions */
1975        if (tp->cookie_values != NULL) {
1976                kref_put(&tp->cookie_values->kref,
1977                         tcp_cookie_values_release);
1978                tp->cookie_values = NULL;
1979        }
1980
1981        /* If socket is aborted during connect operation */
1982        tcp_free_fastopen_req(tp);
1983
1984        sk_sockets_allocated_dec(sk);
1985        sock_release_memcg(sk);
1986}
1987EXPORT_SYMBOL(tcp_v4_destroy_sock);
1988
1989#ifdef CONFIG_PROC_FS
1990/* Proc filesystem TCP sock list dumping. */
1991
1992static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
1993{
1994        return hlist_nulls_empty(head) ? NULL :
1995                list_entry(head->first, struct inet_timewait_sock, tw_node);
1996}
1997
1998static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1999{
2000        return !is_a_nulls(tw->tw_node.next) ?
2001                hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
2002}
2003
2004/*
2005 * Get next listener socket follow cur.  If cur is NULL, get first socket
2006 * starting from bucket given in st->bucket; when st->bucket is zero the
2007 * very first socket in the hash table is returned.
2008 */
2009static void *listening_get_next(struct seq_file *seq, void *cur)
2010{
2011        struct inet_connection_sock *icsk;
2012        struct hlist_nulls_node *node;
2013        struct sock *sk = cur;
2014        struct inet_listen_hashbucket *ilb;
2015        struct tcp_iter_state *st = seq->private;
2016        struct net *net = seq_file_net(seq);
2017
2018        if (!sk) {
2019                ilb = &tcp_hashinfo.listening_hash[st->bucket];
2020                spin_lock_bh(&ilb->lock);
2021                sk = sk_nulls_head(&ilb->head);
2022                st->offset = 0;
2023                goto get_sk;
2024        }
2025        ilb = &tcp_hashinfo.listening_hash[st->bucket];
2026        ++st->num;
2027        ++st->offset;
2028
2029        if (st->state == TCP_SEQ_STATE_OPENREQ) {
2030                struct request_sock *req = cur;
2031
2032                icsk = inet_csk(st->syn_wait_sk);
2033                req = req->dl_next;
2034                while (1) {
2035                        while (req) {
2036                                if (req->rsk_ops->family == st->family) {
2037                                        cur = req;
2038                                        goto out;
2039                                }
2040                                req = req->dl_next;
2041                        }
2042                        if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
2043                                break;
2044get_req:
2045                        req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
2046                }
2047                sk        = sk_nulls_next(st->syn_wait_sk);
2048                st->state = TCP_SEQ_STATE_LISTENING;
2049                read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2050        } else {
2051                icsk = inet_csk(sk);
2052                read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2053                if (reqsk_queue_len(&icsk->icsk_accept_queue))
2054                        goto start_req;
2055                read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2056                sk = sk_nulls_next(sk);
2057        }
2058get_sk:
2059        sk_nulls_for_each_from(sk, node) {
2060                if (!net_eq(sock_net(sk), net))
2061                        continue;
2062                if (sk->sk_family == st->family) {
2063                        cur = sk;
2064                        goto out;
2065                }
2066                icsk = inet_csk(sk);
2067                read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2068                if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
2069start_req:
2070                        st->uid         = sock_i_uid(sk);
2071                        st->syn_wait_sk = sk;
2072                        st->state       = TCP_SEQ_STATE_OPENREQ;
2073                        st->sbucket     = 0;
2074                        goto get_req;
2075                }
2076                read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2077        }
2078        spin_unlock_bh(&ilb->lock);
2079        st->offset = 0;
2080        if (++st->bucket < INET_LHTABLE_SIZE) {
2081                ilb = &tcp_hashinfo.listening_hash[st->bucket];
2082                spin_lock_bh(&ilb->lock);
2083                sk = sk_nulls_head(&ilb->head);
2084                goto get_sk;
2085        }
2086        cur = NULL;
2087out:
2088        return cur;
2089}
2090
2091static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2092{
2093        struct tcp_iter_state *st = seq->private;
2094        void *rc;
2095
2096        st->bucket = 0;
2097        st->offset = 0;
2098        rc = listening_get_next(seq, NULL);
2099
2100        while (rc && *pos) {
2101                rc = listening_get_next(seq, rc);
2102                --*pos;
2103        }
2104        return rc;
2105}
2106
2107static inline bool empty_bucket(struct tcp_iter_state *st)
2108{
2109        return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
2110                hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
2111}
2112
2113/*
2114 * Get first established socket starting from bucket given in st->bucket.
2115 * If st->bucket is zero, the very first socket in the hash is returned.
2116 */
2117static void *established_get_first(struct seq_file *seq)
2118{
2119        struct tcp_iter_state *st = seq->private;
2120        struct net *net = seq_file_net(seq);
2121        void *rc = NULL;
2122
2123        st->offset = 0;
2124        for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
2125                struct sock *sk;
2126                struct hlist_nulls_node *node;
2127                struct inet_timewait_sock *tw;
2128                spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
2129
2130                /* Lockless fast path for the common case of empty buckets */
2131                if (empty_bucket(st))
2132                        continue;
2133
2134                spin_lock_bh(lock);
2135                sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2136                        if (sk->sk_family != st->family ||
2137                            !net_eq(sock_net(sk), net)) {
2138                                continue;
2139                        }
2140                        rc = sk;
2141                        goto out;
2142                }
2143                st->state = TCP_SEQ_STATE_TIME_WAIT;
2144                inet_twsk_for_each(tw, node,
2145                                   &tcp_hashinfo.ehash[st->bucket].twchain) {
2146                        if (tw->tw_family != st->family ||
2147                            !net_eq(twsk_net(tw), net)) {
2148                                continue;
2149                        }
2150                        rc = tw;
2151                        goto out;
2152                }
2153                spin_unlock_bh(lock);
2154                st->state = TCP_SEQ_STATE_ESTABLISHED;
2155        }
2156out:
2157        return rc;
2158}
2159
2160static void *established_get_next(struct seq_file *seq, void *cur)
2161{
2162        struct sock *sk = cur;
2163        struct inet_timewait_sock *tw;
2164        struct hlist_nulls_node *node;
2165        struct tcp_iter_state *st = seq->private;
2166        struct net *net = seq_file_net(seq);
2167
2168        ++st->num;
2169        ++st->offset;
2170
2171        if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2172                tw = cur;
2173                tw = tw_next(tw);
2174get_tw:
2175                while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
2176                        tw = tw_next(tw);
2177                }
2178                if (tw) {
2179                        cur = tw;
2180                        goto out;
2181                }
2182                spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2183                st->state = TCP_SEQ_STATE_ESTABLISHED;
2184
2185                /* Look for next non empty bucket */
2186                st->offset = 0;
2187                while (++st->bucket <= tcp_hashinfo.ehash_mask &&
2188                                empty_bucket(st))
2189                        ;
2190                if (st->bucket > tcp_hashinfo.ehash_mask)
2191                        return NULL;
2192
2193                spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2194                sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
2195        } else
2196                sk = sk_nulls_next(sk);
2197
2198        sk_nulls_for_each_from(sk, node) {
2199                if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
2200                        goto found;
2201        }
2202
2203        st->state = TCP_SEQ_STATE_TIME_WAIT;
2204        tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
2205        goto get_tw;
2206found:
2207        cur = sk;
2208out:
2209        return cur;
2210}
2211
2212static void *established_get_idx(struct seq_file *seq, loff_t pos)
2213{
2214        struct tcp_iter_state *st = seq->private;
2215        void *rc;
2216
2217        st->bucket = 0;
2218        rc = established_get_first(seq);
2219
2220        while (rc && pos) {
2221                rc = established_get_next(seq, rc);
2222                --pos;
2223        }
2224        return rc;
2225}
2226
2227static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2228{
2229        void *rc;
2230        struct tcp_iter_state *st = seq->private;
2231
2232        st->state = TCP_SEQ_STATE_LISTENING;
2233        rc        = listening_get_idx(seq, &pos);
2234
2235        if (!rc) {
2236                st->state = TCP_SEQ_STATE_ESTABLISHED;
2237                rc        = established_get_idx(seq, pos);
2238        }
2239
2240        return rc;
2241}
2242
2243static void *tcp_seek_last_pos(struct seq_file *seq)
2244{
2245        struct tcp_iter_state *st = seq->private;
2246        int offset = st->offset;
2247        int orig_num = st->num;
2248        void *rc = NULL;
2249
2250        switch (st->state) {
2251        case TCP_SEQ_STATE_OPENREQ:
2252        case TCP_SEQ_STATE_LISTENING:
2253                if (st->bucket >= INET_LHTABLE_SIZE)
2254                        break;
2255                st->state = TCP_SEQ_STATE_LISTENING;
2256                rc = listening_get_next(seq, NULL);
2257                while (offset-- && rc)
2258                        rc = listening_get_next(seq, rc);
2259                if (rc)
2260                        break;
2261                st->bucket = 0;
2262                /* Fallthrough */
2263        case TCP_SEQ_STATE_ESTABLISHED:
2264        case TCP_SEQ_STATE_TIME_WAIT:
2265                st->state = TCP_SEQ_STATE_ESTABLISHED;
2266                if (st->bucket > tcp_hashinfo.ehash_mask)
2267                        break;
2268                rc = established_get_first(seq);
2269                while (offset-- && rc)
2270                        rc = established_get_next(seq, rc);
2271        }
2272
2273        st->num = orig_num;
2274
2275        return rc;
2276}
2277
2278static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2279{
2280        struct tcp_iter_state *st = seq->private;
2281        void *rc;
2282
2283        if (*pos && *pos == st->last_pos) {
2284                rc = tcp_seek_last_pos(seq);
2285                if (rc)
2286                        goto out;
2287        }
2288
2289        st->state = TCP_SEQ_STATE_LISTENING;
2290        st->num = 0;
2291        st->bucket = 0;
2292        st->offset = 0;
2293        rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2294
2295out:
2296        st->last_pos = *pos;
2297        return rc;
2298}
2299
2300static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2301{
2302        struct tcp_iter_state *st = seq->private;
2303        void *rc = NULL;
2304
2305        if (v == SEQ_START_TOKEN) {
2306                rc = tcp_get_idx(seq, 0);
2307                goto out;
2308        }
2309
2310        switch (st->state) {
2311        case TCP_SEQ_STATE_OPENREQ:
2312        case TCP_SEQ_STATE_LISTENING:
2313                rc = listening_get_next(seq, v);
2314                if (!rc) {
2315                        st->state = TCP_SEQ_STATE_ESTABLISHED;
2316                        st->bucket = 0;
2317                        st->offset = 0;
2318                        rc        = established_get_first(seq);
2319                }
2320                break;
2321        case TCP_SEQ_STATE_ESTABLISHED:
2322        case TCP_SEQ_STATE_TIME_WAIT:
2323                rc = established_get_next(seq, v);
2324                break;
2325        }
2326out:
2327        ++*pos;
2328        st->last_pos = *pos;
2329        return rc;
2330}
2331
2332static void tcp_seq_stop(struct seq_file *seq, void *v)
2333{
2334        struct tcp_iter_state *st = seq->private;
2335
2336        switch (st->state) {
2337        case TCP_SEQ_STATE_OPENREQ:
2338                if (v) {
2339                        struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2340                        read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2341                }
2342        case TCP_SEQ_STATE_LISTENING:
2343                if (v != SEQ_START_TOKEN)
2344                        spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
2345                break;
2346        case TCP_SEQ_STATE_TIME_WAIT:
2347        case TCP_SEQ_STATE_ESTABLISHED:
2348                if (v)
2349                        spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2350                break;
2351        }
2352}
2353
2354int tcp_seq_open(struct inode *inode, struct file *file)
2355{
2356        struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2357        struct tcp_iter_state *s;
2358        int err;
2359
2360        err = seq_open_net(inode, file, &afinfo->seq_ops,
2361                          sizeof(struct tcp_iter_state));
2362        if (err < 0)
2363                return err;
2364
2365        s = ((struct seq_file *)file->private_data)->private;
2366        s->family               = afinfo->family;
2367        s->last_pos             = 0;
2368        return 0;
2369}
2370EXPORT_SYMBOL(tcp_seq_open);
2371
2372int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2373{
2374        int rc = 0;
2375        struct proc_dir_entry *p;
2376
2377        afinfo->seq_ops.start           = tcp_seq_start;
2378        afinfo->seq_ops.next            = tcp_seq_next;
2379        afinfo->seq_ops.stop            = tcp_seq_stop;
2380
2381        p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2382                             afinfo->seq_fops, afinfo);
2383        if (!p)
2384                rc = -ENOMEM;
2385        return rc;
2386}
2387EXPORT_SYMBOL(tcp_proc_register);
2388
2389void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2390{
2391        proc_net_remove(net, afinfo->name);
2392}
2393EXPORT_SYMBOL(tcp_proc_unregister);
2394
2395static void get_openreq4(const struct sock *sk, const struct request_sock *req,
2396                         struct seq_file *f, int i, int uid, int *len)
2397{
2398        const struct inet_request_sock *ireq = inet_rsk(req);
2399        int ttd = req->expires - jiffies;
2400
2401        seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2402                " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
2403                i,
2404                ireq->loc_addr,
2405                ntohs(inet_sk(sk)->inet_sport),
2406                ireq->rmt_addr,
2407                ntohs(ireq->rmt_port),
2408                TCP_SYN_RECV,
2409                0, 0, /* could print option size, but that is af dependent. */
2410                1,    /* timers active (only the expire timer) */
2411                jiffies_to_clock_t(ttd),
2412                req->retrans,
2413                uid,
2414                0,  /* non standard timer */
2415                0, /* open_requests have no inode */
2416                atomic_read(&sk->sk_refcnt),
2417                req,
2418                len);
2419}
2420
2421static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
2422{
2423        int timer_active;
2424        unsigned long timer_expires;
2425        const struct tcp_sock *tp = tcp_sk(sk);
2426        const struct inet_connection_sock *icsk = inet_csk(sk);
2427        const struct inet_sock *inet = inet_sk(sk);
2428        __be32 dest = inet->inet_daddr;
2429        __be32 src = inet->inet_rcv_saddr;
2430        __u16 destp = ntohs(inet->inet_dport);
2431        __u16 srcp = ntohs(inet->inet_sport);
2432        int rx_queue;
2433
2434        if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
2435                timer_active    = 1;
2436                timer_expires   = icsk->icsk_timeout;
2437        } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2438                timer_active    = 4;
2439                timer_expires   = icsk->icsk_timeout;
2440        } else if (timer_pending(&sk->sk_timer)) {
2441                timer_active    = 2;
2442                timer_expires   = sk->sk_timer.expires;
2443        } else {
2444                timer_active    = 0;
2445                timer_expires = jiffies;
2446        }
2447
2448        if (sk->sk_state == TCP_LISTEN)
2449                rx_queue = sk->sk_ack_backlog;
2450        else
2451                /*
2452                 * because we dont lock socket, we might find a transient negative value
2453                 */
2454                rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2455
2456        seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2457                        "%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n",
2458                i, src, srcp, dest, destp, sk->sk_state,
2459                tp->write_seq - tp->snd_una,
2460                rx_queue,
2461                timer_active,
2462                jiffies_to_clock_t(timer_expires - jiffies),
2463                icsk->icsk_retransmits,
2464                sock_i_uid(sk),
2465                icsk->icsk_probes_out,
2466                sock_i_ino(sk),
2467                atomic_read(&sk->sk_refcnt), sk,
2468                jiffies_to_clock_t(icsk->icsk_rto),
2469                jiffies_to_clock_t(icsk->icsk_ack.ato),
2470                (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2471                tp->snd_cwnd,
2472                tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
2473                len);
2474}
2475
2476static void get_timewait4_sock(const struct inet_timewait_sock *tw,
2477                               struct seq_file *f, int i, int *len)
2478{
2479        __be32 dest, src;
2480        __u16 destp, srcp;
2481        int ttd = tw->tw_ttd - jiffies;
2482
2483        if (ttd < 0)
2484                ttd = 0;
2485
2486        dest  = tw->tw_daddr;
2487        src   = tw->tw_rcv_saddr;
2488        destp = ntohs(tw->tw_dport);
2489        srcp  = ntohs(tw->tw_sport);
2490
2491        seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2492                " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n",
2493                i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2494                3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2495                atomic_read(&tw->tw_refcnt), tw, len);
2496}
2497
2498#define TMPSZ 150
2499
2500static int tcp4_seq_show(struct seq_file *seq, void *v)
2501{
2502        struct tcp_iter_state *st;
2503        int len;
2504
2505        if (v == SEQ_START_TOKEN) {
2506                seq_printf(seq, "%-*s\n", TMPSZ - 1,
2507                           "  sl  local_address rem_address   st tx_queue "
2508                           "rx_queue tr tm->when retrnsmt   uid  timeout "
2509                           "inode");
2510                goto out;
2511        }
2512        st = seq->private;
2513
2514        switch (st->state) {
2515        case TCP_SEQ_STATE_LISTENING:
2516        case TCP_SEQ_STATE_ESTABLISHED:
2517                get_tcp4_sock(v, seq, st->num, &len);
2518                break;
2519        case TCP_SEQ_STATE_OPENREQ:
2520                get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
2521                break;
2522        case TCP_SEQ_STATE_TIME_WAIT:
2523                get_timewait4_sock(v, seq, st->num, &len);
2524                break;
2525        }
2526        seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
2527out:
2528        return 0;
2529}
2530
2531static const struct file_operations tcp_afinfo_seq_fops = {
2532        .owner   = THIS_MODULE,
2533        .open    = tcp_seq_open,
2534        .read    = seq_read,
2535        .llseek  = seq_lseek,
2536        .release = seq_release_net
2537};
2538
2539static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2540        .name           = "tcp",
2541        .family         = AF_INET,
2542        .seq_fops       = &tcp_afinfo_seq_fops,
2543        .seq_ops        = {
2544                .show           = tcp4_seq_show,
2545        },
2546};
2547
2548static int __net_init tcp4_proc_init_net(struct net *net)
2549{
2550        return tcp_proc_register(net, &tcp4_seq_afinfo);
2551}
2552
2553static void __net_exit tcp4_proc_exit_net(struct net *net)
2554{
2555        tcp_proc_unregister(net, &tcp4_seq_afinfo);
2556}
2557
2558static struct pernet_operations tcp4_net_ops = {
2559        .init = tcp4_proc_init_net,
2560        .exit = tcp4_proc_exit_net,
2561};
2562
2563int __init tcp4_proc_init(void)
2564{
2565        return register_pernet_subsys(&tcp4_net_ops);
2566}
2567
2568void tcp4_proc_exit(void)
2569{
2570        unregister_pernet_subsys(&tcp4_net_ops);
2571}
2572#endif /* CONFIG_PROC_FS */
2573
2574struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2575{
2576        const struct iphdr *iph = skb_gro_network_header(skb);
2577
2578        switch (skb->ip_summed) {
2579        case CHECKSUM_COMPLETE:
2580                if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
2581                                  skb->csum)) {
2582                        skb->ip_summed = CHECKSUM_UNNECESSARY;
2583                        break;
2584                }
2585
2586                /* fall through */
2587        case CHECKSUM_NONE:
2588                NAPI_GRO_CB(skb)->flush = 1;
2589                return NULL;
2590        }
2591
2592        return tcp_gro_receive(head, skb);
2593}
2594
2595int tcp4_gro_complete(struct sk_buff *skb)
2596{
2597        const struct iphdr *iph = ip_hdr(skb);
2598        struct tcphdr *th = tcp_hdr(skb);
2599
2600        th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2601                                  iph->saddr, iph->daddr, 0);
2602        skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2603
2604        return tcp_gro_complete(skb);
2605}
2606
2607struct proto tcp_prot = {
2608        .name                   = "TCP",
2609        .owner                  = THIS_MODULE,
2610        .close                  = tcp_close,
2611        .connect                = tcp_v4_connect,
2612        .disconnect             = tcp_disconnect,
2613        .accept                 = inet_csk_accept,
2614        .ioctl                  = tcp_ioctl,
2615        .init                   = tcp_v4_init_sock,
2616        .destroy                = tcp_v4_destroy_sock,
2617        .shutdown               = tcp_shutdown,
2618        .setsockopt             = tcp_setsockopt,
2619        .getsockopt             = tcp_getsockopt,
2620        .recvmsg                = tcp_recvmsg,
2621        .sendmsg                = tcp_sendmsg,
2622        .sendpage               = tcp_sendpage,
2623        .backlog_rcv            = tcp_v4_do_rcv,
2624        .release_cb             = tcp_release_cb,
2625        .mtu_reduced            = tcp_v4_mtu_reduced,
2626        .hash                   = inet_hash,
2627        .unhash                 = inet_unhash,
2628        .get_port               = inet_csk_get_port,
2629        .enter_memory_pressure  = tcp_enter_memory_pressure,
2630        .sockets_allocated      = &tcp_sockets_allocated,
2631        .orphan_count           = &tcp_orphan_count,
2632        .memory_allocated       = &tcp_memory_allocated,
2633        .memory_pressure        = &tcp_memory_pressure,
2634        .sysctl_wmem            = sysctl_tcp_wmem,
2635        .sysctl_rmem            = sysctl_tcp_rmem,
2636        .max_header             = MAX_TCP_HEADER,
2637        .obj_size               = sizeof(struct tcp_sock),
2638        .slab_flags             = SLAB_DESTROY_BY_RCU,
2639        .twsk_prot              = &tcp_timewait_sock_ops,
2640        .rsk_prot               = &tcp_request_sock_ops,
2641        .h.hashinfo             = &tcp_hashinfo,
2642        .no_autobind            = true,
2643#ifdef CONFIG_COMPAT
2644        .compat_setsockopt      = compat_tcp_setsockopt,
2645        .compat_getsockopt      = compat_tcp_getsockopt,
2646#endif
2647#ifdef CONFIG_MEMCG_KMEM
2648        .init_cgroup            = tcp_init_cgroup,
2649        .destroy_cgroup         = tcp_destroy_cgroup,
2650        .proto_cgroup           = tcp_proto_cgroup,
2651#endif
2652};
2653EXPORT_SYMBOL(tcp_prot);
2654
2655static int __net_init tcp_sk_init(struct net *net)
2656{
2657        return 0;
2658}
2659
2660static void __net_exit tcp_sk_exit(struct net *net)
2661{
2662}
2663
2664static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2665{
2666        inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
2667}
2668
2669static struct pernet_operations __net_initdata tcp_sk_ops = {
2670       .init       = tcp_sk_init,
2671       .exit       = tcp_sk_exit,
2672       .exit_batch = tcp_sk_exit_batch,
2673};
2674
2675void __init tcp_v4_init(void)
2676{
2677        inet_hashinfo_init(&tcp_hashinfo);
2678        if (register_pernet_subsys(&tcp_sk_ops))
2679                panic("Failed to create the TCP control socket.\n");
2680}
2681
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.