linux/include/net/tcp.h
<<
>>
Prefs
   1/*
   2 * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3 *              operating system.  INET is implemented using the  BSD Socket
   4 *              interface as the means of communication with the user level.
   5 *
   6 *              Definitions for the TCP module.
   7 *
   8 * Version:     @(#)tcp.h       1.0.5   05/23/93
   9 *
  10 * Authors:     Ross Biro
  11 *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12 *
  13 *              This program is free software; you can redistribute it and/or
  14 *              modify it under the terms of the GNU General Public License
  15 *              as published by the Free Software Foundation; either version
  16 *              2 of the License, or (at your option) any later version.
  17 */
  18#ifndef _TCP_H
  19#define _TCP_H
  20
  21#define FASTRETRANS_DEBUG 1
  22
  23#include <linux/list.h>
  24#include <linux/tcp.h>
  25#include <linux/bug.h>
  26#include <linux/slab.h>
  27#include <linux/cache.h>
  28#include <linux/percpu.h>
  29#include <linux/skbuff.h>
  30#include <linux/dmaengine.h>
  31#include <linux/crypto.h>
  32#include <linux/cryptohash.h>
  33#include <linux/kref.h>
  34
  35#include <net/inet_connection_sock.h>
  36#include <net/inet_timewait_sock.h>
  37#include <net/inet_hashtables.h>
  38#include <net/checksum.h>
  39#include <net/request_sock.h>
  40#include <net/sock.h>
  41#include <net/snmp.h>
  42#include <net/ip.h>
  43#include <net/tcp_states.h>
  44#include <net/inet_ecn.h>
  45#include <net/dst.h>
  46
  47#include <linux/seq_file.h>
  48#include <linux/memcontrol.h>
  49
  50extern struct inet_hashinfo tcp_hashinfo;
  51
  52extern struct percpu_counter tcp_orphan_count;
  53extern void tcp_time_wait(struct sock *sk, int state, int timeo);
  54
  55#define MAX_TCP_HEADER  (128 + MAX_HEADER)
  56#define MAX_TCP_OPTION_SPACE 40
  57
  58/* 
  59 * Never offer a window over 32767 without using window scaling. Some
  60 * poor stacks do signed 16bit maths! 
  61 */
  62#define MAX_TCP_WINDOW          32767U
  63
  64/* Offer an initial receive window of 10 mss. */
  65#define TCP_DEFAULT_INIT_RCVWND 10
  66
  67/* Minimal accepted MSS. It is (60+60+8) - (20+20). */
  68#define TCP_MIN_MSS             88U
  69
  70/* The least MTU to use for probing */
  71#define TCP_BASE_MSS            512
  72
  73/* After receiving this amount of duplicate ACKs fast retransmit starts. */
  74#define TCP_FASTRETRANS_THRESH 3
  75
  76/* Maximal reordering. */
  77#define TCP_MAX_REORDERING      127
  78
  79/* Maximal number of ACKs sent quickly to accelerate slow-start. */
  80#define TCP_MAX_QUICKACKS       16U
  81
  82/* urg_data states */
  83#define TCP_URG_VALID   0x0100
  84#define TCP_URG_NOTYET  0x0200
  85#define TCP_URG_READ    0x0400
  86
  87#define TCP_RETR1       3       /*
  88                                 * This is how many retries it does before it
  89                                 * tries to figure out if the gateway is
  90                                 * down. Minimal RFC value is 3; it corresponds
  91                                 * to ~3sec-8min depending on RTO.
  92                                 */
  93
  94#define TCP_RETR2       15      /*
  95                                 * This should take at least
  96                                 * 90 minutes to time out.
  97                                 * RFC1122 says that the limit is 100 sec.
  98                                 * 15 is ~13-30min depending on RTO.
  99                                 */
 100
 101#define TCP_SYN_RETRIES  5      /* number of times to retry active opening a
 102                                 * connection: ~180sec is RFC minimum   */
 103
 104#define TCP_SYNACK_RETRIES 5    /* number of times to retry passive opening a
 105                                 * connection: ~180sec is RFC minimum   */
 106
 107#define TCP_TIMEWAIT_LEN (60*HZ) /* how long to wait to destroy TIME-WAIT
 108                                  * state, about 60 seconds     */
 109#define TCP_FIN_TIMEOUT TCP_TIMEWAIT_LEN
 110                                 /* BSD style FIN_WAIT2 deadlock breaker.
 111                                  * It used to be 3min, new value is 60sec,
 112                                  * to combine FIN-WAIT-2 timeout with
 113                                  * TIME-WAIT timer.
 114                                  */
 115
 116#define TCP_DELACK_MAX  ((unsigned)(HZ/5))      /* maximal time to delay before sending an ACK */
 117#if HZ >= 100
 118#define TCP_DELACK_MIN  ((unsigned)(HZ/25))     /* minimal time to delay before sending an ACK */
 119#define TCP_ATO_MIN     ((unsigned)(HZ/25))
 120#else
 121#define TCP_DELACK_MIN  4U
 122#define TCP_ATO_MIN     4U
 123#endif
 124#define TCP_RTO_MAX     ((unsigned)(120*HZ))
 125#define TCP_RTO_MIN     ((unsigned)(HZ/5))
 126#define TCP_TIMEOUT_INIT ((unsigned)(1*HZ))     /* RFC6298 2.1 initial RTO value        */
 127#define TCP_TIMEOUT_FALLBACK ((unsigned)(3*HZ)) /* RFC 1122 initial RTO value, now
 128                                                 * used as a fallback RTO for the
 129                                                 * initial data transmission if no
 130                                                 * valid RTT sample has been acquired,
 131                                                 * most likely due to retrans in 3WHS.
 132                                                 */
 133
 134#define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U)) /* Maximal interval between probes
 135                                                         * for local resources.
 136                                                         */
 137
 138#define TCP_KEEPALIVE_TIME      (120*60*HZ)     /* two hours */
 139#define TCP_KEEPALIVE_PROBES    9               /* Max of 9 keepalive probes    */
 140#define TCP_KEEPALIVE_INTVL     (75*HZ)
 141
 142#define MAX_TCP_KEEPIDLE        32767
 143#define MAX_TCP_KEEPINTVL       32767
 144#define MAX_TCP_KEEPCNT         127
 145#define MAX_TCP_SYNCNT          127
 146
 147#define TCP_SYNQ_INTERVAL       (HZ/5)  /* Period of SYNACK timer */
 148
 149#define TCP_PAWS_24DAYS (60 * 60 * 24 * 24)
 150#define TCP_PAWS_MSL    60              /* Per-host timestamps are invalidated
 151                                         * after this time. It should be equal
 152                                         * (or greater than) TCP_TIMEWAIT_LEN
 153                                         * to provide reliability equal to one
 154                                         * provided by timewait state.
 155                                         */
 156#define TCP_PAWS_WINDOW 1               /* Replay window for per-host
 157                                         * timestamps. It must be less than
 158                                         * minimal timewait lifetime.
 159                                         */
 160/*
 161 *      TCP option
 162 */
 163 
 164#define TCPOPT_NOP              1       /* Padding */
 165#define TCPOPT_EOL              0       /* End of options */
 166#define TCPOPT_MSS              2       /* Segment size negotiating */
 167#define TCPOPT_WINDOW           3       /* Window scaling */
 168#define TCPOPT_SACK_PERM        4       /* SACK Permitted */
 169#define TCPOPT_SACK             5       /* SACK Block */
 170#define TCPOPT_TIMESTAMP        8       /* Better RTT estimations/PAWS */
 171#define TCPOPT_MD5SIG           19      /* MD5 Signature (RFC2385) */
 172#define TCPOPT_COOKIE           253     /* Cookie extension (experimental) */
 173#define TCPOPT_EXP              254     /* Experimental */
 174/* Magic number to be after the option value for sharing TCP
 175 * experimental options. See draft-ietf-tcpm-experimental-options-00.txt
 176 */
 177#define TCPOPT_FASTOPEN_MAGIC   0xF989
 178
 179/*
 180 *     TCP option lengths
 181 */
 182
 183#define TCPOLEN_MSS            4
 184#define TCPOLEN_WINDOW         3
 185#define TCPOLEN_SACK_PERM      2
 186#define TCPOLEN_TIMESTAMP      10
 187#define TCPOLEN_MD5SIG         18
 188#define TCPOLEN_EXP_FASTOPEN_BASE  4
 189#define TCPOLEN_COOKIE_BASE    2        /* Cookie-less header extension */
 190#define TCPOLEN_COOKIE_PAIR    3        /* Cookie pair header extension */
 191#define TCPOLEN_COOKIE_MIN     (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MIN)
 192#define TCPOLEN_COOKIE_MAX     (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MAX)
 193
 194/* But this is what stacks really send out. */
 195#define TCPOLEN_TSTAMP_ALIGNED          12
 196#define TCPOLEN_WSCALE_ALIGNED          4
 197#define TCPOLEN_SACKPERM_ALIGNED        4
 198#define TCPOLEN_SACK_BASE               2
 199#define TCPOLEN_SACK_BASE_ALIGNED       4
 200#define TCPOLEN_SACK_PERBLOCK           8
 201#define TCPOLEN_MD5SIG_ALIGNED          20
 202#define TCPOLEN_MSS_ALIGNED             4
 203
 204/* Flags in tp->nonagle */
 205#define TCP_NAGLE_OFF           1       /* Nagle's algo is disabled */
 206#define TCP_NAGLE_CORK          2       /* Socket is corked         */
 207#define TCP_NAGLE_PUSH          4       /* Cork is overridden for already queued data */
 208
 209/* TCP thin-stream limits */
 210#define TCP_THIN_LINEAR_RETRIES 6       /* After 6 linear retries, do exp. backoff */
 211
 212/* TCP initial congestion window as per draft-hkchu-tcpm-initcwnd-01 */
 213#define TCP_INIT_CWND           10
 214
 215/* Bit Flags for sysctl_tcp_fastopen */
 216#define TFO_CLIENT_ENABLE       1
 217#define TFO_CLIENT_NO_COOKIE    4       /* Data in SYN w/o cookie option */
 218
 219extern struct inet_timewait_death_row tcp_death_row;
 220
 221/* sysctl variables for tcp */
 222extern int sysctl_tcp_timestamps;
 223extern int sysctl_tcp_window_scaling;
 224extern int sysctl_tcp_sack;
 225extern int sysctl_tcp_fin_timeout;
 226extern int sysctl_tcp_keepalive_time;
 227extern int sysctl_tcp_keepalive_probes;
 228extern int sysctl_tcp_keepalive_intvl;
 229extern int sysctl_tcp_syn_retries;
 230extern int sysctl_tcp_synack_retries;
 231extern int sysctl_tcp_retries1;
 232extern int sysctl_tcp_retries2;
 233extern int sysctl_tcp_orphan_retries;
 234extern int sysctl_tcp_syncookies;
 235extern int sysctl_tcp_fastopen;
 236extern int sysctl_tcp_retrans_collapse;
 237extern int sysctl_tcp_stdurg;
 238extern int sysctl_tcp_rfc1337;
 239extern int sysctl_tcp_abort_on_overflow;
 240extern int sysctl_tcp_max_orphans;
 241extern int sysctl_tcp_fack;
 242extern int sysctl_tcp_reordering;
 243extern int sysctl_tcp_ecn;
 244extern int sysctl_tcp_dsack;
 245extern int sysctl_tcp_wmem[3];
 246extern int sysctl_tcp_rmem[3];
 247extern int sysctl_tcp_app_win;
 248extern int sysctl_tcp_adv_win_scale;
 249extern int sysctl_tcp_tw_reuse;
 250extern int sysctl_tcp_frto;
 251extern int sysctl_tcp_frto_response;
 252extern int sysctl_tcp_low_latency;
 253extern int sysctl_tcp_dma_copybreak;
 254extern int sysctl_tcp_nometrics_save;
 255extern int sysctl_tcp_moderate_rcvbuf;
 256extern int sysctl_tcp_tso_win_divisor;
 257extern int sysctl_tcp_abc;
 258extern int sysctl_tcp_mtu_probing;
 259extern int sysctl_tcp_base_mss;
 260extern int sysctl_tcp_workaround_signed_windows;
 261extern int sysctl_tcp_slow_start_after_idle;
 262extern int sysctl_tcp_max_ssthresh;
 263extern int sysctl_tcp_cookie_size;
 264extern int sysctl_tcp_thin_linear_timeouts;
 265extern int sysctl_tcp_thin_dupack;
 266extern int sysctl_tcp_early_retrans;
 267extern int sysctl_tcp_limit_output_bytes;
 268extern int sysctl_tcp_challenge_ack_limit;
 269
 270extern atomic_long_t tcp_memory_allocated;
 271extern struct percpu_counter tcp_sockets_allocated;
 272extern int tcp_memory_pressure;
 273
 274/*
 275 * The next routines deal with comparing 32 bit unsigned ints
 276 * and worry about wraparound (automatic with unsigned arithmetic).
 277 */
 278
 279static inline bool before(__u32 seq1, __u32 seq2)
 280{
 281        return (__s32)(seq1-seq2) < 0;
 282}
 283#define after(seq2, seq1)       before(seq1, seq2)
 284
 285/* is s2<=s1<=s3 ? */
 286static inline bool between(__u32 seq1, __u32 seq2, __u32 seq3)
 287{
 288        return seq3 - seq2 >= seq1 - seq2;
 289}
 290
 291static inline bool tcp_out_of_memory(struct sock *sk)
 292{
 293        if (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
 294            sk_memory_allocated(sk) > sk_prot_mem_limits(sk, 2))
 295                return true;
 296        return false;
 297}
 298
 299static inline bool tcp_too_many_orphans(struct sock *sk, int shift)
 300{
 301        struct percpu_counter *ocp = sk->sk_prot->orphan_count;
 302        int orphans = percpu_counter_read_positive(ocp);
 303
 304        if (orphans << shift > sysctl_tcp_max_orphans) {
 305                orphans = percpu_counter_sum_positive(ocp);
 306                if (orphans << shift > sysctl_tcp_max_orphans)
 307                        return true;
 308        }
 309        return false;
 310}
 311
 312extern bool tcp_check_oom(struct sock *sk, int shift);
 313
 314/* syncookies: remember time of last synqueue overflow */
 315static inline void tcp_synq_overflow(struct sock *sk)
 316{
 317        tcp_sk(sk)->rx_opt.ts_recent_stamp = jiffies;
 318}
 319
 320/* syncookies: no recent synqueue overflow on this listening socket? */
 321static inline bool tcp_synq_no_recent_overflow(const struct sock *sk)
 322{
 323        unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
 324        return time_after(jiffies, last_overflow + TCP_TIMEOUT_FALLBACK);
 325}
 326
 327extern struct proto tcp_prot;
 328
 329#define TCP_INC_STATS(net, field)       SNMP_INC_STATS((net)->mib.tcp_statistics, field)
 330#define TCP_INC_STATS_BH(net, field)    SNMP_INC_STATS_BH((net)->mib.tcp_statistics, field)
 331#define TCP_DEC_STATS(net, field)       SNMP_DEC_STATS((net)->mib.tcp_statistics, field)
 332#define TCP_ADD_STATS_USER(net, field, val) SNMP_ADD_STATS_USER((net)->mib.tcp_statistics, field, val)
 333#define TCP_ADD_STATS(net, field, val)  SNMP_ADD_STATS((net)->mib.tcp_statistics, field, val)
 334
 335extern void tcp_init_mem(struct net *net);
 336
 337extern void tcp_tasklet_init(void);
 338
 339extern void tcp_v4_err(struct sk_buff *skb, u32);
 340
 341extern void tcp_shutdown (struct sock *sk, int how);
 342
 343extern void tcp_v4_early_demux(struct sk_buff *skb);
 344extern int tcp_v4_rcv(struct sk_buff *skb);
 345
 346extern struct inet_peer *tcp_v4_get_peer(struct sock *sk);
 347extern int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
 348extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 349                       size_t size);
 350extern int tcp_sendpage(struct sock *sk, struct page *page, int offset,
 351                        size_t size, int flags);
 352extern void tcp_release_cb(struct sock *sk);
 353extern void tcp_write_timer_handler(struct sock *sk);
 354extern void tcp_delack_timer_handler(struct sock *sk);
 355extern int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg);
 356extern int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 357                                 const struct tcphdr *th, unsigned int len);
 358extern int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 359                               const struct tcphdr *th, unsigned int len);
 360extern void tcp_rcv_space_adjust(struct sock *sk);
 361extern void tcp_cleanup_rbuf(struct sock *sk, int copied);
 362extern int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp);
 363extern void tcp_twsk_destructor(struct sock *sk);
 364extern ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos,
 365                               struct pipe_inode_info *pipe, size_t len,
 366                               unsigned int flags);
 367
 368static inline void tcp_dec_quickack_mode(struct sock *sk,
 369                                         const unsigned int pkts)
 370{
 371        struct inet_connection_sock *icsk = inet_csk(sk);
 372
 373        if (icsk->icsk_ack.quick) {
 374                if (pkts >= icsk->icsk_ack.quick) {
 375                        icsk->icsk_ack.quick = 0;
 376                        /* Leaving quickack mode we deflate ATO. */
 377                        icsk->icsk_ack.ato   = TCP_ATO_MIN;
 378                } else
 379                        icsk->icsk_ack.quick -= pkts;
 380        }
 381}
 382
 383#define TCP_ECN_OK              1
 384#define TCP_ECN_QUEUE_CWR       2
 385#define TCP_ECN_DEMAND_CWR      4
 386#define TCP_ECN_SEEN            8
 387
 388enum tcp_tw_status {
 389        TCP_TW_SUCCESS = 0,
 390        TCP_TW_RST = 1,
 391        TCP_TW_ACK = 2,
 392        TCP_TW_SYN = 3
 393};
 394
 395
 396extern enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw,
 397                                                     struct sk_buff *skb,
 398                                                     const struct tcphdr *th);
 399extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb,
 400                                   struct request_sock *req,
 401                                   struct request_sock **prev);
 402extern int tcp_child_process(struct sock *parent, struct sock *child,
 403                             struct sk_buff *skb);
 404extern bool tcp_use_frto(struct sock *sk);
 405extern void tcp_enter_frto(struct sock *sk);
 406extern void tcp_enter_loss(struct sock *sk, int how);
 407extern void tcp_clear_retrans(struct tcp_sock *tp);
 408extern void tcp_update_metrics(struct sock *sk);
 409extern void tcp_init_metrics(struct sock *sk);
 410extern void tcp_metrics_init(void);
 411extern bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, bool paws_check);
 412extern bool tcp_remember_stamp(struct sock *sk);
 413extern bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw);
 414extern void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
 415                                   struct tcp_fastopen_cookie *cookie,
 416                                   int *syn_loss, unsigned long *last_syn_loss);
 417extern void tcp_fastopen_cache_set(struct sock *sk, u16 mss,
 418                                   struct tcp_fastopen_cookie *cookie,
 419                                   bool syn_lost);
 420extern void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst);
 421extern void tcp_disable_fack(struct tcp_sock *tp);
 422extern void tcp_close(struct sock *sk, long timeout);
 423extern void tcp_init_sock(struct sock *sk);
 424extern unsigned int tcp_poll(struct file * file, struct socket *sock,
 425                             struct poll_table_struct *wait);
 426extern int tcp_getsockopt(struct sock *sk, int level, int optname,
 427                          char __user *optval, int __user *optlen);
 428extern int tcp_setsockopt(struct sock *sk, int level, int optname,
 429                          char __user *optval, unsigned int optlen);
 430extern int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
 431                                 char __user *optval, int __user *optlen);
 432extern int compat_tcp_setsockopt(struct sock *sk, int level, int optname,
 433                                 char __user *optval, unsigned int optlen);
 434extern void tcp_set_keepalive(struct sock *sk, int val);
 435extern void tcp_syn_ack_timeout(struct sock *sk, struct request_sock *req);
 436extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 437                       size_t len, int nonblock, int flags, int *addr_len);
 438extern void tcp_parse_options(const struct sk_buff *skb,
 439                              struct tcp_options_received *opt_rx, const u8 **hvpp,
 440                              int estab, struct tcp_fastopen_cookie *foc);
 441extern const u8 *tcp_parse_md5sig_option(const struct tcphdr *th);
 442
 443/*
 444 *      TCP v4 functions exported for the inet6 API
 445 */
 446
 447extern void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb);
 448extern int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
 449extern struct sock * tcp_create_openreq_child(struct sock *sk,
 450                                              struct request_sock *req,
 451                                              struct sk_buff *skb);
 452extern struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 453                                          struct request_sock *req,
 454                                          struct dst_entry *dst);
 455extern int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb);
 456extern int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
 457                          int addr_len);
 458extern int tcp_connect(struct sock *sk);
 459extern struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 460                                        struct request_sock *req,
 461                                        struct request_values *rvp);
 462extern int tcp_disconnect(struct sock *sk, int flags);
 463
 464void tcp_connect_init(struct sock *sk);
 465void tcp_finish_connect(struct sock *sk, struct sk_buff *skb);
 466int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size);
 467void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb);
 468
 469/* From syncookies.c */
 470extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS];
 471extern struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, 
 472                                    struct ip_options *opt);
 473#ifdef CONFIG_SYN_COOKIES
 474extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, 
 475                                     __u16 *mss);
 476#else
 477static inline __u32 cookie_v4_init_sequence(struct sock *sk,
 478                                            struct sk_buff *skb,
 479                                            __u16 *mss)
 480{
 481        return 0;
 482}
 483#endif
 484
 485extern __u32 cookie_init_timestamp(struct request_sock *req);
 486extern bool cookie_check_timestamp(struct tcp_options_received *opt, bool *);
 487
 488/* From net/ipv6/syncookies.c */
 489extern struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb);
 490#ifdef CONFIG_SYN_COOKIES
 491extern __u32 cookie_v6_init_sequence(struct sock *sk, const struct sk_buff *skb,
 492                                     __u16 *mss);
 493#else
 494static inline __u32 cookie_v6_init_sequence(struct sock *sk,
 495                                            struct sk_buff *skb,
 496                                            __u16 *mss)
 497{
 498        return 0;
 499}
 500#endif
 501/* tcp_output.c */
 502
 503extern void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
 504                                      int nonagle);
 505extern bool tcp_may_send_now(struct sock *sk);
 506extern int tcp_retransmit_skb(struct sock *, struct sk_buff *);
 507extern void tcp_retransmit_timer(struct sock *sk);
 508extern void tcp_xmit_retransmit_queue(struct sock *);
 509extern void tcp_simple_retransmit(struct sock *);
 510extern int tcp_trim_head(struct sock *, struct sk_buff *, u32);
 511extern int tcp_fragment(struct sock *, struct sk_buff *, u32, unsigned int);
 512
 513extern void tcp_send_probe0(struct sock *);
 514extern void tcp_send_partial(struct sock *);
 515extern int tcp_write_wakeup(struct sock *);
 516extern void tcp_send_fin(struct sock *sk);
 517extern void tcp_send_active_reset(struct sock *sk, gfp_t priority);
 518extern int tcp_send_synack(struct sock *);
 519extern bool tcp_syn_flood_action(struct sock *sk,
 520                                 const struct sk_buff *skb,
 521                                 const char *proto);
 522extern void tcp_push_one(struct sock *, unsigned int mss_now);
 523extern void tcp_send_ack(struct sock *sk);
 524extern void tcp_send_delayed_ack(struct sock *sk);
 525
 526/* tcp_input.c */
 527extern void tcp_cwnd_application_limited(struct sock *sk);
 528extern void tcp_resume_early_retransmit(struct sock *sk);
 529extern void tcp_rearm_rto(struct sock *sk);
 530
 531/* tcp_timer.c */
 532extern void tcp_init_xmit_timers(struct sock *);
 533static inline void tcp_clear_xmit_timers(struct sock *sk)
 534{
 535        inet_csk_clear_xmit_timers(sk);
 536}
 537
 538extern unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu);
 539extern unsigned int tcp_current_mss(struct sock *sk);
 540
 541/* Bound MSS / TSO packet size with the half of the window */
 542static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
 543{
 544        int cutoff;
 545
 546        /* When peer uses tiny windows, there is no use in packetizing
 547         * to sub-MSS pieces for the sake of SWS or making sure there
 548         * are enough packets in the pipe for fast recovery.
 549         *
 550         * On the other hand, for extremely large MSS devices, handling
 551         * smaller than MSS windows in this way does make sense.
 552         */
 553        if (tp->max_window >= 512)
 554                cutoff = (tp->max_window >> 1);
 555        else
 556                cutoff = tp->max_window;
 557
 558        if (cutoff && pktsize > cutoff)
 559                return max_t(int, cutoff, 68U - tp->tcp_header_len);
 560        else
 561                return pktsize;
 562}
 563
 564/* tcp.c */
 565extern void tcp_get_info(const struct sock *, struct tcp_info *);
 566
 567/* Read 'sendfile()'-style from a TCP socket */
 568typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *,
 569                                unsigned int, size_t);
 570extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
 571                         sk_read_actor_t recv_actor);
 572
 573extern void tcp_initialize_rcv_mss(struct sock *sk);
 574
 575extern int tcp_mtu_to_mss(struct sock *sk, int pmtu);
 576extern int tcp_mss_to_mtu(struct sock *sk, int mss);
 577extern void tcp_mtup_init(struct sock *sk);
 578extern void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt);
 579
 580static inline void tcp_bound_rto(const struct sock *sk)
 581{
 582        if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX)
 583                inet_csk(sk)->icsk_rto = TCP_RTO_MAX;
 584}
 585
 586static inline u32 __tcp_set_rto(const struct tcp_sock *tp)
 587{
 588        return (tp->srtt >> 3) + tp->rttvar;
 589}
 590
 591extern void tcp_set_rto(struct sock *sk);
 592
 593static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd)
 594{
 595        tp->pred_flags = htonl((tp->tcp_header_len << 26) |
 596                               ntohl(TCP_FLAG_ACK) |
 597                               snd_wnd);
 598}
 599
 600static inline void tcp_fast_path_on(struct tcp_sock *tp)
 601{
 602        __tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale);
 603}
 604
 605static inline void tcp_fast_path_check(struct sock *sk)
 606{
 607        struct tcp_sock *tp = tcp_sk(sk);
 608
 609        if (skb_queue_empty(&tp->out_of_order_queue) &&
 610            tp->rcv_wnd &&
 611            atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf &&
 612            !tp->urg_data)
 613                tcp_fast_path_on(tp);
 614}
 615
 616/* Compute the actual rto_min value */
 617static inline u32 tcp_rto_min(struct sock *sk)
 618{
 619        const struct dst_entry *dst = __sk_dst_get(sk);
 620        u32 rto_min = TCP_RTO_MIN;
 621
 622        if (dst && dst_metric_locked(dst, RTAX_RTO_MIN))
 623                rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN);
 624        return rto_min;
 625}
 626
 627/* Compute the actual receive window we are currently advertising.
 628 * Rcv_nxt can be after the window if our peer push more data
 629 * than the offered window.
 630 */
 631static inline u32 tcp_receive_window(const struct tcp_sock *tp)
 632{
 633        s32 win = tp->rcv_wup + tp->rcv_wnd - tp->rcv_nxt;
 634
 635        if (win < 0)
 636                win = 0;
 637        return (u32) win;
 638}
 639
 640/* Choose a new window, without checks for shrinking, and without
 641 * scaling applied to the result.  The caller does these things
 642 * if necessary.  This is a "raw" window selection.
 643 */
 644extern u32 __tcp_select_window(struct sock *sk);
 645
 646void tcp_send_window_probe(struct sock *sk);
 647
 648/* TCP timestamps are only 32-bits, this causes a slight
 649 * complication on 64-bit systems since we store a snapshot
 650 * of jiffies in the buffer control blocks below.  We decided
 651 * to use only the low 32-bits of jiffies and hide the ugly
 652 * casts with the following macro.
 653 */
 654#define tcp_time_stamp          ((__u32)(jiffies))
 655
 656#define tcp_flag_byte(th) (((u_int8_t *)th)[13])
 657
 658#define TCPHDR_FIN 0x01
 659#define TCPHDR_SYN 0x02
 660#define TCPHDR_RST 0x04
 661#define TCPHDR_PSH 0x08
 662#define TCPHDR_ACK 0x10
 663#define TCPHDR_URG 0x20
 664#define TCPHDR_ECE 0x40
 665#define TCPHDR_CWR 0x80
 666
 667/* This is what the send packet queuing engine uses to pass
 668 * TCP per-packet control information to the transmission code.
 669 * We also store the host-order sequence numbers in here too.
 670 * This is 44 bytes if IPV6 is enabled.
 671 * If this grows please adjust skbuff.h:skbuff->cb[xxx] size appropriately.
 672 */
 673struct tcp_skb_cb {
 674        union {
 675                struct inet_skb_parm    h4;
 676#if IS_ENABLED(CONFIG_IPV6)
 677                struct inet6_skb_parm   h6;
 678#endif
 679        } header;       /* For incoming frames          */
 680        __u32           seq;            /* Starting sequence number     */
 681        __u32           end_seq;        /* SEQ + FIN + SYN + datalen    */
 682        __u32           when;           /* used to compute rtt's        */
 683        __u8            tcp_flags;      /* TCP header flags. (tcp[13])  */
 684
 685        __u8            sacked;         /* State flags for SACK/FACK.   */
 686#define TCPCB_SACKED_ACKED      0x01    /* SKB ACK'd by a SACK block    */
 687#define TCPCB_SACKED_RETRANS    0x02    /* SKB retransmitted            */
 688#define TCPCB_LOST              0x04    /* SKB is lost                  */
 689#define TCPCB_TAGBITS           0x07    /* All tag bits                 */
 690#define TCPCB_EVER_RETRANS      0x80    /* Ever retransmitted frame     */
 691#define TCPCB_RETRANS           (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS)
 692
 693        __u8            ip_dsfield;     /* IPv4 tos or IPv6 dsfield     */
 694        /* 1 byte hole */
 695        __u32           ack_seq;        /* Sequence number ACK'd        */
 696};
 697
 698#define TCP_SKB_CB(__skb)       ((struct tcp_skb_cb *)&((__skb)->cb[0]))
 699
 700/* RFC3168 : 6.1.1 SYN packets must not have ECT/ECN bits set
 701 *
 702 * If we receive a SYN packet with these bits set, it means a network is
 703 * playing bad games with TOS bits. In order to avoid possible false congestion
 704 * notifications, we disable TCP ECN negociation.
 705 */
 706static inline void
 707TCP_ECN_create_request(struct request_sock *req, const struct sk_buff *skb)
 708{
 709        const struct tcphdr *th = tcp_hdr(skb);
 710
 711        if (sysctl_tcp_ecn && th->ece && th->cwr &&
 712            INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield))
 713                inet_rsk(req)->ecn_ok = 1;
 714}
 715
 716/* Due to TSO, an SKB can be composed of multiple actual
 717 * packets.  To keep these tracked properly, we use this.
 718 */
 719static inline int tcp_skb_pcount(const struct sk_buff *skb)
 720{
 721        return skb_shinfo(skb)->gso_segs;
 722}
 723
 724/* This is valid iff tcp_skb_pcount() > 1. */
 725static inline int tcp_skb_mss(const struct sk_buff *skb)
 726{
 727        return skb_shinfo(skb)->gso_size;
 728}
 729
 730/* Events passed to congestion control interface */
 731enum tcp_ca_event {
 732        CA_EVENT_TX_START,      /* first transmit when no packets in flight */
 733        CA_EVENT_CWND_RESTART,  /* congestion window restart */
 734        CA_EVENT_COMPLETE_CWR,  /* end of congestion recovery */
 735        CA_EVENT_FRTO,          /* fast recovery timeout */
 736        CA_EVENT_LOSS,          /* loss timeout */
 737        CA_EVENT_FAST_ACK,      /* in sequence ack */
 738        CA_EVENT_SLOW_ACK,      /* other ack */
 739};
 740
 741/*
 742 * Interface for adding new TCP congestion control handlers
 743 */
 744#define TCP_CA_NAME_MAX 16
 745#define TCP_CA_MAX      128
 746#define TCP_CA_BUF_MAX  (TCP_CA_NAME_MAX*TCP_CA_MAX)
 747
 748#define TCP_CONG_NON_RESTRICTED 0x1
 749#define TCP_CONG_RTT_STAMP      0x2
 750
 751struct tcp_congestion_ops {
 752        struct list_head        list;
 753        unsigned long flags;
 754
 755        /* initialize private data (optional) */
 756        void (*init)(struct sock *sk);
 757        /* cleanup private data  (optional) */
 758        void (*release)(struct sock *sk);
 759
 760        /* return slow start threshold (required) */
 761        u32 (*ssthresh)(struct sock *sk);
 762        /* lower bound for congestion window (optional) */
 763        u32 (*min_cwnd)(const struct sock *sk);
 764        /* do new cwnd calculation (required) */
 765        void (*cong_avoid)(struct sock *sk, u32 ack, u32 in_flight);
 766        /* call before changing ca_state (optional) */
 767        void (*set_state)(struct sock *sk, u8 new_state);
 768        /* call when cwnd event occurs (optional) */
 769        void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev);
 770        /* new value of cwnd after loss (optional) */
 771        u32  (*undo_cwnd)(struct sock *sk);
 772        /* hook for packet ack accounting (optional) */
 773        void (*pkts_acked)(struct sock *sk, u32 num_acked, s32 rtt_us);
 774        /* get info for inet_diag (optional) */
 775        void (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb);
 776
 777        char            name[TCP_CA_NAME_MAX];
 778        struct module   *owner;
 779};
 780
 781extern int tcp_register_congestion_control(struct tcp_congestion_ops *type);
 782extern void tcp_unregister_congestion_control(struct tcp_congestion_ops *type);
 783
 784extern void tcp_init_congestion_control(struct sock *sk);
 785extern void tcp_cleanup_congestion_control(struct sock *sk);
 786extern int tcp_set_default_congestion_control(const char *name);
 787extern void tcp_get_default_congestion_control(char *name);
 788extern void tcp_get_available_congestion_control(char *buf, size_t len);
 789extern void tcp_get_allowed_congestion_control(char *buf, size_t len);
 790extern int tcp_set_allowed_congestion_control(char *allowed);
 791extern int tcp_set_congestion_control(struct sock *sk, const char *name);
 792extern void tcp_slow_start(struct tcp_sock *tp);
 793extern void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w);
 794
 795extern struct tcp_congestion_ops tcp_init_congestion_ops;
 796extern u32 tcp_reno_ssthresh(struct sock *sk);
 797extern void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight);
 798extern u32 tcp_reno_min_cwnd(const struct sock *sk);
 799extern struct tcp_congestion_ops tcp_reno;
 800
 801static inline void tcp_set_ca_state(struct sock *sk, const u8 ca_state)
 802{
 803        struct inet_connection_sock *icsk = inet_csk(sk);
 804
 805        if (icsk->icsk_ca_ops->set_state)
 806                icsk->icsk_ca_ops->set_state(sk, ca_state);
 807        icsk->icsk_ca_state = ca_state;
 808}
 809
 810static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
 811{
 812        const struct inet_connection_sock *icsk = inet_csk(sk);
 813
 814        if (icsk->icsk_ca_ops->cwnd_event)
 815                icsk->icsk_ca_ops->cwnd_event(sk, event);
 816}
 817
 818/* These functions determine how the current flow behaves in respect of SACK
 819 * handling. SACK is negotiated with the peer, and therefore it can vary
 820 * between different flows.
 821 *
 822 * tcp_is_sack - SACK enabled
 823 * tcp_is_reno - No SACK
 824 * tcp_is_fack - FACK enabled, implies SACK enabled
 825 */
 826static inline int tcp_is_sack(const struct tcp_sock *tp)
 827{
 828        return tp->rx_opt.sack_ok;
 829}
 830
 831static inline bool tcp_is_reno(const struct tcp_sock *tp)
 832{
 833        return !tcp_is_sack(tp);
 834}
 835
 836static inline bool tcp_is_fack(const struct tcp_sock *tp)
 837{
 838        return tp->rx_opt.sack_ok & TCP_FACK_ENABLED;
 839}
 840
 841static inline void tcp_enable_fack(struct tcp_sock *tp)
 842{
 843        tp->rx_opt.sack_ok |= TCP_FACK_ENABLED;
 844}
 845
 846/* TCP early-retransmit (ER) is similar to but more conservative than
 847 * the thin-dupack feature.  Enable ER only if thin-dupack is disabled.
 848 */
 849static inline void tcp_enable_early_retrans(struct tcp_sock *tp)
 850{
 851        tp->do_early_retrans = sysctl_tcp_early_retrans &&
 852                !sysctl_tcp_thin_dupack && sysctl_tcp_reordering == 3;
 853        tp->early_retrans_delayed = 0;
 854}
 855
 856static inline void tcp_disable_early_retrans(struct tcp_sock *tp)
 857{
 858        tp->do_early_retrans = 0;
 859}
 860
 861static inline unsigned int tcp_left_out(const struct tcp_sock *tp)
 862{
 863        return tp->sacked_out + tp->lost_out;
 864}
 865
 866/* This determines how many packets are "in the network" to the best
 867 * of our knowledge.  In many cases it is conservative, but where
 868 * detailed information is available from the receiver (via SACK
 869 * blocks etc.) we can make more aggressive calculations.
 870 *
 871 * Use this for decisions involving congestion control, use just
 872 * tp->packets_out to determine if the send queue is empty or not.
 873 *
 874 * Read this equation as:
 875 *
 876 *      "Packets sent once on transmission queue" MINUS
 877 *      "Packets left network, but not honestly ACKed yet" PLUS
 878 *      "Packets fast retransmitted"
 879 */
 880static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
 881{
 882        return tp->packets_out - tcp_left_out(tp) + tp->retrans_out;
 883}
 884
 885#define TCP_INFINITE_SSTHRESH   0x7fffffff
 886
 887static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp)
 888{
 889        return tp->snd_ssthresh >= TCP_INFINITE_SSTHRESH;
 890}
 891
 892/* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd.
 893 * The exception is rate halving phase, when cwnd is decreasing towards
 894 * ssthresh.
 895 */
 896static inline __u32 tcp_current_ssthresh(const struct sock *sk)
 897{
 898        const struct tcp_sock *tp = tcp_sk(sk);
 899
 900        if ((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_CWR | TCPF_CA_Recovery))
 901                return tp->snd_ssthresh;
 902        else
 903                return max(tp->snd_ssthresh,
 904                           ((tp->snd_cwnd >> 1) +
 905                            (tp->snd_cwnd >> 2)));
 906}
 907
 908/* Use define here intentionally to get WARN_ON location shown at the caller */
 909#define tcp_verify_left_out(tp) WARN_ON(tcp_left_out(tp) > tp->packets_out)
 910
 911extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh);
 912extern __u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst);
 913
 914/* The maximum number of MSS of available cwnd for which TSO defers
 915 * sending if not using sysctl_tcp_tso_win_divisor.
 916 */
 917static inline __u32 tcp_max_tso_deferred_mss(const struct tcp_sock *tp)
 918{
 919        return 3;
 920}
 921
 922/* Slow start with delack produces 3 packets of burst, so that
 923 * it is safe "de facto".  This will be the default - same as
 924 * the default reordering threshold - but if reordering increases,
 925 * we must be able to allow cwnd to burst at least this much in order
 926 * to not pull it back when holes are filled.
 927 */
 928static __inline__ __u32 tcp_max_burst(const struct tcp_sock *tp)
 929{
 930        return tp->reordering;
 931}
 932
 933/* Returns end sequence number of the receiver's advertised window */
 934static inline u32 tcp_wnd_end(const struct tcp_sock *tp)
 935{
 936        return tp->snd_una + tp->snd_wnd;
 937}
 938extern bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight);
 939
 940static inline void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss,
 941                                       const struct sk_buff *skb)
 942{
 943        if (skb->len < mss)
 944                tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
 945}
 946
 947static inline void tcp_check_probe_timer(struct sock *sk)
 948{
 949        const struct tcp_sock *tp = tcp_sk(sk);
 950        const struct inet_connection_sock *icsk = inet_csk(sk);
 951
 952        if (!tp->packets_out && !icsk->icsk_pending)
 953                inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
 954                                          icsk->icsk_rto, TCP_RTO_MAX);
 955}
 956
 957static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq)
 958{
 959        tp->snd_wl1 = seq;
 960}
 961
 962static inline void tcp_update_wl(struct tcp_sock *tp, u32 seq)
 963{
 964        tp->snd_wl1 = seq;
 965}
 966
 967/*
 968 * Calculate(/check) TCP checksum
 969 */
 970static inline __sum16 tcp_v4_check(int len, __be32 saddr,
 971                                   __be32 daddr, __wsum base)
 972{
 973        return csum_tcpudp_magic(saddr,daddr,len,IPPROTO_TCP,base);
 974}
 975
 976static inline __sum16 __tcp_checksum_complete(struct sk_buff *skb)
 977{
 978        return __skb_checksum_complete(skb);
 979}
 980
 981static inline bool tcp_checksum_complete(struct sk_buff *skb)
 982{
 983        return !skb_csum_unnecessary(skb) &&
 984                __tcp_checksum_complete(skb);
 985}
 986
 987/* Prequeue for VJ style copy to user, combined with checksumming. */
 988
 989static inline void tcp_prequeue_init(struct tcp_sock *tp)
 990{
 991        tp->ucopy.task = NULL;
 992        tp->ucopy.len = 0;
 993        tp->ucopy.memory = 0;
 994        skb_queue_head_init(&tp->ucopy.prequeue);
 995#ifdef CONFIG_NET_DMA
 996        tp->ucopy.dma_chan = NULL;
 997        tp->ucopy.wakeup = 0;
 998        tp->ucopy.pinned_list = NULL;
 999        tp->ucopy.dma_cookie = 0;
1000#endif
1001}
1002
1003/* Packet is added to VJ-style prequeue for processing in process
1004 * context, if a reader task is waiting. Apparently, this exciting
1005 * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
1006 * failed somewhere. Latency? Burstiness? Well, at least now we will
1007 * see, why it failed. 8)8)                               --ANK
1008 *
1009 * NOTE: is this not too big to inline?
1010 */
1011static inline bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1012{
1013        struct tcp_sock *tp = tcp_sk(sk);
1014
1015        if (sysctl_tcp_low_latency || !tp->ucopy.task)
1016                return false;
1017
1018        __skb_queue_tail(&tp->ucopy.prequeue, skb);
1019        tp->ucopy.memory += skb->truesize;
1020        if (tp->ucopy.memory > sk->sk_rcvbuf) {
1021                struct sk_buff *skb1;
1022
1023                BUG_ON(sock_owned_by_user(sk));
1024
1025                while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
1026                        sk_backlog_rcv(sk, skb1);
1027                        NET_INC_STATS_BH(sock_net(sk),
1028                                         LINUX_MIB_TCPPREQUEUEDROPPED);
1029                }
1030
1031                tp->ucopy.memory = 0;
1032        } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1033                wake_up_interruptible_sync_poll(sk_sleep(sk),
1034                                           POLLIN | POLLRDNORM | POLLRDBAND);
1035                if (!inet_csk_ack_scheduled(sk))
1036                        inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
1037                                                  (3 * tcp_rto_min(sk)) / 4,
1038                                                  TCP_RTO_MAX);
1039        }
1040        return true;
1041}
1042
1043
1044#undef STATE_TRACE
1045
1046#ifdef STATE_TRACE
1047static const char *statename[]={
1048        "Unused","Established","Syn Sent","Syn Recv",
1049        "Fin Wait 1","Fin Wait 2","Time Wait", "Close",
1050        "Close Wait","Last ACK","Listen","Closing"
1051};
1052#endif
1053extern void tcp_set_state(struct sock *sk, int state);
1054
1055extern void tcp_done(struct sock *sk);
1056
1057static inline void tcp_sack_reset(struct tcp_options_received *rx_opt)
1058{
1059        rx_opt->dsack = 0;
1060        rx_opt->num_sacks = 0;
1061}
1062
1063/* Determine a window scaling and initial window to offer. */
1064extern void tcp_select_initial_window(int __space, __u32 mss,
1065                                      __u32 *rcv_wnd, __u32 *window_clamp,
1066                                      int wscale_ok, __u8 *rcv_wscale,
1067                                      __u32 init_rcv_wnd);
1068
1069static inline int tcp_win_from_space(int space)
1070{
1071        return sysctl_tcp_adv_win_scale<=0 ?
1072                (space>>(-sysctl_tcp_adv_win_scale)) :
1073                space - (space>>sysctl_tcp_adv_win_scale);
1074}
1075
1076/* Note: caller must be prepared to deal with negative returns */ 
1077static inline int tcp_space(const struct sock *sk)
1078{
1079        return tcp_win_from_space(sk->sk_rcvbuf -
1080                                  atomic_read(&sk->sk_rmem_alloc));
1081} 
1082
1083static inline int tcp_full_space(const struct sock *sk)
1084{
1085        return tcp_win_from_space(sk->sk_rcvbuf); 
1086}
1087
1088static inline void tcp_openreq_init(struct request_sock *req,
1089                                    struct tcp_options_received *rx_opt,
1090                                    struct sk_buff *skb)
1091{
1092        struct inet_request_sock *ireq = inet_rsk(req);
1093
1094        req->rcv_wnd = 0;               /* So that tcp_send_synack() knows! */
1095        req->cookie_ts = 0;
1096        tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
1097        req->mss = rx_opt->mss_clamp;
1098        req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
1099        ireq->tstamp_ok = rx_opt->tstamp_ok;
1100        ireq->sack_ok = rx_opt->sack_ok;
1101        ireq->snd_wscale = rx_opt->snd_wscale;
1102        ireq->wscale_ok = rx_opt->wscale_ok;
1103        ireq->acked = 0;
1104        ireq->ecn_ok = 0;
1105        ireq->rmt_port = tcp_hdr(skb)->source;
1106        ireq->loc_port = tcp_hdr(skb)->dest;
1107}
1108
1109extern void tcp_enter_memory_pressure(struct sock *sk);
1110
1111static inline int keepalive_intvl_when(const struct tcp_sock *tp)
1112{
1113        return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl;
1114}
1115
1116static inline int keepalive_time_when(const struct tcp_sock *tp)
1117{
1118        return tp->keepalive_time ? : sysctl_tcp_keepalive_time;
1119}
1120
1121static inline int keepalive_probes(const struct tcp_sock *tp)
1122{
1123        return tp->keepalive_probes ? : sysctl_tcp_keepalive_probes;
1124}
1125
1126static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp)
1127{
1128        const struct inet_connection_sock *icsk = &tp->inet_conn;
1129
1130        return min_t(u32, tcp_time_stamp - icsk->icsk_ack.lrcvtime,
1131                          tcp_time_stamp - tp->rcv_tstamp);
1132}
1133
1134static inline int tcp_fin_time(const struct sock *sk)
1135{
1136        int fin_timeout = tcp_sk(sk)->linger2 ? : sysctl_tcp_fin_timeout;
1137        const int rto = inet_csk(sk)->icsk_rto;
1138
1139        if (fin_timeout < (rto << 2) - (rto >> 1))
1140                fin_timeout = (rto << 2) - (rto >> 1);
1141
1142        return fin_timeout;
1143}
1144
1145static inline bool tcp_paws_check(const struct tcp_options_received *rx_opt,
1146                                  int paws_win)
1147{
1148        if ((s32)(rx_opt->ts_recent - rx_opt->rcv_tsval) <= paws_win)
1149                return true;
1150        if (unlikely(get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS))
1151                return true;
1152        /*
1153         * Some OSes send SYN and SYNACK messages with tsval=0 tsecr=0,
1154         * then following tcp messages have valid values. Ignore 0 value,
1155         * or else 'negative' tsval might forbid us to accept their packets.
1156         */
1157        if (!rx_opt->ts_recent)
1158                return true;
1159        return false;
1160}
1161
1162static inline bool tcp_paws_reject(const struct tcp_options_received *rx_opt,
1163                                   int rst)
1164{
1165        if (tcp_paws_check(rx_opt, 0))
1166                return false;
1167
1168        /* RST segments are not recommended to carry timestamp,
1169           and, if they do, it is recommended to ignore PAWS because
1170           "their cleanup function should take precedence over timestamps."
1171           Certainly, it is mistake. It is necessary to understand the reasons
1172           of this constraint to relax it: if peer reboots, clock may go
1173           out-of-sync and half-open connections will not be reset.
1174           Actually, the problem would be not existing if all
1175           the implementations followed draft about maintaining clock
1176           via reboots. Linux-2.2 DOES NOT!
1177
1178           However, we can relax time bounds for RST segments to MSL.
1179         */
1180        if (rst && get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_MSL)
1181                return false;
1182        return true;
1183}
1184
1185static inline void tcp_mib_init(struct net *net)
1186{
1187        /* See RFC 2012 */
1188        TCP_ADD_STATS_USER(net, TCP_MIB_RTOALGORITHM, 1);
1189        TCP_ADD_STATS_USER(net, TCP_MIB_RTOMIN, TCP_RTO_MIN*1000/HZ);
1190        TCP_ADD_STATS_USER(net, TCP_MIB_RTOMAX, TCP_RTO_MAX*1000/HZ);
1191        TCP_ADD_STATS_USER(net, TCP_MIB_MAXCONN, -1);
1192}
1193
1194/* from STCP */
1195static inline void tcp_clear_retrans_hints_partial(struct tcp_sock *tp)
1196{
1197        tp->lost_skb_hint = NULL;
1198        tp->scoreboard_skb_hint = NULL;
1199}
1200
1201static inline void tcp_clear_all_retrans_hints(struct tcp_sock *tp)
1202{
1203        tcp_clear_retrans_hints_partial(tp);
1204        tp->retransmit_skb_hint = NULL;
1205}
1206
1207/* MD5 Signature */
1208struct crypto_hash;
1209
1210union tcp_md5_addr {
1211        struct in_addr  a4;
1212#if IS_ENABLED(CONFIG_IPV6)
1213        struct in6_addr a6;
1214#endif
1215};
1216
1217/* - key database */
1218struct tcp_md5sig_key {
1219        struct hlist_node       node;
1220        u8                      keylen;
1221        u8                      family; /* AF_INET or AF_INET6 */
1222        union tcp_md5_addr      addr;
1223        u8                      key[TCP_MD5SIG_MAXKEYLEN];
1224        struct rcu_head         rcu;
1225};
1226
1227/* - sock block */
1228struct tcp_md5sig_info {
1229        struct hlist_head       head;
1230        struct rcu_head         rcu;
1231};
1232
1233/* - pseudo header */
1234struct tcp4_pseudohdr {
1235        __be32          saddr;
1236        __be32          daddr;
1237        __u8            pad;
1238        __u8            protocol;
1239        __be16          len;
1240};
1241
1242struct tcp6_pseudohdr {
1243        struct in6_addr saddr;
1244        struct in6_addr daddr;
1245        __be32          len;
1246        __be32          protocol;       /* including padding */
1247};
1248
1249union tcp_md5sum_block {
1250        struct tcp4_pseudohdr ip4;
1251#if IS_ENABLED(CONFIG_IPV6)
1252        struct tcp6_pseudohdr ip6;
1253#endif
1254};
1255
1256/* - pool: digest algorithm, hash description and scratch buffer */
1257struct tcp_md5sig_pool {
1258        struct hash_desc        md5_desc;
1259        union tcp_md5sum_block  md5_blk;
1260};
1261
1262/* - functions */
1263extern int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1264                               const struct sock *sk,
1265                               const struct request_sock *req,
1266                               const struct sk_buff *skb);
1267extern int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
1268                          int family, const u8 *newkey,
1269                          u8 newkeylen, gfp_t gfp);
1270extern int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr,
1271                          int family);
1272extern struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
1273                                         struct sock *addr_sk);
1274
1275#ifdef CONFIG_TCP_MD5SIG
1276extern struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
1277                        const union tcp_md5_addr *addr, int family);
1278#define tcp_twsk_md5_key(twsk)  ((twsk)->tw_md5_key)
1279#else
1280static inline struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
1281                                         const union tcp_md5_addr *addr,
1282                                         int family)
1283{
1284        return NULL;
1285}
1286#define tcp_twsk_md5_key(twsk)  NULL
1287#endif
1288
1289extern struct tcp_md5sig_pool __percpu *tcp_alloc_md5sig_pool(struct sock *);
1290extern void tcp_free_md5sig_pool(void);
1291
1292extern struct tcp_md5sig_pool   *tcp_get_md5sig_pool(void);
1293extern void tcp_put_md5sig_pool(void);
1294
1295extern int tcp_md5_hash_header(struct tcp_md5sig_pool *, const struct tcphdr *);
1296extern int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, const struct sk_buff *,
1297                                 unsigned int header_len);
1298extern int tcp_md5_hash_key(struct tcp_md5sig_pool *hp,
1299                            const struct tcp_md5sig_key *key);
1300
1301struct tcp_fastopen_request {
1302        /* Fast Open cookie. Size 0 means a cookie request */
1303        struct tcp_fastopen_cookie      cookie;
1304        struct msghdr                   *data;  /* data in MSG_FASTOPEN */
1305        u16                             copied; /* queued in tcp_connect() */
1306};
1307
1308void tcp_free_fastopen_req(struct tcp_sock *tp);
1309
1310/* write queue abstraction */
1311static inline void tcp_write_queue_purge(struct sock *sk)
1312{
1313        struct sk_buff *skb;
1314
1315        while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL)
1316                sk_wmem_free_skb(sk, skb);
1317        sk_mem_reclaim(sk);
1318        tcp_clear_all_retrans_hints(tcp_sk(sk));
1319}
1320
1321static inline struct sk_buff *tcp_write_queue_head(const struct sock *sk)
1322{
1323        return skb_peek(&sk->sk_write_queue);
1324}
1325
1326static inline struct sk_buff *tcp_write_queue_tail(const struct sock *sk)
1327{
1328        return skb_peek_tail(&sk->sk_write_queue);
1329}
1330
1331static inline struct sk_buff *tcp_write_queue_next(const struct sock *sk,
1332                                                   const struct sk_buff *skb)
1333{
1334        return skb_queue_next(&sk->sk_write_queue, skb);
1335}
1336
1337static inline struct sk_buff *tcp_write_queue_prev(const struct sock *sk,
1338                                                   const struct sk_buff *skb)
1339{
1340        return skb_queue_prev(&sk->sk_write_queue, skb);
1341}
1342
1343#define tcp_for_write_queue(skb, sk)                                    \
1344        skb_queue_walk(&(sk)->sk_write_queue, skb)
1345
1346#define tcp_for_write_queue_from(skb, sk)                               \
1347        skb_queue_walk_from(&(sk)->sk_write_queue, skb)
1348
1349#define tcp_for_write_queue_from_safe(skb, tmp, sk)                     \
1350        skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp)
1351
1352static inline struct sk_buff *tcp_send_head(const struct sock *sk)
1353{
1354        return sk->sk_send_head;
1355}
1356
1357static inline bool tcp_skb_is_last(const struct sock *sk,
1358                                   const struct sk_buff *skb)
1359{
1360        return skb_queue_is_last(&sk->sk_write_queue, skb);
1361}
1362
1363static inline void tcp_advance_send_head(struct sock *sk, const struct sk_buff *skb)
1364{
1365        if (tcp_skb_is_last(sk, skb))
1366                sk->sk_send_head = NULL;
1367        else
1368                sk->sk_send_head = tcp_write_queue_next(sk, skb);
1369}
1370
1371static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unlinked)
1372{
1373        if (sk->sk_send_head == skb_unlinked)
1374                sk->sk_send_head = NULL;
1375}
1376
1377static inline void tcp_init_send_head(struct sock *sk)
1378{
1379        sk->sk_send_head = NULL;
1380}
1381
1382static inline void __tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb)
1383{
1384        __skb_queue_tail(&sk->sk_write_queue, skb);
1385}
1386
1387static inline void tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb)
1388{
1389        __tcp_add_write_queue_tail(sk, skb);
1390
1391        /* Queue it, remembering where we must start sending. */
1392        if (sk->sk_send_head == NULL) {
1393                sk->sk_send_head = skb;
1394
1395                if (tcp_sk(sk)->highest_sack == NULL)
1396                        tcp_sk(sk)->highest_sack = skb;
1397        }
1398}
1399
1400static inline void __tcp_add_write_queue_head(struct sock *sk, struct sk_buff *skb)
1401{
1402        __skb_queue_head(&sk->sk_write_queue, skb);
1403}
1404
1405/* Insert buff after skb on the write queue of sk.  */
1406static inline void tcp_insert_write_queue_after(struct sk_buff *skb,
1407                                                struct sk_buff *buff,
1408                                                struct sock *sk)
1409{
1410        __skb_queue_after(&sk->sk_write_queue, skb, buff);
1411}
1412
1413/* Insert new before skb on the write queue of sk.  */
1414static inline void tcp_insert_write_queue_before(struct sk_buff *new,
1415                                                  struct sk_buff *skb,
1416                                                  struct sock *sk)
1417{
1418        __skb_queue_before(&sk->sk_write_queue, skb, new);
1419
1420        if (sk->sk_send_head == skb)
1421                sk->sk_send_head = new;
1422}
1423
1424static inline void tcp_unlink_write_queue(struct sk_buff *skb, struct sock *sk)
1425{
1426        __skb_unlink(skb, &sk->sk_write_queue);
1427}
1428
1429static inline bool tcp_write_queue_empty(struct sock *sk)
1430{
1431        return skb_queue_empty(&sk->sk_write_queue);
1432}
1433
1434static inline void tcp_push_pending_frames(struct sock *sk)
1435{
1436        if (tcp_send_head(sk)) {
1437                struct tcp_sock *tp = tcp_sk(sk);
1438
1439                __tcp_push_pending_frames(sk, tcp_current_mss(sk), tp->nonagle);
1440        }
1441}
1442
1443/* Start sequence of the skb just after the highest skb with SACKed
1444 * bit, valid only if sacked_out > 0 or when the caller has ensured
1445 * validity by itself.
1446 */
1447static inline u32 tcp_highest_sack_seq(struct tcp_sock *tp)
1448{
1449        if (!tp->sacked_out)
1450                return tp->snd_una;
1451
1452        if (tp->highest_sack == NULL)
1453                return tp->snd_nxt;
1454
1455        return TCP_SKB_CB(tp->highest_sack)->seq;
1456}
1457
1458static inline void tcp_advance_highest_sack(struct sock *sk, struct sk_buff *skb)
1459{
1460        tcp_sk(sk)->highest_sack = tcp_skb_is_last(sk, skb) ? NULL :
1461                                                tcp_write_queue_next(sk, skb);
1462}
1463
1464static inline struct sk_buff *tcp_highest_sack(struct sock *sk)
1465{
1466        return tcp_sk(sk)->highest_sack;
1467}
1468
1469static inline void tcp_highest_sack_reset(struct sock *sk)
1470{
1471        tcp_sk(sk)->highest_sack = tcp_write_queue_head(sk);
1472}
1473
1474/* Called when old skb is about to be deleted (to be combined with new skb) */
1475static inline void tcp_highest_sack_combine(struct sock *sk,
1476                                            struct sk_buff *old,
1477                                            struct sk_buff *new)
1478{
1479        if (tcp_sk(sk)->sacked_out && (old == tcp_sk(sk)->highest_sack))
1480                tcp_sk(sk)->highest_sack = new;
1481}
1482
1483/* Determines whether this is a thin stream (which may suffer from
1484 * increased latency). Used to trigger latency-reducing mechanisms.
1485 */
1486static inline bool tcp_stream_is_thin(struct tcp_sock *tp)
1487{
1488        return tp->packets_out < 4 && !tcp_in_initial_slowstart(tp);
1489}
1490
1491/* /proc */
1492enum tcp_seq_states {
1493        TCP_SEQ_STATE_LISTENING,
1494        TCP_SEQ_STATE_OPENREQ,
1495        TCP_SEQ_STATE_ESTABLISHED,
1496        TCP_SEQ_STATE_TIME_WAIT,
1497};
1498
1499int tcp_seq_open(struct inode *inode, struct file *file);
1500
1501struct tcp_seq_afinfo {
1502        char                            *name;
1503        sa_family_t                     family;
1504        const struct file_operations    *seq_fops;
1505        struct seq_operations           seq_ops;
1506};
1507
1508struct tcp_iter_state {
1509        struct seq_net_private  p;
1510        sa_family_t             family;
1511        enum tcp_seq_states     state;
1512        struct sock             *syn_wait_sk;
1513        int                     bucket, offset, sbucket, num, uid;
1514        loff_t                  last_pos;
1515};
1516
1517extern int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo);
1518extern void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo);
1519
1520extern struct request_sock_ops tcp_request_sock_ops;
1521extern struct request_sock_ops tcp6_request_sock_ops;
1522
1523extern void tcp_v4_destroy_sock(struct sock *sk);
1524
1525extern int tcp_v4_gso_send_check(struct sk_buff *skb);
1526extern struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
1527                                       netdev_features_t features);
1528extern struct sk_buff **tcp_gro_receive(struct sk_buff **head,
1529                                        struct sk_buff *skb);
1530extern struct sk_buff **tcp4_gro_receive(struct sk_buff **head,
1531                                         struct sk_buff *skb);
1532extern int tcp_gro_complete(struct sk_buff *skb);
1533extern int tcp4_gro_complete(struct sk_buff *skb);
1534
1535#ifdef CONFIG_PROC_FS
1536extern int tcp4_proc_init(void);
1537extern void tcp4_proc_exit(void);
1538#endif
1539
1540/* TCP af-specific functions */
1541struct tcp_sock_af_ops {
1542#ifdef CONFIG_TCP_MD5SIG
1543        struct tcp_md5sig_key   *(*md5_lookup) (struct sock *sk,
1544                                                struct sock *addr_sk);
1545        int                     (*calc_md5_hash) (char *location,
1546                                                  struct tcp_md5sig_key *md5,
1547                                                  const struct sock *sk,
1548                                                  const struct request_sock *req,
1549                                                  const struct sk_buff *skb);
1550        int                     (*md5_parse) (struct sock *sk,
1551                                              char __user *optval,
1552                                              int optlen);
1553#endif
1554};
1555
1556struct tcp_request_sock_ops {
1557#ifdef CONFIG_TCP_MD5SIG
1558        struct tcp_md5sig_key   *(*md5_lookup) (struct sock *sk,
1559                                                struct request_sock *req);
1560        int                     (*calc_md5_hash) (char *location,
1561                                                  struct tcp_md5sig_key *md5,
1562                                                  const struct sock *sk,
1563                                                  const struct request_sock *req,
1564                                                  const struct sk_buff *skb);
1565#endif
1566};
1567
1568/* Using SHA1 for now, define some constants.
1569 */
1570#define COOKIE_DIGEST_WORDS (SHA_DIGEST_WORDS)
1571#define COOKIE_MESSAGE_WORDS (SHA_MESSAGE_BYTES / 4)
1572#define COOKIE_WORKSPACE_WORDS (COOKIE_DIGEST_WORDS + COOKIE_MESSAGE_WORDS)
1573
1574extern int tcp_cookie_generator(u32 *bakery);
1575
1576/**
1577 *      struct tcp_cookie_values - each socket needs extra space for the
1578 *      cookies, together with (optional) space for any SYN data.
1579 *
1580 *      A tcp_sock contains a pointer to the current value, and this is
1581 *      cloned to the tcp_timewait_sock.
1582 *
1583 * @cookie_pair:        variable data from the option exchange.
1584 *
1585 * @cookie_desired:     user specified tcpct_cookie_desired.  Zero
1586 *                      indicates default (sysctl_tcp_cookie_size).
1587 *                      After cookie sent, remembers size of cookie.
1588 *                      Range 0, TCP_COOKIE_MIN to TCP_COOKIE_MAX.
1589 *
1590 * @s_data_desired:     user specified tcpct_s_data_desired.  When the
1591 *                      constant payload is specified (@s_data_constant),
1592 *                      holds its length instead.
1593 *                      Range 0 to TCP_MSS_DESIRED.
1594 *
1595 * @s_data_payload:     constant data that is to be included in the
1596 *                      payload of SYN or SYNACK segments when the
1597 *                      cookie option is present.
1598 */
1599struct tcp_cookie_values {
1600        struct kref     kref;
1601        u8              cookie_pair[TCP_COOKIE_PAIR_SIZE];
1602        u8              cookie_pair_size;
1603        u8              cookie_desired;
1604        u16             s_data_desired:11,
1605                        s_data_constant:1,
1606                        s_data_in:1,
1607                        s_data_out:1,
1608                        s_data_unused:2;
1609        u8              s_data_payload[0];
1610};
1611
1612static inline void tcp_cookie_values_release(struct kref *kref)
1613{
1614        kfree(container_of(kref, struct tcp_cookie_values, kref));
1615}
1616
1617/* The length of constant payload data.  Note that s_data_desired is
1618 * overloaded, depending on s_data_constant: either the length of constant
1619 * data (returned here) or the limit on variable data.
1620 */
1621static inline int tcp_s_data_size(const struct tcp_sock *tp)
1622{
1623        return (tp->cookie_values != NULL && tp->cookie_values->s_data_constant)
1624                ? tp->cookie_values->s_data_desired
1625                : 0;
1626}
1627
1628/**
1629 *      struct tcp_extend_values - tcp_ipv?.c to tcp_output.c workspace.
1630 *
1631 *      As tcp_request_sock has already been extended in other places, the
1632 *      only remaining method is to pass stack values along as function
1633 *      parameters.  These parameters are not needed after sending SYNACK.
1634 *
1635 * @cookie_bakery:      cryptographic secret and message workspace.
1636 *
1637 * @cookie_plus:        bytes in authenticator/cookie option, copied from
1638 *                      struct tcp_options_received (above).
1639 */
1640struct tcp_extend_values {
1641        struct request_values           rv;
1642        u32                             cookie_bakery[COOKIE_WORKSPACE_WORDS];
1643        u8                              cookie_plus:6,
1644                                        cookie_out_never:1,
1645                                        cookie_in_always:1;
1646};
1647
1648static inline struct tcp_extend_values *tcp_xv(struct request_values *rvp)
1649{
1650        return (struct tcp_extend_values *)rvp;
1651}
1652
1653extern void tcp_v4_init(void);
1654extern void tcp_init(void);
1655
1656#endif  /* _TCP_H */
1657
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.