1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18#ifndef _TCP_H
19#define _TCP_H
20
21#define TCP_DEBUG 1
22#define FASTRETRANS_DEBUG 1
23
24
25#undef TCP_CLEAR_TIMERS
26
27#include <linux/config.h>
28#include <linux/list.h>
29#include <linux/tcp.h>
30#include <linux/slab.h>
31#include <linux/cache.h>
32#include <linux/percpu.h>
33#include <net/checksum.h>
34#include <net/sock.h>
35#include <net/snmp.h>
36#include <net/ip.h>
37#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
38#include <linux/ipv6.h>
39#endif
40#include <linux/seq_file.h>
41
42
43
44
45
46struct tcp_ehash_bucket {
47 rwlock_t lock;
48 struct hlist_head chain;
49} __attribute__((__aligned__(8)));
50
51
52#define TCP_LHTABLE_SIZE 32
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85struct tcp_bind_bucket {
86 unsigned short port;
87 signed short fastreuse;
88 struct hlist_node node;
89 struct hlist_head owners;
90};
91
92#define tb_for_each(tb, node, head) hlist_for_each_entry(tb, node, head, node)
93
94struct tcp_bind_hashbucket {
95 spinlock_t lock;
96 struct hlist_head chain;
97};
98
99static inline struct tcp_bind_bucket *__tb_head(struct tcp_bind_hashbucket *head)
100{
101 return hlist_entry(head->chain.first, struct tcp_bind_bucket, node);
102}
103
104static inline struct tcp_bind_bucket *tb_head(struct tcp_bind_hashbucket *head)
105{
106 return hlist_empty(&head->chain) ? NULL : __tb_head(head);
107}
108
109extern struct tcp_hashinfo {
110
111
112
113
114
115
116
117
118 struct tcp_ehash_bucket *__tcp_ehash;
119
120
121
122
123 struct tcp_bind_hashbucket *__tcp_bhash;
124
125 int __tcp_bhash_size;
126 int __tcp_ehash_size;
127
128
129
130
131
132 struct hlist_head __tcp_listening_hash[TCP_LHTABLE_SIZE];
133
134
135
136
137
138
139
140 rwlock_t __tcp_lhash_lock ____cacheline_aligned;
141 atomic_t __tcp_lhash_users;
142 wait_queue_head_t __tcp_lhash_wait;
143 spinlock_t __tcp_portalloc_lock;
144} tcp_hashinfo;
145
146#define tcp_ehash (tcp_hashinfo.__tcp_ehash)
147#define tcp_bhash (tcp_hashinfo.__tcp_bhash)
148#define tcp_ehash_size (tcp_hashinfo.__tcp_ehash_size)
149#define tcp_bhash_size (tcp_hashinfo.__tcp_bhash_size)
150#define tcp_listening_hash (tcp_hashinfo.__tcp_listening_hash)
151#define tcp_lhash_lock (tcp_hashinfo.__tcp_lhash_lock)
152#define tcp_lhash_users (tcp_hashinfo.__tcp_lhash_users)
153#define tcp_lhash_wait (tcp_hashinfo.__tcp_lhash_wait)
154#define tcp_portalloc_lock (tcp_hashinfo.__tcp_portalloc_lock)
155
156extern kmem_cache_t *tcp_bucket_cachep;
157extern struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head,
158 unsigned short snum);
159extern void tcp_bucket_destroy(struct tcp_bind_bucket *tb);
160extern void tcp_bucket_unlock(struct sock *sk);
161extern int tcp_port_rover;
162
163
164static __inline__ int tcp_bhashfn(__u16 lport)
165{
166 return (lport & (tcp_bhash_size - 1));
167}
168
169extern void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb,
170 unsigned short snum);
171
172#if (BITS_PER_LONG == 64)
173#define TCP_ADDRCMP_ALIGN_BYTES 8
174#else
175#define TCP_ADDRCMP_ALIGN_BYTES 4
176#endif
177
178
179
180
181
182struct tcp_tw_bucket {
183
184
185
186
187 struct sock_common __tw_common;
188#define tw_family __tw_common.skc_family
189#define tw_state __tw_common.skc_state
190#define tw_reuse __tw_common.skc_reuse
191#define tw_bound_dev_if __tw_common.skc_bound_dev_if
192#define tw_node __tw_common.skc_node
193#define tw_bind_node __tw_common.skc_bind_node
194#define tw_refcnt __tw_common.skc_refcnt
195 volatile unsigned char tw_substate;
196 unsigned char tw_rcv_wscale;
197 __u16 tw_sport;
198
199
200 __u32 tw_daddr
201 __attribute__((aligned(TCP_ADDRCMP_ALIGN_BYTES)));
202 __u32 tw_rcv_saddr;
203 __u16 tw_dport;
204 __u16 tw_num;
205
206 int tw_hashent;
207 int tw_timeout;
208 __u32 tw_rcv_nxt;
209 __u32 tw_snd_nxt;
210 __u32 tw_rcv_wnd;
211 __u32 tw_ts_recent;
212 long tw_ts_recent_stamp;
213 unsigned long tw_ttd;
214 struct tcp_bind_bucket *tw_tb;
215 struct hlist_node tw_death_node;
216#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
217 struct in6_addr tw_v6_daddr;
218 struct in6_addr tw_v6_rcv_saddr;
219 int tw_v6_ipv6only;
220#endif
221};
222
223static __inline__ void tw_add_node(struct tcp_tw_bucket *tw,
224 struct hlist_head *list)
225{
226 hlist_add_head(&tw->tw_node, list);
227}
228
229static __inline__ void tw_add_bind_node(struct tcp_tw_bucket *tw,
230 struct hlist_head *list)
231{
232 hlist_add_head(&tw->tw_bind_node, list);
233}
234
235static inline int tw_dead_hashed(struct tcp_tw_bucket *tw)
236{
237 return tw->tw_death_node.pprev != NULL;
238}
239
240static __inline__ void tw_dead_node_init(struct tcp_tw_bucket *tw)
241{
242 tw->tw_death_node.pprev = NULL;
243}
244
245static __inline__ void __tw_del_dead_node(struct tcp_tw_bucket *tw)
246{
247 __hlist_del(&tw->tw_death_node);
248 tw_dead_node_init(tw);
249}
250
251static __inline__ int tw_del_dead_node(struct tcp_tw_bucket *tw)
252{
253 if (tw_dead_hashed(tw)) {
254 __tw_del_dead_node(tw);
255 return 1;
256 }
257 return 0;
258}
259
260#define tw_for_each(tw, node, head) \
261 hlist_for_each_entry(tw, node, head, tw_node)
262
263#define tw_for_each_inmate(tw, node, jail) \
264 hlist_for_each_entry(tw, node, jail, tw_death_node)
265
266#define tw_for_each_inmate_safe(tw, node, safe, jail) \
267 hlist_for_each_entry_safe(tw, node, safe, jail, tw_death_node)
268
269#define tcptw_sk(__sk) ((struct tcp_tw_bucket *)(__sk))
270
271static inline u32 tcp_v4_rcv_saddr(const struct sock *sk)
272{
273 return likely(sk->sk_state != TCP_TIME_WAIT) ?
274 inet_sk(sk)->rcv_saddr : tcptw_sk(sk)->tw_rcv_saddr;
275}
276
277#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
278static inline struct in6_addr *__tcp_v6_rcv_saddr(const struct sock *sk)
279{
280 return likely(sk->sk_state != TCP_TIME_WAIT) ?
281 &inet6_sk(sk)->rcv_saddr : &tcptw_sk(sk)->tw_v6_rcv_saddr;
282}
283
284static inline struct in6_addr *tcp_v6_rcv_saddr(const struct sock *sk)
285{
286 return sk->sk_family == AF_INET6 ? __tcp_v6_rcv_saddr(sk) : NULL;
287}
288
289#define tcptw_sk_ipv6only(__sk) (tcptw_sk(__sk)->tw_v6_ipv6only)
290
291static inline int tcp_v6_ipv6only(const struct sock *sk)
292{
293 return likely(sk->sk_state != TCP_TIME_WAIT) ?
294 ipv6_only_sock(sk) : tcptw_sk_ipv6only(sk);
295}
296#else
297# define __tcp_v6_rcv_saddr(__sk) NULL
298# define tcp_v6_rcv_saddr(__sk) NULL
299# define tcptw_sk_ipv6only(__sk) 0
300# define tcp_v6_ipv6only(__sk) 0
301#endif
302
303extern kmem_cache_t *tcp_timewait_cachep;
304
305static inline void tcp_tw_put(struct tcp_tw_bucket *tw)
306{
307 if (atomic_dec_and_test(&tw->tw_refcnt)) {
308#ifdef INET_REFCNT_DEBUG
309 printk(KERN_DEBUG "tw_bucket %p released\n", tw);
310#endif
311 kmem_cache_free(tcp_timewait_cachep, tw);
312 }
313}
314
315extern atomic_t tcp_orphan_count;
316extern int tcp_tw_count;
317extern void tcp_time_wait(struct sock *sk, int state, int timeo);
318extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw);
319
320
321
322#ifdef __BIG_ENDIAN
323#define TCP_COMBINED_PORTS(__sport, __dport) \
324 (((__u32)(__sport)<<16) | (__u32)(__dport))
325#else
326#define TCP_COMBINED_PORTS(__sport, __dport) \
327 (((__u32)(__dport)<<16) | (__u32)(__sport))
328#endif
329
330#if (BITS_PER_LONG == 64)
331#ifdef __BIG_ENDIAN
332#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \
333 __u64 __name = (((__u64)(__saddr))<<32)|((__u64)(__daddr));
334#else
335#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \
336 __u64 __name = (((__u64)(__daddr))<<32)|((__u64)(__saddr));
337#endif
338#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
339 (((*((__u64 *)&(inet_sk(__sk)->daddr)))== (__cookie)) && \
340 ((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \
341 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
342#define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
343 (((*((__u64 *)&(tcptw_sk(__sk)->tw_daddr))) == (__cookie)) && \
344 ((*((__u32 *)&(tcptw_sk(__sk)->tw_dport))) == (__ports)) && \
345 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
346#else
347#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr)
348#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
349 ((inet_sk(__sk)->daddr == (__saddr)) && \
350 (inet_sk(__sk)->rcv_saddr == (__daddr)) && \
351 ((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \
352 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
353#define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
354 ((tcptw_sk(__sk)->tw_daddr == (__saddr)) && \
355 (tcptw_sk(__sk)->tw_rcv_saddr == (__daddr)) && \
356 ((*((__u32 *)&(tcptw_sk(__sk)->tw_dport))) == (__ports)) && \
357 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
358#endif
359
360#define TCP_IPV6_MATCH(__sk, __saddr, __daddr, __ports, __dif) \
361 (((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \
362 ((__sk)->sk_family == AF_INET6) && \
363 ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr)) && \
364 ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \
365 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
366
367
368static __inline__ int tcp_lhashfn(unsigned short num)
369{
370 return num & (TCP_LHTABLE_SIZE - 1);
371}
372
373static __inline__ int tcp_sk_listen_hashfn(struct sock *sk)
374{
375 return tcp_lhashfn(inet_sk(sk)->num);
376}
377
378#define MAX_TCP_HEADER (128 + MAX_HEADER)
379
380
381
382
383
384#define MAX_TCP_WINDOW 32767U
385
386
387#define TCP_MIN_MSS 88U
388
389
390#define TCP_MIN_RCVMSS 536U
391
392
393#define TCP_FASTRETRANS_THRESH 3
394
395
396#define TCP_MAX_REORDERING 127
397
398
399#define TCP_MAX_QUICKACKS 16U
400
401
402#define TCP_URG_VALID 0x0100
403#define TCP_URG_NOTYET 0x0200
404#define TCP_URG_READ 0x0400
405
406#define TCP_RETR1 3
407
408
409
410
411
412
413#define TCP_RETR2 15
414
415
416
417
418
419
420#define TCP_SYN_RETRIES 5
421
422
423#define TCP_SYNACK_RETRIES 5
424
425
426
427#define TCP_ORPHAN_RETRIES 7
428
429
430
431
432#define TCP_TIMEWAIT_LEN (60*HZ)
433
434#define TCP_FIN_TIMEOUT TCP_TIMEWAIT_LEN
435
436
437
438
439
440
441#define TCP_DELACK_MAX ((unsigned)(HZ/5))
442#if HZ >= 100
443#define TCP_DELACK_MIN ((unsigned)(HZ/25))
444#define TCP_ATO_MIN ((unsigned)(HZ/25))
445#else
446#define TCP_DELACK_MIN 4U
447#define TCP_ATO_MIN 4U
448#endif
449#define TCP_RTO_MAX ((unsigned)(120*HZ))
450#define TCP_RTO_MIN ((unsigned)(HZ/5))
451#define TCP_TIMEOUT_INIT ((unsigned)(3*HZ))
452
453#define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U))
454
455
456
457#define TCP_KEEPALIVE_TIME (120*60*HZ)
458#define TCP_KEEPALIVE_PROBES 9
459#define TCP_KEEPALIVE_INTVL (75*HZ)
460
461#define MAX_TCP_KEEPIDLE 32767
462#define MAX_TCP_KEEPINTVL 32767
463#define MAX_TCP_KEEPCNT 127
464#define MAX_TCP_SYNCNT 127
465
466#define TCP_SYNQ_INTERVAL (HZ/5)
467#define TCP_SYNQ_HSIZE 512
468
469#define TCP_PAWS_24DAYS (60 * 60 * 24 * 24)
470#define TCP_PAWS_MSL 60
471
472
473
474
475
476#define TCP_PAWS_WINDOW 1
477
478
479
480
481#define TCP_TW_RECYCLE_SLOTS_LOG 5
482#define TCP_TW_RECYCLE_SLOTS (1<<TCP_TW_RECYCLE_SLOTS_LOG)
483
484
485
486
487
488#if HZ <= 16 || HZ > 4096
489# error Unsupported: HZ <= 16 or HZ > 4096
490#elif HZ <= 32
491# define TCP_TW_RECYCLE_TICK (5+2-TCP_TW_RECYCLE_SLOTS_LOG)
492#elif HZ <= 64
493# define TCP_TW_RECYCLE_TICK (6+2-TCP_TW_RECYCLE_SLOTS_LOG)
494#elif HZ <= 128
495# define TCP_TW_RECYCLE_TICK (7+2-TCP_TW_RECYCLE_SLOTS_LOG)
496#elif HZ <= 256
497# define TCP_TW_RECYCLE_TICK (8+2-TCP_TW_RECYCLE_SLOTS_LOG)
498#elif HZ <= 512
499# define TCP_TW_RECYCLE_TICK (9+2-TCP_TW_RECYCLE_SLOTS_LOG)
500#elif HZ <= 1024
501# define TCP_TW_RECYCLE_TICK (10+2-TCP_TW_RECYCLE_SLOTS_LOG)
502#elif HZ <= 2048
503# define TCP_TW_RECYCLE_TICK (11+2-TCP_TW_RECYCLE_SLOTS_LOG)
504#else
505# define TCP_TW_RECYCLE_TICK (12+2-TCP_TW_RECYCLE_SLOTS_LOG)
506#endif
507
508#define BICTCP_BETA_SCALE 1024
509
510
511#define BICTCP_MAX_INCREMENT 32
512
513
514
515
516#define BICTCP_FUNC_OF_MIN_INCR 11
517
518
519
520
521#define BICTCP_B 4
522
523
524
525
526
527
528
529
530#define TCPOPT_NOP 1
531#define TCPOPT_EOL 0
532#define TCPOPT_MSS 2
533#define TCPOPT_WINDOW 3
534#define TCPOPT_SACK_PERM 4
535#define TCPOPT_SACK 5
536#define TCPOPT_TIMESTAMP 8
537
538
539
540
541
542#define TCPOLEN_MSS 4
543#define TCPOLEN_WINDOW 3
544#define TCPOLEN_SACK_PERM 2
545#define TCPOLEN_TIMESTAMP 10
546
547
548#define TCPOLEN_TSTAMP_ALIGNED 12
549#define TCPOLEN_WSCALE_ALIGNED 4
550#define TCPOLEN_SACKPERM_ALIGNED 4
551#define TCPOLEN_SACK_BASE 2
552#define TCPOLEN_SACK_BASE_ALIGNED 4
553#define TCPOLEN_SACK_PERBLOCK 8
554
555#define TCP_TIME_RETRANS 1
556#define TCP_TIME_DACK 2
557#define TCP_TIME_PROBE0 3
558#define TCP_TIME_KEEPOPEN 4
559
560
561#define TCP_NAGLE_OFF 1
562#define TCP_NAGLE_CORK 2
563#define TCP_NAGLE_PUSH 4
564
565
566extern int sysctl_max_syn_backlog;
567extern int sysctl_tcp_timestamps;
568extern int sysctl_tcp_window_scaling;
569extern int sysctl_tcp_sack;
570extern int sysctl_tcp_fin_timeout;
571extern int sysctl_tcp_tw_recycle;
572extern int sysctl_tcp_keepalive_time;
573extern int sysctl_tcp_keepalive_probes;
574extern int sysctl_tcp_keepalive_intvl;
575extern int sysctl_tcp_syn_retries;
576extern int sysctl_tcp_synack_retries;
577extern int sysctl_tcp_retries1;
578extern int sysctl_tcp_retries2;
579extern int sysctl_tcp_orphan_retries;
580extern int sysctl_tcp_syncookies;
581extern int sysctl_tcp_retrans_collapse;
582extern int sysctl_tcp_stdurg;
583extern int sysctl_tcp_rfc1337;
584extern int sysctl_tcp_abort_on_overflow;
585extern int sysctl_tcp_max_orphans;
586extern int sysctl_tcp_max_tw_buckets;
587extern int sysctl_tcp_fack;
588extern int sysctl_tcp_reordering;
589extern int sysctl_tcp_ecn;
590extern int sysctl_tcp_dsack;
591extern int sysctl_tcp_mem[3];
592extern int sysctl_tcp_wmem[3];
593extern int sysctl_tcp_rmem[3];
594extern int sysctl_tcp_app_win;
595extern int sysctl_tcp_adv_win_scale;
596extern int sysctl_tcp_tw_reuse;
597extern int sysctl_tcp_frto;
598extern int sysctl_tcp_low_latency;
599extern int sysctl_tcp_westwood;
600extern int sysctl_tcp_vegas_cong_avoid;
601extern int sysctl_tcp_vegas_alpha;
602extern int sysctl_tcp_vegas_beta;
603extern int sysctl_tcp_vegas_gamma;
604extern int sysctl_tcp_nometrics_save;
605extern int sysctl_tcp_bic;
606extern int sysctl_tcp_bic_fast_convergence;
607extern int sysctl_tcp_bic_low_window;
608extern int sysctl_tcp_bic_beta;
609extern int sysctl_tcp_moderate_rcvbuf;
610extern int sysctl_tcp_tso_win_divisor;
611
612extern atomic_t tcp_memory_allocated;
613extern atomic_t tcp_sockets_allocated;
614extern int tcp_memory_pressure;
615
616struct open_request;
617
618struct or_calltable {
619 int family;
620 int (*rtx_syn_ack) (struct sock *sk, struct open_request *req, struct dst_entry*);
621 void (*send_ack) (struct sk_buff *skb, struct open_request *req);
622 void (*destructor) (struct open_request *req);
623 void (*send_reset) (struct sk_buff *skb);
624};
625
626struct tcp_v4_open_req {
627 __u32 loc_addr;
628 __u32 rmt_addr;
629 struct ip_options *opt;
630};
631
632#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
633struct tcp_v6_open_req {
634 struct in6_addr loc_addr;
635 struct in6_addr rmt_addr;
636 struct sk_buff *pktopts;
637 int iif;
638};
639#endif
640
641
642struct open_request {
643 struct open_request *dl_next;
644 __u32 rcv_isn;
645 __u32 snt_isn;
646 __u16 rmt_port;
647 __u16 mss;
648 __u8 retrans;
649 __u8 __pad;
650 __u16 snd_wscale : 4,
651 rcv_wscale : 4,
652 tstamp_ok : 1,
653 sack_ok : 1,
654 wscale_ok : 1,
655 ecn_ok : 1,
656 acked : 1;
657
658 __u32 window_clamp;
659 __u32 rcv_wnd;
660 __u32 ts_recent;
661 unsigned long expires;
662 struct or_calltable *class;
663 struct sock *sk;
664 union {
665 struct tcp_v4_open_req v4_req;
666#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
667 struct tcp_v6_open_req v6_req;
668#endif
669 } af;
670};
671
672
673extern kmem_cache_t *tcp_openreq_cachep;
674
675#define tcp_openreq_alloc() kmem_cache_alloc(tcp_openreq_cachep, SLAB_ATOMIC)
676#define tcp_openreq_fastfree(req) kmem_cache_free(tcp_openreq_cachep, req)
677
678static inline void tcp_openreq_free(struct open_request *req)
679{
680 req->class->destructor(req);
681 tcp_openreq_fastfree(req);
682}
683
684#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
685#define TCP_INET_FAMILY(fam) ((fam) == AF_INET)
686#else
687#define TCP_INET_FAMILY(fam) 1
688#endif
689
690
691
692
693
694
695struct tcp_func {
696 int (*queue_xmit) (struct sk_buff *skb,
697 int ipfragok);
698
699 void (*send_check) (struct sock *sk,
700 struct tcphdr *th,
701 int len,
702 struct sk_buff *skb);
703
704 int (*rebuild_header) (struct sock *sk);
705
706 int (*conn_request) (struct sock *sk,
707 struct sk_buff *skb);
708
709 struct sock * (*syn_recv_sock) (struct sock *sk,
710 struct sk_buff *skb,
711 struct open_request *req,
712 struct dst_entry *dst);
713
714 int (*remember_stamp) (struct sock *sk);
715
716 __u16 net_header_len;
717
718 int (*setsockopt) (struct sock *sk,
719 int level,
720 int optname,
721 char __user *optval,
722 int optlen);
723
724 int (*getsockopt) (struct sock *sk,
725 int level,
726 int optname,
727 char __user *optval,
728 int __user *optlen);
729
730
731 void (*addr2sockaddr) (struct sock *sk,
732 struct sockaddr *);
733
734 int sockaddr_len;
735};
736
737
738
739
740
741
742static inline int before(__u32 seq1, __u32 seq2)
743{
744 return (__s32)(seq1-seq2) < 0;
745}
746
747static inline int after(__u32 seq1, __u32 seq2)
748{
749 return (__s32)(seq2-seq1) < 0;
750}
751
752
753
754static inline int between(__u32 seq1, __u32 seq2, __u32 seq3)
755{
756 return seq3 - seq2 >= seq1 - seq2;
757}
758
759
760extern struct proto tcp_prot;
761
762DECLARE_SNMP_STAT(struct tcp_mib, tcp_statistics);
763#define TCP_INC_STATS(field) SNMP_INC_STATS(tcp_statistics, field)
764#define TCP_INC_STATS_BH(field) SNMP_INC_STATS_BH(tcp_statistics, field)
765#define TCP_INC_STATS_USER(field) SNMP_INC_STATS_USER(tcp_statistics, field)
766#define TCP_DEC_STATS(field) SNMP_DEC_STATS(tcp_statistics, field)
767#define TCP_ADD_STATS_BH(field, val) SNMP_ADD_STATS_BH(tcp_statistics, field, val)
768#define TCP_ADD_STATS_USER(field, val) SNMP_ADD_STATS_USER(tcp_statistics, field, val)
769
770extern void tcp_put_port(struct sock *sk);
771extern void tcp_inherit_port(struct sock *sk, struct sock *child);
772
773extern void tcp_v4_err(struct sk_buff *skb, u32);
774
775extern void tcp_shutdown (struct sock *sk, int how);
776
777extern int tcp_v4_rcv(struct sk_buff *skb);
778
779extern int tcp_v4_remember_stamp(struct sock *sk);
780
781extern int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw);
782
783extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk,
784 struct msghdr *msg, size_t size);
785extern ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags);
786
787extern int tcp_ioctl(struct sock *sk,
788 int cmd,
789 unsigned long arg);
790
791extern int tcp_rcv_state_process(struct sock *sk,
792 struct sk_buff *skb,
793 struct tcphdr *th,
794 unsigned len);
795
796extern int tcp_rcv_established(struct sock *sk,
797 struct sk_buff *skb,
798 struct tcphdr *th,
799 unsigned len);
800
801extern void tcp_rcv_space_adjust(struct sock *sk);
802
803enum tcp_ack_state_t
804{
805 TCP_ACK_SCHED = 1,
806 TCP_ACK_TIMER = 2,
807 TCP_ACK_PUSHED= 4
808};
809
810static inline void tcp_schedule_ack(struct tcp_sock *tp)
811{
812 tp->ack.pending |= TCP_ACK_SCHED;
813}
814
815static inline int tcp_ack_scheduled(struct tcp_sock *tp)
816{
817 return tp->ack.pending&TCP_ACK_SCHED;
818}
819
820static __inline__ void tcp_dec_quickack_mode(struct tcp_sock *tp)
821{
822 if (tp->ack.quick && --tp->ack.quick == 0) {
823
824 tp->ack.ato = TCP_ATO_MIN;
825 }
826}
827
828extern void tcp_enter_quickack_mode(struct tcp_sock *tp);
829
830static __inline__ void tcp_delack_init(struct tcp_sock *tp)
831{
832 memset(&tp->ack, 0, sizeof(tp->ack));
833}
834
835static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
836{
837 rx_opt->tstamp_ok = rx_opt->sack_ok = rx_opt->wscale_ok = rx_opt->snd_wscale = 0;
838}
839
840enum tcp_tw_status
841{
842 TCP_TW_SUCCESS = 0,
843 TCP_TW_RST = 1,
844 TCP_TW_ACK = 2,
845 TCP_TW_SYN = 3
846};
847
848
849extern enum tcp_tw_status tcp_timewait_state_process(struct tcp_tw_bucket *tw,
850 struct sk_buff *skb,
851 struct tcphdr *th,
852 unsigned len);
853
854extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb,
855 struct open_request *req,
856 struct open_request **prev);
857extern int tcp_child_process(struct sock *parent,
858 struct sock *child,
859 struct sk_buff *skb);
860extern void tcp_enter_frto(struct sock *sk);
861extern void tcp_enter_loss(struct sock *sk, int how);
862extern void tcp_clear_retrans(struct tcp_sock *tp);
863extern void tcp_update_metrics(struct sock *sk);
864
865extern void tcp_close(struct sock *sk,
866 long timeout);
867extern struct sock * tcp_accept(struct sock *sk, int flags, int *err);
868extern unsigned int tcp_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait);
869
870extern int tcp_getsockopt(struct sock *sk, int level,
871 int optname,
872 char __user *optval,
873 int __user *optlen);
874extern int tcp_setsockopt(struct sock *sk, int level,
875 int optname, char __user *optval,
876 int optlen);
877extern void tcp_set_keepalive(struct sock *sk, int val);
878extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk,
879 struct msghdr *msg,
880 size_t len, int nonblock,
881 int flags, int *addr_len);
882
883extern int tcp_listen_start(struct sock *sk);
884
885extern void tcp_parse_options(struct sk_buff *skb,
886 struct tcp_options_received *opt_rx,
887 int estab);
888
889
890
891
892
893extern int tcp_v4_rebuild_header(struct sock *sk);
894
895extern int tcp_v4_build_header(struct sock *sk,
896 struct sk_buff *skb);
897
898extern void tcp_v4_send_check(struct sock *sk,
899 struct tcphdr *th, int len,
900 struct sk_buff *skb);
901
902extern int tcp_v4_conn_request(struct sock *sk,
903 struct sk_buff *skb);
904
905extern struct sock * tcp_create_openreq_child(struct sock *sk,
906 struct open_request *req,
907 struct sk_buff *skb);
908
909extern struct sock * tcp_v4_syn_recv_sock(struct sock *sk,
910 struct sk_buff *skb,
911 struct open_request *req,
912 struct dst_entry *dst);
913
914extern int tcp_v4_do_rcv(struct sock *sk,
915 struct sk_buff *skb);
916
917extern int tcp_v4_connect(struct sock *sk,
918 struct sockaddr *uaddr,
919 int addr_len);
920
921extern int tcp_connect(struct sock *sk);
922
923extern struct sk_buff * tcp_make_synack(struct sock *sk,
924 struct dst_entry *dst,
925 struct open_request *req);
926
927extern int tcp_disconnect(struct sock *sk, int flags);
928
929extern void tcp_unhash(struct sock *sk);
930
931extern int tcp_v4_hash_connecting(struct sock *sk);
932
933
934
935extern struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
936 struct ip_options *opt);
937extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb,
938 __u16 *mss);
939
940
941
942extern int tcp_write_xmit(struct sock *, int nonagle);
943extern int tcp_retransmit_skb(struct sock *, struct sk_buff *);
944extern void tcp_xmit_retransmit_queue(struct sock *);
945extern void tcp_simple_retransmit(struct sock *);
946extern int tcp_trim_head(struct sock *, struct sk_buff *, u32);
947
948extern void tcp_send_probe0(struct sock *);
949extern void tcp_send_partial(struct sock *);
950extern int tcp_write_wakeup(struct sock *);
951extern void tcp_send_fin(struct sock *sk);
952extern void tcp_send_active_reset(struct sock *sk, int priority);
953extern int tcp_send_synack(struct sock *);
954extern void tcp_push_one(struct sock *, unsigned mss_now);
955extern void tcp_send_ack(struct sock *sk);
956extern void tcp_send_delayed_ack(struct sock *sk);
957
958
959extern void tcp_init_xmit_timers(struct sock *);
960extern void tcp_clear_xmit_timers(struct sock *);
961
962extern void tcp_delete_keepalive_timer(struct sock *);
963extern void tcp_reset_keepalive_timer(struct sock *, unsigned long);
964extern unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu);
965extern unsigned int tcp_current_mss(struct sock *sk, int large);
966
967#ifdef TCP_DEBUG
968extern const char tcp_timer_bug_msg[];
969#endif
970
971
972extern void tcp_get_info(struct sock *, struct tcp_info *);
973
974
975typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *,
976 unsigned int, size_t);
977extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
978 sk_read_actor_t recv_actor);
979
980static inline void tcp_clear_xmit_timer(struct sock *sk, int what)
981{
982 struct tcp_sock *tp = tcp_sk(sk);
983
984 switch (what) {
985 case TCP_TIME_RETRANS:
986 case TCP_TIME_PROBE0:
987 tp->pending = 0;
988
989#ifdef TCP_CLEAR_TIMERS
990 sk_stop_timer(sk, &tp->retransmit_timer);
991#endif
992 break;
993 case TCP_TIME_DACK:
994 tp->ack.blocked = 0;
995 tp->ack.pending = 0;
996
997#ifdef TCP_CLEAR_TIMERS
998 sk_stop_timer(sk, &tp->delack_timer);
999#endif
1000 break;
1001 default:
1002#ifdef TCP_DEBUG
1003 printk(tcp_timer_bug_msg);
1004#endif
1005 return;
1006 };
1007
1008}
1009
1010
1011
1012
1013static inline void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long when)
1014{
1015 struct tcp_sock *tp = tcp_sk(sk);
1016
1017 if (when > TCP_RTO_MAX) {
1018#ifdef TCP_DEBUG
1019 printk(KERN_DEBUG "reset_xmit_timer sk=%p %d when=0x%lx, caller=%p\n", sk, what, when, current_text_addr());
1020#endif
1021 when = TCP_RTO_MAX;
1022 }
1023
1024 switch (what) {
1025 case TCP_TIME_RETRANS:
1026 case TCP_TIME_PROBE0:
1027 tp->pending = what;
1028 tp->timeout = jiffies+when;
1029 sk_reset_timer(sk, &tp->retransmit_timer, tp->timeout);
1030 break;
1031
1032 case TCP_TIME_DACK:
1033 tp->ack.pending |= TCP_ACK_TIMER;
1034 tp->ack.timeout = jiffies+when;
1035 sk_reset_timer(sk, &tp->delack_timer, tp->ack.timeout);
1036 break;
1037
1038 default:
1039#ifdef TCP_DEBUG
1040 printk(tcp_timer_bug_msg);
1041#endif
1042 };
1043}
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053static inline void tcp_initialize_rcv_mss(struct sock *sk)
1054{
1055 struct tcp_sock *tp = tcp_sk(sk);
1056 unsigned int hint = min(tp->advmss, tp->mss_cache_std);
1057
1058 hint = min(hint, tp->rcv_wnd/2);
1059 hint = min(hint, TCP_MIN_RCVMSS);
1060 hint = max(hint, TCP_MIN_MSS);
1061
1062 tp->ack.rcv_mss = hint;
1063}
1064
1065static __inline__ void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd)
1066{
1067 tp->pred_flags = htonl((tp->tcp_header_len << 26) |
1068 ntohl(TCP_FLAG_ACK) |
1069 snd_wnd);
1070}
1071
1072static __inline__ void tcp_fast_path_on(struct tcp_sock *tp)
1073{
1074 __tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale);
1075}
1076
1077static inline void tcp_fast_path_check(struct sock *sk, struct tcp_sock *tp)
1078{
1079 if (skb_queue_len(&tp->out_of_order_queue) == 0 &&
1080 tp->rcv_wnd &&
1081 atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf &&
1082 !tp->urg_data)
1083 tcp_fast_path_on(tp);
1084}
1085
1086
1087
1088
1089
1090static __inline__ u32 tcp_receive_window(const struct tcp_sock *tp)
1091{
1092 s32 win = tp->rcv_wup + tp->rcv_wnd - tp->rcv_nxt;
1093
1094 if (win < 0)
1095 win = 0;
1096 return (u32) win;
1097}
1098
1099
1100
1101
1102
1103extern u32 __tcp_select_window(struct sock *sk);
1104
1105
1106
1107
1108
1109
1110
1111#define tcp_time_stamp ((__u32)(jiffies))
1112
1113
1114
1115
1116
1117
1118
1119
1120struct tcp_skb_cb {
1121 union {
1122 struct inet_skb_parm h4;
1123#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
1124 struct inet6_skb_parm h6;
1125#endif
1126 } header;
1127 __u32 seq;
1128 __u32 end_seq;
1129 __u32 when;
1130 __u8 flags;
1131
1132
1133
1134
1135#define TCPCB_FLAG_FIN 0x01
1136#define TCPCB_FLAG_SYN 0x02
1137#define TCPCB_FLAG_RST 0x04
1138#define TCPCB_FLAG_PSH 0x08
1139#define TCPCB_FLAG_ACK 0x10
1140#define TCPCB_FLAG_URG 0x20
1141#define TCPCB_FLAG_ECE 0x40
1142#define TCPCB_FLAG_CWR 0x80
1143
1144 __u8 sacked;
1145#define TCPCB_SACKED_ACKED 0x01
1146#define TCPCB_SACKED_RETRANS 0x02
1147#define TCPCB_LOST 0x04
1148#define TCPCB_TAGBITS 0x07
1149
1150#define TCPCB_EVER_RETRANS 0x80
1151#define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS)
1152
1153#define TCPCB_URG 0x20
1154
1155#define TCPCB_AT_TAIL (TCPCB_URG)
1156
1157 __u16 urg_ptr;
1158 __u32 ack_seq;
1159};
1160
1161#define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0]))
1162
1163#include <net/tcp_ecn.h>
1164
1165
1166
1167
1168static inline int tcp_skb_pcount(const struct sk_buff *skb)
1169{
1170 return skb_shinfo(skb)->tso_segs;
1171}
1172
1173
1174static inline int tcp_skb_mss(const struct sk_buff *skb)
1175{
1176 return skb_shinfo(skb)->tso_size;
1177}
1178
1179static inline void tcp_dec_pcount_approx(__u32 *count,
1180 const struct sk_buff *skb)
1181{
1182 if (*count) {
1183 *count -= tcp_skb_pcount(skb);
1184 if ((int)*count < 0)
1185 *count = 0;
1186 }
1187}
1188
1189static inline void tcp_packets_out_inc(struct sock *sk,
1190 struct tcp_sock *tp,
1191 const struct sk_buff *skb)
1192{
1193 int orig = tp->packets_out;
1194
1195 tp->packets_out += tcp_skb_pcount(skb);
1196 if (!orig)
1197 tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
1198}
1199
1200static inline void tcp_packets_out_dec(struct tcp_sock *tp,
1201 const struct sk_buff *skb)
1202{
1203 tp->packets_out -= tcp_skb_pcount(skb);
1204}
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220static __inline__ unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
1221{
1222 return (tp->packets_out - tp->left_out + tp->retrans_out);
1223}
1224
1225
1226
1227
1228#define tcp_is_vegas(__tp) ((__tp)->adv_cong == TCP_VEGAS)
1229#define tcp_is_westwood(__tp) ((__tp)->adv_cong == TCP_WESTWOOD)
1230#define tcp_is_bic(__tp) ((__tp)->adv_cong == TCP_BIC)
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242static inline __u32 tcp_recalc_ssthresh(struct tcp_sock *tp)
1243{
1244 if (tcp_is_bic(tp)) {
1245 if (sysctl_tcp_bic_fast_convergence &&
1246 tp->snd_cwnd < tp->bictcp.last_max_cwnd)
1247 tp->bictcp.last_max_cwnd = (tp->snd_cwnd *
1248 (BICTCP_BETA_SCALE
1249 + sysctl_tcp_bic_beta))
1250 / (2 * BICTCP_BETA_SCALE);
1251 else
1252 tp->bictcp.last_max_cwnd = tp->snd_cwnd;
1253
1254 if (tp->snd_cwnd > sysctl_tcp_bic_low_window)
1255 return max((tp->snd_cwnd * sysctl_tcp_bic_beta)
1256 / BICTCP_BETA_SCALE, 2U);
1257 }
1258
1259 return max(tp->snd_cwnd >> 1U, 2U);
1260}
1261
1262
1263#define tcp_vegas_disable(__tp) ((__tp)->vegas.doing_vegas_now = 0)
1264
1265static inline void tcp_vegas_enable(struct tcp_sock *tp)
1266{
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285 tp->vegas.doing_vegas_now = 1;
1286
1287
1288 tp->vegas.beg_snd_nxt = tp->snd_nxt;
1289
1290 tp->vegas.cntRTT = 0;
1291 tp->vegas.minRTT = 0x7fffffff;
1292}
1293
1294
1295#define tcp_vegas_enabled(__tp) ((__tp)->vegas.doing_vegas_now)
1296
1297extern void tcp_ca_init(struct tcp_sock *tp);
1298
1299static inline void tcp_set_ca_state(struct tcp_sock *tp, u8 ca_state)
1300{
1301 if (tcp_is_vegas(tp)) {
1302 if (ca_state == TCP_CA_Open)
1303 tcp_vegas_enable(tp);
1304 else
1305 tcp_vegas_disable(tp);
1306 }
1307 tp->ca_state = ca_state;
1308}
1309
1310
1311
1312
1313
1314static inline __u32 tcp_current_ssthresh(struct tcp_sock *tp)
1315{
1316 if ((1<<tp->ca_state)&(TCPF_CA_CWR|TCPF_CA_Recovery))
1317 return tp->snd_ssthresh;
1318 else
1319 return max(tp->snd_ssthresh,
1320 ((tp->snd_cwnd >> 1) +
1321 (tp->snd_cwnd >> 2)));
1322}
1323
1324static inline void tcp_sync_left_out(struct tcp_sock *tp)
1325{
1326 if (tp->rx_opt.sack_ok &&
1327 (tp->sacked_out >= tp->packets_out - tp->lost_out))
1328 tp->sacked_out = tp->packets_out - tp->lost_out;
1329 tp->left_out = tp->sacked_out + tp->lost_out;
1330}
1331
1332extern void tcp_cwnd_application_limited(struct sock *sk);
1333
1334
1335
1336static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp)
1337{
1338 __u32 packets_out = tp->packets_out;
1339
1340 if (packets_out >= tp->snd_cwnd) {
1341
1342 tp->snd_cwnd_used = 0;
1343 tp->snd_cwnd_stamp = tcp_time_stamp;
1344 } else {
1345
1346 if (tp->packets_out > tp->snd_cwnd_used)
1347 tp->snd_cwnd_used = tp->packets_out;
1348
1349 if ((s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= tp->rto)
1350 tcp_cwnd_application_limited(sk);
1351 }
1352}
1353
1354
1355static inline void __tcp_enter_cwr(struct tcp_sock *tp)
1356{
1357 tp->undo_marker = 0;
1358 tp->snd_ssthresh = tcp_recalc_ssthresh(tp);
1359 tp->snd_cwnd = min(tp->snd_cwnd,
1360 tcp_packets_in_flight(tp) + 1U);
1361 tp->snd_cwnd_cnt = 0;
1362 tp->high_seq = tp->snd_nxt;
1363 tp->snd_cwnd_stamp = tcp_time_stamp;
1364 TCP_ECN_queue_cwr(tp);
1365}
1366
1367static inline void tcp_enter_cwr(struct tcp_sock *tp)
1368{
1369 tp->prior_ssthresh = 0;
1370 if (tp->ca_state < TCP_CA_CWR) {
1371 __tcp_enter_cwr(tp);
1372 tcp_set_ca_state(tp, TCP_CA_CWR);
1373 }
1374}
1375
1376extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst);
1377
1378
1379
1380
1381static __inline__ __u32 tcp_max_burst(const struct tcp_sock *tp)
1382{
1383 return 3;
1384}
1385
1386static __inline__ int tcp_minshall_check(const struct tcp_sock *tp)
1387{
1388 return after(tp->snd_sml,tp->snd_una) &&
1389 !after(tp->snd_sml, tp->snd_nxt);
1390}
1391
1392static __inline__ void tcp_minshall_update(struct tcp_sock *tp, int mss,
1393 const struct sk_buff *skb)
1394{
1395 if (skb->len < mss)
1396 tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
1397}
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407static __inline__ int
1408tcp_nagle_check(const struct tcp_sock *tp, const struct sk_buff *skb,
1409 unsigned mss_now, int nonagle)
1410{
1411 return (skb->len < mss_now &&
1412 !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
1413 ((nonagle&TCP_NAGLE_CORK) ||
1414 (!nonagle &&
1415 tp->packets_out &&
1416 tcp_minshall_check(tp))));
1417}
1418
1419extern void tcp_set_skb_tso_segs(struct sk_buff *, unsigned int);
1420
1421
1422
1423
1424static __inline__ int tcp_snd_test(const struct tcp_sock *tp,
1425 struct sk_buff *skb,
1426 unsigned cur_mss, int nonagle)
1427{
1428 int pkts = tcp_skb_pcount(skb);
1429
1430 if (!pkts) {
1431 tcp_set_skb_tso_segs(skb, tp->mss_cache_std);
1432 pkts = tcp_skb_pcount(skb);
1433 }
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459 return (((nonagle&TCP_NAGLE_PUSH) || tp->urg_mode
1460 || !tcp_nagle_check(tp, skb, cur_mss, nonagle)) &&
1461 (((tcp_packets_in_flight(tp) + (pkts-1)) < tp->snd_cwnd) ||
1462 (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) &&
1463 !after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd));
1464}
1465
1466static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *tp)
1467{
1468 if (!tp->packets_out && !tp->pending)
1469 tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0, tp->rto);
1470}
1471
1472static __inline__ int tcp_skb_is_last(const struct sock *sk,
1473 const struct sk_buff *skb)
1474{
1475 return skb->next == (struct sk_buff *)&sk->sk_write_queue;
1476}
1477
1478
1479
1480
1481
1482static __inline__ void __tcp_push_pending_frames(struct sock *sk,
1483 struct tcp_sock *tp,
1484 unsigned cur_mss,
1485 int nonagle)
1486{
1487 struct sk_buff *skb = sk->sk_send_head;
1488
1489 if (skb) {
1490 if (!tcp_skb_is_last(sk, skb))
1491 nonagle = TCP_NAGLE_PUSH;
1492 if (!tcp_snd_test(tp, skb, cur_mss, nonagle) ||
1493 tcp_write_xmit(sk, nonagle))
1494 tcp_check_probe_timer(sk, tp);
1495 }
1496 tcp_cwnd_validate(sk, tp);
1497}
1498
1499static __inline__ void tcp_push_pending_frames(struct sock *sk,
1500 struct tcp_sock *tp)
1501{
1502 __tcp_push_pending_frames(sk, tp, tcp_current_mss(sk, 1), tp->nonagle);
1503}
1504
1505static __inline__ int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp)
1506{
1507 struct sk_buff *skb = sk->sk_send_head;
1508
1509 return (skb &&
1510 tcp_snd_test(tp, skb, tcp_current_mss(sk, 1),
1511 tcp_skb_is_last(sk, skb) ? TCP_NAGLE_PUSH : tp->nonagle));
1512}
1513
1514static __inline__ void tcp_init_wl(struct tcp_sock *tp, u32 ack, u32 seq)
1515{
1516 tp->snd_wl1 = seq;
1517}
1518
1519static __inline__ void tcp_update_wl(struct tcp_sock *tp, u32 ack, u32 seq)
1520{
1521 tp->snd_wl1 = seq;
1522}
1523
1524extern void tcp_destroy_sock(struct sock *sk);
1525
1526
1527
1528
1529
1530static __inline__ u16 tcp_v4_check(struct tcphdr *th, int len,
1531 unsigned long saddr, unsigned long daddr,
1532 unsigned long base)
1533{
1534 return csum_tcpudp_magic(saddr,daddr,len,IPPROTO_TCP,base);
1535}
1536
1537static __inline__ int __tcp_checksum_complete(struct sk_buff *skb)
1538{
1539 return (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
1540}
1541
1542static __inline__ int tcp_checksum_complete(struct sk_buff *skb)
1543{
1544 return skb->ip_summed != CHECKSUM_UNNECESSARY &&
1545 __tcp_checksum_complete(skb);
1546}
1547
1548
1549
1550static __inline__ void tcp_prequeue_init(struct tcp_sock *tp)
1551{
1552 tp->ucopy.task = NULL;
1553 tp->ucopy.len = 0;
1554 tp->ucopy.memory = 0;
1555 skb_queue_head_init(&tp->ucopy.prequeue);
1556}
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566static __inline__ int tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1567{
1568 struct tcp_sock *tp = tcp_sk(sk);
1569
1570 if (!sysctl_tcp_low_latency && tp->ucopy.task) {
1571 __skb_queue_tail(&tp->ucopy.prequeue, skb);
1572 tp->ucopy.memory += skb->truesize;
1573 if (tp->ucopy.memory > sk->sk_rcvbuf) {
1574 struct sk_buff *skb1;
1575
1576 BUG_ON(sock_owned_by_user(sk));
1577
1578 while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
1579 sk->sk_backlog_rcv(sk, skb1);
1580 NET_INC_STATS_BH(LINUX_MIB_TCPPREQUEUEDROPPED);
1581 }
1582
1583 tp->ucopy.memory = 0;
1584 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1585 wake_up_interruptible(sk->sk_sleep);
1586 if (!tcp_ack_scheduled(tp))
1587 tcp_reset_xmit_timer(sk, TCP_TIME_DACK, (3*TCP_RTO_MIN)/4);
1588 }
1589 return 1;
1590 }
1591 return 0;
1592}
1593
1594
1595#undef STATE_TRACE
1596
1597#ifdef STATE_TRACE
1598static const char *statename[]={
1599 "Unused","Established","Syn Sent","Syn Recv",
1600 "Fin Wait 1","Fin Wait 2","Time Wait", "Close",
1601 "Close Wait","Last ACK","Listen","Closing"
1602};
1603#endif
1604
1605static __inline__ void tcp_set_state(struct sock *sk, int state)
1606{
1607 int oldstate = sk->sk_state;
1608
1609 switch (state) {
1610 case TCP_ESTABLISHED:
1611 if (oldstate != TCP_ESTABLISHED)
1612 TCP_INC_STATS(TCP_MIB_CURRESTAB);
1613 break;
1614
1615 case TCP_CLOSE:
1616 if (oldstate == TCP_CLOSE_WAIT || oldstate == TCP_ESTABLISHED)
1617 TCP_INC_STATS(TCP_MIB_ESTABRESETS);
1618
1619 sk->sk_prot->unhash(sk);
1620 if (tcp_sk(sk)->bind_hash &&
1621 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
1622 tcp_put_port(sk);
1623
1624 default:
1625 if (oldstate==TCP_ESTABLISHED)
1626 TCP_DEC_STATS(TCP_MIB_CURRESTAB);
1627 }
1628
1629
1630
1631
1632 sk->sk_state = state;
1633
1634#ifdef STATE_TRACE
1635 SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n",sk, statename[oldstate],statename[state]);
1636#endif
1637}
1638
1639static __inline__ void tcp_done(struct sock *sk)
1640{
1641 tcp_set_state(sk, TCP_CLOSE);
1642 tcp_clear_xmit_timers(sk);
1643
1644 sk->sk_shutdown = SHUTDOWN_MASK;
1645
1646 if (!sock_flag(sk, SOCK_DEAD))
1647 sk->sk_state_change(sk);
1648 else
1649 tcp_destroy_sock(sk);
1650}
1651
1652static __inline__ void tcp_sack_reset(struct tcp_options_received *rx_opt)
1653{
1654 rx_opt->dsack = 0;
1655 rx_opt->eff_sacks = 0;
1656 rx_opt->num_sacks = 0;
1657}
1658
1659static __inline__ void tcp_build_and_update_options(__u32 *ptr, struct tcp_sock *tp, __u32 tstamp)
1660{
1661 if (tp->rx_opt.tstamp_ok) {
1662 *ptr++ = __constant_htonl((TCPOPT_NOP << 24) |
1663 (TCPOPT_NOP << 16) |
1664 (TCPOPT_TIMESTAMP << 8) |
1665 TCPOLEN_TIMESTAMP);
1666 *ptr++ = htonl(tstamp);
1667 *ptr++ = htonl(tp->rx_opt.ts_recent);
1668 }
1669 if (tp->rx_opt.eff_sacks) {
1670 struct tcp_sack_block *sp = tp->rx_opt.dsack ? tp->duplicate_sack : tp->selective_acks;
1671 int this_sack;
1672
1673 *ptr++ = __constant_htonl((TCPOPT_NOP << 24) |
1674 (TCPOPT_NOP << 16) |
1675 (TCPOPT_SACK << 8) |
1676 (TCPOLEN_SACK_BASE +
1677 (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK)));
1678 for(this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) {
1679 *ptr++ = htonl(sp[this_sack].start_seq);
1680 *ptr++ = htonl(sp[this_sack].end_seq);
1681 }
1682 if (tp->rx_opt.dsack) {
1683 tp->rx_opt.dsack = 0;
1684 tp->rx_opt.eff_sacks--;
1685 }
1686 }
1687}
1688
1689
1690
1691
1692
1693
1694static inline void tcp_syn_build_options(__u32 *ptr, int mss, int ts, int sack,
1695 int offer_wscale, int wscale, __u32 tstamp, __u32 ts_recent)
1696{
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710 *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss);
1711 if (ts) {
1712 if(sack)
1713 *ptr++ = __constant_htonl((TCPOPT_SACK_PERM << 24) | (TCPOLEN_SACK_PERM << 16) |
1714 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1715 else
1716 *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1717 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1718 *ptr++ = htonl(tstamp);
1719 *ptr++ = htonl(ts_recent);
1720 } else if(sack)
1721 *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1722 (TCPOPT_SACK_PERM << 8) | TCPOLEN_SACK_PERM);
1723 if (offer_wscale)
1724 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | (wscale));
1725}
1726
1727
1728extern void tcp_select_initial_window(int __space, __u32 mss,
1729 __u32 *rcv_wnd, __u32 *window_clamp,
1730 int wscale_ok, __u8 *rcv_wscale);
1731
1732static inline int tcp_win_from_space(int space)
1733{
1734 return sysctl_tcp_adv_win_scale<=0 ?
1735 (space>>(-sysctl_tcp_adv_win_scale)) :
1736 space - (space>>sysctl_tcp_adv_win_scale);
1737}
1738
1739
1740static inline int tcp_space(const struct sock *sk)
1741{
1742 return tcp_win_from_space(sk->sk_rcvbuf -
1743 atomic_read(&sk->sk_rmem_alloc));
1744}
1745
1746static inline int tcp_full_space(const struct sock *sk)
1747{
1748 return tcp_win_from_space(sk->sk_rcvbuf);
1749}
1750
1751static inline void tcp_acceptq_queue(struct sock *sk, struct open_request *req,
1752 struct sock *child)
1753{
1754 struct tcp_sock *tp = tcp_sk(sk);
1755
1756 req->sk = child;
1757 sk_acceptq_added(sk);
1758
1759 if (!tp->accept_queue_tail) {
1760 tp->accept_queue = req;
1761 } else {
1762 tp->accept_queue_tail->dl_next = req;
1763 }
1764 tp->accept_queue_tail = req;
1765 req->dl_next = NULL;
1766}
1767
1768struct tcp_listen_opt
1769{
1770 u8 max_qlen_log;
1771 int qlen;
1772 int qlen_young;
1773 int clock_hand;
1774 u32 hash_rnd;
1775 struct open_request *syn_table[TCP_SYNQ_HSIZE];
1776};
1777
1778static inline void
1779tcp_synq_removed(struct sock *sk, struct open_request *req)
1780{
1781 struct tcp_listen_opt *lopt = tcp_sk(sk)->listen_opt;
1782
1783 if (--lopt->qlen == 0)
1784 tcp_delete_keepalive_timer(sk);
1785 if (req->retrans == 0)
1786 lopt->qlen_young--;
1787}
1788
1789static inline void tcp_synq_added(struct sock *sk)
1790{
1791 struct tcp_listen_opt *lopt = tcp_sk(sk)->listen_opt;
1792
1793 if (lopt->qlen++ == 0)
1794 tcp_reset_keepalive_timer(sk, TCP_TIMEOUT_INIT);
1795 lopt->qlen_young++;
1796}
1797
1798static inline int tcp_synq_len(struct sock *sk)
1799{
1800 return tcp_sk(sk)->listen_opt->qlen;
1801}
1802
1803static inline int tcp_synq_young(struct sock *sk)
1804{
1805 return tcp_sk(sk)->listen_opt->qlen_young;
1806}
1807
1808static inline int tcp_synq_is_full(struct sock *sk)
1809{
1810 return tcp_synq_len(sk) >> tcp_sk(sk)->listen_opt->max_qlen_log;
1811}
1812
1813static inline void tcp_synq_unlink(struct tcp_sock *tp, struct open_request *req,
1814 struct open_request **prev)
1815{
1816 write_lock(&tp->syn_wait_lock);
1817 *prev = req->dl_next;
1818 write_unlock(&tp->syn_wait_lock);
1819}
1820
1821static inline void tcp_synq_drop(struct sock *sk, struct open_request *req,
1822 struct open_request **prev)
1823{
1824 tcp_synq_unlink(tcp_sk(sk), req, prev);
1825 tcp_synq_removed(sk, req);
1826 tcp_openreq_free(req);
1827}
1828
1829static __inline__ void tcp_openreq_init(struct open_request *req,
1830 struct tcp_options_received *rx_opt,
1831 struct sk_buff *skb)
1832{
1833 req->rcv_wnd = 0;
1834 req->rcv_isn = TCP_SKB_CB(skb)->seq;
1835 req->mss = rx_opt->mss_clamp;
1836 req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
1837 req->tstamp_ok = rx_opt->tstamp_ok;
1838 req->sack_ok = rx_opt->sack_ok;
1839 req->snd_wscale = rx_opt->snd_wscale;
1840 req->wscale_ok = rx_opt->wscale_ok;
1841 req->acked = 0;
1842 req->ecn_ok = 0;
1843 req->rmt_port = skb->h.th->source;
1844}
1845
1846extern void tcp_enter_memory_pressure(void);
1847
1848extern void tcp_listen_wlock(void);
1849
1850
1851
1852
1853
1854
1855static inline void tcp_listen_lock(void)
1856{
1857
1858 read_lock(&tcp_lhash_lock);
1859 atomic_inc(&tcp_lhash_users);
1860 read_unlock(&tcp_lhash_lock);
1861}
1862
1863static inline void tcp_listen_unlock(void)
1864{
1865 if (atomic_dec_and_test(&tcp_lhash_users))
1866 wake_up(&tcp_lhash_wait);
1867}
1868
1869static inline int keepalive_intvl_when(const struct tcp_sock *tp)
1870{
1871 return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl;
1872}
1873
1874static inline int keepalive_time_when(const struct tcp_sock *tp)
1875{
1876 return tp->keepalive_time ? : sysctl_tcp_keepalive_time;
1877}
1878
1879static inline int tcp_fin_time(const struct tcp_sock *tp)
1880{
1881 int fin_timeout = tp->linger2 ? : sysctl_tcp_fin_timeout;
1882
1883 if (fin_timeout < (tp->rto<<2) - (tp->rto>>1))
1884 fin_timeout = (tp->rto<<2) - (tp->rto>>1);
1885
1886 return fin_timeout;
1887}
1888
1889static inline int tcp_paws_check(const struct tcp_options_received *rx_opt, int rst)
1890{
1891 if ((s32)(rx_opt->rcv_tsval - rx_opt->ts_recent) >= 0)
1892 return 0;
1893 if (xtime.tv_sec >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS)
1894 return 0;
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908 if (rst && xtime.tv_sec >= rx_opt->ts_recent_stamp + TCP_PAWS_MSL)
1909 return 0;
1910 return 1;
1911}
1912
1913static inline void tcp_v4_setup_caps(struct sock *sk, struct dst_entry *dst)
1914{
1915 sk->sk_route_caps = dst->dev->features;
1916 if (sk->sk_route_caps & NETIF_F_TSO) {
1917 if (sk->sk_no_largesend || dst->header_len)
1918 sk->sk_route_caps &= ~NETIF_F_TSO;
1919 }
1920}
1921
1922#define TCP_CHECK_TIMER(sk) do { } while (0)
1923
1924static inline int tcp_use_frto(const struct sock *sk)
1925{
1926 const struct tcp_sock *tp = tcp_sk(sk);
1927
1928
1929
1930
1931
1932 return (sysctl_tcp_frto && sk->sk_send_head &&
1933 !after(TCP_SKB_CB(sk->sk_send_head)->end_seq,
1934 tp->snd_una + tp->snd_wnd));
1935}
1936
1937static inline void tcp_mib_init(void)
1938{
1939
1940 TCP_ADD_STATS_USER(TCP_MIB_RTOALGORITHM, 1);
1941 TCP_ADD_STATS_USER(TCP_MIB_RTOMIN, TCP_RTO_MIN*1000/HZ);
1942 TCP_ADD_STATS_USER(TCP_MIB_RTOMAX, TCP_RTO_MAX*1000/HZ);
1943 TCP_ADD_STATS_USER(TCP_MIB_MAXCONN, -1);
1944}
1945
1946
1947enum tcp_seq_states {
1948 TCP_SEQ_STATE_LISTENING,
1949 TCP_SEQ_STATE_OPENREQ,
1950 TCP_SEQ_STATE_ESTABLISHED,
1951 TCP_SEQ_STATE_TIME_WAIT,
1952};
1953
1954struct tcp_seq_afinfo {
1955 struct module *owner;
1956 char *name;
1957 sa_family_t family;
1958 int (*seq_show) (struct seq_file *m, void *v);
1959 struct file_operations *seq_fops;
1960};
1961
1962struct tcp_iter_state {
1963 sa_family_t family;
1964 enum tcp_seq_states state;
1965 struct sock *syn_wait_sk;
1966 int bucket, sbucket, num, uid;
1967 struct seq_operations seq_ops;
1968};
1969
1970extern int tcp_proc_register(struct tcp_seq_afinfo *afinfo);
1971extern void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo);
1972
1973
1974
1975#define TCP_WESTWOOD_INIT_RTT (20*HZ)
1976#define TCP_WESTWOOD_RTT_MIN (HZ/20)
1977
1978static inline void tcp_westwood_update_rtt(struct tcp_sock *tp, __u32 rtt_seq)
1979{
1980 if (tcp_is_westwood(tp))
1981 tp->westwood.rtt = rtt_seq;
1982}
1983
1984static inline __u32 __tcp_westwood_bw_rttmin(const struct tcp_sock *tp)
1985{
1986 return max((tp->westwood.bw_est) * (tp->westwood.rtt_min) /
1987 (__u32) (tp->mss_cache_std),
1988 2U);
1989}
1990
1991static inline __u32 tcp_westwood_bw_rttmin(const struct tcp_sock *tp)
1992{
1993 return tcp_is_westwood(tp) ? __tcp_westwood_bw_rttmin(tp) : 0;
1994}
1995
1996static inline int tcp_westwood_ssthresh(struct tcp_sock *tp)
1997{
1998 __u32 ssthresh = 0;
1999
2000 if (tcp_is_westwood(tp)) {
2001 ssthresh = __tcp_westwood_bw_rttmin(tp);
2002 if (ssthresh)
2003 tp->snd_ssthresh = ssthresh;
2004 }
2005
2006 return (ssthresh != 0);
2007}
2008
2009static inline int tcp_westwood_cwnd(struct tcp_sock *tp)
2010{
2011 __u32 cwnd = 0;
2012
2013 if (tcp_is_westwood(tp)) {
2014 cwnd = __tcp_westwood_bw_rttmin(tp);
2015 if (cwnd)
2016 tp->snd_cwnd = cwnd;
2017 }
2018
2019 return (cwnd != 0);
2020}
2021#endif
2022