1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67#include <linux/config.h>
68#include <linux/mm.h>
69#include <linux/sysctl.h>
70#include <net/tcp.h>
71#include <net/inet_common.h>
72#include <linux/ipsec.h>
73
74int sysctl_tcp_timestamps = 1;
75int sysctl_tcp_window_scaling = 1;
76int sysctl_tcp_sack = 1;
77int sysctl_tcp_fack = 1;
78int sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH;
79#ifdef CONFIG_INET_ECN
80int sysctl_tcp_ecn = 1;
81#else
82int sysctl_tcp_ecn = 0;
83#endif
84int sysctl_tcp_dsack = 1;
85int sysctl_tcp_app_win = 31;
86int sysctl_tcp_adv_win_scale = 2;
87
88int sysctl_tcp_stdurg = 0;
89int sysctl_tcp_rfc1337 = 0;
90int sysctl_tcp_max_orphans = NR_FILE;
91int sysctl_tcp_frto = 0;
92
93int sysctl_tcp_nometrics_save = 0;
94
95int sysctl_tcp_westwood = 0;
96int sysctl_tcp_vegas_cong_avoid = 0;
97
98int sysctl_tcp_moderate_rcvbuf = 0;
99
100
101
102
103#define V_PARAM_SHIFT 1
104int sysctl_tcp_vegas_alpha = 1<<V_PARAM_SHIFT;
105int sysctl_tcp_vegas_beta = 3<<V_PARAM_SHIFT;
106int sysctl_tcp_vegas_gamma = 1<<V_PARAM_SHIFT;
107int sysctl_tcp_bic;
108int sysctl_tcp_bic_fast_convergence = 1;
109int sysctl_tcp_bic_low_window = 14;
110int sysctl_tcp_bic_beta = 819;
111
112#define FLAG_DATA 0x01
113#define FLAG_WIN_UPDATE 0x02
114#define FLAG_DATA_ACKED 0x04
115#define FLAG_RETRANS_DATA_ACKED 0x08
116#define FLAG_SYN_ACKED 0x10
117#define FLAG_DATA_SACKED 0x20
118#define FLAG_ECE 0x40
119#define FLAG_DATA_LOST 0x80
120#define FLAG_SLOWPATH 0x100
121
122#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
123#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
124#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE)
125#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)
126
127#define IsReno(tp) ((tp)->sack_ok == 0)
128#define IsFack(tp) ((tp)->sack_ok & 2)
129#define IsDSack(tp) ((tp)->sack_ok & 4)
130
131#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
132
133
134
135
136static __inline__ void tcp_measure_rcv_mss(struct tcp_opt *tp, struct sk_buff *skb)
137{
138 unsigned int len, lss;
139
140 lss = tp->ack.last_seg_size;
141 tp->ack.last_seg_size = 0;
142
143
144
145
146 len = skb->len;
147 if (len >= tp->ack.rcv_mss) {
148 tp->ack.rcv_mss = len;
149 } else {
150
151
152
153
154
155 len += skb->data - skb->h.raw;
156 if (len >= TCP_MIN_RCVMSS + sizeof(struct tcphdr) ||
157
158
159
160
161
162 (len >= TCP_MIN_MSS + sizeof(struct tcphdr) &&
163 !(tcp_flag_word(skb->h.th)&TCP_REMNANT))) {
164
165
166
167
168 len -= tp->tcp_header_len;
169 tp->ack.last_seg_size = len;
170 if (len == lss) {
171 tp->ack.rcv_mss = len;
172 return;
173 }
174 }
175 tp->ack.pending |= TCP_ACK_PUSHED;
176 }
177}
178
179static void tcp_incr_quickack(struct tcp_opt *tp)
180{
181 unsigned quickacks = tp->rcv_wnd/(2*tp->ack.rcv_mss);
182
183 if (quickacks==0)
184 quickacks=2;
185 if (quickacks > tp->ack.quick)
186 tp->ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
187}
188
189void tcp_enter_quickack_mode(struct tcp_opt *tp)
190{
191 tcp_incr_quickack(tp);
192 tp->ack.pingpong = 0;
193 tp->ack.ato = TCP_ATO_MIN;
194}
195
196
197
198
199
200static __inline__ int tcp_in_quickack_mode(struct tcp_opt *tp)
201{
202 return (tp->ack.quick && !tp->ack.pingpong);
203}
204
205
206
207
208
209
210static void tcp_fixup_sndbuf(struct sock *sk)
211{
212 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
213 int sndmem = tp->mss_clamp+MAX_TCP_HEADER+16+sizeof(struct sk_buff);
214
215 if (sk->sndbuf < 3*sndmem)
216 sk->sndbuf = min(3*sndmem, sysctl_tcp_wmem[2]);
217}
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245static int
246__tcp_grow_window(struct sock *sk, struct tcp_opt *tp, struct sk_buff *skb)
247{
248
249 int truesize = tcp_win_from_space(skb->truesize)/2;
250 int window = tcp_full_space(sk)/2;
251
252 while (tp->rcv_ssthresh <= window) {
253 if (truesize <= skb->len)
254 return 2*tp->ack.rcv_mss;
255
256 truesize >>= 1;
257 window >>= 1;
258 }
259 return 0;
260}
261
262static __inline__ void
263tcp_grow_window(struct sock *sk, struct tcp_opt *tp, struct sk_buff *skb)
264{
265
266 if (tp->rcv_ssthresh < tp->window_clamp &&
267 (int)tp->rcv_ssthresh < tcp_space(sk) &&
268 !tcp_memory_pressure) {
269 int incr;
270
271
272
273
274 if (tcp_win_from_space(skb->truesize) <= skb->len)
275 incr = 2*tp->advmss;
276 else
277 incr = __tcp_grow_window(sk, tp, skb);
278
279 if (incr) {
280 tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, tp->window_clamp);
281 tp->ack.quick |= 1;
282 }
283 }
284}
285
286
287
288static void tcp_fixup_rcvbuf(struct sock *sk)
289{
290 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
291 int rcvmem = tp->advmss+MAX_TCP_HEADER+16+sizeof(struct sk_buff);
292
293
294
295
296
297 while (tcp_win_from_space(rcvmem) < tp->advmss)
298 rcvmem += 128;
299 if (sk->rcvbuf < 4*rcvmem)
300 sk->rcvbuf = min(4*rcvmem, sysctl_tcp_rmem[2]);
301}
302
303
304
305
306static void tcp_init_buffer_space(struct sock *sk)
307{
308 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
309 int maxwin;
310
311 if (!(sk->userlocks&SOCK_RCVBUF_LOCK))
312 tcp_fixup_rcvbuf(sk);
313 if (!(sk->userlocks&SOCK_SNDBUF_LOCK))
314 tcp_fixup_sndbuf(sk);
315
316 tp->rcvq_space.space = tp->rcv_wnd;
317
318 maxwin = tcp_full_space(sk);
319
320 if (tp->window_clamp >= maxwin) {
321 tp->window_clamp = maxwin;
322
323 if (sysctl_tcp_app_win && maxwin>4*tp->advmss)
324 tp->window_clamp = max(maxwin-(maxwin>>sysctl_tcp_app_win), 4*tp->advmss);
325 }
326
327
328 if (sysctl_tcp_app_win &&
329 tp->window_clamp > 2*tp->advmss &&
330 tp->window_clamp + tp->advmss > maxwin)
331 tp->window_clamp = max(2*tp->advmss, maxwin-tp->advmss);
332
333 tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
334 tp->snd_cwnd_stamp = tcp_time_stamp;
335}
336
337static void init_bictcp(struct tcp_opt *tp)
338{
339 tp->bictcp.cnt = 0;
340
341 tp->bictcp.last_max_cwnd = 0;
342 tp->bictcp.last_cwnd = 0;
343 tp->bictcp.last_stamp = 0;
344}
345
346
347static void tcp_clamp_window(struct sock *sk, struct tcp_opt *tp)
348{
349 struct sk_buff *skb;
350 unsigned int app_win = tp->rcv_nxt - tp->copied_seq;
351 int ofo_win = 0;
352
353 tp->ack.quick = 0;
354
355 skb_queue_walk(&tp->out_of_order_queue, skb) {
356 ofo_win += skb->len;
357 }
358
359
360
361
362 if (ofo_win) {
363 if (sk->rcvbuf < sysctl_tcp_rmem[2] &&
364 !(sk->userlocks&SOCK_RCVBUF_LOCK) &&
365 !tcp_memory_pressure &&
366 atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0])
367 sk->rcvbuf = min(atomic_read(&sk->rmem_alloc), sysctl_tcp_rmem[2]);
368 }
369 if (atomic_read(&sk->rmem_alloc) > sk->rcvbuf) {
370 app_win += ofo_win;
371 if (atomic_read(&sk->rmem_alloc) >= 2*sk->rcvbuf)
372 app_win >>= 1;
373 if (app_win > tp->ack.rcv_mss)
374 app_win -= tp->ack.rcv_mss;
375 app_win = max(app_win, 2U*tp->advmss);
376
377 if (!ofo_win)
378 tp->window_clamp = min(tp->window_clamp, app_win);
379 tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss);
380 }
381}
382
383
384
385
386
387
388
389
390
391
392
393
394static void tcp_rcv_rtt_update(struct tcp_opt *tp, u32 sample, int win_dep)
395{
396 u32 new_sample = tp->rcv_rtt_est.rtt;
397 long m = sample;
398
399 if (m == 0)
400 m = 1;
401
402 if (new_sample != 0) {
403
404
405
406
407
408
409
410
411
412
413 if (!win_dep) {
414 m -= (new_sample >> 3);
415 new_sample += m;
416 } else if (m < new_sample)
417 new_sample = m << 3;
418 } else {
419
420 new_sample = m << 3;
421 }
422
423 if (tp->rcv_rtt_est.rtt != new_sample)
424 tp->rcv_rtt_est.rtt = new_sample;
425}
426
427static inline void tcp_rcv_rtt_measure(struct tcp_opt *tp)
428{
429 if (tp->rcv_rtt_est.time == 0)
430 goto new_measure;
431 if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
432 return;
433 tcp_rcv_rtt_update(tp,
434 jiffies - tp->rcv_rtt_est.time,
435 1);
436
437new_measure:
438 tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
439 tp->rcv_rtt_est.time = tcp_time_stamp;
440}
441
442static inline void tcp_rcv_rtt_measure_ts(struct tcp_opt *tp, struct sk_buff *skb)
443{
444 if (tp->rcv_tsecr &&
445 (TCP_SKB_CB(skb)->end_seq -
446 TCP_SKB_CB(skb)->seq >= tp->ack.rcv_mss))
447 tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rcv_tsecr, 0);
448}
449
450
451
452
453
454void tcp_rcv_space_adjust(struct sock *sk)
455{
456 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
457 int time;
458 int space;
459
460 if (tp->rcvq_space.time == 0)
461 goto new_measure;
462
463 time = tcp_time_stamp - tp->rcvq_space.time;
464 if (time < (tp->rcv_rtt_est.rtt >> 3) ||
465 tp->rcv_rtt_est.rtt == 0)
466 return;
467
468 space = 2 * (tp->copied_seq - tp->rcvq_space.seq);
469
470 space = max(tp->rcvq_space.space, space);
471
472 if (tp->rcvq_space.space != space) {
473 int rcvmem;
474
475 tp->rcvq_space.space = space;
476
477 if (sysctl_tcp_moderate_rcvbuf) {
478 int new_clamp = space;
479
480
481
482
483
484 space /= tp->advmss;
485 if (!space)
486 space = 1;
487 rcvmem = (tp->advmss + MAX_TCP_HEADER +
488 16 + sizeof(struct sk_buff));
489 while (tcp_win_from_space(rcvmem) < tp->advmss)
490 rcvmem += 128;
491 space *= rcvmem;
492 space = min(space, sysctl_tcp_rmem[2]);
493 if (space > sk->rcvbuf) {
494 sk->rcvbuf = space;
495
496
497 tp->window_clamp = new_clamp;
498 }
499 }
500 }
501
502new_measure:
503 tp->rcvq_space.seq = tp->copied_seq;
504 tp->rcvq_space.time = tcp_time_stamp;
505}
506
507
508
509
510
511
512
513
514
515
516
517static void tcp_event_data_recv(struct sock *sk, struct tcp_opt *tp, struct sk_buff *skb)
518{
519 u32 now;
520
521 tcp_schedule_ack(tp);
522
523 tcp_measure_rcv_mss(tp, skb);
524
525 tcp_rcv_rtt_measure(tp);
526
527 now = tcp_time_stamp;
528
529 if (!tp->ack.ato) {
530
531
532
533 tcp_incr_quickack(tp);
534 tp->ack.ato = TCP_ATO_MIN;
535 } else {
536 int m = now - tp->ack.lrcvtime;
537
538 if (m <= TCP_ATO_MIN/2) {
539
540 tp->ack.ato = (tp->ack.ato>>1) + TCP_ATO_MIN/2;
541 } else if (m < tp->ack.ato) {
542 tp->ack.ato = (tp->ack.ato>>1) + m;
543 if (tp->ack.ato > tp->rto)
544 tp->ack.ato = tp->rto;
545 } else if (m > tp->rto) {
546
547
548
549 tcp_incr_quickack(tp);
550 tcp_mem_reclaim(sk);
551 }
552 }
553 tp->ack.lrcvtime = now;
554
555 TCP_ECN_check_ce(tp, skb);
556
557 if (skb->len >= 128)
558 tcp_grow_window(sk, tp, skb);
559}
560
561
562
563
564void tcp_ca_init(struct tcp_opt *tp)
565{
566 if (sysctl_tcp_westwood)
567 tp->adv_cong = TCP_WESTWOOD;
568 else if (sysctl_tcp_bic)
569 tp->adv_cong = TCP_BIC;
570 else if (sysctl_tcp_vegas_cong_avoid) {
571 tp->adv_cong = TCP_VEGAS;
572 tp->vegas.baseRTT = 0x7fffffff;
573 tcp_vegas_enable(tp);
574 }
575}
576
577
578
579
580
581
582
583
584
585static inline void vegas_rtt_calc(struct tcp_opt *tp, __u32 rtt)
586{
587 __u32 vrtt = rtt + 1;
588
589
590 if (vrtt < tp->vegas.baseRTT)
591 tp->vegas.baseRTT = vrtt;
592
593
594
595
596 tp->vegas.minRTT = min(tp->vegas.minRTT, vrtt);
597 tp->vegas.cntRTT++;
598}
599
600
601
602
603
604
605
606
607
608
609static __inline__ void tcp_rtt_estimator(struct tcp_opt *tp, __u32 mrtt)
610{
611 long m = mrtt;
612
613 if (tcp_vegas_enabled(tp))
614 vegas_rtt_calc(tp, mrtt);
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632 if(m == 0)
633 m = 1;
634 if (tp->srtt != 0) {
635 m -= (tp->srtt >> 3);
636 tp->srtt += m;
637 if (m < 0) {
638 m = -m;
639 m -= (tp->mdev >> 2);
640
641
642
643
644
645
646
647
648 if (m > 0)
649 m >>= 3;
650 } else {
651 m -= (tp->mdev >> 2);
652 }
653 tp->mdev += m;
654 if (tp->mdev > tp->mdev_max) {
655 tp->mdev_max = tp->mdev;
656 if (tp->mdev_max > tp->rttvar)
657 tp->rttvar = tp->mdev_max;
658 }
659 if (after(tp->snd_una, tp->rtt_seq)) {
660 if (tp->mdev_max < tp->rttvar)
661 tp->rttvar -= (tp->rttvar-tp->mdev_max)>>2;
662 tp->rtt_seq = tp->snd_nxt;
663 tp->mdev_max = TCP_RTO_MIN;
664 }
665 } else {
666
667 tp->srtt = m<<3;
668 tp->mdev = m<<1;
669 tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN);
670 tp->rtt_seq = tp->snd_nxt;
671 }
672
673 tcp_westwood_update_rtt(tp, tp->srtt >> 3);
674}
675
676
677
678
679static __inline__ void tcp_set_rto(struct tcp_opt *tp)
680{
681
682
683
684
685
686
687
688
689
690
691 tp->rto = (tp->srtt >> 3) + tp->rttvar;
692
693
694
695
696
697
698}
699
700
701
702
703static __inline__ void tcp_bound_rto(struct tcp_opt *tp)
704{
705 if (tp->rto > TCP_RTO_MAX)
706 tp->rto = TCP_RTO_MAX;
707}
708
709
710
711
712
713void tcp_update_metrics(struct sock *sk)
714{
715 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
716 struct dst_entry *dst = __sk_dst_get(sk);
717
718 if (sysctl_tcp_nometrics_save)
719 return;
720
721 dst_confirm(dst);
722
723 if (dst && (dst->flags&DST_HOST)) {
724 int m;
725
726 if (tp->backoff || !tp->srtt) {
727
728
729
730
731 if (!(dst->mxlock&(1<<RTAX_RTT)))
732 dst->rtt = 0;
733 return;
734 }
735
736 m = dst->rtt - tp->srtt;
737
738
739
740
741
742 if (!(dst->mxlock&(1<<RTAX_RTT))) {
743 if (m <= 0)
744 dst->rtt = tp->srtt;
745 else
746 dst->rtt -= (m>>3);
747 }
748
749 if (!(dst->mxlock&(1<<RTAX_RTTVAR))) {
750 if (m < 0)
751 m = -m;
752
753
754 m >>= 1;
755 if (m < tp->mdev)
756 m = tp->mdev;
757
758 if (m >= dst->rttvar)
759 dst->rttvar = m;
760 else
761 dst->rttvar -= (dst->rttvar - m)>>2;
762 }
763
764 if (tp->snd_ssthresh >= 0xFFFF) {
765
766 if (dst->ssthresh &&
767 !(dst->mxlock&(1<<RTAX_SSTHRESH)) &&
768 (tp->snd_cwnd>>1) > dst->ssthresh)
769 dst->ssthresh = (tp->snd_cwnd>>1);
770 if (!(dst->mxlock&(1<<RTAX_CWND)) &&
771 tp->snd_cwnd > dst->cwnd)
772 dst->cwnd = tp->snd_cwnd;
773 } else if (tp->snd_cwnd > tp->snd_ssthresh &&
774 tp->ca_state == TCP_CA_Open) {
775
776 if (!(dst->mxlock&(1<<RTAX_SSTHRESH)))
777 dst->ssthresh = max(tp->snd_cwnd>>1, tp->snd_ssthresh);
778 if (!(dst->mxlock&(1<<RTAX_CWND)))
779 dst->cwnd = (dst->cwnd + tp->snd_cwnd)>>1;
780 } else {
781
782
783
784 if (!(dst->mxlock&(1<<RTAX_CWND)))
785 dst->cwnd = (dst->cwnd + tp->snd_ssthresh)>>1;
786 if (dst->ssthresh &&
787 !(dst->mxlock&(1<<RTAX_SSTHRESH)) &&
788 tp->snd_ssthresh > dst->ssthresh)
789 dst->ssthresh = tp->snd_ssthresh;
790 }
791
792 if (!(dst->mxlock&(1<<RTAX_REORDERING))) {
793 if (dst->reordering < tp->reordering &&
794 tp->reordering != sysctl_tcp_reordering)
795 dst->reordering = tp->reordering;
796 }
797 }
798}
799
800
801
802
803
804
805__u32 tcp_init_cwnd(struct tcp_opt *tp)
806{
807 __u32 cwnd;
808
809 if (tp->mss_cache > 1460)
810 return 2;
811
812 cwnd = (tp->mss_cache > 1095) ? 3 : 4;
813
814 if (!tp->srtt || (tp->snd_ssthresh >= 0xFFFF && tp->srtt > ((HZ/50)<<3)))
815 cwnd = 2;
816 else if (cwnd > tp->snd_ssthresh)
817 cwnd = tp->snd_ssthresh;
818
819 return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
820}
821
822
823
824static void tcp_init_metrics(struct sock *sk)
825{
826 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
827 struct dst_entry *dst = __sk_dst_get(sk);
828
829 if (dst == NULL)
830 goto reset;
831
832 dst_confirm(dst);
833
834 if (dst->mxlock&(1<<RTAX_CWND))
835 tp->snd_cwnd_clamp = dst->cwnd;
836 if (dst->ssthresh) {
837 tp->snd_ssthresh = dst->ssthresh;
838 if (tp->snd_ssthresh > tp->snd_cwnd_clamp)
839 tp->snd_ssthresh = tp->snd_cwnd_clamp;
840 }
841 if (dst->reordering && tp->reordering != dst->reordering) {
842 tp->sack_ok &= ~2;
843 tp->reordering = dst->reordering;
844 }
845
846 if (dst->rtt == 0)
847 goto reset;
848
849 if (!tp->srtt && dst->rtt < (TCP_TIMEOUT_INIT<<3))
850 goto reset;
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866 if (dst->rtt > tp->srtt) {
867 tp->srtt = dst->rtt;
868 tp->rtt_seq = tp->snd_nxt;
869 }
870 if (dst->rttvar > tp->mdev) {
871 tp->mdev = dst->rttvar;
872 tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN);
873 }
874 tcp_set_rto(tp);
875 tcp_bound_rto(tp);
876 if (tp->rto < TCP_TIMEOUT_INIT && !tp->saw_tstamp)
877 goto reset;
878 tp->snd_cwnd = tcp_init_cwnd(tp);
879 tp->snd_cwnd_stamp = tcp_time_stamp;
880 return;
881
882reset:
883
884
885
886
887 if (!tp->saw_tstamp && tp->srtt) {
888 tp->srtt = 0;
889 tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT;
890 tp->rto = TCP_TIMEOUT_INIT;
891 }
892}
893
894static void tcp_update_reordering(struct tcp_opt *tp, int metric, int ts)
895{
896 if (metric > tp->reordering) {
897 tp->reordering = min(TCP_MAX_REORDERING, metric);
898
899
900 if (ts)
901 NET_INC_STATS_BH(TCPTSReorder);
902 else if (IsReno(tp))
903 NET_INC_STATS_BH(TCPRenoReorder);
904 else if (IsFack(tp))
905 NET_INC_STATS_BH(TCPFACKReorder);
906 else
907 NET_INC_STATS_BH(TCPSACKReorder);
908#if FASTRETRANS_DEBUG > 1
909 printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n",
910 tp->sack_ok, tp->ca_state,
911 tp->reordering, tp->fackets_out, tp->sacked_out,
912 tp->undo_marker ? tp->undo_retrans : 0);
913#endif
914
915 tp->sack_ok &= ~2;
916 }
917}
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967static int
968tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_una)
969{
970 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
971 unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked;
972 struct tcp_sack_block *sp = (struct tcp_sack_block *)(ptr+2);
973 int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3;
974 int reord = tp->packets_out;
975 int prior_fackets;
976 u32 lost_retrans = 0;
977 int flag = 0;
978 int i;
979
980 if (!tp->sacked_out)
981 tp->fackets_out = 0;
982 prior_fackets = tp->fackets_out;
983
984 for (i=0; i<num_sacks; i++, sp++) {
985 struct sk_buff *skb;
986 __u32 start_seq = ntohl(sp->start_seq);
987 __u32 end_seq = ntohl(sp->end_seq);
988 int fack_count = 0;
989 int dup_sack = 0;
990
991
992 if (i == 0) {
993 u32 ack = TCP_SKB_CB(ack_skb)->ack_seq;
994
995 if (before(start_seq, ack)) {
996 dup_sack = 1;
997 tp->sack_ok |= 4;
998 NET_INC_STATS_BH(TCPDSACKRecv);
999 } else if (num_sacks > 1 &&
1000 !after(end_seq, ntohl(sp[1].end_seq)) &&
1001 !before(start_seq, ntohl(sp[1].start_seq))) {
1002 dup_sack = 1;
1003 tp->sack_ok |= 4;
1004 NET_INC_STATS_BH(TCPDSACKOfoRecv);
1005 }
1006
1007
1008
1009 if (dup_sack &&
1010 !after(end_seq, prior_snd_una) &&
1011 after(end_seq, tp->undo_marker))
1012 tp->undo_retrans--;
1013
1014
1015
1016
1017
1018 if (before(ack, prior_snd_una-tp->max_window))
1019 return 0;
1020 }
1021
1022
1023 if (after(end_seq, tp->high_seq))
1024 flag |= FLAG_DATA_LOST;
1025
1026 for_retrans_queue(skb, sk, tp) {
1027 u8 sacked = TCP_SKB_CB(skb)->sacked;
1028 int in_sack;
1029
1030
1031
1032
1033 if(!before(TCP_SKB_CB(skb)->seq, end_seq))
1034 break;
1035
1036 fack_count++;
1037
1038 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1039 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1040
1041
1042 if ((dup_sack && in_sack) &&
1043 (sacked & TCPCB_RETRANS) &&
1044 after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker))
1045 tp->undo_retrans--;
1046
1047
1048 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) {
1049 if (sacked&TCPCB_RETRANS) {
1050 if ((dup_sack && in_sack) &&
1051 (sacked&TCPCB_SACKED_ACKED))
1052 reord = min(fack_count, reord);
1053 } else {
1054
1055 if (fack_count < prior_fackets &&
1056 !(sacked&TCPCB_SACKED_ACKED))
1057 reord = min(fack_count, reord);
1058 }
1059
1060
1061 continue;
1062 }
1063
1064 if ((sacked&TCPCB_SACKED_RETRANS) &&
1065 after(end_seq, TCP_SKB_CB(skb)->ack_seq) &&
1066 (!lost_retrans || after(end_seq, lost_retrans)))
1067 lost_retrans = end_seq;
1068
1069 if (!in_sack)
1070 continue;
1071
1072 if (!(sacked&TCPCB_SACKED_ACKED)) {
1073 if (sacked & TCPCB_SACKED_RETRANS) {
1074
1075
1076
1077
1078 if (sacked & TCPCB_LOST) {
1079 TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
1080 tp->lost_out--;
1081 tp->retrans_out--;
1082 }
1083 } else {
1084
1085
1086
1087 if (!(sacked & TCPCB_RETRANS) &&
1088 fack_count < prior_fackets)
1089 reord = min(fack_count, reord);
1090
1091 if (sacked & TCPCB_LOST) {
1092 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
1093 tp->lost_out--;
1094 }
1095 }
1096
1097 TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED;
1098 flag |= FLAG_DATA_SACKED;
1099 tp->sacked_out++;
1100
1101 if (fack_count > tp->fackets_out)
1102 tp->fackets_out = fack_count;
1103 } else {
1104 if (dup_sack && (sacked&TCPCB_RETRANS))
1105 reord = min(fack_count, reord);
1106 }
1107
1108
1109
1110
1111
1112
1113 if (dup_sack &&
1114 (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) {
1115 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1116 tp->retrans_out--;
1117 }
1118 }
1119 }
1120
1121
1122
1123
1124
1125
1126
1127 if (lost_retrans && tp->ca_state == TCP_CA_Recovery) {
1128 struct sk_buff *skb;
1129
1130 for_retrans_queue(skb, sk, tp) {
1131 if (after(TCP_SKB_CB(skb)->seq, lost_retrans))
1132 break;
1133 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1134 continue;
1135 if ((TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) &&
1136 after(lost_retrans, TCP_SKB_CB(skb)->ack_seq) &&
1137 (IsFack(tp) ||
1138 !before(lost_retrans, TCP_SKB_CB(skb)->ack_seq+tp->reordering*tp->mss_cache))) {
1139 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1140 tp->retrans_out--;
1141
1142 if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) {
1143 tp->lost_out++;
1144 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1145 flag |= FLAG_DATA_SACKED;
1146 NET_INC_STATS_BH(TCPLostRetransmit);
1147 }
1148 }
1149 }
1150 }
1151
1152 tp->left_out = tp->sacked_out + tp->lost_out;
1153
1154 if (reord < tp->fackets_out && tp->ca_state != TCP_CA_Loss)
1155 tcp_update_reordering(tp, (tp->fackets_out+1)-reord, 0);
1156
1157#if FASTRETRANS_DEBUG > 0
1158 BUG_TRAP((int)tp->sacked_out >= 0);
1159 BUG_TRAP((int)tp->lost_out >= 0);
1160 BUG_TRAP((int)tp->retrans_out >= 0);
1161 BUG_TRAP((int)tcp_packets_in_flight(tp) >= 0);
1162#endif
1163 return flag;
1164}
1165
1166
1167
1168
1169
1170void tcp_enter_frto(struct sock *sk)
1171{
1172 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
1173 struct sk_buff *skb;
1174
1175 tp->frto_counter = 1;
1176
1177 if (tp->ca_state <= TCP_CA_Disorder ||
1178 tp->snd_una == tp->high_seq ||
1179 (tp->ca_state == TCP_CA_Loss && !tp->retransmits)) {
1180 tp->prior_ssthresh = tcp_current_ssthresh(tp);
1181 tp->snd_ssthresh = tcp_recalc_ssthresh(tp);
1182 }
1183
1184
1185
1186
1187
1188
1189 tp->retrans_out = 0;
1190 tp->undo_marker = tp->snd_una;
1191 tp->undo_retrans = 0;
1192
1193 for_retrans_queue(skb, sk, tp) {
1194 TCP_SKB_CB(skb)->sacked &= ~TCPCB_RETRANS;
1195 }
1196 tcp_sync_left_out(tp);
1197
1198 tcp_set_ca_state(tp, TCP_CA_Open);
1199 tp->frto_highmark = tp->snd_nxt;
1200}
1201
1202
1203
1204
1205
1206void tcp_enter_frto_loss(struct sock *sk)
1207{
1208 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
1209 struct sk_buff *skb;
1210 int cnt = 0;
1211
1212 tp->sacked_out = 0;
1213 tp->lost_out = 0;
1214 tp->fackets_out = 0;
1215
1216 for_retrans_queue(skb, sk, tp) {
1217 cnt++;
1218 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
1219 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) {
1220
1221
1222
1223
1224 if(!after(TCP_SKB_CB(skb)->end_seq,
1225 tp->frto_highmark)) {
1226 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1227 tp->lost_out++;
1228 }
1229 } else {
1230 tp->sacked_out++;
1231 tp->fackets_out = cnt;
1232 }
1233 }
1234 tcp_sync_left_out(tp);
1235
1236 tp->snd_cwnd = tp->frto_counter + tcp_packets_in_flight(tp)+1;
1237 tp->snd_cwnd_cnt = 0;
1238 tp->snd_cwnd_stamp = tcp_time_stamp;
1239 tp->undo_marker = 0;
1240 tp->frto_counter = 0;
1241
1242 tp->reordering = min_t(unsigned int, tp->reordering,
1243 sysctl_tcp_reordering);
1244 tcp_set_ca_state(tp, TCP_CA_Loss);
1245 tp->high_seq = tp->frto_highmark;
1246 TCP_ECN_queue_cwr(tp);
1247
1248 init_bictcp(tp);
1249}
1250
1251void tcp_clear_retrans(struct tcp_opt *tp)
1252{
1253 tp->left_out = 0;
1254 tp->retrans_out = 0;
1255
1256 tp->fackets_out = 0;
1257 tp->sacked_out = 0;
1258 tp->lost_out = 0;
1259
1260 tp->undo_marker = 0;
1261 tp->undo_retrans = 0;
1262}
1263
1264
1265
1266
1267
1268void tcp_enter_loss(struct sock *sk, int how)
1269{
1270 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
1271 struct sk_buff *skb;
1272 int cnt = 0;
1273
1274
1275 if (tp->ca_state <= TCP_CA_Disorder ||
1276 tp->snd_una == tp->high_seq ||
1277 (tp->ca_state == TCP_CA_Loss && !tp->retransmits)) {
1278 tp->prior_ssthresh = tcp_current_ssthresh(tp);
1279
1280 if (!(tcp_westwood_ssthresh(tp)))
1281 tp->snd_ssthresh = tcp_recalc_ssthresh(tp);
1282 }
1283 tp->snd_cwnd = 1;
1284 tp->snd_cwnd_cnt = 0;
1285 tp->snd_cwnd_stamp = tcp_time_stamp;
1286
1287 tcp_clear_retrans(tp);
1288
1289
1290
1291 if (!how)
1292 tp->undo_marker = tp->snd_una;
1293
1294 for_retrans_queue(skb, sk, tp) {
1295 cnt++;
1296 if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS)
1297 tp->undo_marker = 0;
1298 TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
1299 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) {
1300 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
1301 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1302 tp->lost_out++;
1303 } else {
1304 tp->sacked_out++;
1305 tp->fackets_out = cnt;
1306 }
1307 }
1308 tcp_sync_left_out(tp);
1309
1310 tp->reordering = min_t(unsigned int, tp->reordering, sysctl_tcp_reordering);
1311 tcp_set_ca_state(tp, TCP_CA_Loss);
1312 tp->high_seq = tp->snd_nxt;
1313 TCP_ECN_queue_cwr(tp);
1314}
1315
1316static int tcp_check_sack_reneging(struct sock *sk, struct tcp_opt *tp)
1317{
1318 struct sk_buff *skb;
1319
1320
1321
1322
1323
1324
1325
1326 if ((skb = skb_peek(&sk->write_queue)) != NULL &&
1327 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
1328 NET_INC_STATS_BH(TCPSACKReneging);
1329
1330 tcp_enter_loss(sk, 1);
1331 tp->retransmits++;
1332 tcp_retransmit_skb(sk, skb_peek(&sk->write_queue));
1333 tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
1334 return 1;
1335 }
1336 return 0;
1337}
1338
1339static inline int tcp_fackets_out(struct tcp_opt *tp)
1340{
1341 return IsReno(tp) ? tp->sacked_out+1 : tp->fackets_out;
1342}
1343
1344static inline int tcp_skb_timedout(struct tcp_opt *tp, struct sk_buff *skb)
1345{
1346 return (tcp_time_stamp - TCP_SKB_CB(skb)->when > tp->rto);
1347}
1348
1349static inline int tcp_head_timedout(struct sock *sk, struct tcp_opt *tp)
1350{
1351 return tp->packets_out && tcp_skb_timedout(tp, skb_peek(&sk->write_queue));
1352}
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447static int
1448tcp_time_to_recover(struct sock *sk, struct tcp_opt *tp)
1449{
1450
1451 if (tp->lost_out)
1452 return 1;
1453
1454
1455 if (tcp_fackets_out(tp) > tp->reordering)
1456 return 1;
1457
1458
1459
1460
1461 if (tcp_head_timedout(sk, tp))
1462 return 1;
1463
1464
1465
1466
1467 if (tp->packets_out <= tp->reordering &&
1468 tp->sacked_out >= max_t(__u32, tp->packets_out/2, sysctl_tcp_reordering) &&
1469 !tcp_may_send_now(sk, tp)) {
1470
1471
1472
1473 return 1;
1474 }
1475
1476 return 0;
1477}
1478
1479
1480
1481
1482
1483static void tcp_check_reno_reordering(struct tcp_opt *tp, int addend)
1484{
1485 u32 holes;
1486
1487 holes = max(tp->lost_out, 1U);
1488 holes = min(holes, tp->packets_out);
1489
1490 if (tp->sacked_out + holes > tp->packets_out) {
1491 tp->sacked_out = tp->packets_out - holes;
1492 tcp_update_reordering(tp, tp->packets_out+addend, 0);
1493 }
1494}
1495
1496
1497
1498static void tcp_add_reno_sack(struct tcp_opt *tp)
1499{
1500 ++tp->sacked_out;
1501 tcp_check_reno_reordering(tp, 0);
1502 tcp_sync_left_out(tp);
1503}
1504
1505
1506
1507static void tcp_remove_reno_sacks(struct sock *sk, struct tcp_opt *tp, int acked)
1508{
1509 if (acked > 0) {
1510
1511 if (acked-1 >= tp->sacked_out)
1512 tp->sacked_out = 0;
1513 else
1514 tp->sacked_out -= acked-1;
1515 }
1516 tcp_check_reno_reordering(tp, acked);
1517 tcp_sync_left_out(tp);
1518}
1519
1520static inline void tcp_reset_reno_sack(struct tcp_opt *tp)
1521{
1522 tp->sacked_out = 0;
1523 tp->left_out = tp->lost_out;
1524}
1525
1526
1527static void
1528tcp_mark_head_lost(struct sock *sk, struct tcp_opt *tp, int packets, u32 high_seq)
1529{
1530 struct sk_buff *skb;
1531 int cnt = packets;
1532
1533 BUG_TRAP(cnt <= tp->packets_out);
1534
1535 for_retrans_queue(skb, sk, tp) {
1536 if (--cnt < 0 || after(TCP_SKB_CB(skb)->end_seq, high_seq))
1537 break;
1538 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
1539 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1540 tp->lost_out++;
1541 }
1542 }
1543 tcp_sync_left_out(tp);
1544}
1545
1546
1547
1548static void tcp_update_scoreboard(struct sock *sk, struct tcp_opt *tp)
1549{
1550 if (IsFack(tp)) {
1551 int lost = tp->fackets_out - tp->reordering;
1552 if (lost <= 0)
1553 lost = 1;
1554 tcp_mark_head_lost(sk, tp, lost, tp->high_seq);
1555 } else {
1556 tcp_mark_head_lost(sk, tp, 1, tp->high_seq);
1557 }
1558
1559
1560
1561
1562
1563
1564 if (tcp_head_timedout(sk, tp)) {
1565 struct sk_buff *skb;
1566
1567 for_retrans_queue(skb, sk, tp) {
1568 if (tcp_skb_timedout(tp, skb) &&
1569 !(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
1570 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1571 tp->lost_out++;
1572 }
1573 }
1574 tcp_sync_left_out(tp);
1575 }
1576}
1577
1578
1579
1580
1581static __inline__ void tcp_moderate_cwnd(struct tcp_opt *tp)
1582{
1583 tp->snd_cwnd = min(tp->snd_cwnd,
1584 tcp_packets_in_flight(tp)+tcp_max_burst(tp));
1585 tp->snd_cwnd_stamp = tcp_time_stamp;
1586}
1587
1588
1589
1590static void tcp_cwnd_down(struct tcp_opt *tp)
1591{
1592 int decr = tp->snd_cwnd_cnt + 1;
1593 __u32 limit;
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604 if (!(limit = tcp_westwood_bw_rttmin(tp)))
1605 limit = tp->snd_ssthresh/2;
1606
1607 tp->snd_cwnd_cnt = decr&1;
1608 decr >>= 1;
1609
1610 if (decr && tp->snd_cwnd > limit)
1611 tp->snd_cwnd -= decr;
1612
1613 tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp)+1);
1614 tp->snd_cwnd_stamp = tcp_time_stamp;
1615}
1616
1617
1618
1619
1620static __inline__ int tcp_packet_delayed(struct tcp_opt *tp)
1621{
1622 return !tp->retrans_stamp ||
1623 (tp->saw_tstamp && tp->rcv_tsecr &&
1624 (__s32)(tp->rcv_tsecr - tp->retrans_stamp) < 0);
1625}
1626
1627
1628
1629#if FASTRETRANS_DEBUG > 1
1630static void DBGUNDO(struct sock *sk, struct tcp_opt *tp, const char *msg)
1631{
1632 printk(KERN_DEBUG "Undo %s %u.%u.%u.%u/%u c%u l%u ss%u/%u p%u\n",
1633 msg,
1634 NIPQUAD(sk->daddr), ntohs(sk->dport),
1635 tp->snd_cwnd, tp->left_out,
1636 tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out);
1637}
1638#else
1639#define DBGUNDO(x...) do { } while (0)
1640#endif
1641
1642static void tcp_undo_cwr(struct tcp_opt *tp, int undo)
1643{
1644 if (tp->prior_ssthresh) {
1645 if (tcp_is_bic(tp))
1646 tp->snd_cwnd = max(tp->snd_cwnd, tp->bictcp.last_max_cwnd);
1647 else
1648 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh<<1);
1649
1650 if (undo && tp->prior_ssthresh > tp->snd_ssthresh) {
1651 tp->snd_ssthresh = tp->prior_ssthresh;
1652 TCP_ECN_withdraw_cwr(tp);
1653 }
1654 } else {
1655 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);
1656 }
1657 tcp_moderate_cwnd(tp);
1658 tp->snd_cwnd_stamp = tcp_time_stamp;
1659}
1660
1661static inline int tcp_may_undo(struct tcp_opt *tp)
1662{
1663 return tp->undo_marker &&
1664 (!tp->undo_retrans || tcp_packet_delayed(tp));
1665}
1666
1667
1668static int tcp_try_undo_recovery(struct sock *sk, struct tcp_opt *tp)
1669{
1670 if (tcp_may_undo(tp)) {
1671
1672
1673
1674 DBGUNDO(sk, tp, tp->ca_state == TCP_CA_Loss ? "loss" : "retrans");
1675 tcp_undo_cwr(tp, 1);
1676 if (tp->ca_state == TCP_CA_Loss)
1677 NET_INC_STATS_BH(TCPLossUndo);
1678 else
1679 NET_INC_STATS_BH(TCPFullUndo);
1680 tp->undo_marker = 0;
1681 }
1682 if (tp->snd_una == tp->high_seq && IsReno(tp)) {
1683
1684
1685
1686 tcp_moderate_cwnd(tp);
1687 return 1;
1688 }
1689 tcp_set_ca_state(tp, TCP_CA_Open);
1690 return 0;
1691}
1692
1693
1694static void tcp_try_undo_dsack(struct sock *sk, struct tcp_opt *tp)
1695{
1696 if (tp->undo_marker && !tp->undo_retrans) {
1697 DBGUNDO(sk, tp, "D-SACK");
1698 tcp_undo_cwr(tp, 1);
1699 tp->undo_marker = 0;
1700 NET_INC_STATS_BH(TCPDSACKUndo);
1701 }
1702}
1703
1704
1705
1706static int tcp_try_undo_partial(struct sock *sk, struct tcp_opt *tp, int acked)
1707{
1708
1709 int failed = IsReno(tp) || tp->fackets_out>tp->reordering;
1710
1711 if (tcp_may_undo(tp)) {
1712
1713
1714
1715 if (tp->retrans_out == 0)
1716 tp->retrans_stamp = 0;
1717
1718 tcp_update_reordering(tp, tcp_fackets_out(tp)+acked, 1);
1719
1720 DBGUNDO(sk, tp, "Hoe");
1721 tcp_undo_cwr(tp, 0);
1722 NET_INC_STATS_BH(TCPPartialUndo);
1723
1724
1725
1726
1727
1728 failed = 0;
1729 }
1730 return failed;
1731}
1732
1733
1734static int tcp_try_undo_loss(struct sock *sk, struct tcp_opt *tp)
1735{
1736 if (tcp_may_undo(tp)) {
1737 struct sk_buff *skb;
1738 for_retrans_queue(skb, sk, tp) {
1739 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
1740 }
1741 DBGUNDO(sk, tp, "partial loss");
1742 tp->lost_out = 0;
1743 tp->left_out = tp->sacked_out;
1744 tcp_undo_cwr(tp, 1);
1745 NET_INC_STATS_BH(TCPLossUndo);
1746 tp->retransmits = 0;
1747 tp->undo_marker = 0;
1748 if (!IsReno(tp))
1749 tcp_set_ca_state(tp, TCP_CA_Open);
1750 return 1;
1751 }
1752 return 0;
1753}
1754
1755static __inline__ void tcp_complete_cwr(struct tcp_opt *tp)
1756{
1757 if (!(tcp_westwood_complete_cwr(tp)))
1758 tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
1759 tp->snd_cwnd_stamp = tcp_time_stamp;
1760}
1761
1762static void tcp_try_to_open(struct sock *sk, struct tcp_opt *tp, int flag)
1763{
1764 tp->left_out = tp->sacked_out;
1765
1766 if (tp->retrans_out == 0)
1767 tp->retrans_stamp = 0;
1768
1769 if (flag&FLAG_ECE)
1770 tcp_enter_cwr(tp);
1771
1772 if (tp->ca_state != TCP_CA_CWR) {
1773 int state = TCP_CA_Open;
1774
1775 if (tp->left_out ||
1776 tp->retrans_out ||
1777 tp->undo_marker)
1778 state = TCP_CA_Disorder;
1779
1780 if (tp->ca_state != state) {
1781 tcp_set_ca_state(tp, state);
1782 tp->high_seq = tp->snd_nxt;
1783 }
1784 tcp_moderate_cwnd(tp);
1785 } else {
1786 tcp_cwnd_down(tp);
1787 }
1788}
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801static void
1802tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
1803 int prior_packets, int flag)
1804{
1805 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
1806 int is_dupack = (tp->snd_una == prior_snd_una && !(flag&FLAG_NOT_DUP));
1807
1808
1809
1810 if (!tp->packets_out)
1811 tp->sacked_out = 0;
1812
1813 if (tp->sacked_out == 0)
1814 tp->fackets_out = 0;
1815
1816
1817
1818 if (flag&FLAG_ECE)
1819 tp->prior_ssthresh = 0;
1820
1821
1822 if (tp->sacked_out && tcp_check_sack_reneging(sk, tp))
1823 return;
1824
1825
1826 if ((flag&FLAG_DATA_LOST) &&
1827 before(tp->snd_una, tp->high_seq) &&
1828 tp->ca_state != TCP_CA_Open &&
1829 tp->fackets_out > tp->reordering) {
1830 tcp_mark_head_lost(sk, tp, tp->fackets_out-tp->reordering, tp->high_seq);
1831 NET_INC_STATS_BH(TCPLoss);
1832 }
1833
1834
1835 tcp_sync_left_out(tp);
1836
1837
1838
1839 if (tp->ca_state == TCP_CA_Open) {
1840 if (!sysctl_tcp_frto)
1841 BUG_TRAP(tp->retrans_out == 0);
1842 tp->retrans_stamp = 0;
1843 } else if (!before(tp->snd_una, tp->high_seq)) {
1844 switch (tp->ca_state) {
1845 case TCP_CA_Loss:
1846 tp->retransmits = 0;
1847 if (tcp_try_undo_recovery(sk, tp))
1848 return;
1849 break;
1850
1851 case TCP_CA_CWR:
1852
1853
1854 if (tp->snd_una != tp->high_seq) {
1855 tcp_complete_cwr(tp);
1856 tcp_set_ca_state(tp, TCP_CA_Open);
1857 }
1858 break;
1859
1860 case TCP_CA_Disorder:
1861 tcp_try_undo_dsack(sk, tp);
1862 if (!tp->undo_marker ||
1863
1864
1865 IsReno(tp) || tp->snd_una != tp->high_seq) {
1866 tp->undo_marker = 0;
1867 tcp_set_ca_state(tp, TCP_CA_Open);
1868 }
1869 break;
1870
1871 case TCP_CA_Recovery:
1872 if (IsReno(tp))
1873 tcp_reset_reno_sack(tp);
1874 if (tcp_try_undo_recovery(sk, tp))
1875 return;
1876 tcp_complete_cwr(tp);
1877 break;
1878 }
1879 }
1880
1881
1882 switch (tp->ca_state) {
1883 case TCP_CA_Recovery:
1884 if (prior_snd_una == tp->snd_una) {
1885 if (IsReno(tp) && is_dupack)
1886 tcp_add_reno_sack(tp);
1887 } else {
1888 int acked = prior_packets - tp->packets_out;
1889 if (IsReno(tp))
1890 tcp_remove_reno_sacks(sk, tp, acked);
1891 is_dupack = tcp_try_undo_partial(sk, tp, acked);
1892 }
1893 break;
1894 case TCP_CA_Loss:
1895 if (flag&FLAG_DATA_ACKED)
1896 tp->retransmits = 0;
1897 if (!tcp_try_undo_loss(sk, tp)) {
1898 tcp_moderate_cwnd(tp);
1899 tcp_xmit_retransmit_queue(sk);
1900 return;
1901 }
1902 if (tp->ca_state != TCP_CA_Open)
1903 return;
1904
1905 default:
1906 if (IsReno(tp)) {
1907 if (tp->snd_una != prior_snd_una)
1908 tcp_reset_reno_sack(tp);
1909 if (is_dupack)
1910 tcp_add_reno_sack(tp);
1911 }
1912
1913 if (tp->ca_state == TCP_CA_Disorder)
1914 tcp_try_undo_dsack(sk, tp);
1915
1916 if (!tcp_time_to_recover(sk, tp)) {
1917 tcp_try_to_open(sk, tp, flag);
1918 return;
1919 }
1920
1921
1922
1923 if (IsReno(tp))
1924 NET_INC_STATS_BH(TCPRenoRecovery);
1925 else
1926 NET_INC_STATS_BH(TCPSackRecovery);
1927
1928 tp->high_seq = tp->snd_nxt;
1929 tp->prior_ssthresh = 0;
1930 tp->undo_marker = tp->snd_una;
1931 tp->undo_retrans = tp->retrans_out;
1932
1933 if (tp->ca_state < TCP_CA_CWR) {
1934 if (!(flag&FLAG_ECE))
1935 tp->prior_ssthresh = tcp_current_ssthresh(tp);
1936 tp->snd_ssthresh = tcp_recalc_ssthresh(tp);
1937 TCP_ECN_queue_cwr(tp);
1938 }
1939
1940 tp->snd_cwnd_cnt = 0;
1941 tcp_set_ca_state(tp, TCP_CA_Recovery);
1942 }
1943
1944 if (is_dupack || tcp_head_timedout(sk, tp))
1945 tcp_update_scoreboard(sk, tp);
1946 tcp_cwnd_down(tp);
1947 tcp_xmit_retransmit_queue(sk);
1948}
1949
1950
1951
1952
1953static void tcp_ack_saw_tstamp(struct tcp_opt *tp, int flag)
1954{
1955 __u32 seq_rtt;
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972 seq_rtt = tcp_time_stamp - tp->rcv_tsecr;
1973 tcp_rtt_estimator(tp, seq_rtt);
1974 tcp_set_rto(tp);
1975 tp->backoff = 0;
1976 tcp_bound_rto(tp);
1977}
1978
1979static void tcp_ack_no_tstamp(struct tcp_opt *tp, u32 seq_rtt, int flag)
1980{
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990 if (flag & FLAG_RETRANS_DATA_ACKED)
1991 return;
1992
1993 tcp_rtt_estimator(tp, seq_rtt);
1994 tcp_set_rto(tp);
1995 tp->backoff = 0;
1996 tcp_bound_rto(tp);
1997}
1998
1999static __inline__ void
2000tcp_ack_update_rtt(struct tcp_opt *tp, int flag, s32 seq_rtt)
2001{
2002
2003 if (tp->saw_tstamp && tp->rcv_tsecr)
2004 tcp_ack_saw_tstamp(tp, flag);
2005 else if (seq_rtt >= 0)
2006 tcp_ack_no_tstamp(tp, seq_rtt, flag);
2007}
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022static inline __u32 bictcp_cwnd(struct tcp_opt *tp)
2023{
2024
2025 if (!tcp_is_bic(tp))
2026 return tp->snd_cwnd;
2027
2028 if (tp->bictcp.last_cwnd == tp->snd_cwnd &&
2029 (s32)(tcp_time_stamp - tp->bictcp.last_stamp) <= (HZ>>5))
2030 return tp->bictcp.cnt;
2031
2032 tp->bictcp.last_cwnd = tp->snd_cwnd;
2033 tp->bictcp.last_stamp = tcp_time_stamp;
2034
2035
2036 if (tp->snd_cwnd <= sysctl_tcp_bic_low_window)
2037 tp->bictcp.cnt = tp->snd_cwnd;
2038
2039
2040 else if (tp->snd_cwnd < tp->bictcp.last_max_cwnd) {
2041 __u32 dist = (tp->bictcp.last_max_cwnd - tp->snd_cwnd)
2042 / BICTCP_B;
2043
2044 if (dist > BICTCP_MAX_INCREMENT)
2045
2046 tp->bictcp.cnt = tp->snd_cwnd / BICTCP_MAX_INCREMENT;
2047 else if (dist <= 1U)
2048
2049 tp->bictcp.cnt = tp->snd_cwnd * BICTCP_FUNC_OF_MIN_INCR
2050 / BICTCP_B;
2051 else
2052
2053 tp->bictcp.cnt = tp->snd_cwnd / dist;
2054 } else {
2055
2056 if (tp->snd_cwnd < tp->bictcp.last_max_cwnd + BICTCP_B)
2057
2058 tp->bictcp.cnt = tp->snd_cwnd * BICTCP_FUNC_OF_MIN_INCR
2059 / BICTCP_B;
2060 else if (tp->snd_cwnd < tp->bictcp.last_max_cwnd
2061 + BICTCP_MAX_INCREMENT*(BICTCP_B-1))
2062
2063 tp->bictcp.cnt = tp->snd_cwnd * (BICTCP_B-1)
2064 / (tp->snd_cwnd-tp->bictcp.last_max_cwnd);
2065 else
2066
2067 tp->bictcp.cnt = tp->snd_cwnd / BICTCP_MAX_INCREMENT;
2068 }
2069 return tp->bictcp.cnt;
2070}
2071
2072
2073
2074
2075static __inline__ void reno_cong_avoid(struct tcp_opt *tp)
2076{
2077 if (tp->snd_cwnd <= tp->snd_ssthresh) {
2078
2079 if (tp->snd_cwnd < tp->snd_cwnd_clamp)
2080 tp->snd_cwnd++;
2081 } else {
2082
2083
2084
2085 if (tp->snd_cwnd_cnt >= bictcp_cwnd(tp)) {
2086 if (tp->snd_cwnd < tp->snd_cwnd_clamp)
2087 tp->snd_cwnd++;
2088 tp->snd_cwnd_cnt=0;
2089 } else
2090 tp->snd_cwnd_cnt++;
2091 }
2092 tp->snd_cwnd_stamp = tcp_time_stamp;
2093}
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124static void vegas_cong_avoid(struct tcp_opt *tp, u32 ack, u32 seq_rtt)
2125{
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148 if (after(ack, tp->vegas.beg_snd_nxt)) {
2149
2150 u32 old_wnd, old_snd_cwnd;
2151
2152
2153
2154
2155
2156
2157
2158
2159 old_wnd = (tp->vegas.beg_snd_nxt - tp->vegas.beg_snd_una) /
2160 tp->mss_cache;
2161 old_snd_cwnd = tp->vegas.beg_snd_cwnd;
2162
2163
2164
2165
2166 tp->vegas.beg_snd_una = tp->vegas.beg_snd_nxt;
2167 tp->vegas.beg_snd_nxt = tp->snd_nxt;
2168 tp->vegas.beg_snd_cwnd = tp->snd_cwnd;
2169
2170
2171
2172
2173
2174
2175
2176 vegas_rtt_calc(tp, seq_rtt);
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187 if (tp->vegas.cntRTT <= 2) {
2188
2189
2190
2191 if (tp->snd_cwnd > tp->snd_ssthresh)
2192 tp->snd_cwnd++;
2193 } else {
2194 u32 rtt, target_cwnd, diff;
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207 rtt = tp->vegas.minRTT;
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217 target_cwnd = ((old_wnd * tp->vegas.baseRTT)
2218 << V_PARAM_SHIFT) / rtt;
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228 diff = (old_wnd << V_PARAM_SHIFT) - target_cwnd;
2229
2230 if (tp->snd_cwnd < tp->snd_ssthresh) {
2231
2232 if (diff > sysctl_tcp_vegas_gamma) {
2233
2234
2235
2236 tp->snd_ssthresh = 2;
2237
2238
2239
2240
2241
2242
2243
2244
2245 tp->snd_cwnd = min(tp->snd_cwnd,
2246 (target_cwnd >>
2247 V_PARAM_SHIFT)+1);
2248
2249 }
2250 } else {
2251
2252 u32 next_snd_cwnd;
2253
2254
2255
2256
2257 if (diff > sysctl_tcp_vegas_beta) {
2258
2259
2260
2261 next_snd_cwnd = old_snd_cwnd - 1;
2262 } else if (diff < sysctl_tcp_vegas_alpha) {
2263
2264
2265
2266 next_snd_cwnd = old_snd_cwnd + 1;
2267 } else {
2268
2269
2270
2271 next_snd_cwnd = old_snd_cwnd;
2272 }
2273
2274
2275
2276
2277 if (next_snd_cwnd > tp->snd_cwnd)
2278 tp->snd_cwnd++;
2279 else if (next_snd_cwnd < tp->snd_cwnd)
2280 tp->snd_cwnd--;
2281 }
2282 }
2283
2284
2285 tp->vegas.cntRTT = 0;
2286 tp->vegas.minRTT = 0x7fffffff;
2287 }
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301 if (tp->snd_cwnd <= tp->snd_ssthresh)
2302 tp->snd_cwnd++;
2303
2304
2305 tp->snd_cwnd = min_t(u32, tp->snd_cwnd, tp->snd_cwnd_clamp);
2306
2307
2308
2309
2310
2311
2312 tp->snd_cwnd = max(tp->snd_cwnd, 2U);
2313
2314 tp->snd_cwnd_stamp = tcp_time_stamp;
2315}
2316
2317static inline void tcp_cong_avoid(struct tcp_opt *tp, u32 ack, u32 seq_rtt)
2318{
2319 if (tcp_vegas_enabled(tp))
2320 vegas_cong_avoid(tp, ack, seq_rtt);
2321 else
2322 reno_cong_avoid(tp);
2323}
2324
2325
2326
2327
2328
2329static __inline__ void tcp_ack_packets_out(struct sock *sk, struct tcp_opt *tp)
2330{
2331 if (tp->packets_out==0) {
2332 tcp_clear_xmit_timer(sk, TCP_TIME_RETRANS);
2333 } else {
2334 tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
2335 }
2336}
2337
2338
2339static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
2340{
2341 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
2342 struct sk_buff *skb;
2343 __u32 now = tcp_time_stamp;
2344 int acked = 0;
2345 __s32 seq_rtt = -1;
2346
2347 while((skb=skb_peek(&sk->write_queue)) && (skb != tp->send_head)) {
2348 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
2349 __u8 sacked = scb->sacked;
2350
2351
2352
2353
2354
2355 if (after(scb->end_seq, tp->snd_una))
2356 break;
2357
2358
2359
2360
2361
2362
2363
2364
2365 if(!(scb->flags & TCPCB_FLAG_SYN)) {
2366 acked |= FLAG_DATA_ACKED;
2367 } else {
2368 acked |= FLAG_SYN_ACKED;
2369 tp->retrans_stamp = 0;
2370 }
2371
2372 if (sacked) {
2373 if(sacked & TCPCB_RETRANS) {
2374 if(sacked & TCPCB_SACKED_RETRANS)
2375 tp->retrans_out--;
2376 acked |= FLAG_RETRANS_DATA_ACKED;
2377 seq_rtt = -1;
2378 } else if (seq_rtt < 0)
2379 seq_rtt = now - scb->when;
2380 if(sacked & TCPCB_SACKED_ACKED)
2381 tp->sacked_out--;
2382 if(sacked & TCPCB_LOST)
2383 tp->lost_out--;
2384 if(sacked & TCPCB_URG) {
2385 if (tp->urg_mode &&
2386 !before(scb->end_seq, tp->snd_up))
2387 tp->urg_mode = 0;
2388 }
2389 } else if (seq_rtt < 0)
2390 seq_rtt = now - scb->when;
2391 if(tp->fackets_out)
2392 tp->fackets_out--;
2393 tp->packets_out--;
2394 __skb_unlink(skb, skb->list);
2395 tcp_free_skb(sk, skb);
2396 }
2397
2398 if (acked&FLAG_ACKED) {
2399 tcp_ack_update_rtt(tp, acked, seq_rtt);
2400 tcp_ack_packets_out(sk, tp);
2401 }
2402
2403#if FASTRETRANS_DEBUG > 0
2404 BUG_TRAP((int)tp->sacked_out >= 0);
2405 BUG_TRAP((int)tp->lost_out >= 0);
2406 BUG_TRAP((int)tp->retrans_out >= 0);
2407 if (tp->packets_out==0 && tp->sack_ok) {
2408 if (tp->lost_out) {
2409 printk(KERN_DEBUG "Leak l=%u %d\n", tp->lost_out, tp->ca_state);
2410 tp->lost_out = 0;
2411 }
2412 if (tp->sacked_out) {
2413 printk(KERN_DEBUG "Leak s=%u %d\n", tp->sacked_out, tp->ca_state);
2414 tp->sacked_out = 0;
2415 }
2416 if (tp->retrans_out) {
2417 printk(KERN_DEBUG "Leak r=%u %d\n", tp->retrans_out, tp->ca_state);
2418 tp->retrans_out = 0;
2419 }
2420 }
2421#endif
2422 *seq_rtt_p = seq_rtt;
2423 return acked;
2424}
2425
2426static void tcp_ack_probe(struct sock *sk)
2427{
2428 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
2429
2430
2431
2432 if (!after(TCP_SKB_CB(tp->send_head)->end_seq, tp->snd_una + tp->snd_wnd)) {
2433 tp->backoff = 0;
2434 tcp_clear_xmit_timer(sk, TCP_TIME_PROBE0);
2435
2436
2437
2438 } else {
2439 tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0,
2440 min(tp->rto << tp->backoff, TCP_RTO_MAX));
2441 }
2442}
2443
2444static __inline__ int tcp_ack_is_dubious(struct tcp_opt *tp, int flag)
2445{
2446 return (!(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
2447 tp->ca_state != TCP_CA_Open);
2448}
2449
2450static __inline__ int tcp_may_raise_cwnd(struct tcp_opt *tp, int flag)
2451{
2452 return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) &&
2453 !((1<<tp->ca_state)&(TCPF_CA_Recovery|TCPF_CA_CWR));
2454}
2455
2456
2457
2458
2459static __inline__ int
2460tcp_may_update_window(struct tcp_opt *tp, u32 ack, u32 ack_seq, u32 nwin)
2461{
2462 return (after(ack, tp->snd_una) ||
2463 after(ack_seq, tp->snd_wl1) ||
2464 (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd));
2465}
2466
2467
2468
2469
2470
2471
2472static int tcp_ack_update_window(struct sock *sk, struct tcp_opt *tp,
2473 struct sk_buff *skb, u32 ack, u32 ack_seq)
2474{
2475 int flag = 0;
2476 u32 nwin = ntohs(skb->h.th->window);
2477
2478 if (likely(!skb->h.th->syn))
2479 nwin <<= tp->snd_wscale;
2480
2481 if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
2482 flag |= FLAG_WIN_UPDATE;
2483 tcp_update_wl(tp, ack, ack_seq);
2484
2485 if (tp->snd_wnd != nwin) {
2486 tp->snd_wnd = nwin;
2487
2488
2489
2490
2491 tcp_fast_path_check(sk, tp);
2492
2493 if (nwin > tp->max_window) {
2494 tp->max_window = nwin;
2495 tcp_sync_mss(sk, tp->pmtu_cookie);
2496 }
2497 }
2498 }
2499
2500 tp->snd_una = ack;
2501
2502 return flag;
2503}
2504
2505static void tcp_process_frto(struct sock *sk, u32 prior_snd_una)
2506{
2507 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
2508
2509 tcp_sync_left_out(tp);
2510
2511 if (tp->snd_una == prior_snd_una ||
2512 !before(tp->snd_una, tp->frto_highmark)) {
2513
2514
2515
2516 tcp_enter_frto_loss(sk);
2517 return;
2518 }
2519
2520 if (tp->frto_counter == 1) {
2521
2522
2523
2524 tp->snd_cwnd = tcp_packets_in_flight(tp) + 2;
2525 } else {
2526
2527
2528
2529
2530 tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
2531 tcp_moderate_cwnd(tp);
2532 }
2533
2534
2535
2536
2537 tp->frto_counter = (tp->frto_counter + 1) % 3;
2538}
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549static inline __u32 westwood_do_filter(__u32 a, __u32 b)
2550{
2551 return (((7 * a) + b) >> 3);
2552}
2553
2554static void westwood_filter(struct sock *sk, __u32 delta)
2555{
2556 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
2557
2558 tp->westwood.bw_ns_est =
2559 westwood_do_filter(tp->westwood.bw_ns_est,
2560 tp->westwood.bk / delta);
2561 tp->westwood.bw_est =
2562 westwood_do_filter(tp->westwood.bw_est,
2563 tp->westwood.bw_ns_est);
2564}
2565
2566
2567
2568
2569
2570
2571static inline __u32 westwood_update_rttmin(const struct sock *sk)
2572{
2573 const struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
2574 __u32 rttmin = tp->westwood.rtt_min;
2575
2576 if (tp->westwood.rtt != 0 &&
2577 (tp->westwood.rtt < tp->westwood.rtt_min || !rttmin))
2578 rttmin = tp->westwood.rtt;
2579
2580 return rttmin;
2581}
2582
2583
2584
2585
2586
2587
2588static __u32 westwood_acked(const struct sock *sk)
2589{
2590 const struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
2591
2592 return ((tp->snd_una) - (tp->westwood.snd_una));
2593}
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604static int westwood_new_window(const struct sock *sk)
2605{
2606 const struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
2607 __u32 left_bound;
2608 __u32 rtt;
2609 int ret = 0;
2610
2611 left_bound = tp->westwood.rtt_win_sx;
2612 rtt = max(tp->westwood.rtt, (__u32)TCP_WESTWOOD_RTT_MIN);
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623 if ((left_bound + rtt) < tcp_time_stamp)
2624 ret = 1;
2625
2626 return ret;
2627}
2628
2629
2630
2631
2632
2633
2634
2635static void __westwood_update_window(struct sock *sk, __u32 now)
2636{
2637 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
2638 __u32 delta = now - tp->westwood.rtt_win_sx;
2639
2640 if (delta) {
2641 if (tp->westwood.rtt)
2642 westwood_filter(sk, delta);
2643
2644 tp->westwood.bk = 0;
2645 tp->westwood.rtt_win_sx = tcp_time_stamp;
2646 }
2647}
2648
2649static void westwood_update_window(struct sock *sk, __u32 now)
2650{
2651 if (westwood_new_window(sk))
2652 __westwood_update_window(sk, now);
2653}
2654
2655
2656
2657
2658
2659
2660
2661
2662void __tcp_westwood_fast_bw(struct sock *sk, struct sk_buff *skb)
2663{
2664 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
2665
2666 westwood_update_window(sk, tcp_time_stamp);
2667
2668 tp->westwood.bk += westwood_acked(sk);
2669 tp->westwood.snd_una = tp->snd_una;
2670 tp->westwood.rtt_min = westwood_update_rttmin(sk);
2671}
2672
2673
2674
2675
2676
2677
2678
2679
2680static inline __u32 westwood_mss(struct tcp_opt *tp)
2681{
2682 return ((__u32)(tp->mss_cache));
2683}
2684
2685
2686
2687
2688
2689
2690static void westwood_dupack_update(struct sock *sk)
2691{
2692 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
2693
2694 tp->westwood.accounted += westwood_mss(tp);
2695 tp->westwood.cumul_ack = westwood_mss(tp);
2696}
2697
2698static inline int westwood_may_change_cumul(struct tcp_opt *tp)
2699{
2700 return (tp->westwood.cumul_ack > westwood_mss(tp));
2701}
2702
2703static inline void westwood_partial_update(struct tcp_opt *tp)
2704{
2705 tp->westwood.accounted -= tp->westwood.cumul_ack;
2706 tp->westwood.cumul_ack = westwood_mss(tp);
2707}
2708
2709static inline void westwood_complete_update(struct tcp_opt *tp)
2710{
2711 tp->westwood.cumul_ack -= tp->westwood.accounted;
2712 tp->westwood.accounted = 0;
2713}
2714
2715
2716
2717
2718
2719
2720
2721static inline __u32 westwood_acked_count(struct sock *sk)
2722{
2723 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
2724
2725 tp->westwood.cumul_ack = westwood_acked(sk);
2726
2727
2728
2729
2730 if (!(tp->westwood.cumul_ack))
2731 westwood_dupack_update(sk);
2732
2733 if (westwood_may_change_cumul(tp)) {
2734
2735 if (tp->westwood.accounted >= tp->westwood.cumul_ack)
2736 westwood_partial_update(tp);
2737 else
2738 westwood_complete_update(tp);
2739 }
2740
2741 tp->westwood.snd_una = tp->snd_una;
2742
2743 return tp->westwood.cumul_ack;
2744}
2745
2746
2747
2748
2749
2750
2751
2752
2753void __tcp_westwood_slow_bw(struct sock *sk, struct sk_buff *skb)
2754{
2755 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
2756
2757 westwood_update_window(sk, tcp_time_stamp);
2758
2759 tp->westwood.bk += westwood_acked_count(sk);
2760 tp->westwood.rtt_min = westwood_update_rttmin(sk);
2761}
2762
2763
2764
2765
2766static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
2767{
2768 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
2769 u32 prior_snd_una = tp->snd_una;
2770 u32 ack_seq = TCP_SKB_CB(skb)->seq;
2771 u32 ack = TCP_SKB_CB(skb)->ack_seq;
2772 u32 prior_in_flight;
2773 s32 seq_rtt;
2774 int prior_packets;
2775
2776
2777
2778
2779 if (after(ack, tp->snd_nxt))
2780 goto uninteresting_ack;
2781
2782 if (before(ack, prior_snd_una))
2783 goto old_ack;
2784
2785 if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
2786
2787
2788
2789
2790 tcp_update_wl(tp, ack, ack_seq);
2791 tp->snd_una = ack;
2792 tcp_westwood_fast_bw(sk, skb);
2793 flag |= FLAG_WIN_UPDATE;
2794
2795 NET_INC_STATS_BH(TCPHPAcks);
2796 } else {
2797 if (ack_seq != TCP_SKB_CB(skb)->end_seq)
2798 flag |= FLAG_DATA;
2799 else
2800 NET_INC_STATS_BH(TCPPureAcks);
2801
2802 flag |= tcp_ack_update_window(sk, tp, skb, ack, ack_seq);
2803
2804 if (TCP_SKB_CB(skb)->sacked)
2805 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
2806
2807 if (TCP_ECN_rcv_ecn_echo(tp, skb->h.th))
2808 flag |= FLAG_ECE;
2809
2810 tcp_westwood_slow_bw(sk, skb);
2811 }
2812
2813
2814
2815
2816 sk->err_soft = 0;
2817 tp->rcv_tstamp = tcp_time_stamp;
2818 if ((prior_packets = tp->packets_out) == 0)
2819 goto no_queue;
2820
2821 prior_in_flight = tcp_packets_in_flight(tp);
2822
2823
2824 flag |= tcp_clean_rtx_queue(sk, &seq_rtt);
2825
2826 if (tp->frto_counter)
2827 tcp_process_frto(sk, prior_snd_una);
2828
2829 if (tcp_ack_is_dubious(tp, flag)) {
2830
2831 if ((flag&FLAG_DATA_ACKED) &&
2832 (tcp_vegas_enabled(tp) || prior_in_flight >= tp->snd_cwnd) &&
2833 tcp_may_raise_cwnd(tp, flag))
2834 tcp_cong_avoid(tp, ack, seq_rtt);
2835 tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag);
2836 } else {
2837 if ((flag & FLAG_DATA_ACKED) &&
2838 (tcp_vegas_enabled(tp) || prior_in_flight >= tp->snd_cwnd))
2839 tcp_cong_avoid(tp, ack, seq_rtt);
2840 }
2841
2842 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag&FLAG_NOT_DUP))
2843 dst_confirm(sk->dst_cache);
2844
2845 return 1;
2846
2847no_queue:
2848 tp->probes_out = 0;
2849
2850
2851
2852
2853
2854 if (tp->send_head)
2855 tcp_ack_probe(sk);
2856 return 1;
2857
2858old_ack:
2859 if (TCP_SKB_CB(skb)->sacked)
2860 tcp_sacktag_write_queue(sk, skb, prior_snd_una);
2861
2862uninteresting_ack:
2863 SOCK_DEBUG(sk, "Ack %u out of %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
2864 return 0;
2865}
2866
2867
2868
2869
2870
2871
2872void tcp_parse_options(struct sk_buff *skb, struct tcp_opt *tp, int estab)
2873{
2874 unsigned char *ptr;
2875 struct tcphdr *th = skb->h.th;
2876 int length=(th->doff*4)-sizeof(struct tcphdr);
2877
2878 ptr = (unsigned char *)(th + 1);
2879 tp->saw_tstamp = 0;
2880
2881 while(length>0) {
2882 int opcode=*ptr++;
2883 int opsize;
2884
2885 switch (opcode) {
2886 case TCPOPT_EOL:
2887 return;
2888 case TCPOPT_NOP:
2889 length--;
2890 continue;
2891 default:
2892 opsize=*ptr++;
2893 if (opsize < 2)
2894 return;
2895 if (opsize > length)
2896 return;
2897 switch(opcode) {
2898 case TCPOPT_MSS:
2899 if(opsize==TCPOLEN_MSS && th->syn && !estab) {
2900 u16 in_mss = ntohs(*(__u16 *)ptr);
2901 if (in_mss) {
2902 if (tp->user_mss && tp->user_mss < in_mss)
2903 in_mss = tp->user_mss;
2904 tp->mss_clamp = in_mss;
2905 }
2906 }
2907 break;
2908 case TCPOPT_WINDOW:
2909 if(opsize==TCPOLEN_WINDOW && th->syn && !estab)
2910 if (sysctl_tcp_window_scaling) {
2911 tp->wscale_ok = 1;
2912 tp->snd_wscale = *(__u8 *)ptr;
2913 if(tp->snd_wscale > 14) {
2914 if(net_ratelimit())
2915 printk(KERN_INFO "tcp_parse_options: Illegal window "
2916 "scaling value %d >14 received.\n",
2917 tp->snd_wscale);
2918 tp->snd_wscale = 14;
2919 }
2920 }
2921 break;
2922 case TCPOPT_TIMESTAMP:
2923 if(opsize==TCPOLEN_TIMESTAMP) {
2924 if ((estab && tp->tstamp_ok) ||
2925 (!estab && sysctl_tcp_timestamps)) {
2926 tp->saw_tstamp = 1;
2927 tp->rcv_tsval = ntohl(*(__u32 *)ptr);
2928 tp->rcv_tsecr = ntohl(*(__u32 *)(ptr+4));
2929 }
2930 }
2931 break;
2932 case TCPOPT_SACK_PERM:
2933 if(opsize==TCPOLEN_SACK_PERM && th->syn && !estab) {
2934 if (sysctl_tcp_sack) {
2935 tp->sack_ok = 1;
2936 tcp_sack_reset(tp);
2937 }
2938 }
2939 break;
2940
2941 case TCPOPT_SACK:
2942 if((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
2943 !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
2944 tp->sack_ok) {
2945 TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
2946 }
2947 };
2948 ptr+=opsize-2;
2949 length-=opsize;
2950 };
2951 }
2952}
2953
2954
2955
2956
2957static __inline__ int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, struct tcp_opt *tp)
2958{
2959 if (th->doff == sizeof(struct tcphdr)>>2) {
2960 tp->saw_tstamp = 0;
2961 return 0;
2962 } else if (tp->tstamp_ok &&
2963 th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) {
2964 __u32 *ptr = (__u32 *)(th + 1);
2965 if (*ptr == ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
2966 | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
2967 tp->saw_tstamp = 1;
2968 ++ptr;
2969 tp->rcv_tsval = ntohl(*ptr);
2970 ++ptr;
2971 tp->rcv_tsecr = ntohl(*ptr);
2972 return 1;
2973 }
2974 }
2975 tcp_parse_options(skb, tp, 1);
2976 return 1;
2977}
2978
2979extern __inline__ void
2980tcp_store_ts_recent(struct tcp_opt *tp)
2981{
2982 tp->ts_recent = tp->rcv_tsval;
2983 tp->ts_recent_stamp = xtime.tv_sec;
2984}
2985
2986extern __inline__ void
2987tcp_replace_ts_recent(struct tcp_opt *tp, u32 seq)
2988{
2989 if (tp->saw_tstamp && !after(seq, tp->rcv_wup)) {
2990
2991
2992
2993
2994
2995
2996
2997 if((s32)(tp->rcv_tsval - tp->ts_recent) >= 0 ||
2998 xtime.tv_sec >= tp->ts_recent_stamp + TCP_PAWS_24DAYS)
2999 tcp_store_ts_recent(tp);
3000 }
3001}
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026static int tcp_disordered_ack(struct tcp_opt *tp, struct sk_buff *skb)
3027{
3028 struct tcphdr *th = skb->h.th;
3029 u32 seq = TCP_SKB_CB(skb)->seq;
3030 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3031
3032 return (
3033 (th->ack && seq == TCP_SKB_CB(skb)->end_seq && seq == tp->rcv_nxt) &&
3034
3035
3036 ack == tp->snd_una &&
3037
3038
3039 !tcp_may_update_window(tp, ack, seq, ntohs(th->window)<<tp->snd_wscale) &&
3040
3041
3042 (s32)(tp->ts_recent - tp->rcv_tsval) <= (tp->rto*1024)/HZ);
3043}
3044
3045extern __inline__ int tcp_paws_discard(struct tcp_opt *tp, struct sk_buff *skb)
3046{
3047 return ((s32)(tp->ts_recent - tp->rcv_tsval) > TCP_PAWS_WINDOW &&
3048 xtime.tv_sec < tp->ts_recent_stamp + TCP_PAWS_24DAYS &&
3049 !tcp_disordered_ack(tp, skb));
3050}
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065static inline int tcp_sequence(struct tcp_opt *tp, u32 seq, u32 end_seq)
3066{
3067 return !before(end_seq, tp->rcv_wup) &&
3068 !after(seq, tp->rcv_nxt + tcp_receive_window(tp));
3069}
3070
3071
3072static void tcp_reset(struct sock *sk)
3073{
3074
3075 switch (sk->state) {
3076 case TCP_SYN_SENT:
3077 sk->err = ECONNREFUSED;
3078 break;
3079 case TCP_CLOSE_WAIT:
3080 sk->err = EPIPE;
3081 break;
3082 case TCP_CLOSE:
3083 return;
3084 default:
3085 sk->err = ECONNRESET;
3086 }
3087
3088 if (!sk->dead)
3089 sk->error_report(sk);
3090
3091 tcp_done(sk);
3092}
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
3109{
3110 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
3111
3112 tcp_schedule_ack(tp);
3113
3114 sk->shutdown |= RCV_SHUTDOWN;
3115 sk->done = 1;
3116
3117 switch(sk->state) {
3118 case TCP_SYN_RECV:
3119 case TCP_ESTABLISHED:
3120
3121 tcp_set_state(sk, TCP_CLOSE_WAIT);
3122 tp->ack.pingpong = 1;
3123 break;
3124
3125 case TCP_CLOSE_WAIT:
3126 case TCP_CLOSING:
3127
3128
3129
3130 break;
3131 case TCP_LAST_ACK:
3132
3133 break;
3134
3135 case TCP_FIN_WAIT1:
3136
3137
3138
3139
3140 tcp_send_ack(sk);
3141 tcp_set_state(sk, TCP_CLOSING);
3142 break;
3143 case TCP_FIN_WAIT2:
3144
3145 tcp_send_ack(sk);
3146 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
3147 break;
3148 default:
3149
3150
3151
3152 printk(KERN_ERR "tcp_fin: Impossible, sk->state=%d\n", sk->state);
3153 break;
3154 };
3155
3156
3157
3158
3159 __skb_queue_purge(&tp->out_of_order_queue);
3160 if (tp->sack_ok)
3161 tcp_sack_reset(tp);
3162 tcp_mem_reclaim(sk);
3163
3164 if (!sk->dead) {
3165 sk->state_change(sk);
3166
3167
3168 if (sk->shutdown == SHUTDOWN_MASK || sk->state == TCP_CLOSE)
3169 sk_wake_async(sk, 1, POLL_HUP);
3170 else
3171 sk_wake_async(sk, 1, POLL_IN);
3172 }
3173}
3174
3175static __inline__ int
3176tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_seq)
3177{
3178 if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
3179 if (before(seq, sp->start_seq))
3180 sp->start_seq = seq;
3181 if (after(end_seq, sp->end_seq))
3182 sp->end_seq = end_seq;
3183 return 1;
3184 }
3185 return 0;
3186}
3187
3188static __inline__ void tcp_dsack_set(struct tcp_opt *tp, u32 seq, u32 end_seq)
3189{
3190 if (tp->sack_ok && sysctl_tcp_dsack) {
3191 if (before(seq, tp->rcv_nxt))
3192 NET_INC_STATS_BH(TCPDSACKOldSent);
3193 else
3194 NET_INC_STATS_BH(TCPDSACKOfoSent);
3195
3196 tp->dsack = 1;
3197 tp->duplicate_sack[0].start_seq = seq;
3198 tp->duplicate_sack[0].end_seq = end_seq;
3199 tp->eff_sacks = min(tp->num_sacks+1, 4-tp->tstamp_ok);
3200 }
3201}
3202
3203static __inline__ void tcp_dsack_extend(struct tcp_opt *tp, u32 seq, u32 end_seq)
3204{
3205 if (!tp->dsack)
3206 tcp_dsack_set(tp, seq, end_seq);
3207 else
3208 tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
3209}
3210
3211static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb)
3212{
3213 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
3214
3215 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
3216 before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
3217 NET_INC_STATS_BH(DelayedACKLost);
3218 tcp_enter_quickack_mode(tp);
3219
3220 if (tp->sack_ok && sysctl_tcp_dsack) {
3221 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
3222
3223 if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
3224 end_seq = tp->rcv_nxt;
3225 tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, end_seq);
3226 }
3227 }
3228
3229 tcp_send_ack(sk);
3230}
3231
3232
3233
3234
3235static void tcp_sack_maybe_coalesce(struct tcp_opt *tp)
3236{
3237 int this_sack;
3238 struct tcp_sack_block *sp = &tp->selective_acks[0];
3239 struct tcp_sack_block *swalk = sp+1;
3240
3241
3242
3243
3244 for (this_sack = 1; this_sack < tp->num_sacks; ) {
3245 if (tcp_sack_extend(sp, swalk->start_seq, swalk->end_seq)) {
3246 int i;
3247
3248
3249
3250
3251 tp->num_sacks--;
3252 tp->eff_sacks = min(tp->num_sacks+tp->dsack, 4-tp->tstamp_ok);
3253 for(i=this_sack; i < tp->num_sacks; i++)
3254 sp[i] = sp[i+1];
3255 continue;
3256 }
3257 this_sack++, swalk++;
3258 }
3259}
3260
3261static __inline__ void tcp_sack_swap(struct tcp_sack_block *sack1, struct tcp_sack_block *sack2)
3262{
3263 __u32 tmp;
3264
3265 tmp = sack1->start_seq;
3266 sack1->start_seq = sack2->start_seq;
3267 sack2->start_seq = tmp;
3268
3269 tmp = sack1->end_seq;
3270 sack1->end_seq = sack2->end_seq;
3271 sack2->end_seq = tmp;
3272}
3273
3274static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
3275{
3276 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
3277 struct tcp_sack_block *sp = &tp->selective_acks[0];
3278 int cur_sacks = tp->num_sacks;
3279 int this_sack;
3280
3281 if (!cur_sacks)
3282 goto new_sack;
3283
3284 for (this_sack=0; this_sack<cur_sacks; this_sack++, sp++) {
3285 if (tcp_sack_extend(sp, seq, end_seq)) {
3286
3287 for (; this_sack>0; this_sack--, sp--)
3288 tcp_sack_swap(sp, sp-1);
3289 if (cur_sacks > 1)
3290 tcp_sack_maybe_coalesce(tp);
3291 return;
3292 }
3293 }
3294
3295
3296
3297
3298
3299
3300
3301 if (this_sack >= 4) {
3302 this_sack--;
3303 tp->num_sacks--;
3304 sp--;
3305 }
3306 for(; this_sack > 0; this_sack--, sp--)
3307 *sp = *(sp-1);
3308
3309new_sack:
3310
3311 sp->start_seq = seq;
3312 sp->end_seq = end_seq;
3313 tp->num_sacks++;
3314 tp->eff_sacks = min(tp->num_sacks+tp->dsack, 4-tp->tstamp_ok);
3315}
3316
3317
3318
3319static void tcp_sack_remove(struct tcp_opt *tp)
3320{
3321 struct tcp_sack_block *sp = &tp->selective_acks[0];
3322 int num_sacks = tp->num_sacks;
3323 int this_sack;
3324
3325
3326 if (skb_queue_len(&tp->out_of_order_queue) == 0) {
3327 tp->num_sacks = 0;
3328 tp->eff_sacks = tp->dsack;
3329 return;
3330 }
3331
3332 for(this_sack = 0; this_sack < num_sacks; ) {
3333
3334 if (!before(tp->rcv_nxt, sp->start_seq)) {
3335 int i;
3336
3337
3338 BUG_TRAP(!before(tp->rcv_nxt, sp->end_seq));
3339
3340
3341 for (i=this_sack+1; i < num_sacks; i++)
3342 tp->selective_acks[i-1] = tp->selective_acks[i];
3343 num_sacks--;
3344 continue;
3345 }
3346 this_sack++;
3347 sp++;
3348 }
3349 if (num_sacks != tp->num_sacks) {
3350 tp->num_sacks = num_sacks;
3351 tp->eff_sacks = min(tp->num_sacks+tp->dsack, 4-tp->tstamp_ok);
3352 }
3353}
3354
3355
3356
3357
3358static void tcp_ofo_queue(struct sock *sk)
3359{
3360 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
3361 __u32 dsack_high = tp->rcv_nxt;
3362 struct sk_buff *skb;
3363
3364 while ((skb = skb_peek(&tp->out_of_order_queue)) != NULL) {
3365 if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
3366 break;
3367
3368 if (before(TCP_SKB_CB(skb)->seq, dsack_high)) {
3369 __u32 dsack = dsack_high;
3370 if (before(TCP_SKB_CB(skb)->end_seq, dsack_high))
3371 dsack_high = TCP_SKB_CB(skb)->end_seq;
3372 tcp_dsack_extend(tp, TCP_SKB_CB(skb)->seq, dsack);
3373 }
3374
3375 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
3376 SOCK_DEBUG(sk, "ofo packet was already received \n");
3377 __skb_unlink(skb, skb->list);
3378 __kfree_skb(skb);
3379 continue;
3380 }
3381 SOCK_DEBUG(sk, "ofo requeuing : rcv_next %X seq %X - %X\n",
3382 tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
3383 TCP_SKB_CB(skb)->end_seq);
3384
3385 __skb_unlink(skb, skb->list);
3386 __skb_queue_tail(&sk->receive_queue, skb);
3387 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
3388 if(skb->h.th->fin)
3389 tcp_fin(skb, sk, skb->h.th);
3390 }
3391}
3392
3393static inline int tcp_rmem_schedule(struct sock *sk, struct sk_buff *skb)
3394{
3395 return (int)skb->truesize <= sk->forward_alloc ||
3396 tcp_mem_schedule(sk, skb->truesize, 1);
3397}
3398
3399static int tcp_prune_queue(struct sock *sk);
3400
3401static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
3402{
3403 struct tcphdr *th = skb->h.th;
3404 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
3405 int eaten = -1;
3406
3407 if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq)
3408 goto drop;
3409
3410 th = skb->h.th;
3411 __skb_pull(skb, th->doff*4);
3412
3413 TCP_ECN_accept_cwr(tp, skb);
3414
3415 if (tp->dsack) {
3416 tp->dsack = 0;
3417 tp->eff_sacks = min_t(unsigned int, tp->num_sacks, 4-tp->tstamp_ok);
3418 }
3419
3420
3421
3422
3423
3424 if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
3425 if (tcp_receive_window(tp) == 0)
3426 goto out_of_window;
3427
3428
3429 if (tp->ucopy.task == current &&
3430 tp->copied_seq == tp->rcv_nxt &&
3431 tp->ucopy.len &&
3432 sk->lock.users &&
3433 !tp->urg_data) {
3434 int chunk = min_t(unsigned int, skb->len, tp->ucopy.len);
3435
3436 __set_current_state(TASK_RUNNING);
3437
3438 local_bh_enable();
3439 if (!skb_copy_datagram_iovec(skb, 0, tp->ucopy.iov, chunk)) {
3440 tp->ucopy.len -= chunk;
3441 tp->copied_seq += chunk;
3442 eaten = (chunk == skb->len && !th->fin);
3443 tcp_rcv_space_adjust(sk);
3444 }
3445 local_bh_disable();
3446 }
3447
3448 if (eaten <= 0) {
3449queue_and_out:
3450 if (eaten < 0 &&
3451 (atomic_read(&sk->rmem_alloc) > sk->rcvbuf ||
3452 !tcp_rmem_schedule(sk, skb))) {
3453 if (tcp_prune_queue(sk) < 0 || !tcp_rmem_schedule(sk, skb))
3454 goto drop;
3455 }
3456 tcp_set_owner_r(skb, sk);
3457 __skb_queue_tail(&sk->receive_queue, skb);
3458 }
3459 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
3460 if(skb->len)
3461 tcp_event_data_recv(sk, tp, skb);
3462 if(th->fin)
3463 tcp_fin(skb, sk, th);
3464
3465 if (skb_queue_len(&tp->out_of_order_queue)) {
3466 tcp_ofo_queue(sk);
3467
3468
3469
3470
3471 if (skb_queue_len(&tp->out_of_order_queue) == 0)
3472 tp->ack.pingpong = 0;
3473 }
3474
3475 if(tp->num_sacks)
3476 tcp_sack_remove(tp);
3477
3478 tcp_fast_path_check(sk, tp);
3479
3480 if (eaten > 0) {
3481 __kfree_skb(skb);
3482 } else if (!sk->dead)
3483 sk->data_ready(sk, 0);
3484 return;
3485 }
3486
3487 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
3488
3489 NET_INC_STATS_BH(DelayedACKLost);
3490 tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
3491
3492out_of_window:
3493 tcp_enter_quickack_mode(tp);
3494 tcp_schedule_ack(tp);
3495drop:
3496 __kfree_skb(skb);
3497 return;
3498 }
3499
3500
3501 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt+tcp_receive_window(tp)))
3502 goto out_of_window;
3503
3504 tcp_enter_quickack_mode(tp);
3505
3506 if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
3507
3508 SOCK_DEBUG(sk, "partial packet: rcv_next %X seq %X - %X\n",
3509 tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
3510 TCP_SKB_CB(skb)->end_seq);
3511
3512 tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, tp->rcv_nxt);
3513
3514
3515
3516
3517 if (!tcp_receive_window(tp))
3518 goto out_of_window;
3519 goto queue_and_out;
3520 }
3521
3522 TCP_ECN_check_ce(tp, skb);
3523
3524 if (atomic_read(&sk->rmem_alloc) > sk->rcvbuf ||
3525 !tcp_rmem_schedule(sk, skb)) {
3526 if (tcp_prune_queue(sk) < 0 || !tcp_rmem_schedule(sk, skb))
3527 goto drop;
3528 }
3529
3530
3531 tp->pred_flags = 0;
3532 tcp_schedule_ack(tp);
3533
3534 SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
3535 tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
3536
3537 tcp_set_owner_r(skb, sk);
3538
3539 if (skb_peek(&tp->out_of_order_queue) == NULL) {
3540
3541 if(tp->sack_ok) {
3542 tp->num_sacks = 1;
3543 tp->dsack = 0;
3544 tp->eff_sacks = 1;
3545 tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
3546 tp->selective_acks[0].end_seq = TCP_SKB_CB(skb)->end_seq;
3547 }
3548 __skb_queue_head(&tp->out_of_order_queue,skb);
3549 } else {
3550 struct sk_buff *skb1=tp->out_of_order_queue.prev;
3551 u32 seq = TCP_SKB_CB(skb)->seq;
3552 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
3553
3554 if (seq == TCP_SKB_CB(skb1)->end_seq) {
3555 __skb_append(skb1, skb);
3556
3557 if (tp->num_sacks == 0 ||
3558 tp->selective_acks[0].end_seq != seq)
3559 goto add_sack;
3560
3561
3562 tp->selective_acks[0].end_seq = end_seq;
3563 return;
3564 }
3565
3566
3567 do {
3568 if (!after(TCP_SKB_CB(skb1)->seq, seq))
3569 break;
3570 } while ((skb1=skb1->prev) != (struct sk_buff*)&tp->out_of_order_queue);
3571
3572
3573 if (skb1 != (struct sk_buff*)&tp->out_of_order_queue &&
3574 before(seq, TCP_SKB_CB(skb1)->end_seq)) {
3575 if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
3576
3577 __kfree_skb(skb);
3578 tcp_dsack_set(tp, seq, end_seq);
3579 goto add_sack;
3580 }
3581 if (after(seq, TCP_SKB_CB(skb1)->seq)) {
3582
3583 tcp_dsack_set(tp, seq, TCP_SKB_CB(skb1)->end_seq);
3584 } else {
3585 skb1 = skb1->prev;
3586 }
3587 }
3588 __skb_insert(skb, skb1, skb1->next, &tp->out_of_order_queue);
3589
3590
3591 while ((skb1 = skb->next) != (struct sk_buff*)&tp->out_of_order_queue &&
3592 after(end_seq, TCP_SKB_CB(skb1)->seq)) {
3593 if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
3594 tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, end_seq);
3595 break;
3596 }
3597 __skb_unlink(skb1, skb1->list);
3598 tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, TCP_SKB_CB(skb1)->end_seq);
3599 __kfree_skb(skb1);
3600 }
3601
3602add_sack:
3603 if (tp->sack_ok)
3604 tcp_sack_new_ofo_skb(sk, seq, end_seq);
3605 }
3606}
3607
3608
3609
3610
3611
3612
3613static void
3614tcp_collapse(struct sock *sk, struct sk_buff *head,
3615 struct sk_buff *tail, u32 start, u32 end)
3616{
3617 struct sk_buff *skb;
3618
3619
3620
3621 for (skb = head; skb != tail; ) {
3622
3623 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
3624 struct sk_buff *next = skb->next;
3625 __skb_unlink(skb, skb->list);
3626 __kfree_skb(skb);
3627 NET_INC_STATS_BH(TCPRcvCollapsed);
3628 skb = next;
3629 continue;
3630 }
3631
3632
3633
3634
3635
3636
3637 if (!skb->h.th->syn && !skb->h.th->fin &&
3638 (tcp_win_from_space(skb->truesize) > skb->len ||
3639 before(TCP_SKB_CB(skb)->seq, start) ||
3640 (skb->next != tail &&
3641 TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb->next)->seq)))
3642 break;
3643
3644
3645 start = TCP_SKB_CB(skb)->end_seq;
3646 skb = skb->next;
3647 }
3648 if (skb == tail || skb->h.th->syn || skb->h.th->fin)
3649 return;
3650
3651 while (before(start, end)) {
3652 struct sk_buff *nskb;
3653 int header = skb_headroom(skb);
3654 int copy = SKB_MAX_ORDER(header, 0);
3655
3656
3657 if (copy < 0)
3658 return;
3659 if (end-start < copy)
3660 copy = end-start;
3661 nskb = alloc_skb(copy+header, GFP_ATOMIC);
3662 if (!nskb)
3663 return;
3664 skb_reserve(nskb, header);
3665 memcpy(nskb->head, skb->head, header);
3666 nskb->nh.raw = nskb->head + (skb->nh.raw-skb->head);
3667 nskb->h.raw = nskb->head + (skb->h.raw-skb->head);
3668 nskb->mac.raw = nskb->head + (skb->mac.raw-skb->head);
3669 memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
3670 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
3671 __skb_insert(nskb, skb->prev, skb, skb->list);
3672 tcp_set_owner_r(nskb, sk);
3673
3674
3675 while (copy > 0) {
3676 int offset = start - TCP_SKB_CB(skb)->seq;
3677 int size = TCP_SKB_CB(skb)->end_seq - start;
3678
3679 if (offset < 0) BUG();
3680 if (size > 0) {
3681 size = min(copy, size);
3682 if (skb_copy_bits(skb, offset, skb_put(nskb, size), size))
3683 BUG();
3684 TCP_SKB_CB(nskb)->end_seq += size;
3685 copy -= size;
3686 start += size;
3687 }
3688 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
3689 struct sk_buff *next = skb->next;
3690 __skb_unlink(skb, skb->list);
3691 __kfree_skb(skb);
3692 NET_INC_STATS_BH(TCPRcvCollapsed);
3693 skb = next;
3694 if (skb == tail || skb->h.th->syn || skb->h.th->fin)
3695 return;
3696 }
3697 }
3698 }
3699}
3700
3701
3702
3703
3704static void tcp_collapse_ofo_queue(struct sock *sk)
3705{
3706 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
3707 struct sk_buff *skb = skb_peek(&tp->out_of_order_queue);
3708 struct sk_buff *head;
3709 u32 start, end;
3710
3711 if (skb == NULL)
3712 return;
3713
3714 start = TCP_SKB_CB(skb)->seq;
3715 end = TCP_SKB_CB(skb)->end_seq;
3716 head = skb;
3717
3718 for (;;) {
3719 skb = skb->next;
3720
3721
3722
3723 if (skb == (struct sk_buff *)&tp->out_of_order_queue ||
3724 after(TCP_SKB_CB(skb)->seq, end) ||
3725 before(TCP_SKB_CB(skb)->end_seq, start)) {
3726 tcp_collapse(sk, head, skb, start, end);
3727 head = skb;
3728 if (skb == (struct sk_buff *)&tp->out_of_order_queue)
3729 break;
3730
3731 start = TCP_SKB_CB(skb)->seq;
3732 end = TCP_SKB_CB(skb)->end_seq;
3733 } else {
3734 if (before(TCP_SKB_CB(skb)->seq, start))
3735 start = TCP_SKB_CB(skb)->seq;
3736 if (after(TCP_SKB_CB(skb)->end_seq, end))
3737 end = TCP_SKB_CB(skb)->end_seq;
3738 }
3739 }
3740}
3741
3742
3743
3744
3745
3746
3747
3748
3749static int tcp_prune_queue(struct sock *sk)
3750{
3751 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
3752
3753 SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq);
3754
3755 NET_INC_STATS_BH(PruneCalled);
3756
3757 if (atomic_read(&sk->rmem_alloc) >= sk->rcvbuf)
3758 tcp_clamp_window(sk, tp);
3759 else if (tcp_memory_pressure)
3760 tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U*tp->advmss);
3761
3762 tcp_collapse_ofo_queue(sk);
3763 tcp_collapse(sk, sk->receive_queue.next,
3764 (struct sk_buff*)&sk->receive_queue,
3765 tp->copied_seq, tp->rcv_nxt);
3766 tcp_mem_reclaim(sk);
3767
3768 if (atomic_read(&sk->rmem_alloc) <= sk->rcvbuf)
3769 return 0;
3770
3771
3772
3773
3774
3775 if (skb_queue_len(&tp->out_of_order_queue)) {
3776 net_statistics[smp_processor_id()*2].OfoPruned += skb_queue_len(&tp->out_of_order_queue);
3777 __skb_queue_purge(&tp->out_of_order_queue);
3778
3779
3780
3781
3782
3783
3784 if(tp->sack_ok)
3785 tcp_sack_reset(tp);
3786 tcp_mem_reclaim(sk);
3787 }
3788
3789 if(atomic_read(&sk->rmem_alloc) <= sk->rcvbuf)
3790 return 0;
3791
3792
3793
3794
3795
3796 NET_INC_STATS_BH(RcvPruned);
3797
3798
3799 tp->pred_flags = 0;
3800 return -1;
3801}
3802
3803
3804
3805
3806
3807
3808void tcp_cwnd_application_limited(struct sock *sk)
3809{
3810 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
3811
3812 if (tp->ca_state == TCP_CA_Open &&
3813 sk->socket && !test_bit(SOCK_NOSPACE, &sk->socket->flags)) {
3814
3815 u32 win_used = max(tp->snd_cwnd_used, 2U);
3816 if (win_used < tp->snd_cwnd) {
3817 tp->snd_ssthresh = tcp_current_ssthresh(tp);
3818 tp->snd_cwnd = (tp->snd_cwnd+win_used)>>1;
3819 }
3820 tp->snd_cwnd_used = 0;
3821 }
3822 tp->snd_cwnd_stamp = tcp_time_stamp;
3823}
3824
3825
3826
3827
3828
3829
3830static void tcp_new_space(struct sock *sk)
3831{
3832 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
3833
3834 if (tp->packets_out < tp->snd_cwnd &&
3835 !(sk->userlocks&SOCK_SNDBUF_LOCK) &&
3836 !tcp_memory_pressure &&
3837 atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
3838 int sndmem, demanded;
3839
3840 sndmem = tp->mss_clamp+MAX_TCP_HEADER+16+sizeof(struct sk_buff);
3841 demanded = max_t(unsigned int, tp->snd_cwnd, tp->reordering+1);
3842 sndmem *= 2*demanded;
3843 if (sndmem > sk->sndbuf)
3844 sk->sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
3845 tp->snd_cwnd_stamp = tcp_time_stamp;
3846 }
3847
3848 sk->write_space(sk);
3849}
3850
3851static inline void tcp_check_space(struct sock *sk)
3852{
3853 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
3854
3855 if (tp->queue_shrunk) {
3856 tp->queue_shrunk = 0;
3857 if (sk->socket && test_bit(SOCK_NOSPACE, &sk->socket->flags))
3858 tcp_new_space(sk);
3859 }
3860}
3861
3862static void __tcp_data_snd_check(struct sock *sk, struct sk_buff *skb)
3863{
3864 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
3865
3866 if (after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd) ||
3867 tcp_packets_in_flight(tp) >= tp->snd_cwnd ||
3868 tcp_write_xmit(sk, tp->nonagle))
3869 tcp_check_probe_timer(sk, tp);
3870}
3871
3872static __inline__ void tcp_data_snd_check(struct sock *sk)
3873{
3874 struct sk_buff *skb = sk->tp_pinfo.af_tcp.send_head;
3875
3876 if (skb != NULL)
3877 __tcp_data_snd_check(sk, skb);
3878 tcp_check_space(sk);
3879}
3880
3881
3882
3883
3884static __inline__ void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
3885{
3886 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
3887
3888
3889 if (((tp->rcv_nxt - tp->rcv_wup) > tp->ack.rcv_mss
3890
3891
3892
3893 && __tcp_select_window(sk) >= tp->rcv_wnd) ||
3894
3895 tcp_in_quickack_mode(tp) ||
3896
3897 (ofo_possible &&
3898 skb_peek(&tp->out_of_order_queue) != NULL)) {
3899
3900 tcp_send_ack(sk);
3901 } else {
3902
3903 tcp_send_delayed_ack(sk);
3904 }
3905}
3906
3907static __inline__ void tcp_ack_snd_check(struct sock *sk)
3908{
3909 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
3910 if (!tcp_ack_scheduled(tp)) {
3911
3912 return;
3913 }
3914 __tcp_ack_snd_check(sk, 1);
3915}
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
3928{
3929 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
3930 u32 ptr = ntohs(th->urg_ptr);
3931
3932 if (ptr && !sysctl_tcp_stdurg)
3933 ptr--;
3934 ptr += ntohl(th->seq);
3935
3936
3937 if (after(tp->copied_seq, ptr))
3938 return;
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950 if (before(ptr, tp->rcv_nxt))
3951 return;
3952
3953
3954 if (tp->urg_data && !after(ptr, tp->urg_seq))
3955 return;
3956
3957
3958 if (sk->proc != 0) {
3959 if (sk->proc > 0)
3960 kill_proc(sk->proc, SIGURG, 1);
3961 else
3962 kill_pg(-sk->proc, SIGURG, 1);
3963 sk_wake_async(sk, 3, POLL_PRI);
3964 }
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981 if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
3982 !sk->urginline &&
3983 tp->copied_seq != tp->rcv_nxt) {
3984 struct sk_buff *skb = skb_peek(&sk->receive_queue);
3985 tp->copied_seq++;
3986 if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) {
3987 __skb_unlink(skb, skb->list);
3988 __kfree_skb(skb);
3989 }
3990 }
3991
3992 tp->urg_data = TCP_URG_NOTYET;
3993 tp->urg_seq = ptr;
3994
3995
3996 tp->pred_flags = 0;
3997}
3998
3999
4000static inline void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th)
4001{
4002 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
4003
4004
4005 if (th->urg)
4006 tcp_check_urg(sk,th);
4007
4008
4009 if (tp->urg_data == TCP_URG_NOTYET) {
4010 u32 ptr = tp->urg_seq - ntohl(th->seq) + (th->doff*4) - th->syn;
4011
4012
4013 if (ptr < skb->len) {
4014 u8 tmp;
4015 if (skb_copy_bits(skb, ptr, &tmp, 1))
4016 BUG();
4017 tp->urg_data = TCP_URG_VALID | tmp;
4018 if (!sk->dead)
4019 sk->data_ready(sk,0);
4020 }
4021 }
4022}
4023
4024static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
4025{
4026 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
4027 int chunk = skb->len - hlen;
4028 int err;
4029
4030 local_bh_enable();
4031 if (skb->ip_summed==CHECKSUM_UNNECESSARY)
4032 err = skb_copy_datagram_iovec(skb, hlen, tp->ucopy.iov, chunk);
4033 else
4034 err = skb_copy_and_csum_datagram_iovec(skb, hlen, tp->ucopy.iov);
4035
4036 if (!err) {
4037 tp->ucopy.len -= chunk;
4038 tp->copied_seq += chunk;
4039 tcp_rcv_space_adjust(sk);
4040 }
4041
4042 local_bh_disable();
4043 return err;
4044}
4045
4046static int __tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
4047{
4048 int result;
4049
4050 if (sk->lock.users) {
4051 local_bh_enable();
4052 result = __tcp_checksum_complete(skb);
4053 local_bh_disable();
4054 } else {
4055 result = __tcp_checksum_complete(skb);
4056 }
4057 return result;
4058}
4059
4060static __inline__ int
4061tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
4062{
4063 return skb->ip_summed != CHECKSUM_UNNECESSARY &&
4064 __tcp_checksum_complete_user(sk, skb);
4065}
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
4091 struct tcphdr *th, unsigned len)
4092{
4093 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110 tp->saw_tstamp = 0;
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121 if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
4122 TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
4123 int tcp_header_len = tp->tcp_header_len;
4124
4125
4126
4127
4128
4129
4130
4131 if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
4132 __u32 *ptr = (__u32 *)(th + 1);
4133
4134
4135 if (*ptr != ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
4136 | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP))
4137 goto slow_path;
4138
4139 tp->saw_tstamp = 1;
4140 ++ptr;
4141 tp->rcv_tsval = ntohl(*ptr);
4142 ++ptr;
4143 tp->rcv_tsecr = ntohl(*ptr);
4144
4145
4146 if ((s32)(tp->rcv_tsval - tp->ts_recent) < 0)
4147 goto slow_path;
4148
4149
4150
4151
4152
4153
4154 }
4155
4156 if (len <= tcp_header_len) {
4157
4158 if (len == tcp_header_len) {
4159
4160
4161
4162
4163 if (tcp_header_len ==
4164 (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
4165 tp->rcv_nxt == tp->rcv_wup)
4166 tcp_store_ts_recent(tp);
4167
4168 tcp_rcv_rtt_measure_ts(tp, skb);
4169
4170
4171
4172
4173 tcp_ack(sk, skb, 0);
4174 __kfree_skb(skb);
4175 tcp_data_snd_check(sk);
4176 return 0;
4177 } else {
4178 TCP_INC_STATS_BH(TcpInErrs);
4179 goto discard;
4180 }
4181 } else {
4182 int eaten = 0;
4183
4184 if (tp->ucopy.task == current &&
4185 tp->copied_seq == tp->rcv_nxt &&
4186 len - tcp_header_len <= tp->ucopy.len &&
4187 sk->lock.users) {
4188 __set_current_state(TASK_RUNNING);
4189
4190 if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) {
4191
4192
4193
4194
4195 if (tcp_header_len ==
4196 (sizeof(struct tcphdr) +
4197 TCPOLEN_TSTAMP_ALIGNED) &&
4198 tp->rcv_nxt == tp->rcv_wup)
4199 tcp_store_ts_recent(tp);
4200
4201 tcp_rcv_rtt_measure_ts(tp, skb);
4202
4203 __skb_pull(skb, tcp_header_len);
4204 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
4205 NET_INC_STATS_BH(TCPHPHitsToUser);
4206 eaten = 1;
4207 }
4208 }
4209 if (!eaten) {
4210 if (tcp_checksum_complete_user(sk, skb))
4211 goto csum_error;
4212
4213
4214
4215
4216
4217 if (tcp_header_len ==
4218 (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
4219 tp->rcv_nxt == tp->rcv_wup)
4220 tcp_store_ts_recent(tp);
4221
4222 tcp_rcv_rtt_measure_ts(tp, skb);
4223
4224 if ((int)skb->truesize > sk->forward_alloc)
4225 goto step5;
4226
4227 NET_INC_STATS_BH(TCPHPHits);
4228
4229
4230 __skb_pull(skb,tcp_header_len);
4231 __skb_queue_tail(&sk->receive_queue, skb);
4232 tcp_set_owner_r(skb, sk);
4233 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
4234 }
4235
4236 tcp_event_data_recv(sk, tp, skb);
4237
4238 if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
4239
4240 tcp_ack(sk, skb, FLAG_DATA);
4241 tcp_data_snd_check(sk);
4242 if (!tcp_ack_scheduled(tp))
4243 goto no_ack;
4244 }
4245
4246 if (eaten) {
4247 if (tcp_in_quickack_mode(tp)) {
4248 tcp_send_ack(sk);
4249 } else {
4250 tcp_send_delayed_ack(sk);
4251 }
4252 } else {
4253 __tcp_ack_snd_check(sk, 0);
4254 }
4255
4256no_ack:
4257 if (eaten)
4258 __kfree_skb(skb);
4259 else
4260 sk->data_ready(sk, 0);
4261 return 0;
4262 }
4263 }
4264
4265slow_path:
4266 if (len < (th->doff<<2) || tcp_checksum_complete_user(sk, skb))
4267 goto csum_error;
4268
4269
4270
4271
4272 if (tcp_fast_parse_options(skb, th, tp) && tp->saw_tstamp &&
4273 tcp_paws_discard(tp, skb)) {
4274 if (!th->rst) {
4275 NET_INC_STATS_BH(PAWSEstabRejected);
4276 tcp_send_dupack(sk, skb);
4277 goto discard;
4278 }
4279
4280
4281
4282
4283
4284 }
4285
4286
4287
4288
4289
4290 if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
4291
4292
4293
4294
4295
4296
4297 if (!th->rst)
4298 tcp_send_dupack(sk, skb);
4299 goto discard;
4300 }
4301
4302 if(th->rst) {
4303 tcp_reset(sk);
4304 goto discard;
4305 }
4306
4307 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
4308
4309 if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4310 TCP_INC_STATS_BH(TcpInErrs);
4311 NET_INC_STATS_BH(TCPAbortOnSyn);
4312 tcp_reset(sk);
4313 return 1;
4314 }
4315
4316step5:
4317 if(th->ack)
4318 tcp_ack(sk, skb, FLAG_SLOWPATH);
4319
4320 tcp_rcv_rtt_measure_ts(tp, skb);
4321
4322
4323 tcp_urg(sk, skb, th);
4324
4325
4326 tcp_data_queue(sk, skb);
4327
4328 tcp_data_snd_check(sk);
4329 tcp_ack_snd_check(sk);
4330 return 0;
4331
4332csum_error:
4333 TCP_INC_STATS_BH(TcpInErrs);
4334
4335discard:
4336 __kfree_skb(skb);
4337 return 0;
4338}
4339
4340static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
4341 struct tcphdr *th, unsigned len)
4342{
4343 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
4344 int saved_clamp = tp->mss_clamp;
4345
4346 tcp_parse_options(skb, tp, 0);
4347
4348 if (th->ack) {
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360 if (TCP_SKB_CB(skb)->ack_seq != tp->snd_nxt)
4361 goto reset_and_undo;
4362
4363 if (tp->saw_tstamp && tp->rcv_tsecr &&
4364 !between(tp->rcv_tsecr, tp->retrans_stamp, tcp_time_stamp)) {
4365 NET_INC_STATS_BH(PAWSActiveRejected);
4366 goto reset_and_undo;
4367 }
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377 if (th->rst) {
4378 tcp_reset(sk);
4379 goto discard;
4380 }
4381
4382
4383
4384
4385
4386
4387
4388
4389 if (!th->syn)
4390 goto discard_and_undo;
4391
4392
4393
4394
4395
4396
4397
4398
4399 TCP_ECN_rcv_synack(tp, th);
4400
4401 tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
4402 tcp_ack(sk, skb, FLAG_SLOWPATH);
4403
4404
4405
4406
4407 tp->rcv_nxt = TCP_SKB_CB(skb)->seq+1;
4408 tp->rcv_wup = TCP_SKB_CB(skb)->seq+1;
4409
4410
4411
4412
4413 tp->snd_wnd = ntohs(th->window);
4414 tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(skb)->seq);
4415
4416 if (tp->wscale_ok == 0) {
4417 tp->snd_wscale = tp->rcv_wscale = 0;
4418 tp->window_clamp = min(tp->window_clamp, 65535U);
4419 }
4420
4421 if (tp->saw_tstamp) {
4422 tp->tstamp_ok = 1;
4423 tp->tcp_header_len =
4424 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
4425 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
4426 tcp_store_ts_recent(tp);
4427 } else {
4428 tp->tcp_header_len = sizeof(struct tcphdr);
4429 }
4430
4431 if (tp->sack_ok && sysctl_tcp_fack)
4432 tp->sack_ok |= 2;
4433
4434 tcp_sync_mss(sk, tp->pmtu_cookie);
4435 tcp_initialize_rcv_mss(sk);
4436 tcp_init_metrics(sk);
4437 tcp_init_buffer_space(sk);
4438
4439 if (sk->keepopen)
4440 tcp_reset_keepalive_timer(sk, keepalive_time_when(tp));
4441
4442 if (tp->snd_wscale == 0)
4443 __tcp_fast_path_on(tp, tp->snd_wnd);
4444 else
4445 tp->pred_flags = 0;
4446
4447
4448
4449
4450 tp->copied_seq = tp->rcv_nxt;
4451 mb();
4452 tcp_set_state(sk, TCP_ESTABLISHED);
4453
4454 if(!sk->dead) {
4455 sk->state_change(sk);
4456 sk_wake_async(sk, 0, POLL_OUT);
4457 }
4458
4459 if (tp->write_pending || tp->defer_accept || tp->ack.pingpong) {
4460
4461
4462
4463
4464
4465
4466
4467 tcp_schedule_ack(tp);
4468 tp->ack.lrcvtime = tcp_time_stamp;
4469 tp->ack.ato = TCP_ATO_MIN;
4470 tcp_incr_quickack(tp);
4471 tcp_enter_quickack_mode(tp);
4472 tcp_reset_xmit_timer(sk, TCP_TIME_DACK, TCP_DELACK_MAX);
4473
4474discard:
4475 __kfree_skb(skb);
4476 return 0;
4477 } else {
4478 tcp_send_ack(sk);
4479 }
4480 return -1;
4481 }
4482
4483
4484
4485 if (th->rst) {
4486
4487
4488
4489
4490
4491
4492 goto discard_and_undo;
4493 }
4494
4495
4496 if (tp->ts_recent_stamp && tp->saw_tstamp && tcp_paws_check(tp, 0))
4497 goto discard_and_undo;
4498
4499 if (th->syn) {
4500
4501
4502
4503
4504 tcp_set_state(sk, TCP_SYN_RECV);
4505
4506 if (tp->saw_tstamp) {
4507 tp->tstamp_ok = 1;
4508 tcp_store_ts_recent(tp);
4509 tp->tcp_header_len =
4510 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
4511 } else {
4512 tp->tcp_header_len = sizeof(struct tcphdr);
4513 }
4514
4515 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
4516 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
4517
4518
4519
4520
4521 tp->snd_wnd = ntohs(th->window);
4522 tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
4523 tp->max_window = tp->snd_wnd;
4524
4525 tcp_sync_mss(sk, tp->pmtu_cookie);
4526 tcp_initialize_rcv_mss(sk);
4527
4528 TCP_ECN_rcv_syn(tp, th);
4529
4530 tcp_send_synack(sk);
4531#if 0
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541 return -1;
4542#else
4543 goto discard;
4544#endif
4545 }
4546
4547
4548
4549
4550discard_and_undo:
4551 tcp_clear_options(tp);
4552 tp->mss_clamp = saved_clamp;
4553 goto discard;
4554
4555reset_and_undo:
4556 tcp_clear_options(tp);
4557 tp->mss_clamp = saved_clamp;
4558 return 1;
4559}
4560
4561
4562
4563
4564
4565
4566
4567
4568int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
4569 struct tcphdr *th, unsigned len)
4570{
4571 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
4572 int queued = 0;
4573
4574 tp->saw_tstamp = 0;
4575
4576 switch (sk->state) {
4577 case TCP_CLOSE:
4578 goto discard;
4579
4580 case TCP_LISTEN:
4581 if(th->ack)
4582 return 1;
4583
4584 if(th->rst)
4585 goto discard;
4586
4587 if(th->syn) {
4588 if(tp->af_specific->conn_request(sk, skb) < 0)
4589 return 1;
4590
4591 tcp_init_westwood(sk);
4592 init_bictcp(tp);
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610 goto discard;
4611 }
4612 goto discard;
4613
4614 case TCP_SYN_SENT:
4615 tcp_init_westwood(sk);
4616 init_bictcp(tp);
4617
4618 queued = tcp_rcv_synsent_state_process(sk, skb, th, len);
4619 if (queued >= 0)
4620 return queued;
4621
4622
4623 tcp_urg(sk, skb, th);
4624 __kfree_skb(skb);
4625 tcp_data_snd_check(sk);
4626 return 0;
4627 }
4628
4629 if (tcp_fast_parse_options(skb, th, tp) && tp->saw_tstamp &&
4630 tcp_paws_discard(tp, skb)) {
4631 if (!th->rst) {
4632 NET_INC_STATS_BH(PAWSEstabRejected);
4633 tcp_send_dupack(sk, skb);
4634 goto discard;
4635 }
4636
4637 }
4638
4639
4640 if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
4641 if (!th->rst)
4642 tcp_send_dupack(sk, skb);
4643 goto discard;
4644 }
4645
4646
4647 if(th->rst) {
4648 tcp_reset(sk);
4649 goto discard;
4650 }
4651
4652 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
4653
4654
4655
4656
4657
4658
4659
4660 if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4661 NET_INC_STATS_BH(TCPAbortOnSyn);
4662 tcp_reset(sk);
4663 return 1;
4664 }
4665
4666
4667 if (th->ack) {
4668 int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH);
4669
4670 switch(sk->state) {
4671 case TCP_SYN_RECV:
4672 if (acceptable) {
4673 tp->copied_seq = tp->rcv_nxt;
4674 mb();
4675 tcp_set_state(sk, TCP_ESTABLISHED);
4676 sk->state_change(sk);
4677
4678
4679
4680
4681
4682
4683 if (sk->socket) {
4684 sk_wake_async(sk,0,POLL_OUT);
4685 }
4686
4687 tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
4688 tp->snd_wnd = ntohs(th->window) << tp->snd_wscale;
4689 tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(skb)->seq);
4690
4691
4692
4693
4694
4695 if (tp->saw_tstamp && tp->rcv_tsecr && !tp->srtt)
4696 tcp_ack_saw_tstamp(tp, 0);
4697
4698 if (tp->tstamp_ok)
4699 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
4700
4701 tcp_init_metrics(sk);
4702 tcp_initialize_rcv_mss(sk);
4703 tcp_init_buffer_space(sk);
4704 tcp_fast_path_on(tp);
4705 } else {
4706 return 1;
4707 }
4708 break;
4709
4710 case TCP_FIN_WAIT1:
4711 if (tp->snd_una == tp->write_seq) {
4712 tcp_set_state(sk, TCP_FIN_WAIT2);
4713 sk->shutdown |= SEND_SHUTDOWN;
4714 dst_confirm(sk->dst_cache);
4715
4716 if (!sk->dead) {
4717
4718 sk->state_change(sk);
4719 } else {
4720 int tmo;
4721
4722 if (tp->linger2 < 0 ||
4723 (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
4724 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {
4725 tcp_done(sk);
4726 NET_INC_STATS_BH(TCPAbortOnData);
4727 return 1;
4728 }
4729
4730 tmo = tcp_fin_time(tp);
4731 if (tmo > TCP_TIMEWAIT_LEN) {
4732 tcp_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
4733 } else if (th->fin || sk->lock.users) {
4734
4735
4736
4737
4738
4739
4740 tcp_reset_keepalive_timer(sk, tmo);
4741 } else {
4742 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
4743 goto discard;
4744 }
4745 }
4746 }
4747 break;
4748
4749 case TCP_CLOSING:
4750 if (tp->snd_una == tp->write_seq) {
4751 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
4752 goto discard;
4753 }
4754 break;
4755
4756 case TCP_LAST_ACK:
4757 if (tp->snd_una == tp->write_seq) {
4758 tcp_update_metrics(sk);
4759 tcp_done(sk);
4760 goto discard;
4761 }
4762 break;
4763 }
4764 } else
4765 goto discard;
4766
4767
4768 tcp_urg(sk, skb, th);
4769
4770
4771 switch (sk->state) {
4772 case TCP_CLOSE_WAIT:
4773 case TCP_CLOSING:
4774 case TCP_LAST_ACK:
4775 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
4776 break;
4777 case TCP_FIN_WAIT1:
4778 case TCP_FIN_WAIT2:
4779
4780
4781
4782
4783 if (sk->shutdown & RCV_SHUTDOWN) {
4784 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
4785 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
4786 NET_INC_STATS_BH(TCPAbortOnData);
4787 tcp_reset(sk);
4788 return 1;
4789 }
4790 }
4791
4792 case TCP_ESTABLISHED:
4793 tcp_data_queue(sk, skb);
4794 queued = 1;
4795 break;
4796 }
4797
4798
4799 if (sk->state != TCP_CLOSE) {
4800 tcp_data_snd_check(sk);
4801 tcp_ack_snd_check(sk);
4802 }
4803
4804 if (!queued) {
4805discard:
4806 __kfree_skb(skb);
4807 }
4808 return 0;
4809}
4810