1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64#include <linux/mm.h>
65#include <linux/module.h>
66#include <linux/sysctl.h>
67#include <linux/kernel.h>
68#include <net/dst.h>
69#include <net/tcp.h>
70#include <net/inet_common.h>
71#include <linux/ipsec.h>
72#include <asm/unaligned.h>
73#include <net/netdma.h>
74
75int sysctl_tcp_timestamps __read_mostly = 1;
76int sysctl_tcp_window_scaling __read_mostly = 1;
77int sysctl_tcp_sack __read_mostly = 1;
78int sysctl_tcp_fack __read_mostly = 1;
79int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
80int sysctl_tcp_ecn __read_mostly;
81int sysctl_tcp_dsack __read_mostly = 1;
82int sysctl_tcp_app_win __read_mostly = 31;
83int sysctl_tcp_adv_win_scale __read_mostly = 2;
84
85int sysctl_tcp_stdurg __read_mostly;
86int sysctl_tcp_rfc1337 __read_mostly;
87int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
88int sysctl_tcp_frto __read_mostly = 2;
89int sysctl_tcp_frto_response __read_mostly;
90int sysctl_tcp_nometrics_save __read_mostly;
91
92int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
93int sysctl_tcp_abc __read_mostly;
94
95#define FLAG_DATA 0x01
96#define FLAG_WIN_UPDATE 0x02
97#define FLAG_DATA_ACKED 0x04
98#define FLAG_RETRANS_DATA_ACKED 0x08
99#define FLAG_SYN_ACKED 0x10
100#define FLAG_DATA_SACKED 0x20
101#define FLAG_ECE 0x40
102#define FLAG_DATA_LOST 0x80
103#define FLAG_SLOWPATH 0x100
104#define FLAG_ONLY_ORIG_SACKED 0x200
105#define FLAG_SND_UNA_ADVANCED 0x400
106#define FLAG_DSACKING_ACK 0x800
107#define FLAG_NONHEAD_RETRANS_ACKED 0x1000
108#define FLAG_SACK_RENEGING 0x2000
109
110#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
111#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
112#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE)
113#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)
114#define FLAG_ANY_PROGRESS (FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED)
115
116#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
117#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
118
119
120
121
122static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
123{
124 struct inet_connection_sock *icsk = inet_csk(sk);
125 const unsigned int lss = icsk->icsk_ack.last_seg_size;
126 unsigned int len;
127
128 icsk->icsk_ack.last_seg_size = 0;
129
130
131
132
133 len = skb_shinfo(skb)->gso_size ? : skb->len;
134 if (len >= icsk->icsk_ack.rcv_mss) {
135 icsk->icsk_ack.rcv_mss = len;
136 } else {
137
138
139
140
141
142 len += skb->data - skb_transport_header(skb);
143 if (len >= TCP_MIN_RCVMSS + sizeof(struct tcphdr) ||
144
145
146
147
148
149 (len >= TCP_MIN_MSS + sizeof(struct tcphdr) &&
150 !(tcp_flag_word(tcp_hdr(skb)) & TCP_REMNANT))) {
151
152
153
154
155 len -= tcp_sk(sk)->tcp_header_len;
156 icsk->icsk_ack.last_seg_size = len;
157 if (len == lss) {
158 icsk->icsk_ack.rcv_mss = len;
159 return;
160 }
161 }
162 if (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)
163 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED2;
164 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
165 }
166}
167
168static void tcp_incr_quickack(struct sock *sk)
169{
170 struct inet_connection_sock *icsk = inet_csk(sk);
171 unsigned quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
172
173 if (quickacks == 0)
174 quickacks = 2;
175 if (quickacks > icsk->icsk_ack.quick)
176 icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
177}
178
179void tcp_enter_quickack_mode(struct sock *sk)
180{
181 struct inet_connection_sock *icsk = inet_csk(sk);
182 tcp_incr_quickack(sk);
183 icsk->icsk_ack.pingpong = 0;
184 icsk->icsk_ack.ato = TCP_ATO_MIN;
185}
186
187
188
189
190
191static inline int tcp_in_quickack_mode(const struct sock *sk)
192{
193 const struct inet_connection_sock *icsk = inet_csk(sk);
194 return icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong;
195}
196
197static inline void TCP_ECN_queue_cwr(struct tcp_sock *tp)
198{
199 if (tp->ecn_flags & TCP_ECN_OK)
200 tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
201}
202
203static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, struct sk_buff *skb)
204{
205 if (tcp_hdr(skb)->cwr)
206 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
207}
208
209static inline void TCP_ECN_withdraw_cwr(struct tcp_sock *tp)
210{
211 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
212}
213
214static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb)
215{
216 if (tp->ecn_flags & TCP_ECN_OK) {
217 if (INET_ECN_is_ce(TCP_SKB_CB(skb)->flags))
218 tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
219
220
221
222 else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags)))
223 tcp_enter_quickack_mode((struct sock *)tp);
224 }
225}
226
227static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, struct tcphdr *th)
228{
229 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr))
230 tp->ecn_flags &= ~TCP_ECN_OK;
231}
232
233static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, struct tcphdr *th)
234{
235 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr))
236 tp->ecn_flags &= ~TCP_ECN_OK;
237}
238
239static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th)
240{
241 if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK))
242 return 1;
243 return 0;
244}
245
246
247
248
249
250
251static void tcp_fixup_sndbuf(struct sock *sk)
252{
253 int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 +
254 sizeof(struct sk_buff);
255
256 if (sk->sk_sndbuf < 3 * sndmem)
257 sk->sk_sndbuf = min(3 * sndmem, sysctl_tcp_wmem[2]);
258}
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
287{
288 struct tcp_sock *tp = tcp_sk(sk);
289
290 int truesize = tcp_win_from_space(skb->truesize) >> 1;
291 int window = tcp_win_from_space(sysctl_tcp_rmem[2]) >> 1;
292
293 while (tp->rcv_ssthresh <= window) {
294 if (truesize <= skb->len)
295 return 2 * inet_csk(sk)->icsk_ack.rcv_mss;
296
297 truesize >>= 1;
298 window >>= 1;
299 }
300 return 0;
301}
302
303static void tcp_grow_window(struct sock *sk, struct sk_buff *skb)
304{
305 struct tcp_sock *tp = tcp_sk(sk);
306
307
308 if (tp->rcv_ssthresh < tp->window_clamp &&
309 (int)tp->rcv_ssthresh < tcp_space(sk) &&
310 !tcp_memory_pressure) {
311 int incr;
312
313
314
315
316 if (tcp_win_from_space(skb->truesize) <= skb->len)
317 incr = 2 * tp->advmss;
318 else
319 incr = __tcp_grow_window(sk, skb);
320
321 if (incr) {
322 tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr,
323 tp->window_clamp);
324 inet_csk(sk)->icsk_ack.quick |= 1;
325 }
326 }
327}
328
329
330
331static void tcp_fixup_rcvbuf(struct sock *sk)
332{
333 struct tcp_sock *tp = tcp_sk(sk);
334 int rcvmem = tp->advmss + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff);
335
336
337
338
339
340 while (tcp_win_from_space(rcvmem) < tp->advmss)
341 rcvmem += 128;
342 if (sk->sk_rcvbuf < 4 * rcvmem)
343 sk->sk_rcvbuf = min(4 * rcvmem, sysctl_tcp_rmem[2]);
344}
345
346
347
348
349static void tcp_init_buffer_space(struct sock *sk)
350{
351 struct tcp_sock *tp = tcp_sk(sk);
352 int maxwin;
353
354 if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK))
355 tcp_fixup_rcvbuf(sk);
356 if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
357 tcp_fixup_sndbuf(sk);
358
359 tp->rcvq_space.space = tp->rcv_wnd;
360
361 maxwin = tcp_full_space(sk);
362
363 if (tp->window_clamp >= maxwin) {
364 tp->window_clamp = maxwin;
365
366 if (sysctl_tcp_app_win && maxwin > 4 * tp->advmss)
367 tp->window_clamp = max(maxwin -
368 (maxwin >> sysctl_tcp_app_win),
369 4 * tp->advmss);
370 }
371
372
373 if (sysctl_tcp_app_win &&
374 tp->window_clamp > 2 * tp->advmss &&
375 tp->window_clamp + tp->advmss > maxwin)
376 tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
377
378 tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
379 tp->snd_cwnd_stamp = tcp_time_stamp;
380}
381
382
383static void tcp_clamp_window(struct sock *sk)
384{
385 struct tcp_sock *tp = tcp_sk(sk);
386 struct inet_connection_sock *icsk = inet_csk(sk);
387
388 icsk->icsk_ack.quick = 0;
389
390 if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
391 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
392 !tcp_memory_pressure &&
393 atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
394 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
395 sysctl_tcp_rmem[2]);
396 }
397 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
398 tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
399}
400
401
402
403
404
405
406
407
408void tcp_initialize_rcv_mss(struct sock *sk)
409{
410 struct tcp_sock *tp = tcp_sk(sk);
411 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
412
413 hint = min(hint, tp->rcv_wnd / 2);
414 hint = min(hint, TCP_MIN_RCVMSS);
415 hint = max(hint, TCP_MIN_MSS);
416
417 inet_csk(sk)->icsk_ack.rcv_mss = hint;
418}
419
420
421
422
423
424
425
426
427
428
429
430
431static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
432{
433 u32 new_sample = tp->rcv_rtt_est.rtt;
434 long m = sample;
435
436 if (m == 0)
437 m = 1;
438
439 if (new_sample != 0) {
440
441
442
443
444
445
446
447
448
449
450 if (!win_dep) {
451 m -= (new_sample >> 3);
452 new_sample += m;
453 } else if (m < new_sample)
454 new_sample = m << 3;
455 } else {
456
457 new_sample = m << 3;
458 }
459
460 if (tp->rcv_rtt_est.rtt != new_sample)
461 tp->rcv_rtt_est.rtt = new_sample;
462}
463
464static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
465{
466 if (tp->rcv_rtt_est.time == 0)
467 goto new_measure;
468 if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
469 return;
470 tcp_rcv_rtt_update(tp, jiffies - tp->rcv_rtt_est.time, 1);
471
472new_measure:
473 tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
474 tp->rcv_rtt_est.time = tcp_time_stamp;
475}
476
477static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
478 const struct sk_buff *skb)
479{
480 struct tcp_sock *tp = tcp_sk(sk);
481 if (tp->rx_opt.rcv_tsecr &&
482 (TCP_SKB_CB(skb)->end_seq -
483 TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss))
484 tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rx_opt.rcv_tsecr, 0);
485}
486
487
488
489
490
491void tcp_rcv_space_adjust(struct sock *sk)
492{
493 struct tcp_sock *tp = tcp_sk(sk);
494 int time;
495 int space;
496
497 if (tp->rcvq_space.time == 0)
498 goto new_measure;
499
500 time = tcp_time_stamp - tp->rcvq_space.time;
501 if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0)
502 return;
503
504 space = 2 * (tp->copied_seq - tp->rcvq_space.seq);
505
506 space = max(tp->rcvq_space.space, space);
507
508 if (tp->rcvq_space.space != space) {
509 int rcvmem;
510
511 tp->rcvq_space.space = space;
512
513 if (sysctl_tcp_moderate_rcvbuf &&
514 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
515 int new_clamp = space;
516
517
518
519
520
521 space /= tp->advmss;
522 if (!space)
523 space = 1;
524 rcvmem = (tp->advmss + MAX_TCP_HEADER +
525 16 + sizeof(struct sk_buff));
526 while (tcp_win_from_space(rcvmem) < tp->advmss)
527 rcvmem += 128;
528 space *= rcvmem;
529 space = min(space, sysctl_tcp_rmem[2]);
530 if (space > sk->sk_rcvbuf) {
531 sk->sk_rcvbuf = space;
532
533
534 tp->window_clamp = new_clamp;
535 }
536 }
537 }
538
539new_measure:
540 tp->rcvq_space.seq = tp->copied_seq;
541 tp->rcvq_space.time = tcp_time_stamp;
542}
543
544
545
546
547
548
549
550
551
552
553
554static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
555{
556 struct tcp_sock *tp = tcp_sk(sk);
557 struct inet_connection_sock *icsk = inet_csk(sk);
558 u32 now;
559
560 inet_csk_schedule_ack(sk);
561
562 tcp_measure_rcv_mss(sk, skb);
563
564 tcp_rcv_rtt_measure(tp);
565
566 now = tcp_time_stamp;
567
568 if (!icsk->icsk_ack.ato) {
569
570
571
572 tcp_incr_quickack(sk);
573 icsk->icsk_ack.ato = TCP_ATO_MIN;
574 } else {
575 int m = now - icsk->icsk_ack.lrcvtime;
576
577 if (m <= TCP_ATO_MIN / 2) {
578
579 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + TCP_ATO_MIN / 2;
580 } else if (m < icsk->icsk_ack.ato) {
581 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + m;
582 if (icsk->icsk_ack.ato > icsk->icsk_rto)
583 icsk->icsk_ack.ato = icsk->icsk_rto;
584 } else if (m > icsk->icsk_rto) {
585
586
587
588 tcp_incr_quickack(sk);
589 sk_mem_reclaim(sk);
590 }
591 }
592 icsk->icsk_ack.lrcvtime = now;
593
594 TCP_ECN_check_ce(tp, skb);
595
596 if (skb->len >= 128)
597 tcp_grow_window(sk, skb);
598}
599
600
601
602
603
604
605
606
607
608
609static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
610{
611 struct tcp_sock *tp = tcp_sk(sk);
612 long m = mrtt;
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630 if (m == 0)
631 m = 1;
632 if (tp->srtt != 0) {
633 m -= (tp->srtt >> 3);
634 tp->srtt += m;
635 if (m < 0) {
636 m = -m;
637 m -= (tp->mdev >> 2);
638
639
640
641
642
643
644
645
646 if (m > 0)
647 m >>= 3;
648 } else {
649 m -= (tp->mdev >> 2);
650 }
651 tp->mdev += m;
652 if (tp->mdev > tp->mdev_max) {
653 tp->mdev_max = tp->mdev;
654 if (tp->mdev_max > tp->rttvar)
655 tp->rttvar = tp->mdev_max;
656 }
657 if (after(tp->snd_una, tp->rtt_seq)) {
658 if (tp->mdev_max < tp->rttvar)
659 tp->rttvar -= (tp->rttvar - tp->mdev_max) >> 2;
660 tp->rtt_seq = tp->snd_nxt;
661 tp->mdev_max = tcp_rto_min(sk);
662 }
663 } else {
664
665 tp->srtt = m << 3;
666 tp->mdev = m << 1;
667 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
668 tp->rtt_seq = tp->snd_nxt;
669 }
670}
671
672
673
674
675static inline void tcp_set_rto(struct sock *sk)
676{
677 const struct tcp_sock *tp = tcp_sk(sk);
678
679
680
681
682
683
684
685
686
687
688 inet_csk(sk)->icsk_rto = (tp->srtt >> 3) + tp->rttvar;
689
690
691
692
693
694
695
696
697
698
699 if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX)
700 inet_csk(sk)->icsk_rto = TCP_RTO_MAX;
701}
702
703
704
705
706
707void tcp_update_metrics(struct sock *sk)
708{
709 struct tcp_sock *tp = tcp_sk(sk);
710 struct dst_entry *dst = __sk_dst_get(sk);
711
712 if (sysctl_tcp_nometrics_save)
713 return;
714
715 dst_confirm(dst);
716
717 if (dst && (dst->flags & DST_HOST)) {
718 const struct inet_connection_sock *icsk = inet_csk(sk);
719 int m;
720 unsigned long rtt;
721
722 if (icsk->icsk_backoff || !tp->srtt) {
723
724
725
726
727 if (!(dst_metric_locked(dst, RTAX_RTT)))
728 dst->metrics[RTAX_RTT - 1] = 0;
729 return;
730 }
731
732 rtt = dst_metric_rtt(dst, RTAX_RTT);
733 m = rtt - tp->srtt;
734
735
736
737
738
739 if (!(dst_metric_locked(dst, RTAX_RTT))) {
740 if (m <= 0)
741 set_dst_metric_rtt(dst, RTAX_RTT, tp->srtt);
742 else
743 set_dst_metric_rtt(dst, RTAX_RTT, rtt - (m >> 3));
744 }
745
746 if (!(dst_metric_locked(dst, RTAX_RTTVAR))) {
747 unsigned long var;
748 if (m < 0)
749 m = -m;
750
751
752 m >>= 1;
753 if (m < tp->mdev)
754 m = tp->mdev;
755
756 var = dst_metric_rtt(dst, RTAX_RTTVAR);
757 if (m >= var)
758 var = m;
759 else
760 var -= (var - m) >> 2;
761
762 set_dst_metric_rtt(dst, RTAX_RTTVAR, var);
763 }
764
765 if (tp->snd_ssthresh >= 0xFFFF) {
766
767 if (dst_metric(dst, RTAX_SSTHRESH) &&
768 !dst_metric_locked(dst, RTAX_SSTHRESH) &&
769 (tp->snd_cwnd >> 1) > dst_metric(dst, RTAX_SSTHRESH))
770 dst->metrics[RTAX_SSTHRESH-1] = tp->snd_cwnd >> 1;
771 if (!dst_metric_locked(dst, RTAX_CWND) &&
772 tp->snd_cwnd > dst_metric(dst, RTAX_CWND))
773 dst->metrics[RTAX_CWND - 1] = tp->snd_cwnd;
774 } else if (tp->snd_cwnd > tp->snd_ssthresh &&
775 icsk->icsk_ca_state == TCP_CA_Open) {
776
777 if (!dst_metric_locked(dst, RTAX_SSTHRESH))
778 dst->metrics[RTAX_SSTHRESH-1] =
779 max(tp->snd_cwnd >> 1, tp->snd_ssthresh);
780 if (!dst_metric_locked(dst, RTAX_CWND))
781 dst->metrics[RTAX_CWND-1] = (dst_metric(dst, RTAX_CWND) + tp->snd_cwnd) >> 1;
782 } else {
783
784
785
786 if (!dst_metric_locked(dst, RTAX_CWND))
787 dst->metrics[RTAX_CWND-1] = (dst_metric(dst, RTAX_CWND) + tp->snd_ssthresh) >> 1;
788 if (dst_metric(dst, RTAX_SSTHRESH) &&
789 !dst_metric_locked(dst, RTAX_SSTHRESH) &&
790 tp->snd_ssthresh > dst_metric(dst, RTAX_SSTHRESH))
791 dst->metrics[RTAX_SSTHRESH-1] = tp->snd_ssthresh;
792 }
793
794 if (!dst_metric_locked(dst, RTAX_REORDERING)) {
795 if (dst_metric(dst, RTAX_REORDERING) < tp->reordering &&
796 tp->reordering != sysctl_tcp_reordering)
797 dst->metrics[RTAX_REORDERING-1] = tp->reordering;
798 }
799 }
800}
801
802
803
804
805
806
807
808
809
810
811__u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
812{
813 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
814
815 if (!cwnd) {
816 if (tp->mss_cache > 1460)
817 cwnd = 2;
818 else
819 cwnd = (tp->mss_cache > 1095) ? 3 : 4;
820 }
821 return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
822}
823
824
825void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
826{
827 struct tcp_sock *tp = tcp_sk(sk);
828 const struct inet_connection_sock *icsk = inet_csk(sk);
829
830 tp->prior_ssthresh = 0;
831 tp->bytes_acked = 0;
832 if (icsk->icsk_ca_state < TCP_CA_CWR) {
833 tp->undo_marker = 0;
834 if (set_ssthresh)
835 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
836 tp->snd_cwnd = min(tp->snd_cwnd,
837 tcp_packets_in_flight(tp) + 1U);
838 tp->snd_cwnd_cnt = 0;
839 tp->high_seq = tp->snd_nxt;
840 tp->snd_cwnd_stamp = tcp_time_stamp;
841 TCP_ECN_queue_cwr(tp);
842
843 tcp_set_ca_state(sk, TCP_CA_CWR);
844 }
845}
846
847
848
849
850
851static void tcp_disable_fack(struct tcp_sock *tp)
852{
853
854 if (tcp_is_fack(tp))
855 tp->lost_skb_hint = NULL;
856 tp->rx_opt.sack_ok &= ~2;
857}
858
859
860static void tcp_dsack_seen(struct tcp_sock *tp)
861{
862 tp->rx_opt.sack_ok |= 4;
863}
864
865
866
867static void tcp_init_metrics(struct sock *sk)
868{
869 struct tcp_sock *tp = tcp_sk(sk);
870 struct dst_entry *dst = __sk_dst_get(sk);
871
872 if (dst == NULL)
873 goto reset;
874
875 dst_confirm(dst);
876
877 if (dst_metric_locked(dst, RTAX_CWND))
878 tp->snd_cwnd_clamp = dst_metric(dst, RTAX_CWND);
879 if (dst_metric(dst, RTAX_SSTHRESH)) {
880 tp->snd_ssthresh = dst_metric(dst, RTAX_SSTHRESH);
881 if (tp->snd_ssthresh > tp->snd_cwnd_clamp)
882 tp->snd_ssthresh = tp->snd_cwnd_clamp;
883 }
884 if (dst_metric(dst, RTAX_REORDERING) &&
885 tp->reordering != dst_metric(dst, RTAX_REORDERING)) {
886 tcp_disable_fack(tp);
887 tp->reordering = dst_metric(dst, RTAX_REORDERING);
888 }
889
890 if (dst_metric(dst, RTAX_RTT) == 0)
891 goto reset;
892
893 if (!tp->srtt && dst_metric_rtt(dst, RTAX_RTT) < (TCP_TIMEOUT_INIT << 3))
894 goto reset;
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910 if (dst_metric_rtt(dst, RTAX_RTT) > tp->srtt) {
911 tp->srtt = dst_metric_rtt(dst, RTAX_RTT);
912 tp->rtt_seq = tp->snd_nxt;
913 }
914 if (dst_metric_rtt(dst, RTAX_RTTVAR) > tp->mdev) {
915 tp->mdev = dst_metric_rtt(dst, RTAX_RTTVAR);
916 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
917 }
918 tcp_set_rto(sk);
919 if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp)
920 goto reset;
921
922cwnd:
923 tp->snd_cwnd = tcp_init_cwnd(tp, dst);
924 tp->snd_cwnd_stamp = tcp_time_stamp;
925 return;
926
927reset:
928
929
930
931
932 if (!tp->rx_opt.saw_tstamp && tp->srtt) {
933 tp->srtt = 0;
934 tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT;
935 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
936 }
937 goto cwnd;
938}
939
940static void tcp_update_reordering(struct sock *sk, const int metric,
941 const int ts)
942{
943 struct tcp_sock *tp = tcp_sk(sk);
944 if (metric > tp->reordering) {
945 int mib_idx;
946
947 tp->reordering = min(TCP_MAX_REORDERING, metric);
948
949
950 if (ts)
951 mib_idx = LINUX_MIB_TCPTSREORDER;
952 else if (tcp_is_reno(tp))
953 mib_idx = LINUX_MIB_TCPRENOREORDER;
954 else if (tcp_is_fack(tp))
955 mib_idx = LINUX_MIB_TCPFACKREORDER;
956 else
957 mib_idx = LINUX_MIB_TCPSACKREORDER;
958
959 NET_INC_STATS_BH(sock_net(sk), mib_idx);
960#if FASTRETRANS_DEBUG > 1
961 printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n",
962 tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
963 tp->reordering,
964 tp->fackets_out,
965 tp->sacked_out,
966 tp->undo_marker ? tp->undo_retrans : 0);
967#endif
968 tcp_disable_fack(tp);
969 }
970}
971
972
973static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
974{
975 if ((tp->retransmit_skb_hint == NULL) ||
976 before(TCP_SKB_CB(skb)->seq,
977 TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
978 tp->retransmit_skb_hint = skb;
979
980 if (!tp->lost_out ||
981 after(TCP_SKB_CB(skb)->end_seq, tp->retransmit_high))
982 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
983}
984
985static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb)
986{
987 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
988 tcp_verify_retransmit_hint(tp, skb);
989
990 tp->lost_out += tcp_skb_pcount(skb);
991 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
992 }
993}
994
995static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp,
996 struct sk_buff *skb)
997{
998 tcp_verify_retransmit_hint(tp, skb);
999
1000 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
1001 tp->lost_out += tcp_skb_pcount(skb);
1002 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1003 }
1004}
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack,
1103 u32 start_seq, u32 end_seq)
1104{
1105
1106 if (after(end_seq, tp->snd_nxt) || !before(start_seq, end_seq))
1107 return 0;
1108
1109
1110 if (!before(start_seq, tp->snd_nxt))
1111 return 0;
1112
1113
1114
1115
1116 if (after(start_seq, tp->snd_una))
1117 return 1;
1118
1119 if (!is_dsack || !tp->undo_marker)
1120 return 0;
1121
1122
1123 if (!after(end_seq, tp->snd_una))
1124 return 0;
1125
1126 if (!before(start_seq, tp->undo_marker))
1127 return 1;
1128
1129
1130 if (!after(end_seq, tp->undo_marker))
1131 return 0;
1132
1133
1134
1135
1136 return !before(start_seq, end_seq - tp->max_window);
1137}
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148static void tcp_mark_lost_retrans(struct sock *sk)
1149{
1150 const struct inet_connection_sock *icsk = inet_csk(sk);
1151 struct tcp_sock *tp = tcp_sk(sk);
1152 struct sk_buff *skb;
1153 int cnt = 0;
1154 u32 new_low_seq = tp->snd_nxt;
1155 u32 received_upto = tcp_highest_sack_seq(tp);
1156
1157 if (!tcp_is_fack(tp) || !tp->retrans_out ||
1158 !after(received_upto, tp->lost_retrans_low) ||
1159 icsk->icsk_ca_state != TCP_CA_Recovery)
1160 return;
1161
1162 tcp_for_write_queue(skb, sk) {
1163 u32 ack_seq = TCP_SKB_CB(skb)->ack_seq;
1164
1165 if (skb == tcp_send_head(sk))
1166 break;
1167 if (cnt == tp->retrans_out)
1168 break;
1169 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1170 continue;
1171
1172 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS))
1173 continue;
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186 if (after(received_upto, ack_seq)) {
1187 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1188 tp->retrans_out -= tcp_skb_pcount(skb);
1189
1190 tcp_skb_mark_lost_uncond_verify(tp, skb);
1191 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT);
1192 } else {
1193 if (before(ack_seq, new_low_seq))
1194 new_low_seq = ack_seq;
1195 cnt += tcp_skb_pcount(skb);
1196 }
1197 }
1198
1199 if (tp->retrans_out)
1200 tp->lost_retrans_low = new_low_seq;
1201}
1202
1203static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb,
1204 struct tcp_sack_block_wire *sp, int num_sacks,
1205 u32 prior_snd_una)
1206{
1207 struct tcp_sock *tp = tcp_sk(sk);
1208 u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq);
1209 u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq);
1210 int dup_sack = 0;
1211
1212 if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) {
1213 dup_sack = 1;
1214 tcp_dsack_seen(tp);
1215 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKRECV);
1216 } else if (num_sacks > 1) {
1217 u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq);
1218 u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq);
1219
1220 if (!after(end_seq_0, end_seq_1) &&
1221 !before(start_seq_0, start_seq_1)) {
1222 dup_sack = 1;
1223 tcp_dsack_seen(tp);
1224 NET_INC_STATS_BH(sock_net(sk),
1225 LINUX_MIB_TCPDSACKOFORECV);
1226 }
1227 }
1228
1229
1230 if (dup_sack &&
1231 !after(end_seq_0, prior_snd_una) &&
1232 after(end_seq_0, tp->undo_marker))
1233 tp->undo_retrans--;
1234
1235 return dup_sack;
1236}
1237
1238struct tcp_sacktag_state {
1239 int reord;
1240 int fack_count;
1241 int flag;
1242};
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
1253 u32 start_seq, u32 end_seq)
1254{
1255 int in_sack, err;
1256 unsigned int pkt_len;
1257 unsigned int mss;
1258
1259 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1260 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1261
1262 if (tcp_skb_pcount(skb) > 1 && !in_sack &&
1263 after(TCP_SKB_CB(skb)->end_seq, start_seq)) {
1264 mss = tcp_skb_mss(skb);
1265 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1266
1267 if (!in_sack) {
1268 pkt_len = start_seq - TCP_SKB_CB(skb)->seq;
1269 if (pkt_len < mss)
1270 pkt_len = mss;
1271 } else {
1272 pkt_len = end_seq - TCP_SKB_CB(skb)->seq;
1273 if (pkt_len < mss)
1274 return -EINVAL;
1275 }
1276
1277
1278
1279
1280 if (pkt_len > mss) {
1281 unsigned int new_len = (pkt_len / mss) * mss;
1282 if (!in_sack && new_len < pkt_len) {
1283 new_len += mss;
1284 if (new_len > skb->len)
1285 return 0;
1286 }
1287 pkt_len = new_len;
1288 }
1289 err = tcp_fragment(sk, skb, pkt_len, mss);
1290 if (err < 0)
1291 return err;
1292 }
1293
1294 return in_sack;
1295}
1296
1297static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
1298 struct tcp_sacktag_state *state,
1299 int dup_sack, int pcount)
1300{
1301 struct tcp_sock *tp = tcp_sk(sk);
1302 u8 sacked = TCP_SKB_CB(skb)->sacked;
1303 int fack_count = state->fack_count;
1304
1305
1306 if (dup_sack && (sacked & TCPCB_RETRANS)) {
1307 if (after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker))
1308 tp->undo_retrans--;
1309 if (sacked & TCPCB_SACKED_ACKED)
1310 state->reord = min(fack_count, state->reord);
1311 }
1312
1313
1314 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1315 return sacked;
1316
1317 if (!(sacked & TCPCB_SACKED_ACKED)) {
1318 if (sacked & TCPCB_SACKED_RETRANS) {
1319
1320
1321
1322
1323 if (sacked & TCPCB_LOST) {
1324 sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
1325 tp->lost_out -= pcount;
1326 tp->retrans_out -= pcount;
1327 }
1328 } else {
1329 if (!(sacked & TCPCB_RETRANS)) {
1330
1331
1332
1333 if (before(TCP_SKB_CB(skb)->seq,
1334 tcp_highest_sack_seq(tp)))
1335 state->reord = min(fack_count,
1336 state->reord);
1337
1338
1339 if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark))
1340 state->flag |= FLAG_ONLY_ORIG_SACKED;
1341 }
1342
1343 if (sacked & TCPCB_LOST) {
1344 sacked &= ~TCPCB_LOST;
1345 tp->lost_out -= pcount;
1346 }
1347 }
1348
1349 sacked |= TCPCB_SACKED_ACKED;
1350 state->flag |= FLAG_DATA_SACKED;
1351 tp->sacked_out += pcount;
1352
1353 fack_count += pcount;
1354
1355
1356 if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) &&
1357 before(TCP_SKB_CB(skb)->seq,
1358 TCP_SKB_CB(tp->lost_skb_hint)->seq))
1359 tp->lost_cnt_hint += pcount;
1360
1361 if (fack_count > tp->fackets_out)
1362 tp->fackets_out = fack_count;
1363 }
1364
1365
1366
1367
1368
1369 if (dup_sack && (sacked & TCPCB_SACKED_RETRANS)) {
1370 sacked &= ~TCPCB_SACKED_RETRANS;
1371 tp->retrans_out -= pcount;
1372 }
1373
1374 return sacked;
1375}
1376
1377static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1378 struct tcp_sacktag_state *state,
1379 unsigned int pcount, int shifted, int mss,
1380 int dup_sack)
1381{
1382 struct tcp_sock *tp = tcp_sk(sk);
1383 struct sk_buff *prev = tcp_write_queue_prev(sk, skb);
1384
1385 BUG_ON(!pcount);
1386
1387
1388 if (!tcp_is_fack(tp) && tcp_is_sack(tp) && tp->lost_skb_hint &&
1389 !before(TCP_SKB_CB(tp->lost_skb_hint)->seq, TCP_SKB_CB(skb)->seq))
1390 tp->lost_cnt_hint += pcount;
1391
1392 TCP_SKB_CB(prev)->end_seq += shifted;
1393 TCP_SKB_CB(skb)->seq += shifted;
1394
1395 skb_shinfo(prev)->gso_segs += pcount;
1396 BUG_ON(skb_shinfo(skb)->gso_segs < pcount);
1397 skb_shinfo(skb)->gso_segs -= pcount;
1398
1399
1400
1401
1402
1403
1404 if (!skb_shinfo(prev)->gso_size) {
1405 skb_shinfo(prev)->gso_size = mss;
1406 skb_shinfo(prev)->gso_type = sk->sk_gso_type;
1407 }
1408
1409
1410 if (skb_shinfo(skb)->gso_segs <= 1) {
1411 skb_shinfo(skb)->gso_size = 0;
1412 skb_shinfo(skb)->gso_type = 0;
1413 }
1414
1415
1416 tcp_sacktag_one(skb, sk, state, dup_sack, pcount);
1417
1418
1419 TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
1420
1421 if (skb->len > 0) {
1422 BUG_ON(!tcp_skb_pcount(skb));
1423 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTED);
1424 return 0;
1425 }
1426
1427
1428
1429 if (skb == tp->retransmit_skb_hint)
1430 tp->retransmit_skb_hint = prev;
1431 if (skb == tp->scoreboard_skb_hint)
1432 tp->scoreboard_skb_hint = prev;
1433 if (skb == tp->lost_skb_hint) {
1434 tp->lost_skb_hint = prev;
1435 tp->lost_cnt_hint -= tcp_skb_pcount(prev);
1436 }
1437
1438 TCP_SKB_CB(skb)->flags |= TCP_SKB_CB(prev)->flags;
1439 if (skb == tcp_highest_sack(sk))
1440 tcp_advance_highest_sack(sk, skb);
1441
1442 tcp_unlink_write_queue(skb, sk);
1443 sk_wmem_free_skb(sk, skb);
1444
1445 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKMERGED);
1446
1447 return 1;
1448}
1449
1450
1451
1452
1453static int tcp_skb_seglen(struct sk_buff *skb)
1454{
1455 return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb);
1456}
1457
1458
1459static int skb_can_shift(struct sk_buff *skb)
1460{
1461 return !skb_headlen(skb) && skb_is_nonlinear(skb);
1462}
1463
1464
1465
1466
1467static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
1468 struct tcp_sacktag_state *state,
1469 u32 start_seq, u32 end_seq,
1470 int dup_sack)
1471{
1472 struct tcp_sock *tp = tcp_sk(sk);
1473 struct sk_buff *prev;
1474 int mss;
1475 int pcount = 0;
1476 int len;
1477 int in_sack;
1478
1479 if (!sk_can_gso(sk))
1480 goto fallback;
1481
1482
1483 if (!dup_sack &&
1484 (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS)
1485 goto fallback;
1486 if (!skb_can_shift(skb))
1487 goto fallback;
1488
1489 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1490 goto fallback;
1491
1492
1493 if (unlikely(skb == tcp_write_queue_head(sk)))
1494 goto fallback;
1495 prev = tcp_write_queue_prev(sk, skb);
1496
1497 if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
1498 goto fallback;
1499
1500 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1501 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1502
1503 if (in_sack) {
1504 len = skb->len;
1505 pcount = tcp_skb_pcount(skb);
1506 mss = tcp_skb_seglen(skb);
1507
1508
1509
1510
1511 if (mss != tcp_skb_seglen(prev))
1512 goto fallback;
1513 } else {
1514 if (!after(TCP_SKB_CB(skb)->end_seq, start_seq))
1515 goto noop;
1516
1517
1518
1519
1520 if (tcp_skb_pcount(skb) <= 1)
1521 goto noop;
1522
1523 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1524 if (!in_sack) {
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536 goto fallback;
1537 }
1538
1539 len = end_seq - TCP_SKB_CB(skb)->seq;
1540 BUG_ON(len < 0);
1541 BUG_ON(len > skb->len);
1542
1543
1544
1545
1546
1547 mss = tcp_skb_mss(skb);
1548
1549
1550
1551
1552 if (mss != tcp_skb_seglen(prev))
1553 goto fallback;
1554
1555 if (len == mss) {
1556 pcount = 1;
1557 } else if (len < mss) {
1558 goto noop;
1559 } else {
1560 pcount = len / mss;
1561 len = pcount * mss;
1562 }
1563 }
1564
1565 if (!skb_shift(prev, skb, len))
1566 goto fallback;
1567 if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack))
1568 goto out;
1569
1570
1571
1572
1573 if (prev == tcp_write_queue_tail(sk))
1574 goto out;
1575 skb = tcp_write_queue_next(sk, prev);
1576
1577 if (!skb_can_shift(skb) ||
1578 (skb == tcp_send_head(sk)) ||
1579 ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) ||
1580 (mss != tcp_skb_seglen(skb)))
1581 goto out;
1582
1583 len = skb->len;
1584 if (skb_shift(prev, skb, len)) {
1585 pcount += tcp_skb_pcount(skb);
1586 tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0);
1587 }
1588
1589out:
1590 state->fack_count += pcount;
1591 return prev;
1592
1593noop:
1594 return skb;
1595
1596fallback:
1597 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK);
1598 return NULL;
1599}
1600
1601static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1602 struct tcp_sack_block *next_dup,
1603 struct tcp_sacktag_state *state,
1604 u32 start_seq, u32 end_seq,
1605 int dup_sack_in)
1606{
1607 struct tcp_sock *tp = tcp_sk(sk);
1608 struct sk_buff *tmp;
1609
1610 tcp_for_write_queue_from(skb, sk) {
1611 int in_sack = 0;
1612 int dup_sack = dup_sack_in;
1613
1614 if (skb == tcp_send_head(sk))
1615 break;
1616
1617
1618 if (!before(TCP_SKB_CB(skb)->seq, end_seq))
1619 break;
1620
1621 if ((next_dup != NULL) &&
1622 before(TCP_SKB_CB(skb)->seq, next_dup->end_seq)) {
1623 in_sack = tcp_match_skb_to_sack(sk, skb,
1624 next_dup->start_seq,
1625 next_dup->end_seq);
1626 if (in_sack > 0)
1627 dup_sack = 1;
1628 }
1629
1630
1631
1632
1633
1634 if (in_sack <= 0) {
1635 tmp = tcp_shift_skb_data(sk, skb, state,
1636 start_seq, end_seq, dup_sack);
1637 if (tmp != NULL) {
1638 if (tmp != skb) {
1639 skb = tmp;
1640 continue;
1641 }
1642
1643 in_sack = 0;
1644 } else {
1645 in_sack = tcp_match_skb_to_sack(sk, skb,
1646 start_seq,
1647 end_seq);
1648 }
1649 }
1650
1651 if (unlikely(in_sack < 0))
1652 break;
1653
1654 if (in_sack) {
1655 TCP_SKB_CB(skb)->sacked = tcp_sacktag_one(skb, sk,
1656 state,
1657 dup_sack,
1658 tcp_skb_pcount(skb));
1659
1660 if (!before(TCP_SKB_CB(skb)->seq,
1661 tcp_highest_sack_seq(tp)))
1662 tcp_advance_highest_sack(sk, skb);
1663 }
1664
1665 state->fack_count += tcp_skb_pcount(skb);
1666 }
1667 return skb;
1668}
1669
1670
1671
1672
1673static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
1674 struct tcp_sacktag_state *state,
1675 u32 skip_to_seq)
1676{
1677 tcp_for_write_queue_from(skb, sk) {
1678 if (skb == tcp_send_head(sk))
1679 break;
1680
1681 if (after(TCP_SKB_CB(skb)->end_seq, skip_to_seq))
1682 break;
1683
1684 state->fack_count += tcp_skb_pcount(skb);
1685 }
1686 return skb;
1687}
1688
1689static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
1690 struct sock *sk,
1691 struct tcp_sack_block *next_dup,
1692 struct tcp_sacktag_state *state,
1693 u32 skip_to_seq)
1694{
1695 if (next_dup == NULL)
1696 return skb;
1697
1698 if (before(next_dup->start_seq, skip_to_seq)) {
1699 skb = tcp_sacktag_skip(skb, sk, state, next_dup->start_seq);
1700 skb = tcp_sacktag_walk(skb, sk, NULL, state,
1701 next_dup->start_seq, next_dup->end_seq,
1702 1);
1703 }
1704
1705 return skb;
1706}
1707
1708static int tcp_sack_cache_ok(struct tcp_sock *tp, struct tcp_sack_block *cache)
1709{
1710 return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1711}
1712
1713static int
1714tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb,
1715 u32 prior_snd_una)
1716{
1717 const struct inet_connection_sock *icsk = inet_csk(sk);
1718 struct tcp_sock *tp = tcp_sk(sk);
1719 unsigned char *ptr = (skb_transport_header(ack_skb) +
1720 TCP_SKB_CB(ack_skb)->sacked);
1721 struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
1722 struct tcp_sack_block sp[TCP_NUM_SACKS];
1723 struct tcp_sack_block *cache;
1724 struct tcp_sacktag_state state;
1725 struct sk_buff *skb;
1726 int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
1727 int used_sacks;
1728 int found_dup_sack = 0;
1729 int i, j;
1730 int first_sack_index;
1731
1732 state.flag = 0;
1733 state.reord = tp->packets_out;
1734
1735 if (!tp->sacked_out) {
1736 if (WARN_ON(tp->fackets_out))
1737 tp->fackets_out = 0;
1738 tcp_highest_sack_reset(sk);
1739 }
1740
1741 found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
1742 num_sacks, prior_snd_una);
1743 if (found_dup_sack)
1744 state.flag |= FLAG_DSACKING_ACK;
1745
1746
1747
1748
1749
1750 if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window))
1751 return 0;
1752
1753 if (!tp->packets_out)
1754 goto out;
1755
1756 used_sacks = 0;
1757 first_sack_index = 0;
1758 for (i = 0; i < num_sacks; i++) {
1759 int dup_sack = !i && found_dup_sack;
1760
1761 sp[used_sacks].start_seq = get_unaligned_be32(&sp_wire[i].start_seq);
1762 sp[used_sacks].end_seq = get_unaligned_be32(&sp_wire[i].end_seq);
1763
1764 if (!tcp_is_sackblock_valid(tp, dup_sack,
1765 sp[used_sacks].start_seq,
1766 sp[used_sacks].end_seq)) {
1767 int mib_idx;
1768
1769 if (dup_sack) {
1770 if (!tp->undo_marker)
1771 mib_idx = LINUX_MIB_TCPDSACKIGNOREDNOUNDO;
1772 else
1773 mib_idx = LINUX_MIB_TCPDSACKIGNOREDOLD;
1774 } else {
1775
1776 if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) &&
1777 !after(sp[used_sacks].end_seq, tp->snd_una))
1778 continue;
1779 mib_idx = LINUX_MIB_TCPSACKDISCARD;
1780 }
1781
1782 NET_INC_STATS_BH(sock_net(sk), mib_idx);
1783 if (i == 0)
1784 first_sack_index = -1;
1785 continue;
1786 }
1787
1788
1789 if (!after(sp[used_sacks].end_seq, prior_snd_una))
1790 continue;
1791
1792 used_sacks++;
1793 }
1794
1795
1796 for (i = used_sacks - 1; i > 0; i--) {
1797 for (j = 0; j < i; j++) {
1798 if (after(sp[j].start_seq, sp[j + 1].start_seq)) {
1799 swap(sp[j], sp[j + 1]);
1800
1801
1802 if (j == first_sack_index)
1803 first_sack_index = j + 1;
1804 }
1805 }
1806 }
1807
1808 skb = tcp_write_queue_head(sk);
1809 state.fack_count = 0;
1810 i = 0;
1811
1812 if (!tp->sacked_out) {
1813
1814 cache = tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1815 } else {
1816 cache = tp->recv_sack_cache;
1817
1818 while (tcp_sack_cache_ok(tp, cache) && !cache->start_seq &&
1819 !cache->end_seq)
1820 cache++;
1821 }
1822
1823 while (i < used_sacks) {
1824 u32 start_seq = sp[i].start_seq;
1825 u32 end_seq = sp[i].end_seq;
1826 int dup_sack = (found_dup_sack && (i == first_sack_index));
1827 struct tcp_sack_block *next_dup = NULL;
1828
1829 if (found_dup_sack && ((i + 1) == first_sack_index))
1830 next_dup = &sp[i + 1];
1831
1832
1833 if (after(end_seq, tp->high_seq))
1834 state.flag |= FLAG_DATA_LOST;
1835
1836
1837 while (tcp_sack_cache_ok(tp, cache) &&
1838 !before(start_seq, cache->end_seq))
1839 cache++;
1840
1841
1842 if (tcp_sack_cache_ok(tp, cache) && !dup_sack &&
1843 after(end_seq, cache->start_seq)) {
1844
1845
1846 if (before(start_seq, cache->start_seq)) {
1847 skb = tcp_sacktag_skip(skb, sk, &state,
1848 start_seq);
1849 skb = tcp_sacktag_walk(skb, sk, next_dup,
1850 &state,
1851 start_seq,
1852 cache->start_seq,
1853 dup_sack);
1854 }
1855
1856
1857 if (!after(end_seq, cache->end_seq))
1858 goto advance_sp;
1859
1860 skb = tcp_maybe_skipping_dsack(skb, sk, next_dup,
1861 &state,
1862 cache->end_seq);
1863
1864
1865 if (tcp_highest_sack_seq(tp) == cache->end_seq) {
1866
1867 skb = tcp_highest_sack(sk);
1868 if (skb == NULL)
1869 break;
1870 state.fack_count = tp->fackets_out;
1871 cache++;
1872 goto walk;
1873 }
1874
1875 skb = tcp_sacktag_skip(skb, sk, &state, cache->end_seq);
1876
1877 cache++;
1878 continue;
1879 }
1880
1881 if (!before(start_seq, tcp_highest_sack_seq(tp))) {
1882 skb = tcp_highest_sack(sk);
1883 if (skb == NULL)
1884 break;
1885 state.fack_count = tp->fackets_out;
1886 }
1887 skb = tcp_sacktag_skip(skb, sk, &state, start_seq);
1888
1889walk:
1890 skb = tcp_sacktag_walk(skb, sk, next_dup, &state,
1891 start_seq, end_seq, dup_sack);
1892
1893advance_sp:
1894
1895
1896
1897 if (after(end_seq, tp->frto_highmark))
1898 state.flag &= ~FLAG_ONLY_ORIG_SACKED;
1899
1900 i++;
1901 }
1902
1903
1904 for (i = 0; i < ARRAY_SIZE(tp->recv_sack_cache) - used_sacks; i++) {
1905 tp->recv_sack_cache[i].start_seq = 0;
1906 tp->recv_sack_cache[i].end_seq = 0;
1907 }
1908 for (j = 0; j < used_sacks; j++)
1909 tp->recv_sack_cache[i++] = sp[j];
1910
1911 tcp_mark_lost_retrans(sk);
1912
1913 tcp_verify_left_out(tp);
1914
1915 if ((state.reord < tp->fackets_out) &&
1916 ((icsk->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker) &&
1917 (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark)))
1918 tcp_update_reordering(sk, tp->fackets_out - state.reord, 0);
1919
1920out:
1921
1922#if FASTRETRANS_DEBUG > 0
1923 WARN_ON((int)tp->sacked_out < 0);
1924 WARN_ON((int)tp->lost_out < 0);
1925 WARN_ON((int)tp->retrans_out < 0);
1926 WARN_ON((int)tcp_packets_in_flight(tp) < 0);
1927#endif
1928 return state.flag;
1929}
1930
1931
1932
1933
1934static int tcp_limit_reno_sacked(struct tcp_sock *tp)
1935{
1936 u32 holes;
1937
1938 holes = max(tp->lost_out, 1U);
1939 holes = min(holes, tp->packets_out);
1940
1941 if ((tp->sacked_out + holes) > tp->packets_out) {
1942 tp->sacked_out = tp->packets_out - holes;
1943 return 1;
1944 }
1945 return 0;
1946}
1947
1948
1949
1950
1951
1952static void tcp_check_reno_reordering(struct sock *sk, const int addend)
1953{
1954 struct tcp_sock *tp = tcp_sk(sk);
1955 if (tcp_limit_reno_sacked(tp))
1956 tcp_update_reordering(sk, tp->packets_out + addend, 0);
1957}
1958
1959
1960
1961static void tcp_add_reno_sack(struct sock *sk)
1962{
1963 struct tcp_sock *tp = tcp_sk(sk);
1964 tp->sacked_out++;
1965 tcp_check_reno_reordering(sk, 0);
1966 tcp_verify_left_out(tp);
1967}
1968
1969
1970
1971static void tcp_remove_reno_sacks(struct sock *sk, int acked)
1972{
1973 struct tcp_sock *tp = tcp_sk(sk);
1974
1975 if (acked > 0) {
1976
1977 if (acked - 1 >= tp->sacked_out)
1978 tp->sacked_out = 0;
1979 else
1980 tp->sacked_out -= acked - 1;
1981 }
1982 tcp_check_reno_reordering(sk, acked);
1983 tcp_verify_left_out(tp);
1984}
1985
1986static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
1987{
1988 tp->sacked_out = 0;
1989}
1990
1991static int tcp_is_sackfrto(const struct tcp_sock *tp)
1992{
1993 return (sysctl_tcp_frto == 0x2) && !tcp_is_reno(tp);
1994}
1995
1996
1997
1998
1999int tcp_use_frto(struct sock *sk)
2000{
2001 const struct tcp_sock *tp = tcp_sk(sk);
2002 const struct inet_connection_sock *icsk = inet_csk(sk);
2003 struct sk_buff *skb;
2004
2005 if (!sysctl_tcp_frto)
2006 return 0;
2007
2008
2009 if (icsk->icsk_mtup.probe_size)
2010 return 0;
2011
2012 if (tcp_is_sackfrto(tp))
2013 return 1;
2014
2015
2016 if (tp->retrans_out > 1)
2017 return 0;
2018
2019 skb = tcp_write_queue_head(sk);
2020 if (tcp_skb_is_last(sk, skb))
2021 return 1;
2022 skb = tcp_write_queue_next(sk, skb);
2023 tcp_for_write_queue_from(skb, sk) {
2024 if (skb == tcp_send_head(sk))
2025 break;
2026 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
2027 return 0;
2028
2029 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
2030 break;
2031 }
2032 return 1;
2033}
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047void tcp_enter_frto(struct sock *sk)
2048{
2049 const struct inet_connection_sock *icsk = inet_csk(sk);
2050 struct tcp_sock *tp = tcp_sk(sk);
2051 struct sk_buff *skb;
2052
2053 if ((!tp->frto_counter && icsk->icsk_ca_state <= TCP_CA_Disorder) ||
2054 tp->snd_una == tp->high_seq ||
2055 ((icsk->icsk_ca_state == TCP_CA_Loss || tp->frto_counter) &&
2056 !icsk->icsk_retransmits)) {
2057 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067 if (tp->frto_counter) {
2068 u32 stored_cwnd;
2069 stored_cwnd = tp->snd_cwnd;
2070 tp->snd_cwnd = 2;
2071 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
2072 tp->snd_cwnd = stored_cwnd;
2073 } else {
2074 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
2075 }
2076
2077
2078
2079
2080
2081
2082
2083 tcp_ca_event(sk, CA_EVENT_FRTO);
2084 }
2085
2086 tp->undo_marker = tp->snd_una;
2087 tp->undo_retrans = 0;
2088
2089 skb = tcp_write_queue_head(sk);
2090 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
2091 tp->undo_marker = 0;
2092 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2093 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
2094 tp->retrans_out -= tcp_skb_pcount(skb);
2095 }
2096 tcp_verify_left_out(tp);
2097
2098
2099 tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);
2100
2101
2102
2103
2104 if (tcp_is_sackfrto(tp) && (tp->frto_counter ||
2105 ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) &&
2106 after(tp->high_seq, tp->snd_una)) {
2107 tp->frto_highmark = tp->high_seq;
2108 } else {
2109 tp->frto_highmark = tp->snd_nxt;
2110 }
2111 tcp_set_ca_state(sk, TCP_CA_Disorder);
2112 tp->high_seq = tp->snd_nxt;
2113 tp->frto_counter = 1;
2114}
2115
2116
2117
2118
2119
2120static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
2121{
2122 struct tcp_sock *tp = tcp_sk(sk);
2123 struct sk_buff *skb;
2124
2125 tp->lost_out = 0;
2126 tp->retrans_out = 0;
2127 if (tcp_is_reno(tp))
2128 tcp_reset_reno_sack(tp);
2129
2130 tcp_for_write_queue(skb, sk) {
2131 if (skb == tcp_send_head(sk))
2132 break;
2133
2134 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
2135
2136
2137
2138
2139 if ((tp->frto_counter == 1) && !(flag & FLAG_DATA_ACKED)) {
2140
2141 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
2142 tp->retrans_out += tcp_skb_pcount(skb);
2143
2144 flag |= FLAG_DATA_ACKED;
2145 } else {
2146 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
2147 tp->undo_marker = 0;
2148 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
2149 }
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
2161 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
2162 tp->lost_out += tcp_skb_pcount(skb);
2163 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
2164 }
2165 }
2166 tcp_verify_left_out(tp);
2167
2168 tp->snd_cwnd = tcp_packets_in_flight(tp) + allowed_segments;
2169 tp->snd_cwnd_cnt = 0;
2170 tp->snd_cwnd_stamp = tcp_time_stamp;
2171 tp->frto_counter = 0;
2172 tp->bytes_acked = 0;
2173
2174 tp->reordering = min_t(unsigned int, tp->reordering,
2175 sysctl_tcp_reordering);
2176 tcp_set_ca_state(sk, TCP_CA_Loss);
2177 tp->high_seq = tp->snd_nxt;
2178 TCP_ECN_queue_cwr(tp);
2179
2180 tcp_clear_all_retrans_hints(tp);
2181}
2182
2183static void tcp_clear_retrans_partial(struct tcp_sock *tp)
2184{
2185 tp->retrans_out = 0;
2186 tp->lost_out = 0;
2187
2188 tp->undo_marker = 0;
2189 tp->undo_retrans = 0;
2190}
2191
2192void tcp_clear_retrans(struct tcp_sock *tp)
2193{
2194 tcp_clear_retrans_partial(tp);
2195
2196 tp->fackets_out = 0;
2197 tp->sacked_out = 0;
2198}
2199
2200
2201
2202
2203
2204void tcp_enter_loss(struct sock *sk, int how)
2205{
2206 const struct inet_connection_sock *icsk = inet_csk(sk);
2207 struct tcp_sock *tp = tcp_sk(sk);
2208 struct sk_buff *skb;
2209
2210
2211 if (icsk->icsk_ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq ||
2212 (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
2213 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2214 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
2215 tcp_ca_event(sk, CA_EVENT_LOSS);
2216 }
2217 tp->snd_cwnd = 1;
2218 tp->snd_cwnd_cnt = 0;
2219 tp->snd_cwnd_stamp = tcp_time_stamp;
2220
2221 tp->bytes_acked = 0;
2222 tcp_clear_retrans_partial(tp);
2223
2224 if (tcp_is_reno(tp))
2225 tcp_reset_reno_sack(tp);
2226
2227 if (!how) {
2228
2229
2230 tp->undo_marker = tp->snd_una;
2231 } else {
2232 tp->sacked_out = 0;
2233 tp->fackets_out = 0;
2234 }
2235 tcp_clear_all_retrans_hints(tp);
2236
2237 tcp_for_write_queue(skb, sk) {
2238 if (skb == tcp_send_head(sk))
2239 break;
2240
2241 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
2242 tp->undo_marker = 0;
2243 TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
2244 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) {
2245 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
2246 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
2247 tp->lost_out += tcp_skb_pcount(skb);
2248 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
2249 }
2250 }
2251 tcp_verify_left_out(tp);
2252
2253 tp->reordering = min_t(unsigned int, tp->reordering,
2254 sysctl_tcp_reordering);
2255 tcp_set_ca_state(sk, TCP_CA_Loss);
2256 tp->high_seq = tp->snd_nxt;
2257 TCP_ECN_queue_cwr(tp);
2258
2259 tp->frto_counter = 0;
2260}
2261
2262
2263
2264
2265
2266
2267
2268static int tcp_check_sack_reneging(struct sock *sk, int flag)
2269{
2270 if (flag & FLAG_SACK_RENEGING) {
2271 struct inet_connection_sock *icsk = inet_csk(sk);
2272 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
2273
2274 tcp_enter_loss(sk, 1);
2275 icsk->icsk_retransmits++;
2276 tcp_retransmit_skb(sk, tcp_write_queue_head(sk));
2277 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2278 icsk->icsk_rto, TCP_RTO_MAX);
2279 return 1;
2280 }
2281 return 0;
2282}
2283
2284static inline int tcp_fackets_out(struct tcp_sock *tp)
2285{
2286 return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out;
2287}
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304static inline int tcp_dupack_heurestics(struct tcp_sock *tp)
2305{
2306 return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
2307}
2308
2309static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb)
2310{
2311 return (tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto);
2312}
2313
2314static inline int tcp_head_timedout(struct sock *sk)
2315{
2316 struct tcp_sock *tp = tcp_sk(sk);
2317
2318 return tp->packets_out &&
2319 tcp_skb_timedout(sk, tcp_write_queue_head(sk));
2320}
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415static int tcp_time_to_recover(struct sock *sk)
2416{
2417 struct tcp_sock *tp = tcp_sk(sk);
2418 __u32 packets_out;
2419
2420
2421 if (tp->frto_counter)
2422 return 0;
2423
2424
2425 if (tp->lost_out)
2426 return 1;
2427
2428
2429 if (tcp_dupack_heurestics(tp) > tp->reordering)
2430 return 1;
2431
2432
2433
2434
2435 if (tcp_is_fack(tp) && tcp_head_timedout(sk))
2436 return 1;
2437
2438
2439
2440
2441 packets_out = tp->packets_out;
2442 if (packets_out <= tp->reordering &&
2443 tp->sacked_out >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) &&
2444 !tcp_may_send_now(sk)) {
2445
2446
2447
2448 return 1;
2449 }
2450
2451 return 0;
2452}
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466static void tcp_timeout_skbs(struct sock *sk)
2467{
2468 struct tcp_sock *tp = tcp_sk(sk);
2469 struct sk_buff *skb;
2470
2471 if (!tcp_is_fack(tp) || !tcp_head_timedout(sk))
2472 return;
2473
2474 skb = tp->scoreboard_skb_hint;
2475 if (tp->scoreboard_skb_hint == NULL)
2476 skb = tcp_write_queue_head(sk);
2477
2478 tcp_for_write_queue_from(skb, sk) {
2479 if (skb == tcp_send_head(sk))
2480 break;
2481 if (!tcp_skb_timedout(sk, skb))
2482 break;
2483
2484 tcp_skb_mark_lost(tp, skb);
2485 }
2486
2487 tp->scoreboard_skb_hint = skb;
2488
2489 tcp_verify_left_out(tp);
2490}
2491
2492
2493
2494
2495static void tcp_mark_head_lost(struct sock *sk, int packets)
2496{
2497 struct tcp_sock *tp = tcp_sk(sk);
2498 struct sk_buff *skb;
2499 int cnt, oldcnt;
2500 int err;
2501 unsigned int mss;
2502
2503 WARN_ON(packets > tp->packets_out);
2504 if (tp->lost_skb_hint) {
2505 skb = tp->lost_skb_hint;
2506 cnt = tp->lost_cnt_hint;
2507 } else {
2508 skb = tcp_write_queue_head(sk);
2509 cnt = 0;
2510 }
2511
2512 tcp_for_write_queue_from(skb, sk) {
2513 if (skb == tcp_send_head(sk))
2514 break;
2515
2516
2517 tp->lost_skb_hint = skb;
2518 tp->lost_cnt_hint = cnt;
2519
2520 if (after(TCP_SKB_CB(skb)->end_seq, tp->high_seq))
2521 break;
2522
2523 oldcnt = cnt;
2524 if (tcp_is_fack(tp) || tcp_is_reno(tp) ||
2525 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
2526 cnt += tcp_skb_pcount(skb);
2527
2528 if (cnt > packets) {
2529 if (tcp_is_sack(tp) || (oldcnt >= packets))
2530 break;
2531
2532 mss = skb_shinfo(skb)->gso_size;
2533 err = tcp_fragment(sk, skb, (packets - oldcnt) * mss, mss);
2534 if (err < 0)
2535 break;
2536 cnt = packets;
2537 }
2538
2539 tcp_skb_mark_lost(tp, skb);
2540 }
2541 tcp_verify_left_out(tp);
2542}
2543
2544
2545
2546static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
2547{
2548 struct tcp_sock *tp = tcp_sk(sk);
2549
2550 if (tcp_is_reno(tp)) {
2551 tcp_mark_head_lost(sk, 1);
2552 } else if (tcp_is_fack(tp)) {
2553 int lost = tp->fackets_out - tp->reordering;
2554 if (lost <= 0)
2555 lost = 1;
2556 tcp_mark_head_lost(sk, lost);
2557 } else {
2558 int sacked_upto = tp->sacked_out - tp->reordering;
2559 if (sacked_upto < fast_rexmit)
2560 sacked_upto = fast_rexmit;
2561 tcp_mark_head_lost(sk, sacked_upto);
2562 }
2563
2564 tcp_timeout_skbs(sk);
2565}
2566
2567
2568
2569
2570static inline void tcp_moderate_cwnd(struct tcp_sock *tp)
2571{
2572 tp->snd_cwnd = min(tp->snd_cwnd,
2573 tcp_packets_in_flight(tp) + tcp_max_burst(tp));
2574 tp->snd_cwnd_stamp = tcp_time_stamp;
2575}
2576
2577
2578
2579
2580static inline u32 tcp_cwnd_min(const struct sock *sk)
2581{
2582 const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
2583
2584 return ca_ops->min_cwnd ? ca_ops->min_cwnd(sk) : tcp_sk(sk)->snd_ssthresh;
2585}
2586
2587
2588static void tcp_cwnd_down(struct sock *sk, int flag)
2589{
2590 struct tcp_sock *tp = tcp_sk(sk);
2591 int decr = tp->snd_cwnd_cnt + 1;
2592
2593 if ((flag & (FLAG_ANY_PROGRESS | FLAG_DSACKING_ACK)) ||
2594 (tcp_is_reno(tp) && !(flag & FLAG_NOT_DUP))) {
2595 tp->snd_cwnd_cnt = decr & 1;
2596 decr >>= 1;
2597
2598 if (decr && tp->snd_cwnd > tcp_cwnd_min(sk))
2599 tp->snd_cwnd -= decr;
2600
2601 tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);
2602 tp->snd_cwnd_stamp = tcp_time_stamp;
2603 }
2604}
2605
2606
2607
2608
2609static inline int tcp_packet_delayed(struct tcp_sock *tp)
2610{
2611 return !tp->retrans_stamp ||
2612 (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
2613 before(tp->rx_opt.rcv_tsecr, tp->retrans_stamp));
2614}
2615
2616
2617
2618#if FASTRETRANS_DEBUG > 1
2619static void DBGUNDO(struct sock *sk, const char *msg)
2620{
2621 struct tcp_sock *tp = tcp_sk(sk);
2622 struct inet_sock *inet = inet_sk(sk);
2623
2624 if (sk->sk_family == AF_INET) {
2625 printk(KERN_DEBUG "Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n",
2626 msg,
2627 &inet->daddr, ntohs(inet->dport),
2628 tp->snd_cwnd, tcp_left_out(tp),
2629 tp->snd_ssthresh, tp->prior_ssthresh,
2630 tp->packets_out);
2631 }
2632#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
2633 else if (sk->sk_family == AF_INET6) {
2634 struct ipv6_pinfo *np = inet6_sk(sk);
2635 printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
2636 msg,
2637 &np->daddr, ntohs(inet->dport),
2638 tp->snd_cwnd, tcp_left_out(tp),
2639 tp->snd_ssthresh, tp->prior_ssthresh,
2640 tp->packets_out);
2641 }
2642#endif
2643}
2644#else
2645#define DBGUNDO(x...) do { } while (0)
2646#endif
2647
2648static void tcp_undo_cwr(struct sock *sk, const int undo)
2649{
2650 struct tcp_sock *tp = tcp_sk(sk);
2651
2652 if (tp->prior_ssthresh) {
2653 const struct inet_connection_sock *icsk = inet_csk(sk);
2654
2655 if (icsk->icsk_ca_ops->undo_cwnd)
2656 tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
2657 else
2658 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1);
2659
2660 if (undo && tp->prior_ssthresh > tp->snd_ssthresh) {
2661 tp->snd_ssthresh = tp->prior_ssthresh;
2662 TCP_ECN_withdraw_cwr(tp);
2663 }
2664 } else {
2665 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);
2666 }
2667 tcp_moderate_cwnd(tp);
2668 tp->snd_cwnd_stamp = tcp_time_stamp;
2669}
2670
2671static inline int tcp_may_undo(struct tcp_sock *tp)
2672{
2673 return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
2674}
2675
2676
2677static int tcp_try_undo_recovery(struct sock *sk)
2678{
2679 struct tcp_sock *tp = tcp_sk(sk);
2680
2681 if (tcp_may_undo(tp)) {
2682 int mib_idx;
2683
2684
2685
2686
2687 DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
2688 tcp_undo_cwr(sk, 1);
2689 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
2690 mib_idx = LINUX_MIB_TCPLOSSUNDO;
2691 else
2692 mib_idx = LINUX_MIB_TCPFULLUNDO;
2693
2694 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2695 tp->undo_marker = 0;
2696 }
2697 if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
2698
2699
2700
2701 tcp_moderate_cwnd(tp);
2702 return 1;
2703 }
2704 tcp_set_ca_state(sk, TCP_CA_Open);
2705 return 0;
2706}
2707
2708
2709static void tcp_try_undo_dsack(struct sock *sk)
2710{
2711 struct tcp_sock *tp = tcp_sk(sk);
2712
2713 if (tp->undo_marker && !tp->undo_retrans) {
2714 DBGUNDO(sk, "D-SACK");
2715 tcp_undo_cwr(sk, 1);
2716 tp->undo_marker = 0;
2717 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
2718 }
2719}
2720
2721
2722
2723static int tcp_try_undo_partial(struct sock *sk, int acked)
2724{
2725 struct tcp_sock *tp = tcp_sk(sk);
2726
2727 int failed = tcp_is_reno(tp) || (tcp_fackets_out(tp) > tp->reordering);
2728
2729 if (tcp_may_undo(tp)) {
2730
2731
2732
2733 if (tp->retrans_out == 0)
2734 tp->retrans_stamp = 0;
2735
2736 tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
2737
2738 DBGUNDO(sk, "Hoe");
2739 tcp_undo_cwr(sk, 0);
2740 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
2741
2742
2743
2744
2745
2746 failed = 0;
2747 }
2748 return failed;
2749}
2750
2751
2752static int tcp_try_undo_loss(struct sock *sk)
2753{
2754 struct tcp_sock *tp = tcp_sk(sk);
2755
2756 if (tcp_may_undo(tp)) {
2757 struct sk_buff *skb;
2758 tcp_for_write_queue(skb, sk) {
2759 if (skb == tcp_send_head(sk))
2760 break;
2761 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
2762 }
2763
2764 tcp_clear_all_retrans_hints(tp);
2765
2766 DBGUNDO(sk, "partial loss");
2767 tp->lost_out = 0;
2768 tcp_undo_cwr(sk, 1);
2769 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
2770 inet_csk(sk)->icsk_retransmits = 0;
2771 tp->undo_marker = 0;
2772 if (tcp_is_sack(tp))
2773 tcp_set_ca_state(sk, TCP_CA_Open);
2774 return 1;
2775 }
2776 return 0;
2777}
2778
2779static inline void tcp_complete_cwr(struct sock *sk)
2780{
2781 struct tcp_sock *tp = tcp_sk(sk);
2782 tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
2783 tp->snd_cwnd_stamp = tcp_time_stamp;
2784 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
2785}
2786
2787static void tcp_try_keep_open(struct sock *sk)
2788{
2789 struct tcp_sock *tp = tcp_sk(sk);
2790 int state = TCP_CA_Open;
2791
2792 if (tcp_left_out(tp) || tp->retrans_out || tp->undo_marker)
2793 state = TCP_CA_Disorder;
2794
2795 if (inet_csk(sk)->icsk_ca_state != state) {
2796 tcp_set_ca_state(sk, state);
2797 tp->high_seq = tp->snd_nxt;
2798 }
2799}
2800
2801static void tcp_try_to_open(struct sock *sk, int flag)
2802{
2803 struct tcp_sock *tp = tcp_sk(sk);
2804
2805 tcp_verify_left_out(tp);
2806
2807 if (!tp->frto_counter && tp->retrans_out == 0)
2808 tp->retrans_stamp = 0;
2809
2810 if (flag & FLAG_ECE)
2811 tcp_enter_cwr(sk, 1);
2812
2813 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
2814 tcp_try_keep_open(sk);
2815 tcp_moderate_cwnd(tp);
2816 } else {
2817 tcp_cwnd_down(sk, flag);
2818 }
2819}
2820
2821static void tcp_mtup_probe_failed(struct sock *sk)
2822{
2823 struct inet_connection_sock *icsk = inet_csk(sk);
2824
2825 icsk->icsk_mtup.search_high = icsk->icsk_mtup.probe_size - 1;
2826 icsk->icsk_mtup.probe_size = 0;
2827}
2828
2829static void tcp_mtup_probe_success(struct sock *sk)
2830{
2831 struct tcp_sock *tp = tcp_sk(sk);
2832 struct inet_connection_sock *icsk = inet_csk(sk);
2833
2834
2835 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2836 tp->snd_cwnd = tp->snd_cwnd *
2837 tcp_mss_to_mtu(sk, tp->mss_cache) /
2838 icsk->icsk_mtup.probe_size;
2839 tp->snd_cwnd_cnt = 0;
2840 tp->snd_cwnd_stamp = tcp_time_stamp;
2841 tp->rcv_ssthresh = tcp_current_ssthresh(sk);
2842
2843 icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size;
2844 icsk->icsk_mtup.probe_size = 0;
2845 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
2846}
2847
2848
2849
2850
2851
2852void tcp_simple_retransmit(struct sock *sk)
2853{
2854 const struct inet_connection_sock *icsk = inet_csk(sk);
2855 struct tcp_sock *tp = tcp_sk(sk);
2856 struct sk_buff *skb;
2857 unsigned int mss = tcp_current_mss(sk);
2858 u32 prior_lost = tp->lost_out;
2859
2860 tcp_for_write_queue(skb, sk) {
2861 if (skb == tcp_send_head(sk))
2862 break;
2863 if (tcp_skb_seglen(skb) > mss &&
2864 !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
2865 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2866 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
2867 tp->retrans_out -= tcp_skb_pcount(skb);
2868 }
2869 tcp_skb_mark_lost_uncond_verify(tp, skb);
2870 }
2871 }
2872
2873 tcp_clear_retrans_hints_partial(tp);
2874
2875 if (prior_lost == tp->lost_out)
2876 return;
2877
2878 if (tcp_is_reno(tp))
2879 tcp_limit_reno_sacked(tp);
2880
2881 tcp_verify_left_out(tp);
2882
2883
2884
2885
2886
2887
2888 if (icsk->icsk_ca_state != TCP_CA_Loss) {
2889 tp->high_seq = tp->snd_nxt;
2890 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2891 tp->prior_ssthresh = 0;
2892 tp->undo_marker = 0;
2893 tcp_set_ca_state(sk, TCP_CA_Loss);
2894 }
2895 tcp_xmit_retransmit_queue(sk);
2896}
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
2910{
2911 struct inet_connection_sock *icsk = inet_csk(sk);
2912 struct tcp_sock *tp = tcp_sk(sk);
2913 int is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
2914 int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
2915 (tcp_fackets_out(tp) > tp->reordering));
2916 int fast_rexmit = 0, mib_idx;
2917
2918 if (WARN_ON(!tp->packets_out && tp->sacked_out))
2919 tp->sacked_out = 0;
2920 if (WARN_ON(!tp->sacked_out && tp->fackets_out))
2921 tp->fackets_out = 0;
2922
2923
2924
2925 if (flag & FLAG_ECE)
2926 tp->prior_ssthresh = 0;
2927
2928
2929 if (tcp_check_sack_reneging(sk, flag))
2930 return;
2931
2932
2933 if (tcp_is_fack(tp) && (flag & FLAG_DATA_LOST) &&
2934 before(tp->snd_una, tp->high_seq) &&
2935 icsk->icsk_ca_state != TCP_CA_Open &&
2936 tp->fackets_out > tp->reordering) {
2937 tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering);
2938 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSS);
2939 }
2940
2941
2942 tcp_verify_left_out(tp);
2943
2944
2945
2946 if (icsk->icsk_ca_state == TCP_CA_Open) {
2947 WARN_ON(tp->retrans_out != 0);
2948 tp->retrans_stamp = 0;
2949 } else if (!before(tp->snd_una, tp->high_seq)) {
2950 switch (icsk->icsk_ca_state) {
2951 case TCP_CA_Loss:
2952 icsk->icsk_retransmits = 0;
2953 if (tcp_try_undo_recovery(sk))
2954 return;
2955 break;
2956
2957 case TCP_CA_CWR:
2958
2959
2960 if (tp->snd_una != tp->high_seq) {
2961 tcp_complete_cwr(sk);
2962 tcp_set_ca_state(sk, TCP_CA_Open);
2963 }
2964 break;
2965
2966 case TCP_CA_Disorder:
2967 tcp_try_undo_dsack(sk);
2968 if (!tp->undo_marker ||
2969
2970
2971 tcp_is_reno(tp) || tp->snd_una != tp->high_seq) {
2972 tp->undo_marker = 0;
2973 tcp_set_ca_state(sk, TCP_CA_Open);
2974 }
2975 break;
2976
2977 case TCP_CA_Recovery:
2978 if (tcp_is_reno(tp))
2979 tcp_reset_reno_sack(tp);
2980 if (tcp_try_undo_recovery(sk))
2981 return;
2982 tcp_complete_cwr(sk);
2983 break;
2984 }
2985 }
2986
2987
2988 switch (icsk->icsk_ca_state) {
2989 case TCP_CA_Recovery:
2990 if (!(flag & FLAG_SND_UNA_ADVANCED)) {
2991 if (tcp_is_reno(tp) && is_dupack)
2992 tcp_add_reno_sack(sk);
2993 } else
2994 do_lost = tcp_try_undo_partial(sk, pkts_acked);
2995 break;
2996 case TCP_CA_Loss:
2997 if (flag & FLAG_DATA_ACKED)
2998 icsk->icsk_retransmits = 0;
2999 if (tcp_is_reno(tp) && flag & FLAG_SND_UNA_ADVANCED)
3000 tcp_reset_reno_sack(tp);
3001 if (!tcp_try_undo_loss(sk)) {
3002 tcp_moderate_cwnd(tp);
3003 tcp_xmit_retransmit_queue(sk);
3004 return;
3005 }
3006 if (icsk->icsk_ca_state != TCP_CA_Open)
3007 return;
3008
3009 default:
3010 if (tcp_is_reno(tp)) {
3011 if (flag & FLAG_SND_UNA_ADVANCED)
3012 tcp_reset_reno_sack(tp);
3013 if (is_dupack)
3014 tcp_add_reno_sack(sk);
3015 }
3016
3017 if (icsk->icsk_ca_state == TCP_CA_Disorder)
3018 tcp_try_undo_dsack(sk);
3019
3020 if (!tcp_time_to_recover(sk)) {
3021 tcp_try_to_open(sk, flag);
3022 return;
3023 }
3024
3025
3026 if (icsk->icsk_ca_state < TCP_CA_CWR &&
3027 icsk->icsk_mtup.probe_size &&
3028 tp->snd_una == tp->mtu_probe.probe_seq_start) {
3029 tcp_mtup_probe_failed(sk);
3030
3031 tp->snd_cwnd++;
3032 tcp_simple_retransmit(sk);
3033 return;
3034 }
3035
3036
3037
3038 if (tcp_is_reno(tp))
3039 mib_idx = LINUX_MIB_TCPRENORECOVERY;
3040 else
3041 mib_idx = LINUX_MIB_TCPSACKRECOVERY;
3042
3043 NET_INC_STATS_BH(sock_net(sk), mib_idx);
3044
3045 tp->high_seq = tp->snd_nxt;
3046 tp->prior_ssthresh = 0;
3047 tp->undo_marker = tp->snd_una;
3048 tp->undo_retrans = tp->retrans_out;
3049
3050 if (icsk->icsk_ca_state < TCP_CA_CWR) {
3051 if (!(flag & FLAG_ECE))
3052 tp->prior_ssthresh = tcp_current_ssthresh(sk);
3053 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
3054 TCP_ECN_queue_cwr(tp);
3055 }
3056
3057 tp->bytes_acked = 0;
3058 tp->snd_cwnd_cnt = 0;
3059 tcp_set_ca_state(sk, TCP_CA_Recovery);
3060 fast_rexmit = 1;
3061 }
3062
3063 if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk)))
3064 tcp_update_scoreboard(sk, fast_rexmit);
3065 tcp_cwnd_down(sk, flag);
3066 tcp_xmit_retransmit_queue(sk);
3067}
3068
3069static void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt)
3070{
3071 tcp_rtt_estimator(sk, seq_rtt);
3072 tcp_set_rto(sk);
3073 inet_csk(sk)->icsk_backoff = 0;
3074}
3075
3076
3077
3078
3079static void tcp_ack_saw_tstamp(struct sock *sk, int flag)
3080{
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096 struct tcp_sock *tp = tcp_sk(sk);
3097
3098 tcp_valid_rtt_meas(sk, tcp_time_stamp - tp->rx_opt.rcv_tsecr);
3099}
3100
3101static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag)
3102{
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112 if (flag & FLAG_RETRANS_DATA_ACKED)
3113 return;
3114
3115 tcp_valid_rtt_meas(sk, seq_rtt);
3116}
3117
3118static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
3119 const s32 seq_rtt)
3120{
3121 const struct tcp_sock *tp = tcp_sk(sk);
3122
3123 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
3124 tcp_ack_saw_tstamp(sk, flag);
3125 else if (seq_rtt >= 0)
3126 tcp_ack_no_tstamp(sk, seq_rtt, flag);
3127}
3128
3129static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
3130{
3131 const struct inet_connection_sock *icsk = inet_csk(sk);
3132 icsk->icsk_ca_ops->cong_avoid(sk, ack, in_flight);
3133 tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp;
3134}
3135
3136
3137
3138
3139static void tcp_rearm_rto(struct sock *sk)
3140{
3141 struct tcp_sock *tp = tcp_sk(sk);
3142
3143 if (!tp->packets_out) {
3144 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
3145 } else {
3146 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
3147 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
3148 }
3149}
3150
3151
3152static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
3153{
3154 struct tcp_sock *tp = tcp_sk(sk);
3155 u32 packets_acked;
3156
3157 BUG_ON(!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una));
3158
3159 packets_acked = tcp_skb_pcount(skb);
3160 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
3161 return 0;
3162 packets_acked -= tcp_skb_pcount(skb);
3163
3164 if (packets_acked) {
3165 BUG_ON(tcp_skb_pcount(skb) == 0);
3166 BUG_ON(!before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq));
3167 }
3168
3169 return packets_acked;
3170}
3171
3172
3173
3174
3175
3176static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3177 u32 prior_snd_una)
3178{
3179 struct tcp_sock *tp = tcp_sk(sk);
3180 const struct inet_connection_sock *icsk = inet_csk(sk);
3181 struct sk_buff *skb;
3182 u32 now = tcp_time_stamp;
3183 int fully_acked = 1;
3184 int flag = 0;
3185 u32 pkts_acked = 0;
3186 u32 reord = tp->packets_out;
3187 u32 prior_sacked = tp->sacked_out;
3188 s32 seq_rtt = -1;
3189 s32 ca_seq_rtt = -1;
3190 ktime_t last_ackt = net_invalid_timestamp();
3191
3192 while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
3193 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
3194 u32 acked_pcount;
3195 u8 sacked = scb->sacked;
3196
3197
3198 if (after(scb->end_seq, tp->snd_una)) {
3199 if (tcp_skb_pcount(skb) == 1 ||
3200 !after(tp->snd_una, scb->seq))
3201 break;
3202
3203 acked_pcount = tcp_tso_acked(sk, skb);
3204 if (!acked_pcount)
3205 break;
3206
3207 fully_acked = 0;
3208 } else {
3209 acked_pcount = tcp_skb_pcount(skb);
3210 }
3211
3212 if (sacked & TCPCB_RETRANS) {
3213 if (sacked & TCPCB_SACKED_RETRANS)
3214 tp->retrans_out -= acked_pcount;
3215 flag |= FLAG_RETRANS_DATA_ACKED;
3216 ca_seq_rtt = -1;
3217 seq_rtt = -1;
3218 if ((flag & FLAG_DATA_ACKED) || (acked_pcount > 1))
3219 flag |= FLAG_NONHEAD_RETRANS_ACKED;
3220 } else {
3221 ca_seq_rtt = now - scb->when;
3222 last_ackt = skb->tstamp;
3223 if (seq_rtt < 0) {
3224 seq_rtt = ca_seq_rtt;
3225 }
3226 if (!(sacked & TCPCB_SACKED_ACKED))
3227 reord = min(pkts_acked, reord);
3228 }
3229
3230 if (sacked & TCPCB_SACKED_ACKED)
3231 tp->sacked_out -= acked_pcount;
3232 if (sacked & TCPCB_LOST)
3233 tp->lost_out -= acked_pcount;
3234
3235 tp->packets_out -= acked_pcount;
3236 pkts_acked += acked_pcount;
3237
3238
3239
3240
3241
3242
3243
3244
3245 if (!(scb->flags & TCPCB_FLAG_SYN)) {
3246 flag |= FLAG_DATA_ACKED;
3247 } else {
3248 flag |= FLAG_SYN_ACKED;
3249 tp->retrans_stamp = 0;
3250 }
3251
3252 if (!fully_acked)
3253 break;
3254
3255 tcp_unlink_write_queue(skb, sk);
3256 sk_wmem_free_skb(sk, skb);
3257 tp->scoreboard_skb_hint = NULL;
3258 if (skb == tp->retransmit_skb_hint)
3259 tp->retransmit_skb_hint = NULL;
3260 if (skb == tp->lost_skb_hint)
3261 tp->lost_skb_hint = NULL;
3262 }
3263
3264 if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una)))
3265 tp->snd_up = tp->snd_una;
3266
3267 if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
3268 flag |= FLAG_SACK_RENEGING;
3269
3270 if (flag & FLAG_ACKED) {
3271 const struct tcp_congestion_ops *ca_ops
3272 = inet_csk(sk)->icsk_ca_ops;
3273
3274 if (unlikely(icsk->icsk_mtup.probe_size &&
3275 !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
3276 tcp_mtup_probe_success(sk);
3277 }
3278
3279 tcp_ack_update_rtt(sk, flag, seq_rtt);
3280 tcp_rearm_rto(sk);
3281
3282 if (tcp_is_reno(tp)) {
3283 tcp_remove_reno_sacks(sk, pkts_acked);
3284 } else {
3285 int delta;
3286
3287
3288 if (reord < prior_fackets)
3289 tcp_update_reordering(sk, tp->fackets_out - reord, 0);
3290
3291 delta = tcp_is_fack(tp) ? pkts_acked :
3292 prior_sacked - tp->sacked_out;
3293 tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta);
3294 }
3295
3296 tp->fackets_out -= min(pkts_acked, tp->fackets_out);
3297
3298 if (ca_ops->pkts_acked) {
3299 s32 rtt_us = -1;
3300
3301
3302 if (!(flag & FLAG_RETRANS_DATA_ACKED)) {
3303
3304 if (ca_ops->flags & TCP_CONG_RTT_STAMP &&
3305 !ktime_equal(last_ackt,
3306 net_invalid_timestamp()))
3307 rtt_us = ktime_us_delta(ktime_get_real(),
3308 last_ackt);
3309 else if (ca_seq_rtt > 0)
3310 rtt_us = jiffies_to_usecs(ca_seq_rtt);
3311 }
3312
3313 ca_ops->pkts_acked(sk, pkts_acked, rtt_us);
3314 }
3315 }
3316
3317#if FASTRETRANS_DEBUG > 0
3318 WARN_ON((int)tp->sacked_out < 0);
3319 WARN_ON((int)tp->lost_out < 0);
3320 WARN_ON((int)tp->retrans_out < 0);
3321 if (!tp->packets_out && tcp_is_sack(tp)) {
3322 icsk = inet_csk(sk);
3323 if (tp->lost_out) {
3324 printk(KERN_DEBUG "Leak l=%u %d\n",
3325 tp->lost_out, icsk->icsk_ca_state);
3326 tp->lost_out = 0;
3327 }
3328 if (tp->sacked_out) {
3329 printk(KERN_DEBUG "Leak s=%u %d\n",
3330 tp->sacked_out, icsk->icsk_ca_state);
3331 tp->sacked_out = 0;
3332 }
3333 if (tp->retrans_out) {
3334 printk(KERN_DEBUG "Leak r=%u %d\n",
3335 tp->retrans_out, icsk->icsk_ca_state);
3336 tp->retrans_out = 0;
3337 }
3338 }
3339#endif
3340 return flag;
3341}
3342
3343static void tcp_ack_probe(struct sock *sk)
3344{
3345 const struct tcp_sock *tp = tcp_sk(sk);
3346 struct inet_connection_sock *icsk = inet_csk(sk);
3347
3348
3349
3350 if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, tcp_wnd_end(tp))) {
3351 icsk->icsk_backoff = 0;
3352 inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
3353
3354
3355
3356 } else {
3357 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
3358 min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX),
3359 TCP_RTO_MAX);
3360 }
3361}
3362
3363static inline int tcp_ack_is_dubious(const struct sock *sk, const int flag)
3364{
3365 return (!(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
3366 inet_csk(sk)->icsk_ca_state != TCP_CA_Open);
3367}
3368
3369static inline int tcp_may_raise_cwnd(const struct sock *sk, const int flag)
3370{
3371 const struct tcp_sock *tp = tcp_sk(sk);
3372 return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) &&
3373 !((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_Recovery | TCPF_CA_CWR));
3374}
3375
3376
3377
3378
3379static inline int tcp_may_update_window(const struct tcp_sock *tp,
3380 const u32 ack, const u32 ack_seq,
3381 const u32 nwin)
3382{
3383 return (after(ack, tp->snd_una) ||
3384 after(ack_seq, tp->snd_wl1) ||
3385 (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd));
3386}
3387
3388
3389
3390
3391
3392
3393static int tcp_ack_update_window(struct sock *sk, struct sk_buff *skb, u32 ack,
3394 u32 ack_seq)
3395{
3396 struct tcp_sock *tp = tcp_sk(sk);
3397 int flag = 0;
3398 u32 nwin = ntohs(tcp_hdr(skb)->window);
3399
3400 if (likely(!tcp_hdr(skb)->syn))
3401 nwin <<= tp->rx_opt.snd_wscale;
3402
3403 if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
3404 flag |= FLAG_WIN_UPDATE;
3405 tcp_update_wl(tp, ack_seq);
3406
3407 if (tp->snd_wnd != nwin) {
3408 tp->snd_wnd = nwin;
3409
3410
3411
3412
3413 tp->pred_flags = 0;
3414 tcp_fast_path_check(sk);
3415
3416 if (nwin > tp->max_window) {
3417 tp->max_window = nwin;
3418 tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie);
3419 }
3420 }
3421 }
3422
3423 tp->snd_una = ack;
3424
3425 return flag;
3426}
3427
3428
3429
3430
3431static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
3432{
3433 tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
3434 tp->snd_cwnd_cnt = 0;
3435 tp->bytes_acked = 0;
3436 TCP_ECN_queue_cwr(tp);
3437 tcp_moderate_cwnd(tp);
3438}
3439
3440
3441
3442
3443static void tcp_ratehalving_spur_to_response(struct sock *sk)
3444{
3445 tcp_enter_cwr(sk, 0);
3446}
3447
3448static void tcp_undo_spur_to_response(struct sock *sk, int flag)
3449{
3450 if (flag & FLAG_ECE)
3451 tcp_ratehalving_spur_to_response(sk);
3452 else
3453 tcp_undo_cwr(sk, 1);
3454}
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486static int tcp_process_frto(struct sock *sk, int flag)
3487{
3488 struct tcp_sock *tp = tcp_sk(sk);
3489
3490 tcp_verify_left_out(tp);
3491
3492
3493 if (flag & FLAG_DATA_ACKED)
3494 inet_csk(sk)->icsk_retransmits = 0;
3495
3496 if ((flag & FLAG_NONHEAD_RETRANS_ACKED) ||
3497 ((tp->frto_counter >= 2) && (flag & FLAG_RETRANS_DATA_ACKED)))
3498 tp->undo_marker = 0;
3499
3500 if (!before(tp->snd_una, tp->frto_highmark)) {
3501 tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag);
3502 return 1;
3503 }
3504
3505 if (!tcp_is_sackfrto(tp)) {
3506
3507
3508
3509
3510 if (!(flag & FLAG_ANY_PROGRESS) && (flag & FLAG_NOT_DUP))
3511 return 1;
3512
3513 if (!(flag & FLAG_DATA_ACKED)) {
3514 tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3),
3515 flag);
3516 return 1;
3517 }
3518 } else {
3519 if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) {
3520
3521 tp->snd_cwnd = min(tp->snd_cwnd,
3522 tcp_packets_in_flight(tp));
3523 return 1;
3524 }
3525
3526 if ((tp->frto_counter >= 2) &&
3527 (!(flag & FLAG_FORWARD_PROGRESS) ||
3528 ((flag & FLAG_DATA_SACKED) &&
3529 !(flag & FLAG_ONLY_ORIG_SACKED)))) {
3530
3531 if (!(flag & FLAG_FORWARD_PROGRESS) &&
3532 (flag & FLAG_NOT_DUP))
3533 return 1;
3534
3535 tcp_enter_frto_loss(sk, 3, flag);
3536 return 1;
3537 }
3538 }
3539
3540 if (tp->frto_counter == 1) {
3541
3542 tp->snd_cwnd = tcp_packets_in_flight(tp) + 2;
3543 tp->frto_counter = 2;
3544
3545 if (!tcp_may_send_now(sk))
3546 tcp_enter_frto_loss(sk, 2, flag);
3547
3548 return 1;
3549 } else {
3550 switch (sysctl_tcp_frto_response) {
3551 case 2:
3552 tcp_undo_spur_to_response(sk, flag);
3553 break;
3554 case 1:
3555 tcp_conservative_spur_to_response(tp);
3556 break;
3557 default:
3558 tcp_ratehalving_spur_to_response(sk);
3559 break;
3560 }
3561 tp->frto_counter = 0;
3562 tp->undo_marker = 0;
3563 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS);
3564 }
3565 return 0;
3566}
3567
3568
3569static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
3570{
3571 struct inet_connection_sock *icsk = inet_csk(sk);
3572 struct tcp_sock *tp = tcp_sk(sk);
3573 u32 prior_snd_una = tp->snd_una;
3574 u32 ack_seq = TCP_SKB_CB(skb)->seq;
3575 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3576 u32 prior_in_flight;
3577 u32 prior_fackets;
3578 int prior_packets;
3579 int frto_cwnd = 0;
3580
3581
3582
3583
3584 if (before(ack, prior_snd_una))
3585 goto old_ack;
3586
3587
3588
3589
3590 if (after(ack, tp->snd_nxt))
3591 goto invalid_ack;
3592
3593 if (after(ack, prior_snd_una))
3594 flag |= FLAG_SND_UNA_ADVANCED;
3595
3596 if (sysctl_tcp_abc) {
3597 if (icsk->icsk_ca_state < TCP_CA_CWR)
3598 tp->bytes_acked += ack - prior_snd_una;
3599 else if (icsk->icsk_ca_state == TCP_CA_Loss)
3600
3601 tp->bytes_acked += min(ack - prior_snd_una,
3602 tp->mss_cache);
3603 }
3604
3605 prior_fackets = tp->fackets_out;
3606 prior_in_flight = tcp_packets_in_flight(tp);
3607
3608 if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
3609
3610
3611
3612
3613 tcp_update_wl(tp, ack_seq);
3614 tp->snd_una = ack;
3615 flag |= FLAG_WIN_UPDATE;
3616
3617 tcp_ca_event(sk, CA_EVENT_FAST_ACK);
3618
3619 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPACKS);
3620 } else {
3621 if (ack_seq != TCP_SKB_CB(skb)->end_seq)
3622 flag |= FLAG_DATA;
3623 else
3624 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPUREACKS);
3625
3626 flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
3627
3628 if (TCP_SKB_CB(skb)->sacked)
3629 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
3630
3631 if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb)))
3632 flag |= FLAG_ECE;
3633
3634 tcp_ca_event(sk, CA_EVENT_SLOW_ACK);
3635 }
3636
3637
3638
3639
3640 sk->sk_err_soft = 0;
3641 icsk->icsk_probes_out = 0;
3642 tp->rcv_tstamp = tcp_time_stamp;
3643 prior_packets = tp->packets_out;
3644 if (!prior_packets)
3645 goto no_queue;
3646
3647
3648 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
3649
3650 if (tp->frto_counter)
3651 frto_cwnd = tcp_process_frto(sk, flag);
3652
3653 if (before(tp->frto_highmark, tp->snd_una))
3654 tp->frto_highmark = 0;
3655
3656 if (tcp_ack_is_dubious(sk, flag)) {
3657
3658 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd &&
3659 tcp_may_raise_cwnd(sk, flag))
3660 tcp_cong_avoid(sk, ack, prior_in_flight);
3661 tcp_fastretrans_alert(sk, prior_packets - tp->packets_out,
3662 flag);
3663 } else {
3664 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
3665 tcp_cong_avoid(sk, ack, prior_in_flight);
3666 }
3667
3668 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
3669 dst_confirm(sk->sk_dst_cache);
3670
3671 return 1;
3672
3673no_queue:
3674
3675
3676
3677
3678 if (tcp_send_head(sk))
3679 tcp_ack_probe(sk);
3680 return 1;
3681
3682invalid_ack:
3683 SOCK_DEBUG(sk, "Ack %u after %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
3684 return -1;
3685
3686old_ack:
3687 if (TCP_SKB_CB(skb)->sacked) {
3688 tcp_sacktag_write_queue(sk, skb, prior_snd_una);
3689 if (icsk->icsk_ca_state == TCP_CA_Open)
3690 tcp_try_keep_open(sk);
3691 }
3692
3693 SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
3694 return 0;
3695}
3696
3697
3698
3699
3700
3701void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
3702 int estab)
3703{
3704 unsigned char *ptr;
3705 struct tcphdr *th = tcp_hdr(skb);
3706 int length = (th->doff * 4) - sizeof(struct tcphdr);
3707
3708 ptr = (unsigned char *)(th + 1);
3709 opt_rx->saw_tstamp = 0;
3710
3711 while (length > 0) {
3712 int opcode = *ptr++;
3713 int opsize;
3714
3715 switch (opcode) {
3716 case TCPOPT_EOL:
3717 return;
3718 case TCPOPT_NOP:
3719 length--;
3720 continue;
3721 default:
3722 opsize = *ptr++;
3723 if (opsize < 2)
3724 return;
3725 if (opsize > length)
3726 return;
3727 switch (opcode) {
3728 case TCPOPT_MSS:
3729 if (opsize == TCPOLEN_MSS && th->syn && !estab) {
3730 u16 in_mss = get_unaligned_be16(ptr);
3731 if (in_mss) {
3732 if (opt_rx->user_mss &&
3733 opt_rx->user_mss < in_mss)
3734 in_mss = opt_rx->user_mss;
3735 opt_rx->mss_clamp = in_mss;
3736 }
3737 }
3738 break;
3739 case TCPOPT_WINDOW:
3740 if (opsize == TCPOLEN_WINDOW && th->syn &&
3741 !estab && sysctl_tcp_window_scaling) {
3742 __u8 snd_wscale = *(__u8 *)ptr;
3743 opt_rx->wscale_ok = 1;
3744 if (snd_wscale > 14) {
3745 if (net_ratelimit())
3746 printk(KERN_INFO "tcp_parse_options: Illegal window "
3747 "scaling value %d >14 received.\n",
3748 snd_wscale);
3749 snd_wscale = 14;
3750 }
3751 opt_rx->snd_wscale = snd_wscale;
3752 }
3753 break;
3754 case TCPOPT_TIMESTAMP:
3755 if ((opsize == TCPOLEN_TIMESTAMP) &&
3756 ((estab && opt_rx->tstamp_ok) ||
3757 (!estab && sysctl_tcp_timestamps))) {
3758 opt_rx->saw_tstamp = 1;
3759 opt_rx->rcv_tsval = get_unaligned_be32(ptr);
3760 opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
3761 }
3762 break;
3763 case TCPOPT_SACK_PERM:
3764 if (opsize == TCPOLEN_SACK_PERM && th->syn &&
3765 !estab && sysctl_tcp_sack) {
3766 opt_rx->sack_ok = 1;
3767 tcp_sack_reset(opt_rx);
3768 }
3769 break;
3770
3771 case TCPOPT_SACK:
3772 if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
3773 !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
3774 opt_rx->sack_ok) {
3775 TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
3776 }
3777 break;
3778#ifdef CONFIG_TCP_MD5SIG
3779 case TCPOPT_MD5SIG:
3780
3781
3782
3783
3784 break;
3785#endif
3786 }
3787
3788 ptr += opsize-2;
3789 length -= opsize;
3790 }
3791 }
3792}
3793
3794static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th)
3795{
3796 __be32 *ptr = (__be32 *)(th + 1);
3797
3798 if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
3799 | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
3800 tp->rx_opt.saw_tstamp = 1;
3801 ++ptr;
3802 tp->rx_opt.rcv_tsval = ntohl(*ptr);
3803 ++ptr;
3804 tp->rx_opt.rcv_tsecr = ntohl(*ptr);
3805 return 1;
3806 }
3807 return 0;
3808}
3809
3810
3811
3812
3813static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
3814 struct tcp_sock *tp)
3815{
3816 if (th->doff == sizeof(struct tcphdr) >> 2) {
3817 tp->rx_opt.saw_tstamp = 0;
3818 return 0;
3819 } else if (tp->rx_opt.tstamp_ok &&
3820 th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) {
3821 if (tcp_parse_aligned_timestamp(tp, th))
3822 return 1;
3823 }
3824 tcp_parse_options(skb, &tp->rx_opt, 1);
3825 return 1;
3826}
3827
3828#ifdef CONFIG_TCP_MD5SIG
3829
3830
3831
3832u8 *tcp_parse_md5sig_option(struct tcphdr *th)
3833{
3834 int length = (th->doff << 2) - sizeof (*th);
3835 u8 *ptr = (u8*)(th + 1);
3836
3837
3838 if (length < TCPOLEN_MD5SIG)
3839 return NULL;
3840
3841 while (length > 0) {
3842 int opcode = *ptr++;
3843 int opsize;
3844
3845 switch(opcode) {
3846 case TCPOPT_EOL:
3847 return NULL;
3848 case TCPOPT_NOP:
3849 length--;
3850 continue;
3851 default:
3852 opsize = *ptr++;
3853 if (opsize < 2 || opsize > length)
3854 return NULL;
3855 if (opcode == TCPOPT_MD5SIG)
3856 return ptr;
3857 }
3858 ptr += opsize - 2;
3859 length -= opsize;
3860 }
3861 return NULL;
3862}
3863#endif
3864
3865static inline void tcp_store_ts_recent(struct tcp_sock *tp)
3866{
3867 tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
3868 tp->rx_opt.ts_recent_stamp = get_seconds();
3869}
3870
3871static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
3872{
3873 if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) {
3874
3875
3876
3877
3878
3879
3880
3881 if (tcp_paws_check(&tp->rx_opt, 0))
3882 tcp_store_ts_recent(tp);
3883 }
3884}
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
3910{
3911 struct tcp_sock *tp = tcp_sk(sk);
3912 struct tcphdr *th = tcp_hdr(skb);
3913 u32 seq = TCP_SKB_CB(skb)->seq;
3914 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3915
3916 return (
3917 (th->ack && seq == TCP_SKB_CB(skb)->end_seq && seq == tp->rcv_nxt) &&
3918
3919
3920 ack == tp->snd_una &&
3921
3922
3923 !tcp_may_update_window(tp, ack, seq, ntohs(th->window) << tp->rx_opt.snd_wscale) &&
3924
3925
3926 (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ);
3927}
3928
3929static inline int tcp_paws_discard(const struct sock *sk,
3930 const struct sk_buff *skb)
3931{
3932 const struct tcp_sock *tp = tcp_sk(sk);
3933
3934 return !tcp_paws_check(&tp->rx_opt, TCP_PAWS_WINDOW) &&
3935 !tcp_disordered_ack(sk, skb);
3936}
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951static inline int tcp_sequence(struct tcp_sock *tp, u32 seq, u32 end_seq)
3952{
3953 return !before(end_seq, tp->rcv_wup) &&
3954 !after(seq, tp->rcv_nxt + tcp_receive_window(tp));
3955}
3956
3957
3958static void tcp_reset(struct sock *sk)
3959{
3960
3961 switch (sk->sk_state) {
3962 case TCP_SYN_SENT:
3963 sk->sk_err = ECONNREFUSED;
3964 break;
3965 case TCP_CLOSE_WAIT:
3966 sk->sk_err = EPIPE;
3967 break;
3968 case TCP_CLOSE:
3969 return;
3970 default:
3971 sk->sk_err = ECONNRESET;
3972 }
3973
3974 if (!sock_flag(sk, SOCK_DEAD))
3975 sk->sk_error_report(sk);
3976
3977 tcp_done(sk);
3978}
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
3995{
3996 struct tcp_sock *tp = tcp_sk(sk);
3997
3998 inet_csk_schedule_ack(sk);
3999
4000 sk->sk_shutdown |= RCV_SHUTDOWN;
4001 sock_set_flag(sk, SOCK_DONE);
4002
4003 switch (sk->sk_state) {
4004 case TCP_SYN_RECV:
4005 case TCP_ESTABLISHED:
4006
4007 tcp_set_state(sk, TCP_CLOSE_WAIT);
4008 inet_csk(sk)->icsk_ack.pingpong = 1;
4009 break;
4010
4011 case TCP_CLOSE_WAIT:
4012 case TCP_CLOSING:
4013
4014
4015
4016 break;
4017 case TCP_LAST_ACK:
4018
4019 break;
4020
4021 case TCP_FIN_WAIT1:
4022
4023
4024
4025
4026 tcp_send_ack(sk);
4027 tcp_set_state(sk, TCP_CLOSING);
4028 break;
4029 case TCP_FIN_WAIT2:
4030
4031 tcp_send_ack(sk);
4032 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
4033 break;
4034 default:
4035
4036
4037
4038 printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n",
4039 __func__, sk->sk_state);
4040 break;
4041 }
4042
4043
4044
4045
4046 __skb_queue_purge(&tp->out_of_order_queue);
4047 if (tcp_is_sack(tp))
4048 tcp_sack_reset(&tp->rx_opt);
4049 sk_mem_reclaim(sk);
4050
4051 if (!sock_flag(sk, SOCK_DEAD)) {
4052 sk->sk_state_change(sk);
4053
4054
4055 if (sk->sk_shutdown == SHUTDOWN_MASK ||
4056 sk->sk_state == TCP_CLOSE)
4057 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
4058 else
4059 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
4060 }
4061}
4062
4063static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
4064 u32 end_seq)
4065{
4066 if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
4067 if (before(seq, sp->start_seq))
4068 sp->start_seq = seq;
4069 if (after(end_seq, sp->end_seq))
4070 sp->end_seq = end_seq;
4071 return 1;
4072 }
4073 return 0;
4074}
4075
4076static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
4077{
4078 struct tcp_sock *tp = tcp_sk(sk);
4079
4080 if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
4081 int mib_idx;
4082
4083 if (before(seq, tp->rcv_nxt))
4084 mib_idx = LINUX_MIB_TCPDSACKOLDSENT;
4085 else
4086 mib_idx = LINUX_MIB_TCPDSACKOFOSENT;
4087
4088 NET_INC_STATS_BH(sock_net(sk), mib_idx);
4089
4090 tp->rx_opt.dsack = 1;
4091 tp->duplicate_sack[0].start_seq = seq;
4092 tp->duplicate_sack[0].end_seq = end_seq;
4093 }
4094}
4095
4096static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
4097{
4098 struct tcp_sock *tp = tcp_sk(sk);
4099
4100 if (!tp->rx_opt.dsack)
4101 tcp_dsack_set(sk, seq, end_seq);
4102 else
4103 tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
4104}
4105
4106static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb)
4107{
4108 struct tcp_sock *tp = tcp_sk(sk);
4109
4110 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
4111 before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4112 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4113 tcp_enter_quickack_mode(sk);
4114
4115 if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
4116 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
4117
4118 if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
4119 end_seq = tp->rcv_nxt;
4120 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, end_seq);
4121 }
4122 }
4123
4124 tcp_send_ack(sk);
4125}
4126
4127
4128
4129
4130static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
4131{
4132 int this_sack;
4133 struct tcp_sack_block *sp = &tp->selective_acks[0];
4134 struct tcp_sack_block *swalk = sp + 1;
4135
4136
4137
4138
4139 for (this_sack = 1; this_sack < tp->rx_opt.num_sacks;) {
4140 if (tcp_sack_extend(sp, swalk->start_seq, swalk->end_seq)) {
4141 int i;
4142
4143
4144
4145
4146 tp->rx_opt.num_sacks--;
4147 for (i = this_sack; i < tp->rx_opt.num_sacks; i++)
4148 sp[i] = sp[i + 1];
4149 continue;
4150 }
4151 this_sack++, swalk++;
4152 }
4153}
4154
4155static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
4156{
4157 struct tcp_sock *tp = tcp_sk(sk);
4158 struct tcp_sack_block *sp = &tp->selective_acks[0];
4159 int cur_sacks = tp->rx_opt.num_sacks;
4160 int this_sack;
4161
4162 if (!cur_sacks)
4163 goto new_sack;
4164
4165 for (this_sack = 0; this_sack < cur_sacks; this_sack++, sp++) {
4166 if (tcp_sack_extend(sp, seq, end_seq)) {
4167
4168 for (; this_sack > 0; this_sack--, sp--)
4169 swap(*sp, *(sp - 1));
4170 if (cur_sacks > 1)
4171 tcp_sack_maybe_coalesce(tp);
4172 return;
4173 }
4174 }
4175
4176
4177
4178
4179
4180
4181
4182 if (this_sack >= TCP_NUM_SACKS) {
4183 this_sack--;
4184 tp->rx_opt.num_sacks--;
4185 sp--;
4186 }
4187 for (; this_sack > 0; this_sack--, sp--)
4188 *sp = *(sp - 1);
4189
4190new_sack:
4191
4192 sp->start_seq = seq;
4193 sp->end_seq = end_seq;
4194 tp->rx_opt.num_sacks++;
4195}
4196
4197
4198
4199static void tcp_sack_remove(struct tcp_sock *tp)
4200{
4201 struct tcp_sack_block *sp = &tp->selective_acks[0];
4202 int num_sacks = tp->rx_opt.num_sacks;
4203 int this_sack;
4204
4205
4206 if (skb_queue_empty(&tp->out_of_order_queue)) {
4207 tp->rx_opt.num_sacks = 0;
4208 return;
4209 }
4210
4211 for (this_sack = 0; this_sack < num_sacks;) {
4212
4213 if (!before(tp->rcv_nxt, sp->start_seq)) {
4214 int i;
4215
4216
4217 WARN_ON(before(tp->rcv_nxt, sp->end_seq));
4218
4219
4220 for (i=this_sack+1; i < num_sacks; i++)
4221 tp->selective_acks[i-1] = tp->selective_acks[i];
4222 num_sacks--;
4223 continue;
4224 }
4225 this_sack++;
4226 sp++;
4227 }
4228 tp->rx_opt.num_sacks = num_sacks;
4229}
4230
4231
4232
4233
4234static void tcp_ofo_queue(struct sock *sk)
4235{
4236 struct tcp_sock *tp = tcp_sk(sk);
4237 __u32 dsack_high = tp->rcv_nxt;
4238 struct sk_buff *skb;
4239
4240 while ((skb = skb_peek(&tp->out_of_order_queue)) != NULL) {
4241 if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
4242 break;
4243
4244 if (before(TCP_SKB_CB(skb)->seq, dsack_high)) {
4245 __u32 dsack = dsack_high;
4246 if (before(TCP_SKB_CB(skb)->end_seq, dsack_high))
4247 dsack_high = TCP_SKB_CB(skb)->end_seq;
4248 tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack);
4249 }
4250
4251 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
4252 SOCK_DEBUG(sk, "ofo packet was already received \n");
4253 __skb_unlink(skb, &tp->out_of_order_queue);
4254 __kfree_skb(skb);
4255 continue;
4256 }
4257 SOCK_DEBUG(sk, "ofo requeuing : rcv_next %X seq %X - %X\n",
4258 tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
4259 TCP_SKB_CB(skb)->end_seq);
4260
4261 __skb_unlink(skb, &tp->out_of_order_queue);
4262 __skb_queue_tail(&sk->sk_receive_queue, skb);
4263 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
4264 if (tcp_hdr(skb)->fin)
4265 tcp_fin(skb, sk, tcp_hdr(skb));
4266 }
4267}
4268
4269static int tcp_prune_ofo_queue(struct sock *sk);
4270static int tcp_prune_queue(struct sock *sk);
4271
4272static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)
4273{
4274 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
4275 !sk_rmem_schedule(sk, size)) {
4276
4277 if (tcp_prune_queue(sk) < 0)
4278 return -1;
4279
4280 if (!sk_rmem_schedule(sk, size)) {
4281 if (!tcp_prune_ofo_queue(sk))
4282 return -1;
4283
4284 if (!sk_rmem_schedule(sk, size))
4285 return -1;
4286 }
4287 }
4288 return 0;
4289}
4290
4291static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
4292{
4293 struct tcphdr *th = tcp_hdr(skb);
4294 struct tcp_sock *tp = tcp_sk(sk);
4295 int eaten = -1;
4296
4297 if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq)
4298 goto drop;
4299
4300 __skb_pull(skb, th->doff * 4);
4301
4302 TCP_ECN_accept_cwr(tp, skb);
4303
4304 tp->rx_opt.dsack = 0;
4305
4306
4307
4308
4309
4310 if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
4311 if (tcp_receive_window(tp) == 0)
4312 goto out_of_window;
4313
4314
4315 if (tp->ucopy.task == current &&
4316 tp->copied_seq == tp->rcv_nxt && tp->ucopy.len &&
4317 sock_owned_by_user(sk) && !tp->urg_data) {
4318 int chunk = min_t(unsigned int, skb->len,
4319 tp->ucopy.len);
4320
4321 __set_current_state(TASK_RUNNING);
4322
4323 local_bh_enable();
4324 if (!skb_copy_datagram_iovec(skb, 0, tp->ucopy.iov, chunk)) {
4325 tp->ucopy.len -= chunk;
4326 tp->copied_seq += chunk;
4327 eaten = (chunk == skb->len && !th->fin);
4328 tcp_rcv_space_adjust(sk);
4329 }
4330 local_bh_disable();
4331 }
4332
4333 if (eaten <= 0) {
4334queue_and_out:
4335 if (eaten < 0 &&
4336 tcp_try_rmem_schedule(sk, skb->truesize))
4337 goto drop;
4338
4339 skb_set_owner_r(skb, sk);
4340 __skb_queue_tail(&sk->sk_receive_queue, skb);
4341 }
4342 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
4343 if (skb->len)
4344 tcp_event_data_recv(sk, skb);
4345 if (th->fin)
4346 tcp_fin(skb, sk, th);
4347
4348 if (!skb_queue_empty(&tp->out_of_order_queue)) {
4349 tcp_ofo_queue(sk);
4350
4351
4352
4353
4354 if (skb_queue_empty(&tp->out_of_order_queue))
4355 inet_csk(sk)->icsk_ack.pingpong = 0;
4356 }
4357
4358 if (tp->rx_opt.num_sacks)
4359 tcp_sack_remove(tp);
4360
4361 tcp_fast_path_check(sk);
4362
4363 if (eaten > 0)
4364 __kfree_skb(skb);
4365 else if (!sock_flag(sk, SOCK_DEAD))
4366 sk->sk_data_ready(sk, 0);
4367 return;
4368 }
4369
4370 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
4371
4372 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4373 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
4374
4375out_of_window:
4376 tcp_enter_quickack_mode(sk);
4377 inet_csk_schedule_ack(sk);
4378drop:
4379 __kfree_skb(skb);
4380 return;
4381 }
4382
4383
4384 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp)))
4385 goto out_of_window;
4386
4387 tcp_enter_quickack_mode(sk);
4388
4389 if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4390
4391 SOCK_DEBUG(sk, "partial packet: rcv_next %X seq %X - %X\n",
4392 tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
4393 TCP_SKB_CB(skb)->end_seq);
4394
4395 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, tp->rcv_nxt);
4396
4397
4398
4399
4400 if (!tcp_receive_window(tp))
4401 goto out_of_window;
4402 goto queue_and_out;
4403 }
4404
4405 TCP_ECN_check_ce(tp, skb);
4406
4407 if (tcp_try_rmem_schedule(sk, skb->truesize))
4408 goto drop;
4409
4410
4411 tp->pred_flags = 0;
4412 inet_csk_schedule_ack(sk);
4413
4414 SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
4415 tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
4416
4417 skb_set_owner_r(skb, sk);
4418
4419 if (!skb_peek(&tp->out_of_order_queue)) {
4420
4421 if (tcp_is_sack(tp)) {
4422 tp->rx_opt.num_sacks = 1;
4423 tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
4424 tp->selective_acks[0].end_seq =
4425 TCP_SKB_CB(skb)->end_seq;
4426 }
4427 __skb_queue_head(&tp->out_of_order_queue, skb);
4428 } else {
4429 struct sk_buff *skb1 = tp->out_of_order_queue.prev;
4430 u32 seq = TCP_SKB_CB(skb)->seq;
4431 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
4432
4433 if (seq == TCP_SKB_CB(skb1)->end_seq) {
4434 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4435
4436 if (!tp->rx_opt.num_sacks ||
4437 tp->selective_acks[0].end_seq != seq)
4438 goto add_sack;
4439
4440
4441 tp->selective_acks[0].end_seq = end_seq;
4442 return;
4443 }
4444
4445
4446 do {
4447 if (!after(TCP_SKB_CB(skb1)->seq, seq))
4448 break;
4449 } while ((skb1 = skb1->prev) !=
4450 (struct sk_buff *)&tp->out_of_order_queue);
4451
4452
4453 if (skb1 != (struct sk_buff *)&tp->out_of_order_queue &&
4454 before(seq, TCP_SKB_CB(skb1)->end_seq)) {
4455 if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4456
4457 __kfree_skb(skb);
4458 tcp_dsack_set(sk, seq, end_seq);
4459 goto add_sack;
4460 }
4461 if (after(seq, TCP_SKB_CB(skb1)->seq)) {
4462