1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64#include <linux/mm.h>
65#include <linux/slab.h>
66#include <linux/module.h>
67#include <linux/sysctl.h>
68#include <linux/kernel.h>
69#include <net/dst.h>
70#include <net/tcp.h>
71#include <net/inet_common.h>
72#include <linux/ipsec.h>
73#include <asm/unaligned.h>
74#include <net/netdma.h>
75
76int sysctl_tcp_timestamps __read_mostly = 1;
77int sysctl_tcp_window_scaling __read_mostly = 1;
78int sysctl_tcp_sack __read_mostly = 1;
79int sysctl_tcp_fack __read_mostly = 1;
80int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
81EXPORT_SYMBOL(sysctl_tcp_reordering);
82int sysctl_tcp_ecn __read_mostly = 2;
83EXPORT_SYMBOL(sysctl_tcp_ecn);
84int sysctl_tcp_dsack __read_mostly = 1;
85int sysctl_tcp_app_win __read_mostly = 31;
86int sysctl_tcp_adv_win_scale __read_mostly = 2;
87EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
88
89int sysctl_tcp_stdurg __read_mostly;
90int sysctl_tcp_rfc1337 __read_mostly;
91int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
92int sysctl_tcp_frto __read_mostly = 2;
93int sysctl_tcp_frto_response __read_mostly;
94int sysctl_tcp_nometrics_save __read_mostly;
95
96int sysctl_tcp_thin_dupack __read_mostly;
97
98int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
99int sysctl_tcp_abc __read_mostly;
100
101#define FLAG_DATA 0x01
102#define FLAG_WIN_UPDATE 0x02
103#define FLAG_DATA_ACKED 0x04
104#define FLAG_RETRANS_DATA_ACKED 0x08
105#define FLAG_SYN_ACKED 0x10
106#define FLAG_DATA_SACKED 0x20
107#define FLAG_ECE 0x40
108#define FLAG_SLOWPATH 0x100
109#define FLAG_ONLY_ORIG_SACKED 0x200
110#define FLAG_SND_UNA_ADVANCED 0x400
111#define FLAG_DSACKING_ACK 0x800
112#define FLAG_NONHEAD_RETRANS_ACKED 0x1000
113#define FLAG_SACK_RENEGING 0x2000
114
115#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
116#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
117#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE)
118#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)
119#define FLAG_ANY_PROGRESS (FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED)
120
121#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
122#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
123
124
125
126
127static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
128{
129 struct inet_connection_sock *icsk = inet_csk(sk);
130 const unsigned int lss = icsk->icsk_ack.last_seg_size;
131 unsigned int len;
132
133 icsk->icsk_ack.last_seg_size = 0;
134
135
136
137
138 len = skb_shinfo(skb)->gso_size ? : skb->len;
139 if (len >= icsk->icsk_ack.rcv_mss) {
140 icsk->icsk_ack.rcv_mss = len;
141 } else {
142
143
144
145
146
147 len += skb->data - skb_transport_header(skb);
148 if (len >= TCP_MSS_DEFAULT + sizeof(struct tcphdr) ||
149
150
151
152
153
154 (len >= TCP_MIN_MSS + sizeof(struct tcphdr) &&
155 !(tcp_flag_word(tcp_hdr(skb)) & TCP_REMNANT))) {
156
157
158
159
160 len -= tcp_sk(sk)->tcp_header_len;
161 icsk->icsk_ack.last_seg_size = len;
162 if (len == lss) {
163 icsk->icsk_ack.rcv_mss = len;
164 return;
165 }
166 }
167 if (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)
168 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED2;
169 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
170 }
171}
172
173static void tcp_incr_quickack(struct sock *sk)
174{
175 struct inet_connection_sock *icsk = inet_csk(sk);
176 unsigned quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
177
178 if (quickacks == 0)
179 quickacks = 2;
180 if (quickacks > icsk->icsk_ack.quick)
181 icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
182}
183
184static void tcp_enter_quickack_mode(struct sock *sk)
185{
186 struct inet_connection_sock *icsk = inet_csk(sk);
187 tcp_incr_quickack(sk);
188 icsk->icsk_ack.pingpong = 0;
189 icsk->icsk_ack.ato = TCP_ATO_MIN;
190}
191
192
193
194
195
196static inline int tcp_in_quickack_mode(const struct sock *sk)
197{
198 const struct inet_connection_sock *icsk = inet_csk(sk);
199 return icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong;
200}
201
202static inline void TCP_ECN_queue_cwr(struct tcp_sock *tp)
203{
204 if (tp->ecn_flags & TCP_ECN_OK)
205 tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
206}
207
208static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, const struct sk_buff *skb)
209{
210 if (tcp_hdr(skb)->cwr)
211 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
212}
213
214static inline void TCP_ECN_withdraw_cwr(struct tcp_sock *tp)
215{
216 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
217}
218
219static inline void TCP_ECN_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
220{
221 if (!(tp->ecn_flags & TCP_ECN_OK))
222 return;
223
224 switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) {
225 case INET_ECN_NOT_ECT:
226
227
228
229
230 if (tp->ecn_flags & TCP_ECN_SEEN)
231 tcp_enter_quickack_mode((struct sock *)tp);
232 break;
233 case INET_ECN_CE:
234 tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
235
236 default:
237 tp->ecn_flags |= TCP_ECN_SEEN;
238 }
239}
240
241static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th)
242{
243 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr))
244 tp->ecn_flags &= ~TCP_ECN_OK;
245}
246
247static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th)
248{
249 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr))
250 tp->ecn_flags &= ~TCP_ECN_OK;
251}
252
253static inline int TCP_ECN_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th)
254{
255 if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK))
256 return 1;
257 return 0;
258}
259
260
261
262
263
264
265static void tcp_fixup_sndbuf(struct sock *sk)
266{
267 int sndmem = SKB_TRUESIZE(tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER);
268
269 sndmem *= TCP_INIT_CWND;
270 if (sk->sk_sndbuf < sndmem)
271 sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
272}
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
301{
302 struct tcp_sock *tp = tcp_sk(sk);
303
304 int truesize = tcp_win_from_space(skb->truesize) >> 1;
305 int window = tcp_win_from_space(sysctl_tcp_rmem[2]) >> 1;
306
307 while (tp->rcv_ssthresh <= window) {
308 if (truesize <= skb->len)
309 return 2 * inet_csk(sk)->icsk_ack.rcv_mss;
310
311 truesize >>= 1;
312 window >>= 1;
313 }
314 return 0;
315}
316
317static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
318{
319 struct tcp_sock *tp = tcp_sk(sk);
320
321
322 if (tp->rcv_ssthresh < tp->window_clamp &&
323 (int)tp->rcv_ssthresh < tcp_space(sk) &&
324 !sk_under_memory_pressure(sk)) {
325 int incr;
326
327
328
329
330 if (tcp_win_from_space(skb->truesize) <= skb->len)
331 incr = 2 * tp->advmss;
332 else
333 incr = __tcp_grow_window(sk, skb);
334
335 if (incr) {
336 tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr,
337 tp->window_clamp);
338 inet_csk(sk)->icsk_ack.quick |= 1;
339 }
340 }
341}
342
343
344
345static void tcp_fixup_rcvbuf(struct sock *sk)
346{
347 u32 mss = tcp_sk(sk)->advmss;
348 u32 icwnd = TCP_DEFAULT_INIT_RCVWND;
349 int rcvmem;
350
351
352
353
354 if (mss > 1460)
355 icwnd = max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2);
356
357 rcvmem = SKB_TRUESIZE(mss + MAX_TCP_HEADER);
358 while (tcp_win_from_space(rcvmem) < mss)
359 rcvmem += 128;
360
361 rcvmem *= icwnd;
362
363 if (sk->sk_rcvbuf < rcvmem)
364 sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]);
365}
366
367
368
369
370static void tcp_init_buffer_space(struct sock *sk)
371{
372 struct tcp_sock *tp = tcp_sk(sk);
373 int maxwin;
374
375 if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK))
376 tcp_fixup_rcvbuf(sk);
377 if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
378 tcp_fixup_sndbuf(sk);
379
380 tp->rcvq_space.space = tp->rcv_wnd;
381
382 maxwin = tcp_full_space(sk);
383
384 if (tp->window_clamp >= maxwin) {
385 tp->window_clamp = maxwin;
386
387 if (sysctl_tcp_app_win && maxwin > 4 * tp->advmss)
388 tp->window_clamp = max(maxwin -
389 (maxwin >> sysctl_tcp_app_win),
390 4 * tp->advmss);
391 }
392
393
394 if (sysctl_tcp_app_win &&
395 tp->window_clamp > 2 * tp->advmss &&
396 tp->window_clamp + tp->advmss > maxwin)
397 tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
398
399 tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
400 tp->snd_cwnd_stamp = tcp_time_stamp;
401}
402
403
404static void tcp_clamp_window(struct sock *sk)
405{
406 struct tcp_sock *tp = tcp_sk(sk);
407 struct inet_connection_sock *icsk = inet_csk(sk);
408
409 icsk->icsk_ack.quick = 0;
410
411 if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
412 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
413 !sk_under_memory_pressure(sk) &&
414 sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
415 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
416 sysctl_tcp_rmem[2]);
417 }
418 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
419 tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
420}
421
422
423
424
425
426
427
428
429void tcp_initialize_rcv_mss(struct sock *sk)
430{
431 const struct tcp_sock *tp = tcp_sk(sk);
432 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
433
434 hint = min(hint, tp->rcv_wnd / 2);
435 hint = min(hint, TCP_MSS_DEFAULT);
436 hint = max(hint, TCP_MIN_MSS);
437
438 inet_csk(sk)->icsk_ack.rcv_mss = hint;
439}
440EXPORT_SYMBOL(tcp_initialize_rcv_mss);
441
442
443
444
445
446
447
448
449
450
451
452
453static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
454{
455 u32 new_sample = tp->rcv_rtt_est.rtt;
456 long m = sample;
457
458 if (m == 0)
459 m = 1;
460
461 if (new_sample != 0) {
462
463
464
465
466
467
468
469
470
471
472 if (!win_dep) {
473 m -= (new_sample >> 3);
474 new_sample += m;
475 } else if (m < new_sample)
476 new_sample = m << 3;
477 } else {
478
479 new_sample = m << 3;
480 }
481
482 if (tp->rcv_rtt_est.rtt != new_sample)
483 tp->rcv_rtt_est.rtt = new_sample;
484}
485
486static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
487{
488 if (tp->rcv_rtt_est.time == 0)
489 goto new_measure;
490 if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
491 return;
492 tcp_rcv_rtt_update(tp, jiffies - tp->rcv_rtt_est.time, 1);
493
494new_measure:
495 tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
496 tp->rcv_rtt_est.time = tcp_time_stamp;
497}
498
499static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
500 const struct sk_buff *skb)
501{
502 struct tcp_sock *tp = tcp_sk(sk);
503 if (tp->rx_opt.rcv_tsecr &&
504 (TCP_SKB_CB(skb)->end_seq -
505 TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss))
506 tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rx_opt.rcv_tsecr, 0);
507}
508
509
510
511
512
513void tcp_rcv_space_adjust(struct sock *sk)
514{
515 struct tcp_sock *tp = tcp_sk(sk);
516 int time;
517 int space;
518
519 if (tp->rcvq_space.time == 0)
520 goto new_measure;
521
522 time = tcp_time_stamp - tp->rcvq_space.time;
523 if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0)
524 return;
525
526 space = 2 * (tp->copied_seq - tp->rcvq_space.seq);
527
528 space = max(tp->rcvq_space.space, space);
529
530 if (tp->rcvq_space.space != space) {
531 int rcvmem;
532
533 tp->rcvq_space.space = space;
534
535 if (sysctl_tcp_moderate_rcvbuf &&
536 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
537 int new_clamp = space;
538
539
540
541
542
543 space /= tp->advmss;
544 if (!space)
545 space = 1;
546 rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
547 while (tcp_win_from_space(rcvmem) < tp->advmss)
548 rcvmem += 128;
549 space *= rcvmem;
550 space = min(space, sysctl_tcp_rmem[2]);
551 if (space > sk->sk_rcvbuf) {
552 sk->sk_rcvbuf = space;
553
554
555 tp->window_clamp = new_clamp;
556 }
557 }
558 }
559
560new_measure:
561 tp->rcvq_space.seq = tp->copied_seq;
562 tp->rcvq_space.time = tcp_time_stamp;
563}
564
565
566
567
568
569
570
571
572
573
574
575static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
576{
577 struct tcp_sock *tp = tcp_sk(sk);
578 struct inet_connection_sock *icsk = inet_csk(sk);
579 u32 now;
580
581 inet_csk_schedule_ack(sk);
582
583 tcp_measure_rcv_mss(sk, skb);
584
585 tcp_rcv_rtt_measure(tp);
586
587 now = tcp_time_stamp;
588
589 if (!icsk->icsk_ack.ato) {
590
591
592
593 tcp_incr_quickack(sk);
594 icsk->icsk_ack.ato = TCP_ATO_MIN;
595 } else {
596 int m = now - icsk->icsk_ack.lrcvtime;
597
598 if (m <= TCP_ATO_MIN / 2) {
599
600 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + TCP_ATO_MIN / 2;
601 } else if (m < icsk->icsk_ack.ato) {
602 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + m;
603 if (icsk->icsk_ack.ato > icsk->icsk_rto)
604 icsk->icsk_ack.ato = icsk->icsk_rto;
605 } else if (m > icsk->icsk_rto) {
606
607
608
609 tcp_incr_quickack(sk);
610 sk_mem_reclaim(sk);
611 }
612 }
613 icsk->icsk_ack.lrcvtime = now;
614
615 TCP_ECN_check_ce(tp, skb);
616
617 if (skb->len >= 128)
618 tcp_grow_window(sk, skb);
619}
620
621
622
623
624
625
626
627
628
629
630static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
631{
632 struct tcp_sock *tp = tcp_sk(sk);
633 long m = mrtt;
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651 if (m == 0)
652 m = 1;
653 if (tp->srtt != 0) {
654 m -= (tp->srtt >> 3);
655 tp->srtt += m;
656 if (m < 0) {
657 m = -m;
658 m -= (tp->mdev >> 2);
659
660
661
662
663
664
665
666
667 if (m > 0)
668 m >>= 3;
669 } else {
670 m -= (tp->mdev >> 2);
671 }
672 tp->mdev += m;
673 if (tp->mdev > tp->mdev_max) {
674 tp->mdev_max = tp->mdev;
675 if (tp->mdev_max > tp->rttvar)
676 tp->rttvar = tp->mdev_max;
677 }
678 if (after(tp->snd_una, tp->rtt_seq)) {
679 if (tp->mdev_max < tp->rttvar)
680 tp->rttvar -= (tp->rttvar - tp->mdev_max) >> 2;
681 tp->rtt_seq = tp->snd_nxt;
682 tp->mdev_max = tcp_rto_min(sk);
683 }
684 } else {
685
686 tp->srtt = m << 3;
687 tp->mdev = m << 1;
688 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
689 tp->rtt_seq = tp->snd_nxt;
690 }
691}
692
693
694
695
696static inline void tcp_set_rto(struct sock *sk)
697{
698 const struct tcp_sock *tp = tcp_sk(sk);
699
700
701
702
703
704
705
706
707
708
709 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp);
710
711
712
713
714
715
716
717
718
719
720 tcp_bound_rto(sk);
721}
722
723
724
725
726
727void tcp_update_metrics(struct sock *sk)
728{
729 struct tcp_sock *tp = tcp_sk(sk);
730 struct dst_entry *dst = __sk_dst_get(sk);
731
732 if (sysctl_tcp_nometrics_save)
733 return;
734
735 dst_confirm(dst);
736
737 if (dst && (dst->flags & DST_HOST)) {
738 const struct inet_connection_sock *icsk = inet_csk(sk);
739 int m;
740 unsigned long rtt;
741
742 if (icsk->icsk_backoff || !tp->srtt) {
743
744
745
746
747 if (!(dst_metric_locked(dst, RTAX_RTT)))
748 dst_metric_set(dst, RTAX_RTT, 0);
749 return;
750 }
751
752 rtt = dst_metric_rtt(dst, RTAX_RTT);
753 m = rtt - tp->srtt;
754
755
756
757
758
759 if (!(dst_metric_locked(dst, RTAX_RTT))) {
760 if (m <= 0)
761 set_dst_metric_rtt(dst, RTAX_RTT, tp->srtt);
762 else
763 set_dst_metric_rtt(dst, RTAX_RTT, rtt - (m >> 3));
764 }
765
766 if (!(dst_metric_locked(dst, RTAX_RTTVAR))) {
767 unsigned long var;
768 if (m < 0)
769 m = -m;
770
771
772 m >>= 1;
773 if (m < tp->mdev)
774 m = tp->mdev;
775
776 var = dst_metric_rtt(dst, RTAX_RTTVAR);
777 if (m >= var)
778 var = m;
779 else
780 var -= (var - m) >> 2;
781
782 set_dst_metric_rtt(dst, RTAX_RTTVAR, var);
783 }
784
785 if (tcp_in_initial_slowstart(tp)) {
786
787 if (dst_metric(dst, RTAX_SSTHRESH) &&
788 !dst_metric_locked(dst, RTAX_SSTHRESH) &&
789 (tp->snd_cwnd >> 1) > dst_metric(dst, RTAX_SSTHRESH))
790 dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_cwnd >> 1);
791 if (!dst_metric_locked(dst, RTAX_CWND) &&
792 tp->snd_cwnd > dst_metric(dst, RTAX_CWND))
793 dst_metric_set(dst, RTAX_CWND, tp->snd_cwnd);
794 } else if (tp->snd_cwnd > tp->snd_ssthresh &&
795 icsk->icsk_ca_state == TCP_CA_Open) {
796
797 if (!dst_metric_locked(dst, RTAX_SSTHRESH))
798 dst_metric_set(dst, RTAX_SSTHRESH,
799 max(tp->snd_cwnd >> 1, tp->snd_ssthresh));
800 if (!dst_metric_locked(dst, RTAX_CWND))
801 dst_metric_set(dst, RTAX_CWND,
802 (dst_metric(dst, RTAX_CWND) +
803 tp->snd_cwnd) >> 1);
804 } else {
805
806
807
808 if (!dst_metric_locked(dst, RTAX_CWND))
809 dst_metric_set(dst, RTAX_CWND,
810 (dst_metric(dst, RTAX_CWND) +
811 tp->snd_ssthresh) >> 1);
812 if (dst_metric(dst, RTAX_SSTHRESH) &&
813 !dst_metric_locked(dst, RTAX_SSTHRESH) &&
814 tp->snd_ssthresh > dst_metric(dst, RTAX_SSTHRESH))
815 dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_ssthresh);
816 }
817
818 if (!dst_metric_locked(dst, RTAX_REORDERING)) {
819 if (dst_metric(dst, RTAX_REORDERING) < tp->reordering &&
820 tp->reordering != sysctl_tcp_reordering)
821 dst_metric_set(dst, RTAX_REORDERING, tp->reordering);
822 }
823 }
824}
825
826__u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)
827{
828 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
829
830 if (!cwnd)
831 cwnd = TCP_INIT_CWND;
832 return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
833}
834
835
836void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
837{
838 struct tcp_sock *tp = tcp_sk(sk);
839 const struct inet_connection_sock *icsk = inet_csk(sk);
840
841 tp->prior_ssthresh = 0;
842 tp->bytes_acked = 0;
843 if (icsk->icsk_ca_state < TCP_CA_CWR) {
844 tp->undo_marker = 0;
845 if (set_ssthresh)
846 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
847 tp->snd_cwnd = min(tp->snd_cwnd,
848 tcp_packets_in_flight(tp) + 1U);
849 tp->snd_cwnd_cnt = 0;
850 tp->high_seq = tp->snd_nxt;
851 tp->snd_cwnd_stamp = tcp_time_stamp;
852 TCP_ECN_queue_cwr(tp);
853
854 tcp_set_ca_state(sk, TCP_CA_CWR);
855 }
856}
857
858
859
860
861
862static void tcp_disable_fack(struct tcp_sock *tp)
863{
864
865 if (tcp_is_fack(tp))
866 tp->lost_skb_hint = NULL;
867 tp->rx_opt.sack_ok &= ~TCP_FACK_ENABLED;
868}
869
870
871static void tcp_dsack_seen(struct tcp_sock *tp)
872{
873 tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
874}
875
876
877
878static void tcp_init_metrics(struct sock *sk)
879{
880 struct tcp_sock *tp = tcp_sk(sk);
881 struct dst_entry *dst = __sk_dst_get(sk);
882
883 if (dst == NULL)
884 goto reset;
885
886 dst_confirm(dst);
887
888 if (dst_metric_locked(dst, RTAX_CWND))
889 tp->snd_cwnd_clamp = dst_metric(dst, RTAX_CWND);
890 if (dst_metric(dst, RTAX_SSTHRESH)) {
891 tp->snd_ssthresh = dst_metric(dst, RTAX_SSTHRESH);
892 if (tp->snd_ssthresh > tp->snd_cwnd_clamp)
893 tp->snd_ssthresh = tp->snd_cwnd_clamp;
894 } else {
895
896
897
898 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
899 }
900 if (dst_metric(dst, RTAX_REORDERING) &&
901 tp->reordering != dst_metric(dst, RTAX_REORDERING)) {
902 tcp_disable_fack(tp);
903 tp->reordering = dst_metric(dst, RTAX_REORDERING);
904 }
905
906 if (dst_metric(dst, RTAX_RTT) == 0 || tp->srtt == 0)
907 goto reset;
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923 if (dst_metric_rtt(dst, RTAX_RTT) > tp->srtt) {
924 tp->srtt = dst_metric_rtt(dst, RTAX_RTT);
925 tp->rtt_seq = tp->snd_nxt;
926 }
927 if (dst_metric_rtt(dst, RTAX_RTTVAR) > tp->mdev) {
928 tp->mdev = dst_metric_rtt(dst, RTAX_RTTVAR);
929 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
930 }
931 tcp_set_rto(sk);
932reset:
933 if (tp->srtt == 0) {
934
935
936
937
938
939
940 tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_FALLBACK;
941 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK;
942 }
943
944
945
946
947
948 if (tp->total_retrans > 1)
949 tp->snd_cwnd = 1;
950 else
951 tp->snd_cwnd = tcp_init_cwnd(tp, dst);
952 tp->snd_cwnd_stamp = tcp_time_stamp;
953}
954
955static void tcp_update_reordering(struct sock *sk, const int metric,
956 const int ts)
957{
958 struct tcp_sock *tp = tcp_sk(sk);
959 if (metric > tp->reordering) {
960 int mib_idx;
961
962 tp->reordering = min(TCP_MAX_REORDERING, metric);
963
964
965 if (ts)
966 mib_idx = LINUX_MIB_TCPTSREORDER;
967 else if (tcp_is_reno(tp))
968 mib_idx = LINUX_MIB_TCPRENOREORDER;
969 else if (tcp_is_fack(tp))
970 mib_idx = LINUX_MIB_TCPFACKREORDER;
971 else
972 mib_idx = LINUX_MIB_TCPSACKREORDER;
973
974 NET_INC_STATS_BH(sock_net(sk), mib_idx);
975#if FASTRETRANS_DEBUG > 1
976 printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n",
977 tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
978 tp->reordering,
979 tp->fackets_out,
980 tp->sacked_out,
981 tp->undo_marker ? tp->undo_retrans : 0);
982#endif
983 tcp_disable_fack(tp);
984 }
985}
986
987
988static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
989{
990 if ((tp->retransmit_skb_hint == NULL) ||
991 before(TCP_SKB_CB(skb)->seq,
992 TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
993 tp->retransmit_skb_hint = skb;
994
995 if (!tp->lost_out ||
996 after(TCP_SKB_CB(skb)->end_seq, tp->retransmit_high))
997 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
998}
999
1000static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb)
1001{
1002 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
1003 tcp_verify_retransmit_hint(tp, skb);
1004
1005 tp->lost_out += tcp_skb_pcount(skb);
1006 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1007 }
1008}
1009
1010static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp,
1011 struct sk_buff *skb)
1012{
1013 tcp_verify_retransmit_hint(tp, skb);
1014
1015 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
1016 tp->lost_out += tcp_skb_pcount(skb);
1017 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1018 }
1019}
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack,
1116 u32 start_seq, u32 end_seq)
1117{
1118
1119 if (after(end_seq, tp->snd_nxt) || !before(start_seq, end_seq))
1120 return 0;
1121
1122
1123 if (!before(start_seq, tp->snd_nxt))
1124 return 0;
1125
1126
1127
1128
1129 if (after(start_seq, tp->snd_una))
1130 return 1;
1131
1132 if (!is_dsack || !tp->undo_marker)
1133 return 0;
1134
1135
1136 if (after(end_seq, tp->snd_una))
1137 return 0;
1138
1139 if (!before(start_seq, tp->undo_marker))
1140 return 1;
1141
1142
1143 if (!after(end_seq, tp->undo_marker))
1144 return 0;
1145
1146
1147
1148
1149 return !before(start_seq, end_seq - tp->max_window);
1150}
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161static void tcp_mark_lost_retrans(struct sock *sk)
1162{
1163 const struct inet_connection_sock *icsk = inet_csk(sk);
1164 struct tcp_sock *tp = tcp_sk(sk);
1165 struct sk_buff *skb;
1166 int cnt = 0;
1167 u32 new_low_seq = tp->snd_nxt;
1168 u32 received_upto = tcp_highest_sack_seq(tp);
1169
1170 if (!tcp_is_fack(tp) || !tp->retrans_out ||
1171 !after(received_upto, tp->lost_retrans_low) ||
1172 icsk->icsk_ca_state != TCP_CA_Recovery)
1173 return;
1174
1175 tcp_for_write_queue(skb, sk) {
1176 u32 ack_seq = TCP_SKB_CB(skb)->ack_seq;
1177
1178 if (skb == tcp_send_head(sk))
1179 break;
1180 if (cnt == tp->retrans_out)
1181 break;
1182 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1183 continue;
1184
1185 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS))
1186 continue;
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199 if (after(received_upto, ack_seq)) {
1200 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1201 tp->retrans_out -= tcp_skb_pcount(skb);
1202
1203 tcp_skb_mark_lost_uncond_verify(tp, skb);
1204 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT);
1205 } else {
1206 if (before(ack_seq, new_low_seq))
1207 new_low_seq = ack_seq;
1208 cnt += tcp_skb_pcount(skb);
1209 }
1210 }
1211
1212 if (tp->retrans_out)
1213 tp->lost_retrans_low = new_low_seq;
1214}
1215
1216static int tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
1217 struct tcp_sack_block_wire *sp, int num_sacks,
1218 u32 prior_snd_una)
1219{
1220 struct tcp_sock *tp = tcp_sk(sk);
1221 u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq);
1222 u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq);
1223 int dup_sack = 0;
1224
1225 if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) {
1226 dup_sack = 1;
1227 tcp_dsack_seen(tp);
1228 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKRECV);
1229 } else if (num_sacks > 1) {
1230 u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq);
1231 u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq);
1232
1233 if (!after(end_seq_0, end_seq_1) &&
1234 !before(start_seq_0, start_seq_1)) {
1235 dup_sack = 1;
1236 tcp_dsack_seen(tp);
1237 NET_INC_STATS_BH(sock_net(sk),
1238 LINUX_MIB_TCPDSACKOFORECV);
1239 }
1240 }
1241
1242
1243 if (dup_sack && tp->undo_marker && tp->undo_retrans &&
1244 !after(end_seq_0, prior_snd_una) &&
1245 after(end_seq_0, tp->undo_marker))
1246 tp->undo_retrans--;
1247
1248 return dup_sack;
1249}
1250
1251struct tcp_sacktag_state {
1252 int reord;
1253 int fack_count;
1254 int flag;
1255};
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
1266 u32 start_seq, u32 end_seq)
1267{
1268 int in_sack, err;
1269 unsigned int pkt_len;
1270 unsigned int mss;
1271
1272 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1273 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1274
1275 if (tcp_skb_pcount(skb) > 1 && !in_sack &&
1276 after(TCP_SKB_CB(skb)->end_seq, start_seq)) {
1277 mss = tcp_skb_mss(skb);
1278 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1279
1280 if (!in_sack) {
1281 pkt_len = start_seq - TCP_SKB_CB(skb)->seq;
1282 if (pkt_len < mss)
1283 pkt_len = mss;
1284 } else {
1285 pkt_len = end_seq - TCP_SKB_CB(skb)->seq;
1286 if (pkt_len < mss)
1287 return -EINVAL;
1288 }
1289
1290
1291
1292
1293 if (pkt_len > mss) {
1294 unsigned int new_len = (pkt_len / mss) * mss;
1295 if (!in_sack && new_len < pkt_len) {
1296 new_len += mss;
1297 if (new_len > skb->len)
1298 return 0;
1299 }
1300 pkt_len = new_len;
1301 }
1302 err = tcp_fragment(sk, skb, pkt_len, mss);
1303 if (err < 0)
1304 return err;
1305 }
1306
1307 return in_sack;
1308}
1309
1310
1311static u8 tcp_sacktag_one(struct sock *sk,
1312 struct tcp_sacktag_state *state, u8 sacked,
1313 u32 start_seq, u32 end_seq,
1314 int dup_sack, int pcount)
1315{
1316 struct tcp_sock *tp = tcp_sk(sk);
1317 int fack_count = state->fack_count;
1318
1319
1320 if (dup_sack && (sacked & TCPCB_RETRANS)) {
1321 if (tp->undo_marker && tp->undo_retrans &&
1322 after(end_seq, tp->undo_marker))
1323 tp->undo_retrans--;
1324 if (sacked & TCPCB_SACKED_ACKED)
1325 state->reord = min(fack_count, state->reord);
1326 }
1327
1328
1329 if (!after(end_seq, tp->snd_una))
1330 return sacked;
1331
1332 if (!(sacked & TCPCB_SACKED_ACKED)) {
1333 if (sacked & TCPCB_SACKED_RETRANS) {
1334
1335
1336
1337
1338 if (sacked & TCPCB_LOST) {
1339 sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
1340 tp->lost_out -= pcount;
1341 tp->retrans_out -= pcount;
1342 }
1343 } else {
1344 if (!(sacked & TCPCB_RETRANS)) {
1345
1346
1347
1348 if (before(start_seq,
1349 tcp_highest_sack_seq(tp)))
1350 state->reord = min(fack_count,
1351 state->reord);
1352
1353
1354 if (!after(end_seq, tp->frto_highmark))
1355 state->flag |= FLAG_ONLY_ORIG_SACKED;
1356 }
1357
1358 if (sacked & TCPCB_LOST) {
1359 sacked &= ~TCPCB_LOST;
1360 tp->lost_out -= pcount;
1361 }
1362 }
1363
1364 sacked |= TCPCB_SACKED_ACKED;
1365 state->flag |= FLAG_DATA_SACKED;
1366 tp->sacked_out += pcount;
1367
1368 fack_count += pcount;
1369
1370
1371 if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) &&
1372 before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq))
1373 tp->lost_cnt_hint += pcount;
1374
1375 if (fack_count > tp->fackets_out)
1376 tp->fackets_out = fack_count;
1377 }
1378
1379
1380
1381
1382
1383 if (dup_sack && (sacked & TCPCB_SACKED_RETRANS)) {
1384 sacked &= ~TCPCB_SACKED_RETRANS;
1385 tp->retrans_out -= pcount;
1386 }
1387
1388 return sacked;
1389}
1390
1391
1392
1393
1394static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1395 struct tcp_sacktag_state *state,
1396 unsigned int pcount, int shifted, int mss,
1397 int dup_sack)
1398{
1399 struct tcp_sock *tp = tcp_sk(sk);
1400 struct sk_buff *prev = tcp_write_queue_prev(sk, skb);
1401 u32 start_seq = TCP_SKB_CB(skb)->seq;
1402 u32 end_seq = start_seq + shifted;
1403
1404 BUG_ON(!pcount);
1405
1406
1407
1408
1409
1410
1411
1412 tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
1413 start_seq, end_seq, dup_sack, pcount);
1414
1415 if (skb == tp->lost_skb_hint)
1416 tp->lost_cnt_hint += pcount;
1417
1418 TCP_SKB_CB(prev)->end_seq += shifted;
1419 TCP_SKB_CB(skb)->seq += shifted;
1420
1421 skb_shinfo(prev)->gso_segs += pcount;
1422 BUG_ON(skb_shinfo(skb)->gso_segs < pcount);
1423 skb_shinfo(skb)->gso_segs -= pcount;
1424
1425
1426
1427
1428
1429
1430 if (!skb_shinfo(prev)->gso_size) {
1431 skb_shinfo(prev)->gso_size = mss;
1432 skb_shinfo(prev)->gso_type = sk->sk_gso_type;
1433 }
1434
1435
1436 if (skb_shinfo(skb)->gso_segs <= 1) {
1437 skb_shinfo(skb)->gso_size = 0;
1438 skb_shinfo(skb)->gso_type = 0;
1439 }
1440
1441
1442 TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
1443
1444 if (skb->len > 0) {
1445 BUG_ON(!tcp_skb_pcount(skb));
1446 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTED);
1447 return 0;
1448 }
1449
1450
1451
1452 if (skb == tp->retransmit_skb_hint)
1453 tp->retransmit_skb_hint = prev;
1454 if (skb == tp->scoreboard_skb_hint)
1455 tp->scoreboard_skb_hint = prev;
1456 if (skb == tp->lost_skb_hint) {
1457 tp->lost_skb_hint = prev;
1458 tp->lost_cnt_hint -= tcp_skb_pcount(prev);
1459 }
1460
1461 TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(prev)->tcp_flags;
1462 if (skb == tcp_highest_sack(sk))
1463 tcp_advance_highest_sack(sk, skb);
1464
1465 tcp_unlink_write_queue(skb, sk);
1466 sk_wmem_free_skb(sk, skb);
1467
1468 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKMERGED);
1469
1470 return 1;
1471}
1472
1473
1474
1475
1476static int tcp_skb_seglen(const struct sk_buff *skb)
1477{
1478 return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb);
1479}
1480
1481
1482static int skb_can_shift(const struct sk_buff *skb)
1483{
1484 return !skb_headlen(skb) && skb_is_nonlinear(skb);
1485}
1486
1487
1488
1489
1490static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
1491 struct tcp_sacktag_state *state,
1492 u32 start_seq, u32 end_seq,
1493 int dup_sack)
1494{
1495 struct tcp_sock *tp = tcp_sk(sk);
1496 struct sk_buff *prev;
1497 int mss;
1498 int pcount = 0;
1499 int len;
1500 int in_sack;
1501
1502 if (!sk_can_gso(sk))
1503 goto fallback;
1504
1505
1506 if (!dup_sack &&
1507 (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS)
1508 goto fallback;
1509 if (!skb_can_shift(skb))
1510 goto fallback;
1511
1512 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1513 goto fallback;
1514
1515
1516 if (unlikely(skb == tcp_write_queue_head(sk)))
1517 goto fallback;
1518 prev = tcp_write_queue_prev(sk, skb);
1519
1520 if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
1521 goto fallback;
1522
1523 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1524 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1525
1526 if (in_sack) {
1527 len = skb->len;
1528 pcount = tcp_skb_pcount(skb);
1529 mss = tcp_skb_seglen(skb);
1530
1531
1532
1533
1534 if (mss != tcp_skb_seglen(prev))
1535 goto fallback;
1536 } else {
1537 if (!after(TCP_SKB_CB(skb)->end_seq, start_seq))
1538 goto noop;
1539
1540
1541
1542
1543 if (tcp_skb_pcount(skb) <= 1)
1544 goto noop;
1545
1546 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1547 if (!in_sack) {
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559 goto fallback;
1560 }
1561
1562 len = end_seq - TCP_SKB_CB(skb)->seq;
1563 BUG_ON(len < 0);
1564 BUG_ON(len > skb->len);
1565
1566
1567
1568
1569
1570 mss = tcp_skb_mss(skb);
1571
1572
1573
1574
1575 if (mss != tcp_skb_seglen(prev))
1576 goto fallback;
1577
1578 if (len == mss) {
1579 pcount = 1;
1580 } else if (len < mss) {
1581 goto noop;
1582 } else {
1583 pcount = len / mss;
1584 len = pcount * mss;
1585 }
1586 }
1587
1588
1589 if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una))
1590 goto fallback;
1591
1592 if (!skb_shift(prev, skb, len))
1593 goto fallback;
1594 if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack))
1595 goto out;
1596
1597
1598
1599
1600 if (prev == tcp_write_queue_tail(sk))
1601 goto out;
1602 skb = tcp_write_queue_next(sk, prev);
1603
1604 if (!skb_can_shift(skb) ||
1605 (skb == tcp_send_head(sk)) ||
1606 ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) ||
1607 (mss != tcp_skb_seglen(skb)))
1608 goto out;
1609
1610 len = skb->len;
1611 if (skb_shift(prev, skb, len)) {
1612 pcount += tcp_skb_pcount(skb);
1613 tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0);
1614 }
1615
1616out:
1617 state->fack_count += pcount;
1618 return prev;
1619
1620noop:
1621 return skb;
1622
1623fallback:
1624 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK);
1625 return NULL;
1626}
1627
1628static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1629 struct tcp_sack_block *next_dup,
1630 struct tcp_sacktag_state *state,
1631 u32 start_seq, u32 end_seq,
1632 int dup_sack_in)
1633{
1634 struct tcp_sock *tp = tcp_sk(sk);
1635 struct sk_buff *tmp;
1636
1637 tcp_for_write_queue_from(skb, sk) {
1638 int in_sack = 0;
1639 int dup_sack = dup_sack_in;
1640
1641 if (skb == tcp_send_head(sk))
1642 break;
1643
1644
1645 if (!before(TCP_SKB_CB(skb)->seq, end_seq))
1646 break;
1647
1648 if ((next_dup != NULL) &&
1649 before(TCP_SKB_CB(skb)->seq, next_dup->end_seq)) {
1650 in_sack = tcp_match_skb_to_sack(sk, skb,
1651 next_dup->start_seq,
1652 next_dup->end_seq);
1653 if (in_sack > 0)
1654 dup_sack = 1;
1655 }
1656
1657
1658
1659
1660
1661 if (in_sack <= 0) {
1662 tmp = tcp_shift_skb_data(sk, skb, state,
1663 start_seq, end_seq, dup_sack);
1664 if (tmp != NULL) {
1665 if (tmp != skb) {
1666 skb = tmp;
1667 continue;
1668 }
1669
1670 in_sack = 0;
1671 } else {
1672 in_sack = tcp_match_skb_to_sack(sk, skb,
1673 start_seq,
1674 end_seq);
1675 }
1676 }
1677
1678 if (unlikely(in_sack < 0))
1679 break;
1680
1681 if (in_sack) {
1682 TCP_SKB_CB(skb)->sacked =
1683 tcp_sacktag_one(sk,
1684 state,
1685 TCP_SKB_CB(skb)->sacked,
1686 TCP_SKB_CB(skb)->seq,
1687 TCP_SKB_CB(skb)->end_seq,
1688 dup_sack,
1689 tcp_skb_pcount(skb));
1690
1691 if (!before(TCP_SKB_CB(skb)->seq,
1692 tcp_highest_sack_seq(tp)))
1693 tcp_advance_highest_sack(sk, skb);
1694 }
1695
1696 state->fack_count += tcp_skb_pcount(skb);
1697 }
1698 return skb;
1699}
1700
1701
1702
1703
1704static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
1705 struct tcp_sacktag_state *state,
1706 u32 skip_to_seq)
1707{
1708 tcp_for_write_queue_from(skb, sk) {
1709 if (skb == tcp_send_head(sk))
1710 break;
1711
1712 if (after(TCP_SKB_CB(skb)->end_seq, skip_to_seq))
1713 break;
1714
1715 state->fack_count += tcp_skb_pcount(skb);
1716 }
1717 return skb;
1718}
1719
1720static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
1721 struct sock *sk,
1722 struct tcp_sack_block *next_dup,
1723 struct tcp_sacktag_state *state,
1724 u32 skip_to_seq)
1725{
1726 if (next_dup == NULL)
1727 return skb;
1728
1729 if (before(next_dup->start_seq, skip_to_seq)) {
1730 skb = tcp_sacktag_skip(skb, sk, state, next_dup->start_seq);
1731 skb = tcp_sacktag_walk(skb, sk, NULL, state,
1732 next_dup->start_seq, next_dup->end_seq,
1733 1);
1734 }
1735
1736 return skb;
1737}
1738
1739static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_block *cache)
1740{
1741 return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1742}
1743
1744static int
1745tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1746 u32 prior_snd_una)
1747{
1748 const struct inet_connection_sock *icsk = inet_csk(sk);
1749 struct tcp_sock *tp = tcp_sk(sk);
1750 const unsigned char *ptr = (skb_transport_header(ack_skb) +
1751 TCP_SKB_CB(ack_skb)->sacked);
1752 struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
1753 struct tcp_sack_block sp[TCP_NUM_SACKS];
1754 struct tcp_sack_block *cache;
1755 struct tcp_sacktag_state state;
1756 struct sk_buff *skb;
1757 int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
1758 int used_sacks;
1759 int found_dup_sack = 0;
1760 int i, j;
1761 int first_sack_index;
1762
1763 state.flag = 0;
1764 state.reord = tp->packets_out;
1765
1766 if (!tp->sacked_out) {
1767 if (WARN_ON(tp->fackets_out))
1768 tp->fackets_out = 0;
1769 tcp_highest_sack_reset(sk);
1770 }
1771
1772 found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
1773 num_sacks, prior_snd_una);
1774 if (found_dup_sack)
1775 state.flag |= FLAG_DSACKING_ACK;
1776
1777
1778
1779
1780
1781 if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window))
1782 return 0;
1783
1784 if (!tp->packets_out)
1785 goto out;
1786
1787 used_sacks = 0;
1788 first_sack_index = 0;
1789 for (i = 0; i < num_sacks; i++) {
1790 int dup_sack = !i && found_dup_sack;
1791
1792 sp[used_sacks].start_seq = get_unaligned_be32(&sp_wire[i].start_seq);
1793 sp[used_sacks].end_seq = get_unaligned_be32(&sp_wire[i].end_seq);
1794
1795 if (!tcp_is_sackblock_valid(tp, dup_sack,
1796 sp[used_sacks].start_seq,
1797 sp[used_sacks].end_seq)) {
1798 int mib_idx;
1799
1800 if (dup_sack) {
1801 if (!tp->undo_marker)
1802 mib_idx = LINUX_MIB_TCPDSACKIGNOREDNOUNDO;
1803 else
1804 mib_idx = LINUX_MIB_TCPDSACKIGNOREDOLD;
1805 } else {
1806
1807 if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) &&
1808 !after(sp[used_sacks].end_seq, tp->snd_una))
1809 continue;
1810 mib_idx = LINUX_MIB_TCPSACKDISCARD;
1811 }
1812
1813 NET_INC_STATS_BH(sock_net(sk), mib_idx);
1814 if (i == 0)
1815 first_sack_index = -1;
1816 continue;
1817 }
1818
1819
1820 if (!after(sp[used_sacks].end_seq, prior_snd_una))
1821 continue;
1822
1823 used_sacks++;
1824 }
1825
1826
1827 for (i = used_sacks - 1; i > 0; i--) {
1828 for (j = 0; j < i; j++) {
1829 if (after(sp[j].start_seq, sp[j + 1].start_seq)) {
1830 swap(sp[j], sp[j + 1]);
1831
1832
1833 if (j == first_sack_index)
1834 first_sack_index = j + 1;
1835 }
1836 }
1837 }
1838
1839 skb = tcp_write_queue_head(sk);
1840 state.fack_count = 0;
1841 i = 0;
1842
1843 if (!tp->sacked_out) {
1844
1845 cache = tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1846 } else {
1847 cache = tp->recv_sack_cache;
1848
1849 while (tcp_sack_cache_ok(tp, cache) && !cache->start_seq &&
1850 !cache->end_seq)
1851 cache++;
1852 }
1853
1854 while (i < used_sacks) {
1855 u32 start_seq = sp[i].start_seq;
1856 u32 end_seq = sp[i].end_seq;
1857 int dup_sack = (found_dup_sack && (i == first_sack_index));
1858 struct tcp_sack_block *next_dup = NULL;
1859
1860 if (found_dup_sack && ((i + 1) == first_sack_index))
1861 next_dup = &sp[i + 1];
1862
1863
1864 while (tcp_sack_cache_ok(tp, cache) &&
1865 !before(start_seq, cache->end_seq))
1866 cache++;
1867
1868
1869 if (tcp_sack_cache_ok(tp, cache) && !dup_sack &&
1870 after(end_seq, cache->start_seq)) {
1871
1872
1873 if (before(start_seq, cache->start_seq)) {
1874 skb = tcp_sacktag_skip(skb, sk, &state,
1875 start_seq);
1876 skb = tcp_sacktag_walk(skb, sk, next_dup,
1877 &state,
1878 start_seq,
1879 cache->start_seq,
1880 dup_sack);
1881 }
1882
1883
1884 if (!after(end_seq, cache->end_seq))
1885 goto advance_sp;
1886
1887 skb = tcp_maybe_skipping_dsack(skb, sk, next_dup,
1888 &state,
1889 cache->end_seq);
1890
1891
1892 if (tcp_highest_sack_seq(tp) == cache->end_seq) {
1893
1894 skb = tcp_highest_sack(sk);
1895 if (skb == NULL)
1896 break;
1897 state.fack_count = tp->fackets_out;
1898 cache++;
1899 goto walk;
1900 }
1901
1902 skb = tcp_sacktag_skip(skb, sk, &state, cache->end_seq);
1903
1904 cache++;
1905 continue;
1906 }
1907
1908 if (!before(start_seq, tcp_highest_sack_seq(tp))) {
1909 skb = tcp_highest_sack(sk);
1910 if (skb == NULL)
1911 break;
1912 state.fack_count = tp->fackets_out;
1913 }
1914 skb = tcp_sacktag_skip(skb, sk, &state, start_seq);
1915
1916walk:
1917 skb = tcp_sacktag_walk(skb, sk, next_dup, &state,
1918 start_seq, end_seq, dup_sack);
1919
1920advance_sp:
1921
1922
1923
1924 if (after(end_seq, tp->frto_highmark))
1925 state.flag &= ~FLAG_ONLY_ORIG_SACKED;
1926
1927 i++;
1928 }
1929
1930
1931 for (i = 0; i < ARRAY_SIZE(tp->recv_sack_cache) - used_sacks; i++) {
1932 tp->recv_sack_cache[i].start_seq = 0;
1933 tp->recv_sack_cache[i].end_seq = 0;
1934 }
1935 for (j = 0; j < used_sacks; j++)
1936 tp->recv_sack_cache[i++] = sp[j];
1937
1938 tcp_mark_lost_retrans(sk);
1939
1940 tcp_verify_left_out(tp);
1941
1942 if ((state.reord < tp->fackets_out) &&
1943 ((icsk->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker) &&
1944 (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark)))
1945 tcp_update_reordering(sk, tp->fackets_out - state.reord, 0);
1946
1947out:
1948
1949#if FASTRETRANS_DEBUG > 0
1950 WARN_ON((int)tp->sacked_out < 0);
1951 WARN_ON((int)tp->lost_out < 0);
1952 WARN_ON((int)tp->retrans_out < 0);
1953 WARN_ON((int)tcp_packets_in_flight(tp) < 0);
1954#endif
1955 return state.flag;
1956}
1957
1958
1959
1960
1961static int tcp_limit_reno_sacked(struct tcp_sock *tp)
1962{
1963 u32 holes;
1964
1965 holes = max(tp->lost_out, 1U);
1966 holes = min(holes, tp->packets_out);
1967
1968 if ((tp->sacked_out + holes) > tp->packets_out) {
1969 tp->sacked_out = tp->packets_out - holes;
1970 return 1;
1971 }
1972 return 0;
1973}
1974
1975
1976
1977
1978
1979static void tcp_check_reno_reordering(struct sock *sk, const int addend)
1980{
1981 struct tcp_sock *tp = tcp_sk(sk);
1982 if (tcp_limit_reno_sacked(tp))
1983 tcp_update_reordering(sk, tp->packets_out + addend, 0);
1984}
1985
1986
1987
1988static void tcp_add_reno_sack(struct sock *sk)
1989{
1990 struct tcp_sock *tp = tcp_sk(sk);
1991 tp->sacked_out++;
1992 tcp_check_reno_reordering(sk, 0);
1993 tcp_verify_left_out(tp);
1994}
1995
1996
1997
1998static void tcp_remove_reno_sacks(struct sock *sk, int acked)
1999{
2000 struct tcp_sock *tp = tcp_sk(sk);
2001
2002 if (acked > 0) {
2003
2004 if (acked - 1 >= tp->sacked_out)
2005 tp->sacked_out = 0;
2006 else
2007 tp->sacked_out -= acked - 1;
2008 }
2009 tcp_check_reno_reordering(sk, acked);
2010 tcp_verify_left_out(tp);
2011}
2012
2013static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
2014{
2015 tp->sacked_out = 0;
2016}
2017
2018static int tcp_is_sackfrto(const struct tcp_sock *tp)
2019{
2020 return (sysctl_tcp_frto == 0x2) && !tcp_is_reno(tp);
2021}
2022
2023
2024
2025
2026int tcp_use_frto(struct sock *sk)
2027{
2028 const struct tcp_sock *tp = tcp_sk(sk);
2029 const struct inet_connection_sock *icsk = inet_csk(sk);
2030 struct sk_buff *skb;
2031
2032 if (!sysctl_tcp_frto)
2033 return 0;
2034
2035
2036 if (icsk->icsk_mtup.probe_size)
2037 return 0;
2038
2039 if (tcp_is_sackfrto(tp))
2040 return 1;
2041
2042
2043 if (tp->retrans_out > 1)
2044 return 0;
2045
2046 skb = tcp_write_queue_head(sk);
2047 if (tcp_skb_is_last(sk, skb))
2048 return 1;
2049 skb = tcp_write_queue_next(sk, skb);
2050 tcp_for_write_queue_from(skb, sk) {
2051 if (skb == tcp_send_head(sk))
2052 break;
2053 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
2054 return 0;
2055
2056 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
2057 break;
2058 }
2059 return 1;
2060}
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074void tcp_enter_frto(struct sock *sk)
2075{
2076 const struct inet_connection_sock *icsk = inet_csk(sk);
2077 struct tcp_sock *tp = tcp_sk(sk);
2078 struct sk_buff *skb;
2079
2080 if ((!tp->frto_counter && icsk->icsk_ca_state <= TCP_CA_Disorder) ||
2081 tp->snd_una == tp->high_seq ||
2082 ((icsk->icsk_ca_state == TCP_CA_Loss || tp->frto_counter) &&
2083 !icsk->icsk_retransmits)) {
2084 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094 if (tp->frto_counter) {
2095 u32 stored_cwnd;
2096 stored_cwnd = tp->snd_cwnd;
2097 tp->snd_cwnd = 2;
2098 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
2099 tp->snd_cwnd = stored_cwnd;
2100 } else {
2101 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
2102 }
2103
2104
2105
2106
2107
2108
2109
2110 tcp_ca_event(sk, CA_EVENT_FRTO);
2111 }
2112
2113 tp->undo_marker = tp->snd_una;
2114 tp->undo_retrans = 0;
2115
2116 skb = tcp_write_queue_head(sk);
2117 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
2118 tp->undo_marker = 0;
2119 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2120 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
2121 tp->retrans_out -= tcp_skb_pcount(skb);
2122 }
2123 tcp_verify_left_out(tp);
2124
2125
2126 tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);
2127
2128
2129
2130
2131 if (tcp_is_sackfrto(tp) && (tp->frto_counter ||
2132 ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) &&
2133 after(tp->high_seq, tp->snd_una)) {
2134 tp->frto_highmark = tp->high_seq;
2135 } else {
2136 tp->frto_highmark = tp->snd_nxt;
2137 }
2138 tcp_set_ca_state(sk, TCP_CA_Disorder);
2139 tp->high_seq = tp->snd_nxt;
2140 tp->frto_counter = 1;
2141}
2142
2143
2144
2145
2146
2147static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
2148{
2149 struct tcp_sock *tp = tcp_sk(sk);
2150 struct sk_buff *skb;
2151
2152 tp->lost_out = 0;
2153 tp->retrans_out = 0;
2154 if (tcp_is_reno(tp))
2155 tcp_reset_reno_sack(tp);
2156
2157 tcp_for_write_queue(skb, sk) {
2158 if (skb == tcp_send_head(sk))
2159 break;
2160
2161 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
2162
2163
2164
2165
2166 if ((tp->frto_counter == 1) && !(flag & FLAG_DATA_ACKED)) {
2167
2168 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
2169 tp->retrans_out += tcp_skb_pcount(skb);
2170
2171 flag |= FLAG_DATA_ACKED;
2172 } else {
2173 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
2174 tp->undo_marker = 0;
2175 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
2176 }
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
2188 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
2189 tp->lost_out += tcp_skb_pcount(skb);
2190 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
2191 }
2192 }
2193 tcp_verify_left_out(tp);
2194
2195 tp->snd_cwnd = tcp_packets_in_flight(tp) + allowed_segments;
2196 tp->snd_cwnd_cnt = 0;
2197 tp->snd_cwnd_stamp = tcp_time_stamp;
2198 tp->frto_counter = 0;
2199 tp->bytes_acked = 0;
2200
2201 tp->reordering = min_t(unsigned int, tp->reordering,
2202 sysctl_tcp_reordering);
2203 tcp_set_ca_state(sk, TCP_CA_Loss);
2204 tp->high_seq = tp->snd_nxt;
2205 TCP_ECN_queue_cwr(tp);
2206
2207 tcp_clear_all_retrans_hints(tp);
2208}
2209
2210static void tcp_clear_retrans_partial(struct tcp_sock *tp)
2211{
2212 tp->retrans_out = 0;
2213 tp->lost_out = 0;
2214
2215 tp->undo_marker = 0;
2216 tp->undo_retrans = 0;
2217}
2218
2219void tcp_clear_retrans(struct tcp_sock *tp)
2220{
2221 tcp_clear_retrans_partial(tp);
2222
2223 tp->fackets_out = 0;
2224 tp->sacked_out = 0;
2225}
2226
2227
2228
2229
2230
2231void tcp_enter_loss(struct sock *sk, int how)
2232{
2233 const struct inet_connection_sock *icsk = inet_csk(sk);
2234 struct tcp_sock *tp = tcp_sk(sk);
2235 struct sk_buff *skb;
2236
2237
2238 if (icsk->icsk_ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq ||
2239 (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
2240 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2241 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
2242 tcp_ca_event(sk, CA_EVENT_LOSS);
2243 }
2244 tp->snd_cwnd = 1;
2245 tp->snd_cwnd_cnt = 0;
2246 tp->snd_cwnd_stamp = tcp_time_stamp;
2247
2248 tp->bytes_acked = 0;
2249 tcp_clear_retrans_partial(tp);
2250
2251 if (tcp_is_reno(tp))
2252 tcp_reset_reno_sack(tp);
2253
2254 if (!how) {
2255
2256
2257 tp->undo_marker = tp->snd_una;
2258 } else {
2259 tp->sacked_out = 0;
2260 tp->fackets_out = 0;
2261 }
2262 tcp_clear_all_retrans_hints(tp);
2263
2264 tcp_for_write_queue(skb, sk) {
2265 if (skb == tcp_send_head(sk))
2266 break;
2267
2268 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
2269 tp->undo_marker = 0;
2270 TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
2271 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) {
2272 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
2273 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
2274 tp->lost_out += tcp_skb_pcount(skb);
2275 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
2276 }
2277 }
2278 tcp_verify_left_out(tp);
2279
2280 tp->reordering = min_t(unsigned int, tp->reordering,
2281 sysctl_tcp_reordering);
2282 tcp_set_ca_state(sk, TCP_CA_Loss);
2283 tp->high_seq = tp->snd_nxt;
2284 TCP_ECN_queue_cwr(tp);
2285
2286 tp->frto_counter = 0;
2287}
2288
2289
2290
2291
2292
2293
2294
2295static int tcp_check_sack_reneging(struct sock *sk, int flag)
2296{
2297 if (flag & FLAG_SACK_RENEGING) {
2298 struct inet_connection_sock *icsk = inet_csk(sk);
2299 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
2300
2301 tcp_enter_loss(sk, 1);
2302 icsk->icsk_retransmits++;
2303 tcp_retransmit_skb(sk, tcp_write_queue_head(sk));
2304 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2305 icsk->icsk_rto, TCP_RTO_MAX);
2306 return 1;
2307 }
2308 return 0;
2309}
2310
2311static inline int tcp_fackets_out(const struct tcp_sock *tp)
2312{
2313 return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out;
2314}
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
2332{
2333 return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
2334}
2335
2336static inline int tcp_skb_timedout(const struct sock *sk,
2337 const struct sk_buff *skb)
2338{
2339 return tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto;
2340}
2341
2342static inline int tcp_head_timedout(const struct sock *sk)
2343{
2344 const struct tcp_sock *tp = tcp_sk(sk);
2345
2346 return tp->packets_out &&
2347 tcp_skb_timedout(sk, tcp_write_queue_head(sk));
2348}
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443static int tcp_time_to_recover(struct sock *sk)
2444{
2445 struct tcp_sock *tp = tcp_sk(sk);
2446 __u32 packets_out;
2447
2448
2449 if (tp->frto_counter)
2450 return 0;
2451
2452
2453 if (tp->lost_out)
2454 return 1;
2455
2456
2457 if (tcp_dupack_heuristics(tp) > tp->reordering)
2458 return 1;
2459
2460
2461
2462
2463 if (tcp_is_fack(tp) && tcp_head_timedout(sk))
2464 return 1;
2465
2466
2467
2468
2469 packets_out = tp->packets_out;
2470 if (packets_out <= tp->reordering &&
2471 tp->sacked_out >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) &&
2472 !tcp_may_send_now(sk)) {
2473
2474
2475
2476 return 1;
2477 }
2478
2479
2480
2481
2482
2483
2484 if ((tp->thin_dupack || sysctl_tcp_thin_dupack) &&
2485 tcp_stream_is_thin(tp) && tcp_dupack_heuristics(tp) > 1 &&
2486 tcp_is_sack(tp) && !tcp_send_head(sk))
2487 return 1;
2488
2489 return 0;
2490}
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504static void tcp_timeout_skbs(struct sock *sk)
2505{
2506 struct tcp_sock *tp = tcp_sk(sk);
2507 struct sk_buff *skb;
2508
2509 if (!tcp_is_fack(tp) || !tcp_head_timedout(sk))
2510 return;
2511
2512 skb = tp->scoreboard_skb_hint;
2513 if (tp->scoreboard_skb_hint == NULL)
2514 skb = tcp_write_queue_head(sk);
2515
2516 tcp_for_write_queue_from(skb, sk) {
2517 if (skb == tcp_send_head(sk))
2518 break;
2519 if (!tcp_skb_timedout(sk, skb))
2520 break;
2521
2522 tcp_skb_mark_lost(tp, skb);
2523 }
2524
2525 tp->scoreboard_skb_hint = skb;
2526
2527 tcp_verify_left_out(tp);
2528}
2529
2530
2531
2532
2533
2534
2535
2536static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2537{
2538 struct tcp_sock *tp = tcp_sk(sk);
2539 struct sk_buff *skb;
2540 int cnt, oldcnt;
2541 int err;
2542 unsigned int mss;
2543
2544 const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq;
2545
2546 WARN_ON(packets > tp->packets_out);
2547 if (tp->lost_skb_hint) {
2548 skb = tp->lost_skb_hint;
2549 cnt = tp->lost_cnt_hint;
2550
2551 if (mark_head && skb != tcp_write_queue_head(sk))
2552 return;
2553 } else {
2554 skb = tcp_write_queue_head(sk);
2555 cnt = 0;
2556 }
2557
2558 tcp_for_write_queue_from(skb, sk) {
2559 if (skb == tcp_send_head(sk))
2560 break;
2561
2562
2563 tp->lost_skb_hint = skb;
2564 tp->lost_cnt_hint = cnt;
2565
2566 if (after(TCP_SKB_CB(skb)->end_seq, loss_high))
2567 break;
2568
2569 oldcnt = cnt;
2570 if (tcp_is_fack(tp) || tcp_is_reno(tp) ||
2571 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
2572 cnt += tcp_skb_pcount(skb);
2573
2574 if (cnt > packets) {
2575 if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) ||
2576 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
2577 (oldcnt >= packets))
2578 break;
2579
2580 mss = skb_shinfo(skb)->gso_size;
2581 err = tcp_fragment(sk, skb, (packets - oldcnt) * mss, mss);
2582 if (err < 0)
2583 break;
2584 cnt = packets;
2585 }
2586
2587 tcp_skb_mark_lost(tp, skb);
2588
2589 if (mark_head)
2590 break;
2591 }
2592 tcp_verify_left_out(tp);
2593}
2594
2595
2596
2597static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
2598{
2599 struct tcp_sock *tp = tcp_sk(sk);
2600
2601 if (tcp_is_reno(tp)) {
2602 tcp_mark_head_lost(sk, 1, 1);
2603 } else if (tcp_is_fack(tp)) {
2604 int lost = tp->fackets_out - tp->reordering;
2605 if (lost <= 0)
2606 lost = 1;
2607 tcp_mark_head_lost(sk, lost, 0);
2608 } else {
2609 int sacked_upto = tp->sacked_out - tp->reordering;
2610 if (sacked_upto >= 0)
2611 tcp_mark_head_lost(sk, sacked_upto, 0);
2612 else if (fast_rexmit)
2613 tcp_mark_head_lost(sk, 1, 1);
2614 }
2615
2616 tcp_timeout_skbs(sk);
2617}
2618
2619
2620
2621
2622static inline void tcp_moderate_cwnd(struct tcp_sock *tp)
2623{
2624 tp->snd_cwnd = min(tp->snd_cwnd,
2625 tcp_packets_in_flight(tp) + tcp_max_burst(tp));
2626 tp->snd_cwnd_stamp = tcp_time_stamp;
2627}
2628
2629
2630
2631
2632static inline u32 tcp_cwnd_min(const struct sock *sk)
2633{
2634 const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
2635
2636 return ca_ops->min_cwnd ? ca_ops->min_cwnd(sk) : tcp_sk(sk)->snd_ssthresh;
2637}
2638
2639
2640static void tcp_cwnd_down(struct sock *sk, int flag)
2641{
2642 struct tcp_sock *tp = tcp_sk(sk);
2643 int decr = tp->snd_cwnd_cnt + 1;
2644
2645 if ((flag & (FLAG_ANY_PROGRESS | FLAG_DSACKING_ACK)) ||
2646 (tcp_is_reno(tp) && !(flag & FLAG_NOT_DUP))) {
2647 tp->snd_cwnd_cnt = decr & 1;
2648 decr >>= 1;
2649
2650 if (decr && tp->snd_cwnd > tcp_cwnd_min(sk))
2651 tp->snd_cwnd -= decr;
2652
2653 tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);
2654 tp->snd_cwnd_stamp = tcp_time_stamp;
2655 }
2656}
2657
2658
2659
2660
2661static inline int tcp_packet_delayed(const struct tcp_sock *tp)
2662{
2663 return !tp->retrans_stamp ||
2664 (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
2665 before(tp->rx_opt.rcv_tsecr, tp->retrans_stamp));
2666}
2667
2668
2669
2670#if FASTRETRANS_DEBUG > 1
2671static void DBGUNDO(struct sock *sk, const char *msg)
2672{
2673 struct tcp_sock *tp = tcp_sk(sk);
2674 struct inet_sock *inet = inet_sk(sk);
2675
2676 if (sk->sk_family == AF_INET) {
2677 printk(KERN_DEBUG "Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n",
2678 msg,
2679 &inet->inet_daddr, ntohs(inet->inet_dport),
2680 tp->snd_cwnd, tcp_left_out(tp),
2681 tp->snd_ssthresh, tp->prior_ssthresh,
2682 tp->packets_out);
2683 }
2684#if IS_ENABLED(CONFIG_IPV6)
2685 else if (sk->sk_family == AF_INET6) {
2686 struct ipv6_pinfo *np = inet6_sk(sk);
2687 printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
2688 msg,
2689 &np->daddr, ntohs(inet->inet_dport),
2690 tp->snd_cwnd, tcp_left_out(tp),
2691 tp->snd_ssthresh, tp->prior_ssthresh,
2692 tp->packets_out);
2693 }
2694#endif
2695}
2696#else
2697#define DBGUNDO(x...) do { } while (0)
2698#endif
2699
2700static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh)
2701{
2702 struct tcp_sock *tp = tcp_sk(sk);
2703
2704 if (tp->prior_ssthresh) {
2705 const struct inet_connection_sock *icsk = inet_csk(sk);
2706
2707 if (icsk->icsk_ca_ops->undo_cwnd)
2708 tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
2709 else
2710 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1);
2711
2712 if (undo_ssthresh && tp->prior_ssthresh > tp->snd_ssthresh) {
2713 tp->snd_ssthresh = tp->prior_ssthresh;
2714 TCP_ECN_withdraw_cwr(tp);
2715 }
2716 } else {
2717 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);
2718 }
2719 tp->snd_cwnd_stamp = tcp_time_stamp;
2720}
2721
2722static inline int tcp_may_undo(const struct tcp_sock *tp)
2723{
2724 return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
2725}
2726
2727
2728static int tcp_try_undo_recovery(struct sock *sk)
2729{
2730 struct tcp_sock *tp = tcp_sk(sk);
2731
2732 if (tcp_may_undo(tp)) {
2733 int mib_idx;
2734
2735
2736
2737
2738 DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
2739 tcp_undo_cwr(sk, true);
2740 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
2741 mib_idx = LINUX_MIB_TCPLOSSUNDO;
2742 else
2743 mib_idx = LINUX_MIB_TCPFULLUNDO;
2744
2745 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2746 tp->undo_marker = 0;
2747 }
2748 if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
2749
2750
2751
2752 tcp_moderate_cwnd(tp);
2753 return 1;
2754 }
2755 tcp_set_ca_state(sk, TCP_CA_Open);
2756 return 0;
2757}
2758
2759
2760static void tcp_try_undo_dsack(struct sock *sk)
2761{
2762 struct tcp_sock *tp = tcp_sk(sk);
2763
2764 if (tp->undo_marker && !tp->undo_retrans) {
2765 DBGUNDO(sk, "D-SACK");
2766 tcp_undo_cwr(sk, true);
2767 tp->undo_marker = 0;
2768 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
2769 }
2770}
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786static int tcp_any_retrans_done(const struct sock *sk)
2787{
2788 const struct tcp_sock *tp = tcp_sk(sk);
2789 struct sk_buff *skb;
2790
2791 if (tp->retrans_out)
2792 return 1;
2793
2794 skb = tcp_write_queue_head(sk);
2795 if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
2796 return 1;
2797
2798 return 0;
2799}
2800
2801
2802
2803static int tcp_try_undo_partial(struct sock *sk, int acked)
2804{
2805 struct tcp_sock *tp = tcp_sk(sk);
2806
2807 int failed = tcp_is_reno(tp) || (tcp_fackets_out(tp) > tp->reordering);
2808
2809 if (tcp_may_undo(tp)) {
2810
2811
2812
2813 if (!tcp_any_retrans_done(sk))
2814 tp->retrans_stamp = 0;
2815
2816 tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
2817
2818 DBGUNDO(sk, "Hoe");
2819 tcp_undo_cwr(sk, false);
2820 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
2821
2822
2823
2824
2825
2826 failed = 0;
2827 }
2828 return failed;
2829}
2830
2831
2832static int tcp_try_undo_loss(struct sock *sk)
2833{
2834 struct tcp_sock *tp = tcp_sk(sk);
2835
2836 if (tcp_may_undo(tp)) {
2837 struct sk_buff *skb;
2838 tcp_for_write_queue(skb, sk) {
2839 if (skb == tcp_send_head(sk))
2840 break;
2841 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
2842 }
2843
2844 tcp_clear_all_retrans_hints(tp);
2845
2846 DBGUNDO(sk, "partial loss");
2847 tp->lost_out = 0;
2848 tcp_undo_cwr(sk, true);
2849 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
2850 inet_csk(sk)->icsk_retransmits = 0;
2851 tp->undo_marker = 0;
2852 if (tcp_is_sack(tp))
2853 tcp_set_ca_state(sk, TCP_CA_Open);
2854 return 1;
2855 }
2856 return 0;
2857}
2858
2859static inline void tcp_complete_cwr(struct sock *sk)
2860{
2861 struct tcp_sock *tp = tcp_sk(sk);
2862
2863
2864 if (tp->undo_marker) {
2865 if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR)
2866 tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
2867 else
2868 tp->snd_cwnd = tp->snd_ssthresh;
2869 tp->snd_cwnd_stamp = tcp_time_stamp;
2870 }
2871 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
2872}
2873
2874static void tcp_try_keep_open(struct sock *sk)
2875{
2876 struct tcp_sock *tp = tcp_sk(sk);
2877 int state = TCP_CA_Open;
2878
2879 if (tcp_left_out(tp) || tcp_any_retrans_done(sk))
2880 state = TCP_CA_Disorder;
2881
2882 if (inet_csk(sk)->icsk_ca_state != state) {
2883 tcp_set_ca_state(sk, state);
2884 tp->high_seq = tp->snd_nxt;
2885 }
2886}
2887
2888static void tcp_try_to_open(struct sock *sk, int flag)
2889{
2890 struct tcp_sock *tp = tcp_sk(sk);
2891
2892 tcp_verify_left_out(tp);
2893
2894 if (!tp->frto_counter && !tcp_any_retrans_done(sk))
2895 tp->retrans_stamp = 0;
2896
2897 if (flag & FLAG_ECE)
2898 tcp_enter_cwr(sk, 1);
2899
2900 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
2901 tcp_try_keep_open(sk);
2902 if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
2903 tcp_moderate_cwnd(tp);
2904 } else {
2905 tcp_cwnd_down(sk, flag);
2906 }
2907}
2908
2909static void tcp_mtup_probe_failed(struct sock *sk)
2910{
2911 struct inet_connection_sock *icsk = inet_csk(sk);
2912
2913 icsk->icsk_mtup.search_high = icsk->icsk_mtup.probe_size - 1;
2914 icsk->icsk_mtup.probe_size = 0;
2915}
2916
2917static void tcp_mtup_probe_success(struct sock *sk)
2918{
2919 struct tcp_sock *tp = tcp_sk(sk);
2920 struct inet_connection_sock *icsk = inet_csk(sk);
2921
2922
2923 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2924 tp->snd_cwnd = tp->snd_cwnd *
2925 tcp_mss_to_mtu(sk, tp->mss_cache) /
2926 icsk->icsk_mtup.probe_size;
2927 tp->snd_cwnd_cnt = 0;
2928 tp->snd_cwnd_stamp = tcp_time_stamp;
2929 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2930
2931 icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size;
2932 icsk->icsk_mtup.probe_size = 0;
2933 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
2934}
2935
2936
2937
2938
2939
2940void tcp_simple_retransmit(struct sock *sk)
2941{
2942 const struct inet_connection_sock *icsk = inet_csk(sk);
2943 struct tcp_sock *tp = tcp_sk(sk);
2944 struct sk_buff *skb;
2945 unsigned int mss = tcp_current_mss(sk);
2946 u32 prior_lost = tp->lost_out;
2947
2948 tcp_for_write_queue(skb, sk) {
2949 if (skb == tcp_send_head(sk))
2950 break;
2951 if (tcp_skb_seglen(skb) > mss &&
2952 !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
2953 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2954 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
2955 tp->retrans_out -= tcp_skb_pcount(skb);
2956 }
2957 tcp_skb_mark_lost_uncond_verify(tp, skb);
2958 }
2959 }
2960
2961 tcp_clear_retrans_hints_partial(tp);
2962
2963 if (prior_lost == tp->lost_out)
2964 return;
2965
2966 if (tcp_is_reno(tp))
2967 tcp_limit_reno_sacked(tp);
2968
2969 tcp_verify_left_out(tp);
2970
2971
2972
2973
2974
2975
2976 if (icsk->icsk_ca_state != TCP_CA_Loss) {
2977 tp->high_seq = tp->snd_nxt;
2978 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2979 tp->prior_ssthresh = 0;
2980 tp->undo_marker = 0;
2981 tcp_set_ca_state(sk, TCP_CA_Loss);
2982 }
2983 tcp_xmit_retransmit_queue(sk);
2984}
2985EXPORT_SYMBOL(tcp_simple_retransmit);
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked,
2999 int fast_rexmit, int flag)
3000{
3001 struct tcp_sock *tp = tcp_sk(sk);
3002 int sndcnt = 0;
3003 int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
3004
3005 if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) {
3006 u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
3007 tp->prior_cwnd - 1;
3008 sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
3009 } else {
3010 sndcnt = min_t(int, delta,
3011 max_t(int, tp->prr_delivered - tp->prr_out,
3012 newly_acked_sacked) + 1);
3013 }
3014
3015 sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0));
3016 tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
3017}
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
3031 int newly_acked_sacked, bool is_dupack,
3032 int flag)
3033{
3034 struct inet_connection_sock *icsk = inet_csk(sk);
3035 struct tcp_sock *tp = tcp_sk(sk);
3036 int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
3037 (tcp_fackets_out(tp) > tp->reordering));
3038 int fast_rexmit = 0, mib_idx;
3039
3040 if (WARN_ON(!tp->packets_out && tp->sacked_out))
3041 tp->sacked_out = 0;
3042 if (WARN_ON(!tp->sacked_out && tp->fackets_out))
3043 tp->fackets_out = 0;
3044
3045
3046
3047 if (flag & FLAG_ECE)
3048 tp->prior_ssthresh = 0;
3049
3050
3051 if (tcp_check_sack_reneging(sk, flag))
3052 return;
3053
3054
3055 tcp_verify_left_out(tp);
3056
3057
3058
3059 if (icsk->icsk_ca_state == TCP_CA_Open) {
3060 WARN_ON(tp->retrans_out != 0);
3061 tp->retrans_stamp = 0;
3062 } else if (!before(tp->snd_una, tp->high_seq)) {
3063 switch (icsk->icsk_ca_state) {
3064 case TCP_CA_Loss:
3065 icsk->icsk_retransmits = 0;
3066 if (tcp_try_undo_recovery(sk))
3067 return;
3068 break;
3069
3070 case TCP_CA_CWR:
3071
3072
3073 if (tp->snd_una != tp->high_seq) {
3074 tcp_complete_cwr(sk);
3075 tcp_set_ca_state(sk, TCP_CA_Open);
3076 }
3077 break;
3078
3079 case TCP_CA_Recovery:
3080 if (tcp_is_reno(tp))
3081 tcp_reset_reno_sack(tp);
3082 if (tcp_try_undo_recovery(sk))
3083 return;
3084 tcp_complete_cwr(sk);
3085 break;
3086 }
3087 }
3088
3089
3090 switch (icsk->icsk_ca_state) {
3091 case TCP_CA_Recovery:
3092 if (!(flag & FLAG_SND_UNA_ADVANCED)) {
3093 if (tcp_is_reno(tp) && is_dupack)
3094 tcp_add_reno_sack(sk);
3095 } else
3096 do_lost = tcp_try_undo_partial(sk, pkts_acked);
3097 break;
3098 case TCP_CA_Loss:
3099 if (flag & FLAG_DATA_ACKED)
3100 icsk->icsk_retransmits = 0;
3101 if (tcp_is_reno(tp) && flag & FLAG_SND_UNA_ADVANCED)
3102 tcp_reset_reno_sack(tp);
3103 if (!tcp_try_undo_loss(sk)) {
3104 tcp_moderate_cwnd(tp);
3105 tcp_xmit_retransmit_queue(sk);
3106 return;
3107 }
3108 if (icsk->icsk_ca_state != TCP_CA_Open)
3109 return;
3110
3111 default:
3112 if (tcp_is_reno(tp)) {
3113 if (flag & FLAG_SND_UNA_ADVANCED)
3114 tcp_reset_reno_sack(tp);
3115 if (is_dupack)
3116 tcp_add_reno_sack(sk);
3117 }
3118
3119 if (icsk->icsk_ca_state <= TCP_CA_Disorder)
3120 tcp_try_undo_dsack(sk);
3121
3122 if (!tcp_time_to_recover(sk)) {
3123 tcp_try_to_open(sk, flag);
3124 return;
3125 }
3126
3127
3128 if (icsk->icsk_ca_state < TCP_CA_CWR &&
3129 icsk->icsk_mtup.probe_size &&
3130 tp->snd_una == tp->mtu_probe.probe_seq_start) {
3131 tcp_mtup_probe_failed(sk);
3132
3133 tp->snd_cwnd++;
3134 tcp_simple_retransmit(sk);
3135 return;
3136 }
3137
3138
3139
3140 if (tcp_is_reno(tp))
3141 mib_idx = LINUX_MIB_TCPRENORECOVERY;
3142 else
3143 mib_idx = LINUX_MIB_TCPSACKRECOVERY;
3144
3145 NET_INC_STATS_BH(sock_net(sk), mib_idx);
3146
3147 tp->high_seq = tp->snd_nxt;
3148 tp->prior_ssthresh = 0;
3149 tp->undo_marker = tp->snd_una;
3150 tp->undo_retrans = tp->retrans_out;
3151
3152 if (icsk->icsk_ca_state < TCP_CA_CWR) {
3153 if (!(flag & FLAG_ECE))
3154 tp->prior_ssthresh = tcp_current_ssthresh(sk);
3155 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
3156 TCP_ECN_queue_cwr(tp);
3157 }
3158
3159 tp->bytes_acked = 0;
3160 tp->snd_cwnd_cnt = 0;
3161 tp->prior_cwnd = tp->snd_cwnd;
3162 tp->prr_delivered = 0;
3163 tp->prr_out = 0;
3164 tcp_set_ca_state(sk, TCP_CA_Recovery);
3165 fast_rexmit = 1;
3166 }
3167
3168 if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk)))
3169 tcp_update_scoreboard(sk, fast_rexmit);
3170 tp->prr_delivered += newly_acked_sacked;
3171 tcp_update_cwnd_in_recovery(sk, newly_acked_sacked, fast_rexmit, flag);
3172 tcp_xmit_retransmit_queue(sk);
3173}
3174
3175void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt)
3176{
3177 tcp_rtt_estimator(sk, seq_rtt);
3178 tcp_set_rto(sk);
3179 inet_csk(sk)->icsk_backoff = 0;
3180}
3181EXPORT_SYMBOL(tcp_valid_rtt_meas);
3182
3183
3184
3185
3186static void tcp_ack_saw_tstamp(struct sock *sk, int flag)
3187{
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203 struct tcp_sock *tp = tcp_sk(sk);
3204
3205 tcp_valid_rtt_meas(sk, tcp_time_stamp - tp->rx_opt.rcv_tsecr);
3206}
3207
3208static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag)
3209{
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219 if (flag & FLAG_RETRANS_DATA_ACKED)
3220 return;
3221
3222 tcp_valid_rtt_meas(sk, seq_rtt);
3223}
3224
3225static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
3226 const s32 seq_rtt)
3227{
3228 const struct tcp_sock *tp = tcp_sk(sk);
3229
3230 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
3231 tcp_ack_saw_tstamp(sk, flag);
3232 else if (seq_rtt >= 0)
3233 tcp_ack_no_tstamp(sk, seq_rtt, flag);
3234}
3235
3236static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
3237{
3238 const struct inet_connection_sock *icsk = inet_csk(sk);
3239 icsk->icsk_ca_ops->cong_avoid(sk, ack, in_flight);
3240 tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp;
3241}
3242
3243
3244
3245
3246static void tcp_rearm_rto(struct sock *sk)
3247{
3248 const struct tcp_sock *tp = tcp_sk(sk);
3249
3250 if (!tp->packets_out) {
3251 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
3252 } else {
3253 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
3254 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
3255 }
3256}
3257
3258
3259static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
3260{
3261 struct tcp_sock *tp = tcp_sk(sk);
3262 u32 packets_acked;
3263
3264 BUG_ON(!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una));
3265
3266 packets_acked = tcp_skb_pcount(skb);
3267 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
3268 return 0;
3269 packets_acked -= tcp_skb_pcount(skb);
3270
3271 if (packets_acked) {
3272 BUG_ON(tcp_skb_pcount(skb) == 0);
3273 BUG_ON(!before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq));
3274 }
3275
3276 return packets_acked;
3277}
3278
3279
3280
3281
3282
3283static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3284 u32 prior_snd_una)
3285{
3286 struct tcp_sock *tp = tcp_sk(sk);
3287 const struct inet_connection_sock *icsk = inet_csk(sk);
3288 struct sk_buff *skb;
3289 u32 now = tcp_time_stamp;
3290 int fully_acked = 1;
3291 int flag = 0;
3292 u32 pkts_acked = 0;
3293 u32 reord = tp->packets_out;
3294 u32 prior_sacked = tp->sacked_out;
3295 s32 seq_rtt = -1;
3296 s32 ca_seq_rtt = -1;
3297 ktime_t last_ackt = net_invalid_timestamp();
3298
3299 while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
3300 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
3301 u32 acked_pcount;
3302 u8 sacked = scb->sacked;
3303
3304
3305 if (after(scb->end_seq, tp->snd_una)) {
3306 if (tcp_skb_pcount(skb) == 1 ||
3307 !after(tp->snd_una, scb->seq))
3308 break;
3309
3310 acked_pcount = tcp_tso_acked(sk, skb);
3311 if (!acked_pcount)
3312 break;
3313
3314 fully_acked = 0;
3315 } else {
3316 acked_pcount = tcp_skb_pcount(skb);
3317 }
3318
3319 if (sacked & TCPCB_RETRANS) {
3320 if (sacked & TCPCB_SACKED_RETRANS)
3321 tp->retrans_out -= acked_pcount;
3322 flag |= FLAG_RETRANS_DATA_ACKED;
3323 ca_seq_rtt = -1;
3324 seq_rtt = -1;
3325 if ((flag & FLAG_DATA_ACKED) || (acked_pcount > 1))
3326 flag |= FLAG_NONHEAD_RETRANS_ACKED;
3327 } else {
3328 ca_seq_rtt = now - scb->when;
3329 last_ackt = skb->tstamp;
3330 if (seq_rtt < 0) {
3331 seq_rtt = ca_seq_rtt;
3332 }
3333 if (!(sacked & TCPCB_SACKED_ACKED))
3334 reord = min(pkts_acked, reord);
3335 }
3336
3337 if (sacked & TCPCB_SACKED_ACKED)
3338 tp->sacked_out -= acked_pcount;
3339 if (sacked & TCPCB_LOST)
3340 tp->lost_out -= acked_pcount;
3341
3342 tp->packets_out -= acked_pcount;
3343 pkts_acked += acked_pcount;
3344
3345
3346
3347
3348
3349
3350
3351
3352 if (!(scb->tcp_flags & TCPHDR_SYN)) {
3353 flag |= FLAG_DATA_ACKED;
3354 } else {
3355 flag |= FLAG_SYN_ACKED;
3356 tp->retrans_stamp = 0;
3357 }
3358
3359 if (!fully_acked)
3360 break;
3361
3362 tcp_unlink_write_queue(skb, sk);
3363 sk_wmem_free_skb(sk, skb);
3364 tp->scoreboard_skb_hint = NULL;
3365 if (skb == tp->retransmit_skb_hint)
3366 tp->retransmit_skb_hint = NULL;
3367 if (skb == tp->lost_skb_hint)
3368 tp->lost_skb_hint = NULL;
3369 }
3370
3371 if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una)))
3372 tp->snd_up = tp->snd_una;
3373
3374 if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
3375 flag |= FLAG_SACK_RENEGING;
3376
3377 if (flag & FLAG_ACKED) {
3378 const struct tcp_congestion_ops *ca_ops
3379 = inet_csk(sk)->icsk_ca_ops;
3380
3381 if (unlikely(icsk->icsk_mtup.probe_size &&
3382 !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
3383 tcp_mtup_probe_success(sk);
3384 }
3385
3386 tcp_ack_update_rtt(sk, flag, seq_rtt);
3387 tcp_rearm_rto(sk);
3388
3389 if (tcp_is_reno(tp)) {
3390 tcp_remove_reno_sacks(sk, pkts_acked);
3391 } else {
3392 int delta;
3393
3394
3395 if (reord < prior_fackets)
3396 tcp_update_reordering(sk, tp->fackets_out - reord, 0);
3397
3398 delta = tcp_is_fack(tp) ? pkts_acked :
3399 prior_sacked - tp->sacked_out;
3400 tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta);
3401 }
3402
3403 tp->fackets_out -= min(pkts_acked, tp->fackets_out);
3404
3405 if (ca_ops->pkts_acked) {
3406 s32 rtt_us = -1;
3407
3408
3409 if (!(flag & FLAG_RETRANS_DATA_ACKED)) {
3410
3411 if (ca_ops->flags & TCP_CONG_RTT_STAMP &&
3412 !ktime_equal(last_ackt,
3413 net_invalid_timestamp()))
3414 rtt_us = ktime_us_delta(ktime_get_real(),
3415 last_ackt);
3416 else if (ca_seq_rtt >= 0)
3417 rtt_us = jiffies_to_usecs(ca_seq_rtt);
3418 }
3419
3420 ca_ops->pkts_acked(sk, pkts_acked, rtt_us);
3421 }
3422 }
3423
3424#if FASTRETRANS_DEBUG > 0
3425 WARN_ON((int)tp->sacked_out < 0);
3426 WARN_ON((int)tp->lost_out < 0);
3427 WARN_ON((int)tp->retrans_out < 0);
3428 if (!tp->packets_out && tcp_is_sack(tp)) {
3429 icsk = inet_csk(sk);
3430 if (tp->lost_out) {
3431 printk(KERN_DEBUG "Leak l=%u %d\n",
3432 tp->lost_out, icsk->icsk_ca_state);
3433 tp->lost_out = 0;
3434 }
3435 if (tp->sacked_out) {
3436 printk(KERN_DEBUG "Leak s=%u %d\n",
3437 tp->sacked_out, icsk->icsk_ca_state);
3438 tp->sacked_out = 0;
3439 }
3440 if (tp->retrans_out) {
3441 printk(KERN_DEBUG "Leak r=%u %d\n",
3442 tp->retrans_out, icsk->icsk_ca_state);
3443 tp->retrans_out = 0;
3444 }
3445 }
3446#endif
3447 return flag;
3448}
3449
3450static void tcp_ack_probe(struct sock *sk)
3451{
3452 const struct tcp_sock *tp = tcp_sk(sk);
3453 struct inet_connection_sock *icsk = inet_csk(sk);
3454
3455
3456
3457 if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, tcp_wnd_end(tp))) {
3458 icsk->icsk_backoff = 0;
3459 inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
3460
3461
3462
3463 } else {
3464 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
3465 min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX),
3466 TCP_RTO_MAX);
3467 }
3468}
3469
3470static inline int tcp_ack_is_dubious(const struct sock *sk, const int flag)
3471{
3472 return !(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
3473 inet_csk(sk)->icsk_ca_state != TCP_CA_Open;
3474}
3475
3476static inline int tcp_may_raise_cwnd(const struct sock *sk, const int flag)
3477{
3478 const struct tcp_sock *tp = tcp_sk(sk);
3479 return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) &&
3480 !((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_Recovery | TCPF_CA_CWR));
3481}
3482
3483
3484
3485
3486static inline int tcp_may_update_window(const struct tcp_sock *tp,
3487 const u32 ack, const u32 ack_seq,
3488 const u32 nwin)
3489{
3490 return after(ack, tp->snd_una) ||
3491 after(ack_seq, tp->snd_wl1) ||
3492 (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd);
3493}
3494
3495
3496
3497
3498
3499
3500static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 ack,
3501 u32 ack_seq)
3502{
3503 struct tcp_sock *tp = tcp_sk(sk);
3504 int flag = 0;
3505 u32 nwin = ntohs(tcp_hdr(skb)->window);
3506
3507 if (likely(!tcp_hdr(skb)->syn))
3508 nwin <<= tp->rx_opt.snd_wscale;
3509
3510 if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
3511 flag |= FLAG_WIN_UPDATE;
3512 tcp_update_wl(tp, ack_seq);
3513
3514 if (tp->snd_wnd != nwin) {
3515 tp->snd_wnd = nwin;
3516
3517
3518
3519
3520 tp->pred_flags = 0;
3521 tcp_fast_path_check(sk);
3522
3523 if (nwin > tp->max_window) {
3524 tp->max_window = nwin;
3525 tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie);
3526 }
3527 }
3528 }
3529
3530 tp->snd_una = ack;
3531
3532 return flag;
3533}
3534
3535
3536
3537
3538static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
3539{
3540 tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
3541 tp->snd_cwnd_cnt = 0;
3542 tp->bytes_acked = 0;
3543 TCP_ECN_queue_cwr(tp);
3544 tcp_moderate_cwnd(tp);
3545}
3546
3547
3548
3549
3550static void tcp_ratehalving_spur_to_response(struct sock *sk)
3551{
3552 tcp_enter_cwr(sk, 0);
3553}
3554
3555static void tcp_undo_spur_to_response(struct sock *sk, int flag)
3556{
3557 if (flag & FLAG_ECE)
3558 tcp_ratehalving_spur_to_response(sk);
3559 else
3560 tcp_undo_cwr(sk, true);
3561}
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593static int tcp_process_frto(struct sock *sk, int flag)
3594{
3595 struct tcp_sock *tp = tcp_sk(sk);
3596
3597 tcp_verify_left_out(tp);
3598
3599
3600 if (flag & FLAG_DATA_ACKED)
3601 inet_csk(sk)->icsk_retransmits = 0;
3602
3603 if ((flag & FLAG_NONHEAD_RETRANS_ACKED) ||
3604 ((tp->frto_counter >= 2) && (flag & FLAG_RETRANS_DATA_ACKED)))
3605 tp->undo_marker = 0;
3606
3607 if (!before(tp->snd_una, tp->frto_highmark)) {
3608 tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag);
3609 return 1;
3610 }
3611
3612 if (!tcp_is_sackfrto(tp)) {
3613
3614
3615
3616
3617 if (!(flag & FLAG_ANY_PROGRESS) && (flag & FLAG_NOT_DUP))
3618 return 1;
3619
3620 if (!(flag & FLAG_DATA_ACKED)) {
3621 tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3),
3622 flag);
3623 return 1;
3624 }
3625 } else {
3626 if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) {
3627
3628 tp->snd_cwnd = min(tp->snd_cwnd,
3629 tcp_packets_in_flight(tp));
3630 return 1;
3631 }
3632
3633 if ((tp->frto_counter >= 2) &&
3634 (!(flag & FLAG_FORWARD_PROGRESS) ||
3635 ((flag & FLAG_DATA_SACKED) &&
3636 !(flag & FLAG_ONLY_ORIG_SACKED)))) {
3637
3638 if (!(flag & FLAG_FORWARD_PROGRESS) &&
3639 (flag & FLAG_NOT_DUP))
3640 return 1;
3641
3642 tcp_enter_frto_loss(sk, 3, flag);
3643 return 1;
3644 }
3645 }
3646
3647 if (tp->frto_counter == 1) {
3648
3649 tp->snd_cwnd = tcp_packets_in_flight(tp) + 2;
3650 tp->frto_counter = 2;
3651
3652 if (!tcp_may_send_now(sk))
3653 tcp_enter_frto_loss(sk, 2, flag);
3654
3655 return 1;
3656 } else {
3657 switch (sysctl_tcp_frto_response) {
3658 case 2:
3659 tcp_undo_spur_to_response(sk, flag);
3660 break;
3661 case 1:
3662 tcp_conservative_spur_to_response(tp);
3663 break;
3664 default:
3665 tcp_ratehalving_spur_to_response(sk);
3666 break;
3667 }
3668 tp->frto_counter = 0;
3669 tp->undo_marker = 0;
3670 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS);
3671 }
3672 return 0;
3673}
3674
3675
3676static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3677{
3678 struct inet_connection_sock *icsk = inet_csk(sk);
3679 struct tcp_sock *tp = tcp_sk(sk);
3680 u32 prior_snd_una = tp->snd_una;
3681 u32 ack_seq = TCP_SKB_CB(skb)->seq;
3682 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3683 bool is_dupack = false;
3684 u32 prior_in_flight;
3685 u32 prior_fackets;
3686 int prior_packets;
3687 int prior_sacked = tp->sacked_out;
3688 int pkts_acked = 0;
3689 int newly_acked_sacked = 0;
3690 int frto_cwnd = 0;
3691
3692
3693
3694
3695 if (before(ack, prior_snd_una))
3696 goto old_ack;
3697
3698
3699
3700
3701 if (after(ack, tp->snd_nxt))
3702 goto invalid_ack;
3703
3704 if (after(ack, prior_snd_una))
3705 flag |= FLAG_SND_UNA_ADVANCED;
3706
3707 if (sysctl_tcp_abc) {
3708 if (icsk->icsk_ca_state < TCP_CA_CWR)
3709 tp->bytes_acked += ack - prior_snd_una;
3710 else if (icsk->icsk_ca_state == TCP_CA_Loss)
3711
3712 tp->bytes_acked += min(ack - prior_snd_una,
3713 tp->mss_cache);
3714 }
3715
3716 prior_fackets = tp->fackets_out;
3717 prior_in_flight = tcp_packets_in_flight(tp);
3718
3719 if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
3720
3721
3722
3723
3724 tcp_update_wl(tp, ack_seq);
3725 tp->snd_una = ack;
3726 flag |= FLAG_WIN_UPDATE;
3727
3728 tcp_ca_event(sk, CA_EVENT_FAST_ACK);
3729
3730 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPACKS);
3731 } else {
3732 if (ack_seq != TCP_SKB_CB(skb)->end_seq)
3733 flag |= FLAG_DATA;
3734 else
3735 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPUREACKS);
3736
3737 flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
3738
3739 if (TCP_SKB_CB(skb)->sacked)
3740 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
3741
3742 if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb)))
3743 flag |= FLAG_ECE;
3744
3745 tcp_ca_event(sk, CA_EVENT_SLOW_ACK);
3746 }
3747
3748
3749
3750
3751 sk->sk_err_soft = 0;
3752 icsk->icsk_probes_out = 0;
3753 tp->rcv_tstamp = tcp_time_stamp;
3754 prior_packets = tp->packets_out;
3755 if (!prior_packets)
3756 goto no_queue;
3757
3758
3759 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
3760
3761 pkts_acked = prior_packets - tp->packets_out;
3762 newly_acked_sacked = (prior_packets - prior_sacked) -
3763 (tp->packets_out - tp->sacked_out);
3764
3765 if (tp->frto_counter)
3766 frto_cwnd = tcp_process_frto(sk, flag);
3767
3768 if (before(tp->frto_highmark, tp->snd_una))
3769 tp->frto_highmark = 0;
3770
3771 if (tcp_ack_is_dubious(sk, flag)) {
3772
3773 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd &&
3774 tcp_may_raise_cwnd(sk, flag))
3775 tcp_cong_avoid(sk, ack, prior_in_flight);
3776 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
3777 tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked,
3778 is_dupack, flag);
3779 } else {
3780 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
3781 tcp_cong_avoid(sk, ack, prior_in_flight);
3782 }
3783
3784 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
3785 dst_confirm(__sk_dst_get(sk));
3786
3787 return 1;
3788
3789no_queue:
3790
3791 if (flag & FLAG_DSACKING_ACK)
3792 tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked,
3793 is_dupack, flag);
3794
3795
3796
3797
3798 if (tcp_send_head(sk))
3799 tcp_ack_probe(sk);
3800 return 1;
3801
3802invalid_ack:
3803 SOCK_DEBUG(sk, "Ack %u after %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
3804 return -1;
3805
3806old_ack:
3807
3808
3809
3810 if (TCP_SKB_CB(skb)->sacked) {
3811 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
3812 newly_acked_sacked = tp->sacked_out - prior_sacked;
3813 tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked,
3814 is_dupack, flag);
3815 }
3816
3817 SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
3818 return 0;
3819}
3820
3821
3822
3823
3824
3825void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx,
3826 const u8 **hvpp, int estab)
3827{
3828 const unsigned char *ptr;
3829 const struct tcphdr *th = tcp_hdr(skb);
3830 int length = (th->doff * 4) - sizeof(struct tcphdr);
3831
3832 ptr = (const unsigned char *)(th + 1);
3833 opt_rx->saw_tstamp = 0;
3834
3835 while (length > 0) {
3836 int opcode = *ptr++;
3837 int opsize;
3838
3839 switch (opcode) {
3840 case TCPOPT_EOL:
3841 return;
3842 case TCPOPT_NOP:
3843 length--;
3844 continue;
3845 default:
3846 opsize = *ptr++;
3847 if (opsize < 2)
3848 return;
3849 if (opsize > length)
3850 return;
3851 switch (opcode) {
3852 case TCPOPT_MSS:
3853 if (opsize == TCPOLEN_MSS && th->syn && !estab) {
3854 u16 in_mss = get_unaligned_be16(ptr);
3855 if (in_mss) {
3856 if (opt_rx->user_mss &&
3857 opt_rx->user_mss < in_mss)
3858 in_mss = opt_rx->user_mss;
3859 opt_rx->mss_clamp = in_mss;
3860 }
3861 }
3862 break;
3863 case TCPOPT_WINDOW:
3864 if (opsize == TCPOLEN_WINDOW && th->syn &&
3865 !estab && sysctl_tcp_window_scaling) {
3866 __u8 snd_wscale = *(__u8 *)ptr;
3867 opt_rx->wscale_ok = 1;
3868 if (snd_wscale > 14) {
3869 if (net_ratelimit())
3870 printk(KERN_INFO "tcp_parse_options: Illegal window "
3871 "scaling value %d >14 received.\n",
3872 snd_wscale);
3873 snd_wscale = 14;
3874 }
3875 opt_rx->snd_wscale = snd_wscale;
3876 }
3877 break;
3878 case TCPOPT_TIMESTAMP:
3879 if ((opsize == TCPOLEN_TIMESTAMP) &&
3880 ((estab && opt_rx->tstamp_ok) ||
3881 (!estab && sysctl_tcp_timestamps))) {
3882 opt_rx->saw_tstamp = 1;
3883 opt_rx->rcv_tsval = get_unaligned_be32(ptr);
3884 opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
3885 }
3886 break;
3887 case TCPOPT_SACK_PERM:
3888 if (opsize == TCPOLEN_SACK_PERM && th->syn &&
3889 !estab && sysctl_tcp_sack) {
3890 opt_rx->sack_ok = TCP_SACK_SEEN;
3891 tcp_sack_reset(opt_rx);
3892 }
3893 break;
3894
3895 case TCPOPT_SACK:
3896 if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
3897 !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
3898 opt_rx->sack_ok) {
3899 TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
3900 }
3901 break;
3902#ifdef CONFIG_TCP_MD5SIG
3903 case TCPOPT_MD5SIG:
3904
3905
3906
3907
3908 break;
3909#endif
3910 case TCPOPT_COOKIE:
3911
3912
3913 switch (opsize) {
3914 case TCPOLEN_COOKIE_BASE:
3915
3916 break;
3917 case TCPOLEN_COOKIE_PAIR:
3918
3919 break;
3920 case TCPOLEN_COOKIE_MIN+0:
3921 case TCPOLEN_COOKIE_MIN+2:
3922 case TCPOLEN_COOKIE_MIN+4:
3923 case TCPOLEN_COOKIE_MIN+6:
3924 case TCPOLEN_COOKIE_MAX:
3925
3926 opt_rx->cookie_plus = opsize;
3927 *hvpp = ptr;
3928 break;
3929 default:
3930
3931 break;
3932 }
3933 break;
3934 }
3935
3936 ptr += opsize-2;
3937 length -= opsize;
3938 }
3939 }
3940}
3941EXPORT_SYMBOL(tcp_parse_options);
3942
3943static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr *th)
3944{
3945 const __be32 *ptr = (const __be32 *)(th + 1);
3946
3947 if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
3948 | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
3949 tp->rx_opt.saw_tstamp = 1;
3950 ++ptr;
3951 tp->rx_opt.rcv_tsval = ntohl(*ptr);
3952 ++ptr;
3953 tp->rx_opt.rcv_tsecr = ntohl(*ptr);
3954 return 1;
3955 }
3956 return 0;
3957}
3958
3959
3960
3961
3962static int tcp_fast_parse_options(const struct sk_buff *skb,
3963 const struct tcphdr *th,
3964 struct tcp_sock *tp, const u8 **hvpp)
3965{
3966
3967
3968
3969 if (th->doff == (sizeof(*th) / 4)) {
3970 tp->rx_opt.saw_tstamp = 0;
3971 return 0;
3972 } else if (tp->rx_opt.tstamp_ok &&
3973 th->doff == ((sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4)) {
3974 if (tcp_parse_aligned_timestamp(tp, th))
3975 return 1;
3976 }
3977 tcp_parse_options(skb, &tp->rx_opt, hvpp, 1);
3978 return 1;
3979}
3980
3981#ifdef CONFIG_TCP_MD5SIG
3982
3983
3984
3985const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
3986{
3987 int length = (th->doff << 2) - sizeof(*th);
3988 const u8 *ptr = (const u8 *)(th + 1);
3989
3990
3991 if (length < TCPOLEN_MD5SIG)
3992 return NULL;
3993
3994 while (length > 0) {
3995 int opcode = *ptr++;
3996 int opsize;
3997
3998 switch(opcode) {
3999 case TCPOPT_EOL:
4000 return NULL;
4001 case TCPOPT_NOP:
4002 length--;
4003 continue;
4004 default:
4005 opsize = *ptr++;
4006 if (opsize < 2 || opsize > length)
4007 return NULL;
4008 if (opcode == TCPOPT_MD5SIG)
4009 return opsize == TCPOLEN_MD5SIG ? ptr : NULL;
4010 }
4011 ptr += opsize - 2;
4012 length -= opsize;
4013 }
4014 return NULL;
4015}
4016EXPORT_SYMBOL(tcp_parse_md5sig_option);
4017#endif
4018
4019static inline void tcp_store_ts_recent(struct tcp_sock *tp)
4020{
4021 tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
4022 tp->rx_opt.ts_recent_stamp = get_seconds();
4023}
4024
4025static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
4026{
4027 if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) {
4028
4029
4030
4031
4032
4033
4034
4035 if (tcp_paws_check(&tp->rx_opt, 0))
4036 tcp_store_ts_recent(tp);
4037 }
4038}
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
4064{
4065 const struct tcp_sock *tp = tcp_sk(sk);
4066 const struct tcphdr *th = tcp_hdr(skb);
4067 u32 seq = TCP_SKB_CB(skb)->seq;
4068 u32 ack = TCP_SKB_CB(skb)->ack_seq;
4069
4070 return (
4071 (th->ack && seq == TCP_SKB_CB(skb)->end_seq && seq == tp->rcv_nxt) &&
4072
4073
4074 ack == tp->snd_una &&
4075
4076
4077 !tcp_may_update_window(tp, ack, seq, ntohs(th->window) << tp->rx_opt.snd_wscale) &&
4078
4079
4080 (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ);
4081}
4082
4083static inline int tcp_paws_discard(const struct sock *sk,
4084 const struct sk_buff *skb)
4085{
4086 const struct tcp_sock *tp = tcp_sk(sk);
4087
4088 return !tcp_paws_check(&tp->rx_opt, TCP_PAWS_WINDOW) &&
4089 !tcp_disordered_ack(sk, skb);
4090}
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105static inline int tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
4106{
4107 return !before(end_seq, tp->rcv_wup) &&
4108 !after(seq, tp->rcv_nxt + tcp_receive_window(tp));
4109}
4110
4111
4112static void tcp_reset(struct sock *sk)
4113{
4114
4115 switch (sk->sk_state) {
4116 case TCP_SYN_SENT:
4117 sk->sk_err = ECONNREFUSED;
4118 break;
4119 case TCP_CLOSE_WAIT:
4120 sk->sk_err = EPIPE;
4121 break;
4122 case TCP_CLOSE:
4123 return;
4124 default:
4125 sk->sk_err = ECONNRESET;
4126 }
4127
4128 smp_wmb();
4129
4130 if (!sock_flag(sk, SOCK_DEAD))
4131 sk->sk_error_report(sk);
4132
4133 tcp_done(sk);
4134}
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150static void tcp_fin(struct sock *sk)
4151{
4152 struct tcp_sock *tp = tcp_sk(sk);
4153
4154 inet_csk_schedule_ack(sk);
4155
4156 sk->sk_shutdown |= RCV_SHUTDOWN;
4157 sock_set_flag(sk, SOCK_DONE);
4158
4159 switch (sk->sk_state) {
4160 case TCP_SYN_RECV:
4161 case TCP_ESTABLISHED:
4162
4163 tcp_set_state(sk, TCP_CLOSE_WAIT);
4164 inet_csk(sk)->icsk_ack.pingpong = 1;
4165 break;
4166
4167 case TCP_CLOSE_WAIT:
4168 case TCP_CLOSING:
4169
4170
4171
4172 break;
4173 case TCP_LAST_ACK:
4174
4175 break;
4176
4177 case TCP_FIN_WAIT1:
4178
4179
4180
4181
4182 tcp_send_ack(sk);
4183 tcp_set_state(sk, TCP_CLOSING);
4184 break;
4185 case TCP_FIN_WAIT2:
4186
4187 tcp_send_ack(sk);
4188 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
4189 break;
4190 default:
4191
4192
4193
4194 printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n",
4195 __func__, sk->sk_state);
4196 break;
4197 }
4198
4199
4200
4201
4202 __skb_queue_purge(&tp->out_of_order_queue);
4203 if (tcp_is_sack(tp))
4204 tcp_sack_reset(&tp->rx_opt);
4205 sk_mem_reclaim(sk);
4206
4207 if (!sock_flag(sk, SOCK_DEAD)) {
4208 sk->sk_state_change(sk);
4209
4210
4211 if (sk->sk_shutdown == SHUTDOWN_MASK ||
4212 sk->sk_state == TCP_CLOSE)
4213 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
4214 else
4215 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
4216 }
4217}
4218
4219static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
4220 u32 end_seq)
4221{
4222 if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
4223 if (before(seq, sp->start_seq))
4224 sp->start_seq = seq;
4225 if (after(end_seq, sp->end_seq))
4226 sp->end_seq = end_seq;
4227 return 1;
4228 }
4229 return 0;
4230}
4231
4232static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
4233{
4234 struct tcp_sock *tp = tcp_sk(sk);
4235
4236 if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
4237 int mib_idx;
4238
4239 if (before(seq, tp->rcv_nxt))
4240 mib_idx = LINUX_MIB_TCPDSACKOLDSENT;
4241 else
4242 mib_idx = LINUX_MIB_TCPDSACKOFOSENT;
4243
4244 NET_INC_STATS_BH(sock_net(sk), mib_idx);
4245
4246 tp->rx_opt.dsack = 1;
4247 tp->duplicate_sack[0].start_seq = seq;
4248 tp->duplicate_sack[0].end_seq = end_seq;
4249 }
4250}
4251
4252static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
4253{
4254 struct tcp_sock *tp = tcp_sk(sk);
4255
4256 if (!tp->rx_opt.dsack)
4257 tcp_dsack_set(sk, seq, end_seq);
4258 else
4259 tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
4260}
4261
4262static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
4263{
4264 struct tcp_sock *tp = tcp_sk(sk);
4265
4266 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
4267 before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4268 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4269 tcp_enter_quickack_mode(sk);
4270
4271 if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
4272 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
4273
4274 if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
4275 end_seq = tp->rcv_nxt;
4276 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, end_seq);
4277 }
4278 }
4279
4280 tcp_send_ack(sk);
4281}
4282
4283
4284
4285
4286static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
4287{
4288 int this_sack;
4289 struct tcp_sack_block *sp = &tp->selective_acks[0];
4290 struct tcp_sack_block *swalk = sp + 1;
4291
4292
4293
4294
4295 for (this_sack = 1; this_sack < tp->rx_opt.num_sacks;) {
4296 if (tcp_sack_extend(sp, swalk->start_seq, swalk->end_seq)) {
4297 int i;
4298
4299
4300
4301
4302 tp->rx_opt.num_sacks--;
4303 for (i = this_sack; i < tp->rx_opt.num_sacks; i++)
4304 sp[i] = sp[i + 1];
4305 continue;
4306 }
4307 this_sack++, swalk++;
4308 }
4309}
4310
4311static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
4312{
4313 struct tcp_sock *tp = tcp_sk(sk);
4314 struct tcp_sack_block *sp = &tp->selective_acks[0];
4315 int cur_sacks = tp->rx_opt.num_sacks;
4316 int this_sack;
4317
4318 if (!cur_sacks)
4319 goto new_sack;
4320
4321 for (this_sack = 0; this_sack < cur_sacks; this_sack++, sp++) {
4322 if (tcp_sack_extend(sp, seq, end_seq)) {
4323
4324 for (; this_sack > 0; this_sack--, sp--)
4325 swap(*sp, *(sp - 1));
4326 if (cur_sacks > 1)
4327 tcp_sack_maybe_coalesce(tp);
4328 return;
4329 }
4330 }
4331
4332
4333
4334
4335
4336
4337
4338 if (this_sack >= TCP_NUM_SACKS) {
4339 this_sack--;
4340 tp->rx_opt.num_sacks--;
4341 sp--;
4342 }
4343 for (; this_sack > 0; this_sack--, sp--)
4344 *sp = *(sp - 1);
4345
4346new_sack:
4347
4348 sp->start_seq = seq;
4349 sp->end_seq = end_seq;
4350 tp->rx_opt.num_sacks++;
4351}
4352
4353
4354
4355static void tcp_sack_remove(struct tcp_sock *tp)
4356{
4357 struct tcp_sack_block *sp = &tp->selective_acks[0];
4358 int num_sacks = tp->rx_opt.num_sacks;
4359 int this_sack;
4360
4361
4362 if (skb_queue_empty(&tp->out_of_order_queue)) {
4363 tp->rx_opt.num_sacks = 0;
4364 return;
4365 }
4366
4367 for (this_sack = 0; this_sack < num_sacks;) {
4368
4369 if (!before(tp->rcv_nxt, sp->start_seq)) {
4370 int i;
4371
4372
4373 WARN_ON(before(tp->rcv_nxt, sp->end_seq));
4374
4375
4376 for (i=this_sack+1; i < num_sacks; i++)
4377 tp->selective_acks[i-1] = tp->selective_acks[i];
4378 num_sacks--;
4379 continue;
4380 }
4381 this_sack++;
4382 sp++;
4383 }
4384 tp->rx_opt.num_sacks = num_sacks;
4385}
4386
4387
4388
4389
4390static void tcp_ofo_queue(struct sock *sk)
4391{
4392 struct tcp_sock *tp = tcp_sk(sk);
4393 __u32 dsack_high = tp->rcv_nxt;
4394 struct sk_buff *skb;
4395
4396 while ((skb = skb_peek(&tp->out_of_order_queue)) != NULL) {
4397 if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
4398 break;
4399
4400 if (before(TCP_SKB_CB(skb)->seq, dsack_high)) {
4401 __u32 dsack = dsack_high;
4402 if (before(TCP_SKB_CB(skb)->end_seq, dsack_high))
4403 dsack_high = TCP_SKB_CB(skb)->end_seq;
4404 tcp_dsack_extend(sk, TCP_SKB_CB(skb)->