1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64#include <linux/mm.h>
65#include <linux/module.h>
66#include <linux/sysctl.h>
67#include <linux/kernel.h>
68#include <net/dst.h>
69#include <net/tcp.h>
70#include <net/inet_common.h>
71#include <linux/ipsec.h>
72#include <asm/unaligned.h>
73#include <net/netdma.h>
74
75int sysctl_tcp_timestamps __read_mostly = 1;
76int sysctl_tcp_window_scaling __read_mostly = 1;
77int sysctl_tcp_sack __read_mostly = 1;
78int sysctl_tcp_fack __read_mostly = 1;
79int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
80int sysctl_tcp_ecn __read_mostly = 2;
81int sysctl_tcp_dsack __read_mostly = 1;
82int sysctl_tcp_app_win __read_mostly = 31;
83int sysctl_tcp_adv_win_scale __read_mostly = 2;
84
85int sysctl_tcp_stdurg __read_mostly;
86int sysctl_tcp_rfc1337 __read_mostly;
87int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
88int sysctl_tcp_frto __read_mostly = 2;
89int sysctl_tcp_frto_response __read_mostly;
90int sysctl_tcp_nometrics_save __read_mostly;
91
92int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
93int sysctl_tcp_abc __read_mostly;
94
95#define FLAG_DATA 0x01
96#define FLAG_WIN_UPDATE 0x02
97#define FLAG_DATA_ACKED 0x04
98#define FLAG_RETRANS_DATA_ACKED 0x08
99#define FLAG_SYN_ACKED 0x10
100#define FLAG_DATA_SACKED 0x20
101#define FLAG_ECE 0x40
102#define FLAG_DATA_LOST 0x80
103#define FLAG_SLOWPATH 0x100
104#define FLAG_ONLY_ORIG_SACKED 0x200
105#define FLAG_SND_UNA_ADVANCED 0x400
106#define FLAG_DSACKING_ACK 0x800
107#define FLAG_NONHEAD_RETRANS_ACKED 0x1000
108#define FLAG_SACK_RENEGING 0x2000
109
110#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
111#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
112#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE)
113#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)
114#define FLAG_ANY_PROGRESS (FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED)
115
116#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
117#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
118
119
120
121
122static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
123{
124 struct inet_connection_sock *icsk = inet_csk(sk);
125 const unsigned int lss = icsk->icsk_ack.last_seg_size;
126 unsigned int len;
127
128 icsk->icsk_ack.last_seg_size = 0;
129
130
131
132
133 len = skb_shinfo(skb)->gso_size ? : skb->len;
134 if (len >= icsk->icsk_ack.rcv_mss) {
135 icsk->icsk_ack.rcv_mss = len;
136 } else {
137
138
139
140
141
142 len += skb->data - skb_transport_header(skb);
143 if (len >= TCP_MIN_RCVMSS + sizeof(struct tcphdr) ||
144
145
146
147
148
149 (len >= TCP_MIN_MSS + sizeof(struct tcphdr) &&
150 !(tcp_flag_word(tcp_hdr(skb)) & TCP_REMNANT))) {
151
152
153
154
155 len -= tcp_sk(sk)->tcp_header_len;
156 icsk->icsk_ack.last_seg_size = len;
157 if (len == lss) {
158 icsk->icsk_ack.rcv_mss = len;
159 return;
160 }
161 }
162 if (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)
163 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED2;
164 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
165 }
166}
167
168static void tcp_incr_quickack(struct sock *sk)
169{
170 struct inet_connection_sock *icsk = inet_csk(sk);
171 unsigned quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
172
173 if (quickacks == 0)
174 quickacks = 2;
175 if (quickacks > icsk->icsk_ack.quick)
176 icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
177}
178
179void tcp_enter_quickack_mode(struct sock *sk)
180{
181 struct inet_connection_sock *icsk = inet_csk(sk);
182 tcp_incr_quickack(sk);
183 icsk->icsk_ack.pingpong = 0;
184 icsk->icsk_ack.ato = TCP_ATO_MIN;
185}
186
187
188
189
190
191static inline int tcp_in_quickack_mode(const struct sock *sk)
192{
193 const struct inet_connection_sock *icsk = inet_csk(sk);
194 return icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong;
195}
196
197static inline void TCP_ECN_queue_cwr(struct tcp_sock *tp)
198{
199 if (tp->ecn_flags & TCP_ECN_OK)
200 tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
201}
202
203static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, struct sk_buff *skb)
204{
205 if (tcp_hdr(skb)->cwr)
206 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
207}
208
209static inline void TCP_ECN_withdraw_cwr(struct tcp_sock *tp)
210{
211 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
212}
213
214static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb)
215{
216 if (tp->ecn_flags & TCP_ECN_OK) {
217 if (INET_ECN_is_ce(TCP_SKB_CB(skb)->flags))
218 tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
219
220
221
222 else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags)))
223 tcp_enter_quickack_mode((struct sock *)tp);
224 }
225}
226
227static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, struct tcphdr *th)
228{
229 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr))
230 tp->ecn_flags &= ~TCP_ECN_OK;
231}
232
233static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, struct tcphdr *th)
234{
235 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr))
236 tp->ecn_flags &= ~TCP_ECN_OK;
237}
238
239static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th)
240{
241 if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK))
242 return 1;
243 return 0;
244}
245
246
247
248
249
250
251static void tcp_fixup_sndbuf(struct sock *sk)
252{
253 int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 +
254 sizeof(struct sk_buff);
255
256 if (sk->sk_sndbuf < 3 * sndmem)
257 sk->sk_sndbuf = min(3 * sndmem, sysctl_tcp_wmem[2]);
258}
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
287{
288 struct tcp_sock *tp = tcp_sk(sk);
289
290 int truesize = tcp_win_from_space(skb->truesize) >> 1;
291 int window = tcp_win_from_space(sysctl_tcp_rmem[2]) >> 1;
292
293 while (tp->rcv_ssthresh <= window) {
294 if (truesize <= skb->len)
295 return 2 * inet_csk(sk)->icsk_ack.rcv_mss;
296
297 truesize >>= 1;
298 window >>= 1;
299 }
300 return 0;
301}
302
303static void tcp_grow_window(struct sock *sk, struct sk_buff *skb)
304{
305 struct tcp_sock *tp = tcp_sk(sk);
306
307
308 if (tp->rcv_ssthresh < tp->window_clamp &&
309 (int)tp->rcv_ssthresh < tcp_space(sk) &&
310 !tcp_memory_pressure) {
311 int incr;
312
313
314
315
316 if (tcp_win_from_space(skb->truesize) <= skb->len)
317 incr = 2 * tp->advmss;
318 else
319 incr = __tcp_grow_window(sk, skb);
320
321 if (incr) {
322 tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr,
323 tp->window_clamp);
324 inet_csk(sk)->icsk_ack.quick |= 1;
325 }
326 }
327}
328
329
330
331static void tcp_fixup_rcvbuf(struct sock *sk)
332{
333 struct tcp_sock *tp = tcp_sk(sk);
334 int rcvmem = tp->advmss + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff);
335
336
337
338
339
340 while (tcp_win_from_space(rcvmem) < tp->advmss)
341 rcvmem += 128;
342 if (sk->sk_rcvbuf < 4 * rcvmem)
343 sk->sk_rcvbuf = min(4 * rcvmem, sysctl_tcp_rmem[2]);
344}
345
346
347
348
349static void tcp_init_buffer_space(struct sock *sk)
350{
351 struct tcp_sock *tp = tcp_sk(sk);
352 int maxwin;
353
354 if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK))
355 tcp_fixup_rcvbuf(sk);
356 if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
357 tcp_fixup_sndbuf(sk);
358
359 tp->rcvq_space.space = tp->rcv_wnd;
360
361 maxwin = tcp_full_space(sk);
362
363 if (tp->window_clamp >= maxwin) {
364 tp->window_clamp = maxwin;
365
366 if (sysctl_tcp_app_win && maxwin > 4 * tp->advmss)
367 tp->window_clamp = max(maxwin -
368 (maxwin >> sysctl_tcp_app_win),
369 4 * tp->advmss);
370 }
371
372
373 if (sysctl_tcp_app_win &&
374 tp->window_clamp > 2 * tp->advmss &&
375 tp->window_clamp + tp->advmss > maxwin)
376 tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
377
378 tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
379 tp->snd_cwnd_stamp = tcp_time_stamp;
380}
381
382
383static void tcp_clamp_window(struct sock *sk)
384{
385 struct tcp_sock *tp = tcp_sk(sk);
386 struct inet_connection_sock *icsk = inet_csk(sk);
387
388 icsk->icsk_ack.quick = 0;
389
390 if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
391 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
392 !tcp_memory_pressure &&
393 atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
394 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
395 sysctl_tcp_rmem[2]);
396 }
397 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
398 tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
399}
400
401
402
403
404
405
406
407
408void tcp_initialize_rcv_mss(struct sock *sk)
409{
410 struct tcp_sock *tp = tcp_sk(sk);
411 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
412
413 hint = min(hint, tp->rcv_wnd / 2);
414 hint = min(hint, TCP_MIN_RCVMSS);
415 hint = max(hint, TCP_MIN_MSS);
416
417 inet_csk(sk)->icsk_ack.rcv_mss = hint;
418}
419
420
421
422
423
424
425
426
427
428
429
430
431static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
432{
433 u32 new_sample = tp->rcv_rtt_est.rtt;
434 long m = sample;
435
436 if (m == 0)
437 m = 1;
438
439 if (new_sample != 0) {
440
441
442
443
444
445
446
447
448
449
450 if (!win_dep) {
451 m -= (new_sample >> 3);
452 new_sample += m;
453 } else if (m < new_sample)
454 new_sample = m << 3;
455 } else {
456
457 new_sample = m << 3;
458 }
459
460 if (tp->rcv_rtt_est.rtt != new_sample)
461 tp->rcv_rtt_est.rtt = new_sample;
462}
463
464static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
465{
466 if (tp->rcv_rtt_est.time == 0)
467 goto new_measure;
468 if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
469 return;
470 tcp_rcv_rtt_update(tp, jiffies - tp->rcv_rtt_est.time, 1);
471
472new_measure:
473 tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
474 tp->rcv_rtt_est.time = tcp_time_stamp;
475}
476
477static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
478 const struct sk_buff *skb)
479{
480 struct tcp_sock *tp = tcp_sk(sk);
481 if (tp->rx_opt.rcv_tsecr &&
482 (TCP_SKB_CB(skb)->end_seq -
483 TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss))
484 tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rx_opt.rcv_tsecr, 0);
485}
486
487
488
489
490
491void tcp_rcv_space_adjust(struct sock *sk)
492{
493 struct tcp_sock *tp = tcp_sk(sk);
494 int time;
495 int space;
496
497 if (tp->rcvq_space.time == 0)
498 goto new_measure;
499
500 time = tcp_time_stamp - tp->rcvq_space.time;
501 if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0)
502 return;
503
504 space = 2 * (tp->copied_seq - tp->rcvq_space.seq);
505
506 space = max(tp->rcvq_space.space, space);
507
508 if (tp->rcvq_space.space != space) {
509 int rcvmem;
510
511 tp->rcvq_space.space = space;
512
513 if (sysctl_tcp_moderate_rcvbuf &&
514 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
515 int new_clamp = space;
516
517
518
519
520
521 space /= tp->advmss;
522 if (!space)
523 space = 1;
524 rcvmem = (tp->advmss + MAX_TCP_HEADER +
525 16 + sizeof(struct sk_buff));
526 while (tcp_win_from_space(rcvmem) < tp->advmss)
527 rcvmem += 128;
528 space *= rcvmem;
529 space = min(space, sysctl_tcp_rmem[2]);
530 if (space > sk->sk_rcvbuf) {
531 sk->sk_rcvbuf = space;
532
533
534 tp->window_clamp = new_clamp;
535 }
536 }
537 }
538
539new_measure:
540 tp->rcvq_space.seq = tp->copied_seq;
541 tp->rcvq_space.time = tcp_time_stamp;
542}
543
544
545
546
547
548
549
550
551
552
553
554static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
555{
556 struct tcp_sock *tp = tcp_sk(sk);
557 struct inet_connection_sock *icsk = inet_csk(sk);
558 u32 now;
559
560 inet_csk_schedule_ack(sk);
561
562 tcp_measure_rcv_mss(sk, skb);
563
564 tcp_rcv_rtt_measure(tp);
565
566 now = tcp_time_stamp;
567
568 if (!icsk->icsk_ack.ato) {
569
570
571
572 tcp_incr_quickack(sk);
573 icsk->icsk_ack.ato = TCP_ATO_MIN;
574 } else {
575 int m = now - icsk->icsk_ack.lrcvtime;
576
577 if (m <= TCP_ATO_MIN / 2) {
578
579 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + TCP_ATO_MIN / 2;
580 } else if (m < icsk->icsk_ack.ato) {
581 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + m;
582 if (icsk->icsk_ack.ato > icsk->icsk_rto)
583 icsk->icsk_ack.ato = icsk->icsk_rto;
584 } else if (m > icsk->icsk_rto) {
585
586
587
588 tcp_incr_quickack(sk);
589 sk_mem_reclaim(sk);
590 }
591 }
592 icsk->icsk_ack.lrcvtime = now;
593
594 TCP_ECN_check_ce(tp, skb);
595
596 if (skb->len >= 128)
597 tcp_grow_window(sk, skb);
598}
599
600
601
602
603
604
605
606
607
608
609static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
610{
611 struct tcp_sock *tp = tcp_sk(sk);
612 long m = mrtt;
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630 if (m == 0)
631 m = 1;
632 if (tp->srtt != 0) {
633 m -= (tp->srtt >> 3);
634 tp->srtt += m;
635 if (m < 0) {
636 m = -m;
637 m -= (tp->mdev >> 2);
638
639
640
641
642
643
644
645
646 if (m > 0)
647 m >>= 3;
648 } else {
649 m -= (tp->mdev >> 2);
650 }
651 tp->mdev += m;
652 if (tp->mdev > tp->mdev_max) {
653 tp->mdev_max = tp->mdev;
654 if (tp->mdev_max > tp->rttvar)
655 tp->rttvar = tp->mdev_max;
656 }
657 if (after(tp->snd_una, tp->rtt_seq)) {
658 if (tp->mdev_max < tp->rttvar)
659 tp->rttvar -= (tp->rttvar - tp->mdev_max) >> 2;
660 tp->rtt_seq = tp->snd_nxt;
661 tp->mdev_max = tcp_rto_min(sk);
662 }
663 } else {
664
665 tp->srtt = m << 3;
666 tp->mdev = m << 1;
667 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
668 tp->rtt_seq = tp->snd_nxt;
669 }
670}
671
672
673
674
675static inline void tcp_set_rto(struct sock *sk)
676{
677 const struct tcp_sock *tp = tcp_sk(sk);
678
679
680
681
682
683
684
685
686
687
688 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp);
689
690
691
692
693
694
695
696
697
698
699 tcp_bound_rto(sk);
700}
701
702
703
704
705
706void tcp_update_metrics(struct sock *sk)
707{
708 struct tcp_sock *tp = tcp_sk(sk);
709 struct dst_entry *dst = __sk_dst_get(sk);
710
711 if (sysctl_tcp_nometrics_save)
712 return;
713
714 dst_confirm(dst);
715
716 if (dst && (dst->flags & DST_HOST)) {
717 const struct inet_connection_sock *icsk = inet_csk(sk);
718 int m;
719 unsigned long rtt;
720
721 if (icsk->icsk_backoff || !tp->srtt) {
722
723
724
725
726 if (!(dst_metric_locked(dst, RTAX_RTT)))
727 dst->metrics[RTAX_RTT - 1] = 0;
728 return;
729 }
730
731 rtt = dst_metric_rtt(dst, RTAX_RTT);
732 m = rtt - tp->srtt;
733
734
735
736
737
738 if (!(dst_metric_locked(dst, RTAX_RTT))) {
739 if (m <= 0)
740 set_dst_metric_rtt(dst, RTAX_RTT, tp->srtt);
741 else
742 set_dst_metric_rtt(dst, RTAX_RTT, rtt - (m >> 3));
743 }
744
745 if (!(dst_metric_locked(dst, RTAX_RTTVAR))) {
746 unsigned long var;
747 if (m < 0)
748 m = -m;
749
750
751 m >>= 1;
752 if (m < tp->mdev)
753 m = tp->mdev;
754
755 var = dst_metric_rtt(dst, RTAX_RTTVAR);
756 if (m >= var)
757 var = m;
758 else
759 var -= (var - m) >> 2;
760
761 set_dst_metric_rtt(dst, RTAX_RTTVAR, var);
762 }
763
764 if (tcp_in_initial_slowstart(tp)) {
765
766 if (dst_metric(dst, RTAX_SSTHRESH) &&
767 !dst_metric_locked(dst, RTAX_SSTHRESH) &&
768 (tp->snd_cwnd >> 1) > dst_metric(dst, RTAX_SSTHRESH))
769 dst->metrics[RTAX_SSTHRESH-1] = tp->snd_cwnd >> 1;
770 if (!dst_metric_locked(dst, RTAX_CWND) &&
771 tp->snd_cwnd > dst_metric(dst, RTAX_CWND))
772 dst->metrics[RTAX_CWND - 1] = tp->snd_cwnd;
773 } else if (tp->snd_cwnd > tp->snd_ssthresh &&
774 icsk->icsk_ca_state == TCP_CA_Open) {
775
776 if (!dst_metric_locked(dst, RTAX_SSTHRESH))
777 dst->metrics[RTAX_SSTHRESH-1] =
778 max(tp->snd_cwnd >> 1, tp->snd_ssthresh);
779 if (!dst_metric_locked(dst, RTAX_CWND))
780 dst->metrics[RTAX_CWND-1] = (dst_metric(dst, RTAX_CWND) + tp->snd_cwnd) >> 1;
781 } else {
782
783
784
785 if (!dst_metric_locked(dst, RTAX_CWND))
786 dst->metrics[RTAX_CWND-1] = (dst_metric(dst, RTAX_CWND) + tp->snd_ssthresh) >> 1;
787 if (dst_metric(dst, RTAX_SSTHRESH) &&
788 !dst_metric_locked(dst, RTAX_SSTHRESH) &&
789 tp->snd_ssthresh > dst_metric(dst, RTAX_SSTHRESH))
790 dst->metrics[RTAX_SSTHRESH-1] = tp->snd_ssthresh;
791 }
792
793 if (!dst_metric_locked(dst, RTAX_REORDERING)) {
794 if (dst_metric(dst, RTAX_REORDERING) < tp->reordering &&
795 tp->reordering != sysctl_tcp_reordering)
796 dst->metrics[RTAX_REORDERING-1] = tp->reordering;
797 }
798 }
799}
800
801
802
803
804
805
806
807
808
809
810__u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
811{
812 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
813
814 if (!cwnd) {
815 if (tp->mss_cache > 1460)
816 cwnd = 2;
817 else
818 cwnd = (tp->mss_cache > 1095) ? 3 : 4;
819 }
820 return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
821}
822
823
824void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
825{
826 struct tcp_sock *tp = tcp_sk(sk);
827 const struct inet_connection_sock *icsk = inet_csk(sk);
828
829 tp->prior_ssthresh = 0;
830 tp->bytes_acked = 0;
831 if (icsk->icsk_ca_state < TCP_CA_CWR) {
832 tp->undo_marker = 0;
833 if (set_ssthresh)
834 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
835 tp->snd_cwnd = min(tp->snd_cwnd,
836 tcp_packets_in_flight(tp) + 1U);
837 tp->snd_cwnd_cnt = 0;
838 tp->high_seq = tp->snd_nxt;
839 tp->snd_cwnd_stamp = tcp_time_stamp;
840 TCP_ECN_queue_cwr(tp);
841
842 tcp_set_ca_state(sk, TCP_CA_CWR);
843 }
844}
845
846
847
848
849
850static void tcp_disable_fack(struct tcp_sock *tp)
851{
852
853 if (tcp_is_fack(tp))
854 tp->lost_skb_hint = NULL;
855 tp->rx_opt.sack_ok &= ~2;
856}
857
858
859static void tcp_dsack_seen(struct tcp_sock *tp)
860{
861 tp->rx_opt.sack_ok |= 4;
862}
863
864
865
866static void tcp_init_metrics(struct sock *sk)
867{
868 struct tcp_sock *tp = tcp_sk(sk);
869 struct dst_entry *dst = __sk_dst_get(sk);
870
871 if (dst == NULL)
872 goto reset;
873
874 dst_confirm(dst);
875
876 if (dst_metric_locked(dst, RTAX_CWND))
877 tp->snd_cwnd_clamp = dst_metric(dst, RTAX_CWND);
878 if (dst_metric(dst, RTAX_SSTHRESH)) {
879 tp->snd_ssthresh = dst_metric(dst, RTAX_SSTHRESH);
880 if (tp->snd_ssthresh > tp->snd_cwnd_clamp)
881 tp->snd_ssthresh = tp->snd_cwnd_clamp;
882 }
883 if (dst_metric(dst, RTAX_REORDERING) &&
884 tp->reordering != dst_metric(dst, RTAX_REORDERING)) {
885 tcp_disable_fack(tp);
886 tp->reordering = dst_metric(dst, RTAX_REORDERING);
887 }
888
889 if (dst_metric(dst, RTAX_RTT) == 0)
890 goto reset;
891
892 if (!tp->srtt && dst_metric_rtt(dst, RTAX_RTT) < (TCP_TIMEOUT_INIT << 3))
893 goto reset;
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909 if (dst_metric_rtt(dst, RTAX_RTT) > tp->srtt) {
910 tp->srtt = dst_metric_rtt(dst, RTAX_RTT);
911 tp->rtt_seq = tp->snd_nxt;
912 }
913 if (dst_metric_rtt(dst, RTAX_RTTVAR) > tp->mdev) {
914 tp->mdev = dst_metric_rtt(dst, RTAX_RTTVAR);
915 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
916 }
917 tcp_set_rto(sk);
918 if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp)
919 goto reset;
920
921cwnd:
922 tp->snd_cwnd = tcp_init_cwnd(tp, dst);
923 tp->snd_cwnd_stamp = tcp_time_stamp;
924 return;
925
926reset:
927
928
929
930
931 if (!tp->rx_opt.saw_tstamp && tp->srtt) {
932 tp->srtt = 0;
933 tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT;
934 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
935 }
936 goto cwnd;
937}
938
939static void tcp_update_reordering(struct sock *sk, const int metric,
940 const int ts)
941{
942 struct tcp_sock *tp = tcp_sk(sk);
943 if (metric > tp->reordering) {
944 int mib_idx;
945
946 tp->reordering = min(TCP_MAX_REORDERING, metric);
947
948
949 if (ts)
950 mib_idx = LINUX_MIB_TCPTSREORDER;
951 else if (tcp_is_reno(tp))
952 mib_idx = LINUX_MIB_TCPRENOREORDER;
953 else if (tcp_is_fack(tp))
954 mib_idx = LINUX_MIB_TCPFACKREORDER;
955 else
956 mib_idx = LINUX_MIB_TCPSACKREORDER;
957
958 NET_INC_STATS_BH(sock_net(sk), mib_idx);
959#if FASTRETRANS_DEBUG > 1
960 printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n",
961 tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
962 tp->reordering,
963 tp->fackets_out,
964 tp->sacked_out,
965 tp->undo_marker ? tp->undo_retrans : 0);
966#endif
967 tcp_disable_fack(tp);
968 }
969}
970
971
972static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
973{
974 if ((tp->retransmit_skb_hint == NULL) ||
975 before(TCP_SKB_CB(skb)->seq,
976 TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
977 tp->retransmit_skb_hint = skb;
978
979 if (!tp->lost_out ||
980 after(TCP_SKB_CB(skb)->end_seq, tp->retransmit_high))
981 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
982}
983
984static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb)
985{
986 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
987 tcp_verify_retransmit_hint(tp, skb);
988
989 tp->lost_out += tcp_skb_pcount(skb);
990 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
991 }
992}
993
994static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp,
995 struct sk_buff *skb)
996{
997 tcp_verify_retransmit_hint(tp, skb);
998
999 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
1000 tp->lost_out += tcp_skb_pcount(skb);
1001 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1002 }
1003}
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack,
1102 u32 start_seq, u32 end_seq)
1103{
1104
1105 if (after(end_seq, tp->snd_nxt) || !before(start_seq, end_seq))
1106 return 0;
1107
1108
1109 if (!before(start_seq, tp->snd_nxt))
1110 return 0;
1111
1112
1113
1114
1115 if (after(start_seq, tp->snd_una))
1116 return 1;
1117
1118 if (!is_dsack || !tp->undo_marker)
1119 return 0;
1120
1121
1122 if (!after(end_seq, tp->snd_una))
1123 return 0;
1124
1125 if (!before(start_seq, tp->undo_marker))
1126 return 1;
1127
1128
1129 if (!after(end_seq, tp->undo_marker))
1130 return 0;
1131
1132
1133
1134
1135 return !before(start_seq, end_seq - tp->max_window);
1136}
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147static void tcp_mark_lost_retrans(struct sock *sk)
1148{
1149 const struct inet_connection_sock *icsk = inet_csk(sk);
1150 struct tcp_sock *tp = tcp_sk(sk);
1151 struct sk_buff *skb;
1152 int cnt = 0;
1153 u32 new_low_seq = tp->snd_nxt;
1154 u32 received_upto = tcp_highest_sack_seq(tp);
1155
1156 if (!tcp_is_fack(tp) || !tp->retrans_out ||
1157 !after(received_upto, tp->lost_retrans_low) ||
1158 icsk->icsk_ca_state != TCP_CA_Recovery)
1159 return;
1160
1161 tcp_for_write_queue(skb, sk) {
1162 u32 ack_seq = TCP_SKB_CB(skb)->ack_seq;
1163
1164 if (skb == tcp_send_head(sk))
1165 break;
1166 if (cnt == tp->retrans_out)
1167 break;
1168 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1169 continue;
1170
1171 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS))
1172 continue;
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185 if (after(received_upto, ack_seq)) {
1186 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1187 tp->retrans_out -= tcp_skb_pcount(skb);
1188
1189 tcp_skb_mark_lost_uncond_verify(tp, skb);
1190 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT);
1191 } else {
1192 if (before(ack_seq, new_low_seq))
1193 new_low_seq = ack_seq;
1194 cnt += tcp_skb_pcount(skb);
1195 }
1196 }
1197
1198 if (tp->retrans_out)
1199 tp->lost_retrans_low = new_low_seq;
1200}
1201
1202static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb,
1203 struct tcp_sack_block_wire *sp, int num_sacks,
1204 u32 prior_snd_una)
1205{
1206 struct tcp_sock *tp = tcp_sk(sk);
1207 u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq);
1208 u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq);
1209 int dup_sack = 0;
1210
1211 if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) {
1212 dup_sack = 1;
1213 tcp_dsack_seen(tp);
1214 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKRECV);
1215 } else if (num_sacks > 1) {
1216 u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq);
1217 u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq);
1218
1219 if (!after(end_seq_0, end_seq_1) &&
1220 !before(start_seq_0, start_seq_1)) {
1221 dup_sack = 1;
1222 tcp_dsack_seen(tp);
1223 NET_INC_STATS_BH(sock_net(sk),
1224 LINUX_MIB_TCPDSACKOFORECV);
1225 }
1226 }
1227
1228
1229 if (dup_sack &&
1230 !after(end_seq_0, prior_snd_una) &&
1231 after(end_seq_0, tp->undo_marker))
1232 tp->undo_retrans--;
1233
1234 return dup_sack;
1235}
1236
1237struct tcp_sacktag_state {
1238 int reord;
1239 int fack_count;
1240 int flag;
1241};
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
1252 u32 start_seq, u32 end_seq)
1253{
1254 int in_sack, err;
1255 unsigned int pkt_len;
1256 unsigned int mss;
1257
1258 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1259 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1260
1261 if (tcp_skb_pcount(skb) > 1 && !in_sack &&
1262 after(TCP_SKB_CB(skb)->end_seq, start_seq)) {
1263 mss = tcp_skb_mss(skb);
1264 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1265
1266 if (!in_sack) {
1267 pkt_len = start_seq - TCP_SKB_CB(skb)->seq;
1268 if (pkt_len < mss)
1269 pkt_len = mss;
1270 } else {
1271 pkt_len = end_seq - TCP_SKB_CB(skb)->seq;
1272 if (pkt_len < mss)
1273 return -EINVAL;
1274 }
1275
1276
1277
1278
1279 if (pkt_len > mss) {
1280 unsigned int new_len = (pkt_len / mss) * mss;
1281 if (!in_sack && new_len < pkt_len) {
1282 new_len += mss;
1283 if (new_len > skb->len)
1284 return 0;
1285 }
1286 pkt_len = new_len;
1287 }
1288 err = tcp_fragment(sk, skb, pkt_len, mss);
1289 if (err < 0)
1290 return err;
1291 }
1292
1293 return in_sack;
1294}
1295
1296static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
1297 struct tcp_sacktag_state *state,
1298 int dup_sack, int pcount)
1299{
1300 struct tcp_sock *tp = tcp_sk(sk);
1301 u8 sacked = TCP_SKB_CB(skb)->sacked;
1302 int fack_count = state->fack_count;
1303
1304
1305 if (dup_sack && (sacked & TCPCB_RETRANS)) {
1306 if (after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker))
1307 tp->undo_retrans--;
1308 if (sacked & TCPCB_SACKED_ACKED)
1309 state->reord = min(fack_count, state->reord);
1310 }
1311
1312
1313 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1314 return sacked;
1315
1316 if (!(sacked & TCPCB_SACKED_ACKED)) {
1317 if (sacked & TCPCB_SACKED_RETRANS) {
1318
1319
1320
1321
1322 if (sacked & TCPCB_LOST) {
1323 sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
1324 tp->lost_out -= pcount;
1325 tp->retrans_out -= pcount;
1326 }
1327 } else {
1328 if (!(sacked & TCPCB_RETRANS)) {
1329
1330
1331
1332 if (before(TCP_SKB_CB(skb)->seq,
1333 tcp_highest_sack_seq(tp)))
1334 state->reord = min(fack_count,
1335 state->reord);
1336
1337
1338 if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark))
1339 state->flag |= FLAG_ONLY_ORIG_SACKED;
1340 }
1341
1342 if (sacked & TCPCB_LOST) {
1343 sacked &= ~TCPCB_LOST;
1344 tp->lost_out -= pcount;
1345 }
1346 }
1347
1348 sacked |= TCPCB_SACKED_ACKED;
1349 state->flag |= FLAG_DATA_SACKED;
1350 tp->sacked_out += pcount;
1351
1352 fack_count += pcount;
1353
1354
1355 if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) &&
1356 before(TCP_SKB_CB(skb)->seq,
1357 TCP_SKB_CB(tp->lost_skb_hint)->seq))
1358 tp->lost_cnt_hint += pcount;
1359
1360 if (fack_count > tp->fackets_out)
1361 tp->fackets_out = fack_count;
1362 }
1363
1364
1365
1366
1367
1368 if (dup_sack && (sacked & TCPCB_SACKED_RETRANS)) {
1369 sacked &= ~TCPCB_SACKED_RETRANS;
1370 tp->retrans_out -= pcount;
1371 }
1372
1373 return sacked;
1374}
1375
1376static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1377 struct tcp_sacktag_state *state,
1378 unsigned int pcount, int shifted, int mss,
1379 int dup_sack)
1380{
1381 struct tcp_sock *tp = tcp_sk(sk);
1382 struct sk_buff *prev = tcp_write_queue_prev(sk, skb);
1383
1384 BUG_ON(!pcount);
1385
1386
1387 if (!tcp_is_fack(tp) && tcp_is_sack(tp) && tp->lost_skb_hint &&
1388 !before(TCP_SKB_CB(tp->lost_skb_hint)->seq, TCP_SKB_CB(skb)->seq))
1389 tp->lost_cnt_hint += pcount;
1390
1391 TCP_SKB_CB(prev)->end_seq += shifted;
1392 TCP_SKB_CB(skb)->seq += shifted;
1393
1394 skb_shinfo(prev)->gso_segs += pcount;
1395 BUG_ON(skb_shinfo(skb)->gso_segs < pcount);
1396 skb_shinfo(skb)->gso_segs -= pcount;
1397
1398
1399
1400
1401
1402
1403 if (!skb_shinfo(prev)->gso_size) {
1404 skb_shinfo(prev)->gso_size = mss;
1405 skb_shinfo(prev)->gso_type = sk->sk_gso_type;
1406 }
1407
1408
1409 if (skb_shinfo(skb)->gso_segs <= 1) {
1410 skb_shinfo(skb)->gso_size = 0;
1411 skb_shinfo(skb)->gso_type = 0;
1412 }
1413
1414
1415 tcp_sacktag_one(skb, sk, state, dup_sack, pcount);
1416
1417
1418 TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
1419
1420 if (skb->len > 0) {
1421 BUG_ON(!tcp_skb_pcount(skb));
1422 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTED);
1423 return 0;
1424 }
1425
1426
1427
1428 if (skb == tp->retransmit_skb_hint)
1429 tp->retransmit_skb_hint = prev;
1430 if (skb == tp->scoreboard_skb_hint)
1431 tp->scoreboard_skb_hint = prev;
1432 if (skb == tp->lost_skb_hint) {
1433 tp->lost_skb_hint = prev;
1434 tp->lost_cnt_hint -= tcp_skb_pcount(prev);
1435 }
1436
1437 TCP_SKB_CB(skb)->flags |= TCP_SKB_CB(prev)->flags;
1438 if (skb == tcp_highest_sack(sk))
1439 tcp_advance_highest_sack(sk, skb);
1440
1441 tcp_unlink_write_queue(skb, sk);
1442 sk_wmem_free_skb(sk, skb);
1443
1444 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKMERGED);
1445
1446 return 1;
1447}
1448
1449
1450
1451
1452static int tcp_skb_seglen(struct sk_buff *skb)
1453{
1454 return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb);
1455}
1456
1457
1458static int skb_can_shift(struct sk_buff *skb)
1459{
1460 return !skb_headlen(skb) && skb_is_nonlinear(skb);
1461}
1462
1463
1464
1465
1466static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
1467 struct tcp_sacktag_state *state,
1468 u32 start_seq, u32 end_seq,
1469 int dup_sack)
1470{
1471 struct tcp_sock *tp = tcp_sk(sk);
1472 struct sk_buff *prev;
1473 int mss;
1474 int pcount = 0;
1475 int len;
1476 int in_sack;
1477
1478 if (!sk_can_gso(sk))
1479 goto fallback;
1480
1481
1482 if (!dup_sack &&
1483 (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS)
1484 goto fallback;
1485 if (!skb_can_shift(skb))
1486 goto fallback;
1487
1488 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1489 goto fallback;
1490
1491
1492 if (unlikely(skb == tcp_write_queue_head(sk)))
1493 goto fallback;
1494 prev = tcp_write_queue_prev(sk, skb);
1495
1496 if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
1497 goto fallback;
1498
1499 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1500 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1501
1502 if (in_sack) {
1503 len = skb->len;
1504 pcount = tcp_skb_pcount(skb);
1505 mss = tcp_skb_seglen(skb);
1506
1507
1508
1509
1510 if (mss != tcp_skb_seglen(prev))
1511 goto fallback;
1512 } else {
1513 if (!after(TCP_SKB_CB(skb)->end_seq, start_seq))
1514 goto noop;
1515
1516
1517
1518
1519 if (tcp_skb_pcount(skb) <= 1)
1520 goto noop;
1521
1522 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1523 if (!in_sack) {
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535 goto fallback;
1536 }
1537
1538 len = end_seq - TCP_SKB_CB(skb)->seq;
1539 BUG_ON(len < 0);
1540 BUG_ON(len > skb->len);
1541
1542
1543
1544
1545
1546 mss = tcp_skb_mss(skb);
1547
1548
1549
1550
1551 if (mss != tcp_skb_seglen(prev))
1552 goto fallback;
1553
1554 if (len == mss) {
1555 pcount = 1;
1556 } else if (len < mss) {
1557 goto noop;
1558 } else {
1559 pcount = len / mss;
1560 len = pcount * mss;
1561 }
1562 }
1563
1564 if (!skb_shift(prev, skb, len))
1565 goto fallback;
1566 if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack))
1567 goto out;
1568
1569
1570
1571
1572 if (prev == tcp_write_queue_tail(sk))
1573 goto out;
1574 skb = tcp_write_queue_next(sk, prev);
1575
1576 if (!skb_can_shift(skb) ||
1577 (skb == tcp_send_head(sk)) ||
1578 ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) ||
1579 (mss != tcp_skb_seglen(skb)))
1580 goto out;
1581
1582 len = skb->len;
1583 if (skb_shift(prev, skb, len)) {
1584 pcount += tcp_skb_pcount(skb);
1585 tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0);
1586 }
1587
1588out:
1589 state->fack_count += pcount;
1590 return prev;
1591
1592noop:
1593 return skb;
1594
1595fallback:
1596 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK);
1597 return NULL;
1598}
1599
1600static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1601 struct tcp_sack_block *next_dup,
1602 struct tcp_sacktag_state *state,
1603 u32 start_seq, u32 end_seq,
1604 int dup_sack_in)
1605{
1606 struct tcp_sock *tp = tcp_sk(sk);
1607 struct sk_buff *tmp;
1608
1609 tcp_for_write_queue_from(skb, sk) {
1610 int in_sack = 0;
1611 int dup_sack = dup_sack_in;
1612
1613 if (skb == tcp_send_head(sk))
1614 break;
1615
1616
1617 if (!before(TCP_SKB_CB(skb)->seq, end_seq))
1618 break;
1619
1620 if ((next_dup != NULL) &&
1621 before(TCP_SKB_CB(skb)->seq, next_dup->end_seq)) {
1622 in_sack = tcp_match_skb_to_sack(sk, skb,
1623 next_dup->start_seq,
1624 next_dup->end_seq);
1625 if (in_sack > 0)
1626 dup_sack = 1;
1627 }
1628
1629
1630
1631
1632
1633 if (in_sack <= 0) {
1634 tmp = tcp_shift_skb_data(sk, skb, state,
1635 start_seq, end_seq, dup_sack);
1636 if (tmp != NULL) {
1637 if (tmp != skb) {
1638 skb = tmp;
1639 continue;
1640 }
1641
1642 in_sack = 0;
1643 } else {
1644 in_sack = tcp_match_skb_to_sack(sk, skb,
1645 start_seq,
1646 end_seq);
1647 }
1648 }
1649
1650 if (unlikely(in_sack < 0))
1651 break;
1652
1653 if (in_sack) {
1654 TCP_SKB_CB(skb)->sacked = tcp_sacktag_one(skb, sk,
1655 state,
1656 dup_sack,
1657 tcp_skb_pcount(skb));
1658
1659 if (!before(TCP_SKB_CB(skb)->seq,
1660 tcp_highest_sack_seq(tp)))
1661 tcp_advance_highest_sack(sk, skb);
1662 }
1663
1664 state->fack_count += tcp_skb_pcount(skb);
1665 }
1666 return skb;
1667}
1668
1669
1670
1671
1672static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
1673 struct tcp_sacktag_state *state,
1674 u32 skip_to_seq)
1675{
1676 tcp_for_write_queue_from(skb, sk) {
1677 if (skb == tcp_send_head(sk))
1678 break;
1679
1680 if (after(TCP_SKB_CB(skb)->end_seq, skip_to_seq))
1681 break;
1682
1683 state->fack_count += tcp_skb_pcount(skb);
1684 }
1685 return skb;
1686}
1687
1688static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
1689 struct sock *sk,
1690 struct tcp_sack_block *next_dup,
1691 struct tcp_sacktag_state *state,
1692 u32 skip_to_seq)
1693{
1694 if (next_dup == NULL)
1695 return skb;
1696
1697 if (before(next_dup->start_seq, skip_to_seq)) {
1698 skb = tcp_sacktag_skip(skb, sk, state, next_dup->start_seq);
1699 skb = tcp_sacktag_walk(skb, sk, NULL, state,
1700 next_dup->start_seq, next_dup->end_seq,
1701 1);
1702 }
1703
1704 return skb;
1705}
1706
1707static int tcp_sack_cache_ok(struct tcp_sock *tp, struct tcp_sack_block *cache)
1708{
1709 return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1710}
1711
1712static int
1713tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb,
1714 u32 prior_snd_una)
1715{
1716 const struct inet_connection_sock *icsk = inet_csk(sk);
1717 struct tcp_sock *tp = tcp_sk(sk);
1718 unsigned char *ptr = (skb_transport_header(ack_skb) +
1719 TCP_SKB_CB(ack_skb)->sacked);
1720 struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
1721 struct tcp_sack_block sp[TCP_NUM_SACKS];
1722 struct tcp_sack_block *cache;
1723 struct tcp_sacktag_state state;
1724 struct sk_buff *skb;
1725 int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
1726 int used_sacks;
1727 int found_dup_sack = 0;
1728 int i, j;
1729 int first_sack_index;
1730
1731 state.flag = 0;
1732 state.reord = tp->packets_out;
1733
1734 if (!tp->sacked_out) {
1735 if (WARN_ON(tp->fackets_out))
1736 tp->fackets_out = 0;
1737 tcp_highest_sack_reset(sk);
1738 }
1739
1740 found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
1741 num_sacks, prior_snd_una);
1742 if (found_dup_sack)
1743 state.flag |= FLAG_DSACKING_ACK;
1744
1745
1746
1747
1748
1749 if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window))
1750 return 0;
1751
1752 if (!tp->packets_out)
1753 goto out;
1754
1755 used_sacks = 0;
1756 first_sack_index = 0;
1757 for (i = 0; i < num_sacks; i++) {
1758 int dup_sack = !i && found_dup_sack;
1759
1760 sp[used_sacks].start_seq = get_unaligned_be32(&sp_wire[i].start_seq);
1761 sp[used_sacks].end_seq = get_unaligned_be32(&sp_wire[i].end_seq);
1762
1763 if (!tcp_is_sackblock_valid(tp, dup_sack,
1764 sp[used_sacks].start_seq,
1765 sp[used_sacks].end_seq)) {
1766 int mib_idx;
1767
1768 if (dup_sack) {
1769 if (!tp->undo_marker)
1770 mib_idx = LINUX_MIB_TCPDSACKIGNOREDNOUNDO;
1771 else
1772 mib_idx = LINUX_MIB_TCPDSACKIGNOREDOLD;
1773 } else {
1774
1775 if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) &&
1776 !after(sp[used_sacks].end_seq, tp->snd_una))
1777 continue;
1778 mib_idx = LINUX_MIB_TCPSACKDISCARD;
1779 }
1780
1781 NET_INC_STATS_BH(sock_net(sk), mib_idx);
1782 if (i == 0)
1783 first_sack_index = -1;
1784 continue;
1785 }
1786
1787
1788 if (!after(sp[used_sacks].end_seq, prior_snd_una))
1789 continue;
1790
1791 used_sacks++;
1792 }
1793
1794
1795 for (i = used_sacks - 1; i > 0; i--) {
1796 for (j = 0; j < i; j++) {
1797 if (after(sp[j].start_seq, sp[j + 1].start_seq)) {
1798 swap(sp[j], sp[j + 1]);
1799
1800
1801 if (j == first_sack_index)
1802 first_sack_index = j + 1;
1803 }
1804 }
1805 }
1806
1807 skb = tcp_write_queue_head(sk);
1808 state.fack_count = 0;
1809 i = 0;
1810
1811 if (!tp->sacked_out) {
1812
1813 cache = tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1814 } else {
1815 cache = tp->recv_sack_cache;
1816
1817 while (tcp_sack_cache_ok(tp, cache) && !cache->start_seq &&
1818 !cache->end_seq)
1819 cache++;
1820 }
1821
1822 while (i < used_sacks) {
1823 u32 start_seq = sp[i].start_seq;
1824 u32 end_seq = sp[i].end_seq;
1825 int dup_sack = (found_dup_sack && (i == first_sack_index));
1826 struct tcp_sack_block *next_dup = NULL;
1827
1828 if (found_dup_sack && ((i + 1) == first_sack_index))
1829 next_dup = &sp[i + 1];
1830
1831
1832 if (after(end_seq, tp->high_seq))
1833 state.flag |= FLAG_DATA_LOST;
1834
1835
1836 while (tcp_sack_cache_ok(tp, cache) &&
1837 !before(start_seq, cache->end_seq))
1838 cache++;
1839
1840
1841 if (tcp_sack_cache_ok(tp, cache) && !dup_sack &&
1842 after(end_seq, cache->start_seq)) {
1843
1844
1845 if (before(start_seq, cache->start_seq)) {
1846 skb = tcp_sacktag_skip(skb, sk, &state,
1847 start_seq);
1848 skb = tcp_sacktag_walk(skb, sk, next_dup,
1849 &state,
1850 start_seq,
1851 cache->start_seq,
1852 dup_sack);
1853 }
1854
1855
1856 if (!after(end_seq, cache->end_seq))
1857 goto advance_sp;
1858
1859 skb = tcp_maybe_skipping_dsack(skb, sk, next_dup,
1860 &state,
1861 cache->end_seq);
1862
1863
1864 if (tcp_highest_sack_seq(tp) == cache->end_seq) {
1865
1866 skb = tcp_highest_sack(sk);
1867 if (skb == NULL)
1868 break;
1869 state.fack_count = tp->fackets_out;
1870 cache++;
1871 goto walk;
1872 }
1873
1874 skb = tcp_sacktag_skip(skb, sk, &state, cache->end_seq);
1875
1876 cache++;
1877 continue;
1878 }
1879
1880 if (!before(start_seq, tcp_highest_sack_seq(tp))) {
1881 skb = tcp_highest_sack(sk);
1882 if (skb == NULL)
1883 break;
1884 state.fack_count = tp->fackets_out;
1885 }
1886 skb = tcp_sacktag_skip(skb, sk, &state, start_seq);
1887
1888walk:
1889 skb = tcp_sacktag_walk(skb, sk, next_dup, &state,
1890 start_seq, end_seq, dup_sack);
1891
1892advance_sp:
1893
1894
1895
1896 if (after(end_seq, tp->frto_highmark))
1897 state.flag &= ~FLAG_ONLY_ORIG_SACKED;
1898
1899 i++;
1900 }
1901
1902
1903 for (i = 0; i < ARRAY_SIZE(tp->recv_sack_cache) - used_sacks; i++) {
1904 tp->recv_sack_cache[i].start_seq = 0;
1905 tp->recv_sack_cache[i].end_seq = 0;
1906 }
1907 for (j = 0; j < used_sacks; j++)
1908 tp->recv_sack_cache[i++] = sp[j];
1909
1910 tcp_mark_lost_retrans(sk);
1911
1912 tcp_verify_left_out(tp);
1913
1914 if ((state.reord < tp->fackets_out) &&
1915 ((icsk->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker) &&
1916 (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark)))
1917 tcp_update_reordering(sk, tp->fackets_out - state.reord, 0);
1918
1919out:
1920
1921#if FASTRETRANS_DEBUG > 0
1922 WARN_ON((int)tp->sacked_out < 0);
1923 WARN_ON((int)tp->lost_out < 0);
1924 WARN_ON((int)tp->retrans_out < 0);
1925 WARN_ON((int)tcp_packets_in_flight(tp) < 0);
1926#endif
1927 return state.flag;
1928}
1929
1930
1931
1932
1933static int tcp_limit_reno_sacked(struct tcp_sock *tp)
1934{
1935 u32 holes;
1936
1937 holes = max(tp->lost_out, 1U);
1938 holes = min(holes, tp->packets_out);
1939
1940 if ((tp->sacked_out + holes) > tp->packets_out) {
1941 tp->sacked_out = tp->packets_out - holes;
1942 return 1;
1943 }
1944 return 0;
1945}
1946
1947
1948
1949
1950
1951static void tcp_check_reno_reordering(struct sock *sk, const int addend)
1952{
1953 struct tcp_sock *tp = tcp_sk(sk);
1954 if (tcp_limit_reno_sacked(tp))
1955 tcp_update_reordering(sk, tp->packets_out + addend, 0);
1956}
1957
1958
1959
1960static void tcp_add_reno_sack(struct sock *sk)
1961{
1962 struct tcp_sock *tp = tcp_sk(sk);
1963 tp->sacked_out++;
1964 tcp_check_reno_reordering(sk, 0);
1965 tcp_verify_left_out(tp);
1966}
1967
1968
1969
1970static void tcp_remove_reno_sacks(struct sock *sk, int acked)
1971{
1972 struct tcp_sock *tp = tcp_sk(sk);
1973
1974 if (acked > 0) {
1975
1976 if (acked - 1 >= tp->sacked_out)
1977 tp->sacked_out = 0;
1978 else
1979 tp->sacked_out -= acked - 1;
1980 }
1981 tcp_check_reno_reordering(sk, acked);
1982 tcp_verify_left_out(tp);
1983}
1984
1985static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
1986{
1987 tp->sacked_out = 0;
1988}
1989
1990static int tcp_is_sackfrto(const struct tcp_sock *tp)
1991{
1992 return (sysctl_tcp_frto == 0x2) && !tcp_is_reno(tp);
1993}
1994
1995
1996
1997
1998int tcp_use_frto(struct sock *sk)
1999{
2000 const struct tcp_sock *tp = tcp_sk(sk);
2001 const struct inet_connection_sock *icsk = inet_csk(sk);
2002 struct sk_buff *skb;
2003
2004 if (!sysctl_tcp_frto)
2005 return 0;
2006
2007
2008 if (icsk->icsk_mtup.probe_size)
2009 return 0;
2010
2011 if (tcp_is_sackfrto(tp))
2012 return 1;
2013
2014
2015 if (tp->retrans_out > 1)
2016 return 0;
2017
2018 skb = tcp_write_queue_head(sk);
2019 if (tcp_skb_is_last(sk, skb))
2020 return 1;
2021 skb = tcp_write_queue_next(sk, skb);
2022 tcp_for_write_queue_from(skb, sk) {
2023 if (skb == tcp_send_head(sk))
2024 break;
2025 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
2026 return 0;
2027
2028 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
2029 break;
2030 }
2031 return 1;
2032}
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046void tcp_enter_frto(struct sock *sk)
2047{
2048 const struct inet_connection_sock *icsk = inet_csk(sk);
2049 struct tcp_sock *tp = tcp_sk(sk);
2050 struct sk_buff *skb;
2051
2052 if ((!tp->frto_counter && icsk->icsk_ca_state <= TCP_CA_Disorder) ||
2053 tp->snd_una == tp->high_seq ||
2054 ((icsk->icsk_ca_state == TCP_CA_Loss || tp->frto_counter) &&
2055 !icsk->icsk_retransmits)) {
2056 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066 if (tp->frto_counter) {
2067 u32 stored_cwnd;
2068 stored_cwnd = tp->snd_cwnd;
2069 tp->snd_cwnd = 2;
2070 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
2071 tp->snd_cwnd = stored_cwnd;
2072 } else {
2073 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
2074 }
2075
2076
2077
2078
2079
2080
2081
2082 tcp_ca_event(sk, CA_EVENT_FRTO);
2083 }
2084
2085 tp->undo_marker = tp->snd_una;
2086 tp->undo_retrans = 0;
2087
2088 skb = tcp_write_queue_head(sk);
2089 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
2090 tp->undo_marker = 0;
2091 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2092 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
2093 tp->retrans_out -= tcp_skb_pcount(skb);
2094 }
2095 tcp_verify_left_out(tp);
2096
2097
2098 tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);
2099
2100
2101
2102
2103 if (tcp_is_sackfrto(tp) && (tp->frto_counter ||
2104 ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) &&
2105 after(tp->high_seq, tp->snd_una)) {
2106 tp->frto_highmark = tp->high_seq;
2107 } else {
2108 tp->frto_highmark = tp->snd_nxt;
2109 }
2110 tcp_set_ca_state(sk, TCP_CA_Disorder);
2111 tp->high_seq = tp->snd_nxt;
2112 tp->frto_counter = 1;
2113}
2114
2115
2116
2117
2118
2119static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
2120{
2121 struct tcp_sock *tp = tcp_sk(sk);
2122 struct sk_buff *skb;
2123
2124 tp->lost_out = 0;
2125 tp->retrans_out = 0;
2126 if (tcp_is_reno(tp))
2127 tcp_reset_reno_sack(tp);
2128
2129 tcp_for_write_queue(skb, sk) {
2130 if (skb == tcp_send_head(sk))
2131 break;
2132
2133 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
2134
2135
2136
2137
2138 if ((tp->frto_counter == 1) && !(flag & FLAG_DATA_ACKED)) {
2139
2140 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
2141 tp->retrans_out += tcp_skb_pcount(skb);
2142
2143 flag |= FLAG_DATA_ACKED;
2144 } else {
2145 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
2146 tp->undo_marker = 0;
2147 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
2148 }
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
2160 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
2161 tp->lost_out += tcp_skb_pcount(skb);
2162 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
2163 }
2164 }
2165 tcp_verify_left_out(tp);
2166
2167 tp->snd_cwnd = tcp_packets_in_flight(tp) + allowed_segments;
2168 tp->snd_cwnd_cnt = 0;
2169 tp->snd_cwnd_stamp = tcp_time_stamp;
2170 tp->frto_counter = 0;
2171 tp->bytes_acked = 0;
2172
2173 tp->reordering = min_t(unsigned int, tp->reordering,
2174 sysctl_tcp_reordering);
2175 tcp_set_ca_state(sk, TCP_CA_Loss);
2176 tp->high_seq = tp->snd_nxt;
2177 TCP_ECN_queue_cwr(tp);
2178
2179 tcp_clear_all_retrans_hints(tp);
2180}
2181
2182static void tcp_clear_retrans_partial(struct tcp_sock *tp)
2183{
2184 tp->retrans_out = 0;
2185 tp->lost_out = 0;
2186
2187 tp->undo_marker = 0;
2188 tp->undo_retrans = 0;
2189}
2190
2191void tcp_clear_retrans(struct tcp_sock *tp)
2192{
2193 tcp_clear_retrans_partial(tp);
2194
2195 tp->fackets_out = 0;
2196 tp->sacked_out = 0;
2197}
2198
2199
2200
2201
2202
2203void tcp_enter_loss(struct sock *sk, int how)
2204{
2205 const struct inet_connection_sock *icsk = inet_csk(sk);
2206 struct tcp_sock *tp = tcp_sk(sk);
2207 struct sk_buff *skb;
2208
2209
2210 if (icsk->icsk_ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq ||
2211 (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
2212 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2213 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
2214 tcp_ca_event(sk, CA_EVENT_LOSS);
2215 }
2216 tp->snd_cwnd = 1;
2217 tp->snd_cwnd_cnt = 0;
2218 tp->snd_cwnd_stamp = tcp_time_stamp;
2219
2220 tp->bytes_acked = 0;
2221 tcp_clear_retrans_partial(tp);
2222
2223 if (tcp_is_reno(tp))
2224 tcp_reset_reno_sack(tp);
2225
2226 if (!how) {
2227
2228
2229 tp->undo_marker = tp->snd_una;
2230 } else {
2231 tp->sacked_out = 0;
2232 tp->fackets_out = 0;
2233 }
2234 tcp_clear_all_retrans_hints(tp);
2235
2236 tcp_for_write_queue(skb, sk) {
2237 if (skb == tcp_send_head(sk))
2238 break;
2239
2240 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
2241 tp->undo_marker = 0;
2242 TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
2243 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) {
2244 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
2245 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
2246 tp->lost_out += tcp_skb_pcount(skb);
2247 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
2248 }
2249 }
2250 tcp_verify_left_out(tp);
2251
2252 tp->reordering = min_t(unsigned int, tp->reordering,
2253 sysctl_tcp_reordering);
2254 tcp_set_ca_state(sk, TCP_CA_Loss);
2255 tp->high_seq = tp->snd_nxt;
2256 TCP_ECN_queue_cwr(tp);
2257
2258 tp->frto_counter = 0;
2259}
2260
2261
2262
2263
2264
2265
2266
2267static int tcp_check_sack_reneging(struct sock *sk, int flag)
2268{
2269 if (flag & FLAG_SACK_RENEGING) {
2270 struct inet_connection_sock *icsk = inet_csk(sk);
2271 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
2272
2273 tcp_enter_loss(sk, 1);
2274 icsk->icsk_retransmits++;
2275 tcp_retransmit_skb(sk, tcp_write_queue_head(sk));
2276 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2277 icsk->icsk_rto, TCP_RTO_MAX);
2278 return 1;
2279 }
2280 return 0;
2281}
2282
2283static inline int tcp_fackets_out(struct tcp_sock *tp)
2284{
2285 return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out;
2286}
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303static inline int tcp_dupack_heurestics(struct tcp_sock *tp)
2304{
2305 return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
2306}
2307
2308static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb)
2309{
2310 return (tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto);
2311}
2312
2313static inline int tcp_head_timedout(struct sock *sk)
2314{
2315 struct tcp_sock *tp = tcp_sk(sk);
2316
2317 return tp->packets_out &&
2318 tcp_skb_timedout(sk, tcp_write_queue_head(sk));
2319}
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414static int tcp_time_to_recover(struct sock *sk)
2415{
2416 struct tcp_sock *tp = tcp_sk(sk);
2417 __u32 packets_out;
2418
2419
2420 if (tp->frto_counter)
2421 return 0;
2422
2423
2424 if (tp->lost_out)
2425 return 1;
2426
2427
2428 if (tcp_dupack_heurestics(tp) > tp->reordering)
2429 return 1;
2430
2431
2432
2433
2434 if (tcp_is_fack(tp) && tcp_head_timedout(sk))
2435 return 1;
2436
2437
2438
2439
2440 packets_out = tp->packets_out;
2441 if (packets_out <= tp->reordering &&
2442 tp->sacked_out >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) &&
2443 !tcp_may_send_now(sk)) {
2444
2445
2446
2447 return 1;
2448 }
2449
2450 return 0;
2451}
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465static void tcp_timeout_skbs(struct sock *sk)
2466{
2467 struct tcp_sock *tp = tcp_sk(sk);
2468 struct sk_buff *skb;
2469
2470 if (!tcp_is_fack(tp) || !tcp_head_timedout(sk))
2471 return;
2472
2473 skb = tp->scoreboard_skb_hint;
2474 if (tp->scoreboard_skb_hint == NULL)
2475 skb = tcp_write_queue_head(sk);
2476
2477 tcp_for_write_queue_from(skb, sk) {
2478 if (skb == tcp_send_head(sk))
2479 break;
2480 if (!tcp_skb_timedout(sk, skb))
2481 break;
2482
2483 tcp_skb_mark_lost(tp, skb);
2484 }
2485
2486 tp->scoreboard_skb_hint = skb;
2487
2488 tcp_verify_left_out(tp);
2489}
2490
2491
2492
2493
2494static void tcp_mark_head_lost(struct sock *sk, int packets)
2495{
2496 struct tcp_sock *tp = tcp_sk(sk);
2497 struct sk_buff *skb;
2498 int cnt, oldcnt;
2499 int err;
2500 unsigned int mss;
2501
2502 WARN_ON(packets > tp->packets_out);
2503 if (tp->lost_skb_hint) {
2504 skb = tp->lost_skb_hint;
2505 cnt = tp->lost_cnt_hint;
2506 } else {
2507 skb = tcp_write_queue_head(sk);
2508 cnt = 0;
2509 }
2510
2511 tcp_for_write_queue_from(skb, sk) {
2512 if (skb == tcp_send_head(sk))
2513 break;
2514
2515
2516 tp->lost_skb_hint = skb;
2517 tp->lost_cnt_hint = cnt;
2518
2519 if (after(TCP_SKB_CB(skb)->end_seq, tp->high_seq))
2520 break;
2521
2522 oldcnt = cnt;
2523 if (tcp_is_fack(tp) || tcp_is_reno(tp) ||
2524 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
2525 cnt += tcp_skb_pcount(skb);
2526
2527 if (cnt > packets) {
2528 if (tcp_is_sack(tp) || (oldcnt >= packets))
2529 break;
2530
2531 mss = skb_shinfo(skb)->gso_size;
2532 err = tcp_fragment(sk, skb, (packets - oldcnt) * mss, mss);
2533 if (err < 0)
2534 break;
2535 cnt = packets;
2536 }
2537
2538 tcp_skb_mark_lost(tp, skb);
2539 }
2540 tcp_verify_left_out(tp);
2541}
2542
2543
2544
2545static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
2546{
2547 struct tcp_sock *tp = tcp_sk(sk);
2548
2549 if (tcp_is_reno(tp)) {
2550 tcp_mark_head_lost(sk, 1);
2551 } else if (tcp_is_fack(tp)) {
2552 int lost = tp->fackets_out - tp->reordering;
2553 if (lost <= 0)
2554 lost = 1;
2555 tcp_mark_head_lost(sk, lost);
2556 } else {
2557 int sacked_upto = tp->sacked_out - tp->reordering;
2558 if (sacked_upto < fast_rexmit)
2559 sacked_upto = fast_rexmit;
2560 tcp_mark_head_lost(sk, sacked_upto);
2561 }
2562
2563 tcp_timeout_skbs(sk);
2564}
2565
2566
2567
2568
2569static inline void tcp_moderate_cwnd(struct tcp_sock *tp)
2570{
2571 tp->snd_cwnd = min(tp->snd_cwnd,
2572 tcp_packets_in_flight(tp) + tcp_max_burst(tp));
2573 tp->snd_cwnd_stamp = tcp_time_stamp;
2574}
2575
2576
2577
2578
2579static inline u32 tcp_cwnd_min(const struct sock *sk)
2580{
2581 const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
2582
2583 return ca_ops->min_cwnd ? ca_ops->min_cwnd(sk) : tcp_sk(sk)->snd_ssthresh;
2584}
2585
2586
2587static void tcp_cwnd_down(struct sock *sk, int flag)
2588{
2589 struct tcp_sock *tp = tcp_sk(sk);
2590 int decr = tp->snd_cwnd_cnt + 1;
2591
2592 if ((flag & (FLAG_ANY_PROGRESS | FLAG_DSACKING_ACK)) ||
2593 (tcp_is_reno(tp) && !(flag & FLAG_NOT_DUP))) {
2594 tp->snd_cwnd_cnt = decr & 1;
2595 decr >>= 1;
2596
2597 if (decr && tp->snd_cwnd > tcp_cwnd_min(sk))
2598 tp->snd_cwnd -= decr;
2599
2600 tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);
2601 tp->snd_cwnd_stamp = tcp_time_stamp;
2602 }
2603}
2604
2605
2606
2607
2608static inline int tcp_packet_delayed(struct tcp_sock *tp)
2609{
2610 return !tp->retrans_stamp ||
2611 (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
2612 before(tp->rx_opt.rcv_tsecr, tp->retrans_stamp));
2613}
2614
2615
2616
2617#if FASTRETRANS_DEBUG > 1
2618static void DBGUNDO(struct sock *sk, const char *msg)
2619{
2620 struct tcp_sock *tp = tcp_sk(sk);
2621 struct inet_sock *inet = inet_sk(sk);
2622
2623 if (sk->sk_family == AF_INET) {
2624 printk(KERN_DEBUG "Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n",
2625 msg,
2626 &inet->daddr, ntohs(inet->dport),
2627 tp->snd_cwnd, tcp_left_out(tp),
2628 tp->snd_ssthresh, tp->prior_ssthresh,
2629 tp->packets_out);
2630 }
2631#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
2632 else if (sk->sk_family == AF_INET6) {
2633 struct ipv6_pinfo *np = inet6_sk(sk);
2634 printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
2635 msg,
2636 &np->daddr, ntohs(inet->dport),
2637 tp->snd_cwnd, tcp_left_out(tp),
2638 tp->snd_ssthresh, tp->prior_ssthresh,
2639 tp->packets_out);
2640 }
2641#endif
2642}
2643#else
2644#define DBGUNDO(x...) do { } while (0)
2645#endif
2646
2647static void tcp_undo_cwr(struct sock *sk, const int undo)
2648{
2649 struct tcp_sock *tp = tcp_sk(sk);
2650
2651 if (tp->prior_ssthresh) {
2652 const struct inet_connection_sock *icsk = inet_csk(sk);
2653
2654 if (icsk->icsk_ca_ops->undo_cwnd)
2655 tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
2656 else
2657 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1);
2658
2659 if (undo && tp->prior_ssthresh > tp->snd_ssthresh) {
2660 tp->snd_ssthresh = tp->prior_ssthresh;
2661 TCP_ECN_withdraw_cwr(tp);
2662 }
2663 } else {
2664 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);
2665 }
2666 tcp_moderate_cwnd(tp);
2667 tp->snd_cwnd_stamp = tcp_time_stamp;
2668}
2669
2670static inline int tcp_may_undo(struct tcp_sock *tp)
2671{
2672 return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
2673}
2674
2675
2676static int tcp_try_undo_recovery(struct sock *sk)
2677{
2678 struct tcp_sock *tp = tcp_sk(sk);
2679
2680 if (tcp_may_undo(tp)) {
2681 int mib_idx;
2682
2683
2684
2685
2686 DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
2687 tcp_undo_cwr(sk, 1);
2688 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
2689 mib_idx = LINUX_MIB_TCPLOSSUNDO;
2690 else
2691 mib_idx = LINUX_MIB_TCPFULLUNDO;
2692
2693 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2694 tp->undo_marker = 0;
2695 }
2696 if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
2697
2698
2699
2700 tcp_moderate_cwnd(tp);
2701 return 1;
2702 }
2703 tcp_set_ca_state(sk, TCP_CA_Open);
2704 return 0;
2705}
2706
2707
2708static void tcp_try_undo_dsack(struct sock *sk)
2709{
2710 struct tcp_sock *tp = tcp_sk(sk);
2711
2712 if (tp->undo_marker && !tp->undo_retrans) {
2713 DBGUNDO(sk, "D-SACK");
2714 tcp_undo_cwr(sk, 1);
2715 tp->undo_marker = 0;
2716 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
2717 }
2718}
2719
2720
2721
2722static int tcp_try_undo_partial(struct sock *sk, int acked)
2723{
2724 struct tcp_sock *tp = tcp_sk(sk);
2725
2726 int failed = tcp_is_reno(tp) || (tcp_fackets_out(tp) > tp->reordering);
2727
2728 if (tcp_may_undo(tp)) {
2729
2730
2731
2732 if (tp->retrans_out == 0)
2733 tp->retrans_stamp = 0;
2734
2735 tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
2736
2737 DBGUNDO(sk, "Hoe");
2738 tcp_undo_cwr(sk, 0);
2739 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
2740
2741
2742
2743
2744
2745 failed = 0;
2746 }
2747 return failed;
2748}
2749
2750
2751static int tcp_try_undo_loss(struct sock *sk)
2752{
2753 struct tcp_sock *tp = tcp_sk(sk);
2754
2755 if (tcp_may_undo(tp)) {
2756 struct sk_buff *skb;
2757 tcp_for_write_queue(skb, sk) {
2758 if (skb == tcp_send_head(sk))
2759 break;
2760 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
2761 }
2762
2763 tcp_clear_all_retrans_hints(tp);
2764
2765 DBGUNDO(sk, "partial loss");
2766 tp->lost_out = 0;
2767 tcp_undo_cwr(sk, 1);
2768 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
2769 inet_csk(sk)->icsk_retransmits = 0;
2770 tp->undo_marker = 0;
2771 if (tcp_is_sack(tp))
2772 tcp_set_ca_state(sk, TCP_CA_Open);
2773 return 1;
2774 }
2775 return 0;
2776}
2777
2778static inline void tcp_complete_cwr(struct sock *sk)
2779{
2780 struct tcp_sock *tp = tcp_sk(sk);
2781 tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
2782 tp->snd_cwnd_stamp = tcp_time_stamp;
2783 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
2784}
2785
2786static void tcp_try_keep_open(struct sock *sk)
2787{
2788 struct tcp_sock *tp = tcp_sk(sk);
2789 int state = TCP_CA_Open;
2790
2791 if (tcp_left_out(tp) || tp->retrans_out || tp->undo_marker)
2792 state = TCP_CA_Disorder;
2793
2794 if (inet_csk(sk)->icsk_ca_state != state) {
2795 tcp_set_ca_state(sk, state);
2796 tp->high_seq = tp->snd_nxt;
2797 }
2798}
2799
2800static void tcp_try_to_open(struct sock *sk, int flag)
2801{
2802 struct tcp_sock *tp = tcp_sk(sk);
2803
2804 tcp_verify_left_out(tp);
2805
2806 if (!tp->frto_counter && tp->retrans_out == 0)
2807 tp->retrans_stamp = 0;
2808
2809 if (flag & FLAG_ECE)
2810 tcp_enter_cwr(sk, 1);
2811
2812 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
2813 tcp_try_keep_open(sk);
2814 tcp_moderate_cwnd(tp);
2815 } else {
2816 tcp_cwnd_down(sk, flag);
2817 }
2818}
2819
2820static void tcp_mtup_probe_failed(struct sock *sk)
2821{
2822 struct inet_connection_sock *icsk = inet_csk(sk);
2823
2824 icsk->icsk_mtup.search_high = icsk->icsk_mtup.probe_size - 1;
2825 icsk->icsk_mtup.probe_size = 0;
2826}
2827
2828static void tcp_mtup_probe_success(struct sock *sk)
2829{
2830 struct tcp_sock *tp = tcp_sk(sk);
2831 struct inet_connection_sock *icsk = inet_csk(sk);
2832
2833
2834 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2835 tp->snd_cwnd = tp->snd_cwnd *
2836 tcp_mss_to_mtu(sk, tp->mss_cache) /
2837 icsk->icsk_mtup.probe_size;
2838 tp->snd_cwnd_cnt = 0;
2839 tp->snd_cwnd_stamp = tcp_time_stamp;
2840 tp->rcv_ssthresh = tcp_current_ssthresh(sk);
2841
2842 icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size;
2843 icsk->icsk_mtup.probe_size = 0;
2844 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
2845}
2846
2847
2848
2849
2850
2851void tcp_simple_retransmit(struct sock *sk)
2852{
2853 const struct inet_connection_sock *icsk = inet_csk(sk);
2854 struct tcp_sock *tp = tcp_sk(sk);
2855 struct sk_buff *skb;
2856 unsigned int mss = tcp_current_mss(sk);
2857 u32 prior_lost = tp->lost_out;
2858
2859 tcp_for_write_queue(skb, sk) {
2860 if (skb == tcp_send_head(sk))
2861 break;
2862 if (tcp_skb_seglen(skb) > mss &&
2863 !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
2864 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2865 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
2866 tp->retrans_out -= tcp_skb_pcount(skb);
2867 }
2868 tcp_skb_mark_lost_uncond_verify(tp, skb);
2869 }
2870 }
2871
2872 tcp_clear_retrans_hints_partial(tp);
2873
2874 if (prior_lost == tp->lost_out)
2875 return;
2876
2877 if (tcp_is_reno(tp))
2878 tcp_limit_reno_sacked(tp);
2879
2880 tcp_verify_left_out(tp);
2881
2882
2883
2884
2885
2886
2887 if (icsk->icsk_ca_state != TCP_CA_Loss) {
2888 tp->high_seq = tp->snd_nxt;
2889 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2890 tp->prior_ssthresh = 0;
2891 tp->undo_marker = 0;
2892 tcp_set_ca_state(sk, TCP_CA_Loss);
2893 }
2894 tcp_xmit_retransmit_queue(sk);
2895}
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
2909{
2910 struct inet_connection_sock *icsk = inet_csk(sk);
2911 struct tcp_sock *tp = tcp_sk(sk);
2912 int is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
2913 int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
2914 (tcp_fackets_out(tp) > tp->reordering));
2915 int fast_rexmit = 0, mib_idx;
2916
2917 if (WARN_ON(!tp->packets_out && tp->sacked_out))
2918 tp->sacked_out = 0;
2919 if (WARN_ON(!tp->sacked_out && tp->fackets_out))
2920 tp->fackets_out = 0;
2921
2922
2923
2924 if (flag & FLAG_ECE)
2925 tp->prior_ssthresh = 0;
2926
2927
2928 if (tcp_check_sack_reneging(sk, flag))
2929 return;
2930
2931
2932 if (tcp_is_fack(tp) && (flag & FLAG_DATA_LOST) &&
2933 before(tp->snd_una, tp->high_seq) &&
2934 icsk->icsk_ca_state != TCP_CA_Open &&
2935 tp->fackets_out > tp->reordering) {
2936 tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering);
2937 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSS);
2938 }
2939
2940
2941 tcp_verify_left_out(tp);
2942
2943
2944
2945 if (icsk->icsk_ca_state == TCP_CA_Open) {
2946 WARN_ON(tp->retrans_out != 0);
2947 tp->retrans_stamp = 0;
2948 } else if (!before(tp->snd_una, tp->high_seq)) {
2949 switch (icsk->icsk_ca_state) {
2950 case TCP_CA_Loss:
2951 icsk->icsk_retransmits = 0;
2952 if (tcp_try_undo_recovery(sk))
2953 return;
2954 break;
2955
2956 case TCP_CA_CWR:
2957
2958
2959 if (tp->snd_una != tp->high_seq) {
2960 tcp_complete_cwr(sk);
2961 tcp_set_ca_state(sk, TCP_CA_Open);
2962 }
2963 break;
2964
2965 case TCP_CA_Disorder:
2966 tcp_try_undo_dsack(sk);
2967 if (!tp->undo_marker ||
2968
2969
2970 tcp_is_reno(tp) || tp->snd_una != tp->high_seq) {
2971 tp->undo_marker = 0;
2972 tcp_set_ca_state(sk, TCP_CA_Open);
2973 }
2974 break;
2975
2976 case TCP_CA_Recovery:
2977 if (tcp_is_reno(tp))
2978 tcp_reset_reno_sack(tp);
2979 if (tcp_try_undo_recovery(sk))
2980 return;
2981 tcp_complete_cwr(sk);
2982 break;
2983 }
2984 }
2985
2986
2987 switch (icsk->icsk_ca_state) {
2988 case TCP_CA_Recovery:
2989 if (!(flag & FLAG_SND_UNA_ADVANCED)) {
2990 if (tcp_is_reno(tp) && is_dupack)
2991 tcp_add_reno_sack(sk);
2992 } else
2993 do_lost = tcp_try_undo_partial(sk, pkts_acked);
2994 break;
2995 case TCP_CA_Loss:
2996 if (flag & FLAG_DATA_ACKED)
2997 icsk->icsk_retransmits = 0;
2998 if (tcp_is_reno(tp) && flag & FLAG_SND_UNA_ADVANCED)
2999 tcp_reset_reno_sack(tp);
3000 if (!tcp_try_undo_loss(sk)) {
3001 tcp_moderate_cwnd(tp);
3002 tcp_xmit_retransmit_queue(sk);
3003 return;
3004 }
3005 if (icsk->icsk_ca_state != TCP_CA_Open)
3006 return;
3007
3008 default:
3009 if (tcp_is_reno(tp)) {
3010 if (flag & FLAG_SND_UNA_ADVANCED)
3011 tcp_reset_reno_sack(tp);
3012 if (is_dupack)
3013 tcp_add_reno_sack(sk);
3014 }
3015
3016 if (icsk->icsk_ca_state == TCP_CA_Disorder)
3017 tcp_try_undo_dsack(sk);
3018
3019 if (!tcp_time_to_recover(sk)) {
3020 tcp_try_to_open(sk, flag);
3021 return;
3022 }
3023
3024
3025 if (icsk->icsk_ca_state < TCP_CA_CWR &&
3026 icsk->icsk_mtup.probe_size &&
3027 tp->snd_una == tp->mtu_probe.probe_seq_start) {
3028 tcp_mtup_probe_failed(sk);
3029
3030 tp->snd_cwnd++;
3031 tcp_simple_retransmit(sk);
3032 return;
3033 }
3034
3035
3036
3037 if (tcp_is_reno(tp))
3038 mib_idx = LINUX_MIB_TCPRENORECOVERY;
3039 else
3040 mib_idx = LINUX_MIB_TCPSACKRECOVERY;
3041
3042 NET_INC_STATS_BH(sock_net(sk), mib_idx);
3043
3044 tp->high_seq = tp->snd_nxt;
3045 tp->prior_ssthresh = 0;
3046 tp->undo_marker = tp->snd_una;
3047 tp->undo_retrans = tp->retrans_out;
3048
3049 if (icsk->icsk_ca_state < TCP_CA_CWR) {
3050 if (!(flag & FLAG_ECE))
3051 tp->prior_ssthresh = tcp_current_ssthresh(sk);
3052 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
3053 TCP_ECN_queue_cwr(tp);
3054 }
3055
3056 tp->bytes_acked = 0;
3057 tp->snd_cwnd_cnt = 0;
3058 tcp_set_ca_state(sk, TCP_CA_Recovery);
3059 fast_rexmit = 1;
3060 }
3061
3062 if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk)))
3063 tcp_update_scoreboard(sk, fast_rexmit);
3064 tcp_cwnd_down(sk, flag);
3065 tcp_xmit_retransmit_queue(sk);
3066}
3067
3068static void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt)
3069{
3070 tcp_rtt_estimator(sk, seq_rtt);
3071 tcp_set_rto(sk);
3072 inet_csk(sk)->icsk_backoff = 0;
3073}
3074
3075
3076
3077
3078static void tcp_ack_saw_tstamp(struct sock *sk, int flag)
3079{
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095 struct tcp_sock *tp = tcp_sk(sk);
3096
3097 tcp_valid_rtt_meas(sk, tcp_time_stamp - tp->rx_opt.rcv_tsecr);
3098}
3099
3100static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag)
3101{
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111 if (flag & FLAG_RETRANS_DATA_ACKED)
3112 return;
3113
3114 tcp_valid_rtt_meas(sk, seq_rtt);
3115}
3116
3117static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
3118 const s32 seq_rtt)
3119{
3120 const struct tcp_sock *tp = tcp_sk(sk);
3121
3122 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
3123 tcp_ack_saw_tstamp(sk, flag);
3124 else if (seq_rtt >= 0)
3125 tcp_ack_no_tstamp(sk, seq_rtt, flag);
3126}
3127
3128static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
3129{
3130 const struct inet_connection_sock *icsk = inet_csk(sk);
3131 icsk->icsk_ca_ops->cong_avoid(sk, ack, in_flight);
3132 tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp;
3133}
3134
3135
3136
3137
3138static void tcp_rearm_rto(struct sock *sk)
3139{
3140 struct tcp_sock *tp = tcp_sk(sk);
3141
3142 if (!tp->packets_out) {
3143 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
3144 } else {
3145 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
3146 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
3147 }
3148}
3149
3150
3151static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
3152{
3153 struct tcp_sock *tp = tcp_sk(sk);
3154 u32 packets_acked;
3155
3156 BUG_ON(!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una));
3157
3158 packets_acked = tcp_skb_pcount(skb);
3159 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
3160 return 0;
3161 packets_acked -= tcp_skb_pcount(skb);
3162
3163 if (packets_acked) {
3164 BUG_ON(tcp_skb_pcount(skb) == 0);
3165 BUG_ON(!before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq));
3166 }
3167
3168 return packets_acked;
3169}
3170
3171
3172
3173
3174
3175static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3176 u32 prior_snd_una)
3177{
3178 struct tcp_sock *tp = tcp_sk(sk);
3179 const struct inet_connection_sock *icsk = inet_csk(sk);
3180 struct sk_buff *skb;
3181 u32 now = tcp_time_stamp;
3182 int fully_acked = 1;
3183 int flag = 0;
3184 u32 pkts_acked = 0;
3185 u32 reord = tp->packets_out;
3186 u32 prior_sacked = tp->sacked_out;
3187 s32 seq_rtt = -1;
3188 s32 ca_seq_rtt = -1;
3189 ktime_t last_ackt = net_invalid_timestamp();
3190
3191 while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
3192 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
3193 u32 acked_pcount;
3194 u8 sacked = scb->sacked;
3195
3196
3197 if (after(scb->end_seq, tp->snd_una)) {
3198 if (tcp_skb_pcount(skb) == 1 ||
3199 !after(tp->snd_una, scb->seq))
3200 break;
3201
3202 acked_pcount = tcp_tso_acked(sk, skb);
3203 if (!acked_pcount)
3204 break;
3205
3206 fully_acked = 0;
3207 } else {
3208 acked_pcount = tcp_skb_pcount(skb);
3209 }
3210
3211 if (sacked & TCPCB_RETRANS) {
3212 if (sacked & TCPCB_SACKED_RETRANS)
3213 tp->retrans_out -= acked_pcount;
3214 flag |= FLAG_RETRANS_DATA_ACKED;
3215 ca_seq_rtt = -1;
3216 seq_rtt = -1;
3217 if ((flag & FLAG_DATA_ACKED) || (acked_pcount > 1))
3218 flag |= FLAG_NONHEAD_RETRANS_ACKED;
3219 } else {
3220 ca_seq_rtt = now - scb->when;
3221 last_ackt = skb->tstamp;
3222 if (seq_rtt < 0) {
3223 seq_rtt = ca_seq_rtt;
3224 }
3225 if (!(sacked & TCPCB_SACKED_ACKED))
3226 reord = min(pkts_acked, reord);
3227 }
3228
3229 if (sacked & TCPCB_SACKED_ACKED)
3230 tp->sacked_out -= acked_pcount;
3231 if (sacked & TCPCB_LOST)
3232 tp->lost_out -= acked_pcount;
3233
3234 tp->packets_out -= acked_pcount;
3235 pkts_acked += acked_pcount;
3236
3237
3238
3239
3240
3241
3242
3243
3244 if (!(scb->flags & TCPCB_FLAG_SYN)) {
3245 flag |= FLAG_DATA_ACKED;
3246 } else {
3247 flag |= FLAG_SYN_ACKED;
3248 tp->retrans_stamp = 0;
3249 }
3250
3251 if (!fully_acked)
3252 break;
3253
3254 tcp_unlink_write_queue(skb, sk);
3255 sk_wmem_free_skb(sk, skb);
3256 tp->scoreboard_skb_hint = NULL;
3257 if (skb == tp->retransmit_skb_hint)
3258 tp->retransmit_skb_hint = NULL;
3259 if (skb == tp->lost_skb_hint)
3260 tp->lost_skb_hint = NULL;
3261 }
3262
3263 if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una)))
3264 tp->snd_up = tp->snd_una;
3265
3266 if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
3267 flag |= FLAG_SACK_RENEGING;
3268
3269 if (flag & FLAG_ACKED) {
3270 const struct tcp_congestion_ops *ca_ops
3271 = inet_csk(sk)->icsk_ca_ops;
3272
3273 if (unlikely(icsk->icsk_mtup.probe_size &&
3274 !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
3275 tcp_mtup_probe_success(sk);
3276 }
3277
3278 tcp_ack_update_rtt(sk, flag, seq_rtt);
3279 tcp_rearm_rto(sk);
3280
3281 if (tcp_is_reno(tp)) {
3282 tcp_remove_reno_sacks(sk, pkts_acked);
3283 } else {
3284 int delta;
3285
3286
3287 if (reord < prior_fackets)
3288 tcp_update_reordering(sk, tp->fackets_out - reord, 0);
3289
3290 delta = tcp_is_fack(tp) ? pkts_acked :
3291 prior_sacked - tp->sacked_out;
3292 tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta);
3293 }
3294
3295 tp->fackets_out -= min(pkts_acked, tp->fackets_out);
3296
3297 if (ca_ops->pkts_acked) {
3298 s32 rtt_us = -1;
3299
3300
3301 if (!(flag & FLAG_RETRANS_DATA_ACKED)) {
3302
3303 if (ca_ops->flags & TCP_CONG_RTT_STAMP &&
3304 !ktime_equal(last_ackt,
3305 net_invalid_timestamp()))
3306 rtt_us = ktime_us_delta(ktime_get_real(),
3307 last_ackt);
3308 else if (ca_seq_rtt > 0)
3309 rtt_us = jiffies_to_usecs(ca_seq_rtt);
3310 }
3311
3312 ca_ops->pkts_acked(sk, pkts_acked, rtt_us);
3313 }
3314 }
3315
3316#if FASTRETRANS_DEBUG > 0
3317 WARN_ON((int)tp->sacked_out < 0);
3318 WARN_ON((int)tp->lost_out < 0);
3319 WARN_ON((int)tp->retrans_out < 0);
3320 if (!tp->packets_out && tcp_is_sack(tp)) {
3321 icsk = inet_csk(sk);
3322 if (tp->lost_out) {
3323 printk(KERN_DEBUG "Leak l=%u %d\n",
3324 tp->lost_out, icsk->icsk_ca_state);
3325 tp->lost_out = 0;
3326 }
3327 if (tp->sacked_out) {
3328 printk(KERN_DEBUG "Leak s=%u %d\n",
3329 tp->sacked_out, icsk->icsk_ca_state);
3330 tp->sacked_out = 0;
3331 }
3332 if (tp->retrans_out) {
3333 printk(KERN_DEBUG "Leak r=%u %d\n",
3334 tp->retrans_out, icsk->icsk_ca_state);
3335 tp->retrans_out = 0;
3336 }
3337 }
3338#endif
3339 return flag;
3340}
3341
3342static void tcp_ack_probe(struct sock *sk)
3343{
3344 const struct tcp_sock *tp = tcp_sk(sk);
3345 struct inet_connection_sock *icsk = inet_csk(sk);
3346
3347
3348
3349 if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, tcp_wnd_end(tp))) {
3350 icsk->icsk_backoff = 0;
3351 inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
3352
3353
3354
3355 } else {
3356 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
3357 min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX),
3358 TCP_RTO_MAX);
3359 }
3360}
3361
3362static inline int tcp_ack_is_dubious(const struct sock *sk, const int flag)
3363{
3364 return (!(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
3365 inet_csk(sk)->icsk_ca_state != TCP_CA_Open);
3366}
3367
3368static inline int tcp_may_raise_cwnd(const struct sock *sk, const int flag)
3369{
3370 const struct tcp_sock *tp = tcp_sk(sk);
3371 return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) &&
3372 !((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_Recovery | TCPF_CA_CWR));
3373}
3374
3375
3376
3377
3378static inline int tcp_may_update_window(const struct tcp_sock *tp,
3379 const u32 ack, const u32 ack_seq,
3380 const u32 nwin)
3381{
3382 return (after(ack, tp->snd_una) ||
3383 after(ack_seq, tp->snd_wl1) ||
3384 (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd));
3385}
3386
3387
3388
3389
3390
3391
3392static int tcp_ack_update_window(struct sock *sk, struct sk_buff *skb, u32 ack,
3393 u32 ack_seq)
3394{
3395 struct tcp_sock *tp = tcp_sk(sk);
3396 int flag = 0;
3397 u32 nwin = ntohs(tcp_hdr(skb)->window);
3398
3399 if (likely(!tcp_hdr(skb)->syn))
3400 nwin <<= tp->rx_opt.snd_wscale;
3401
3402 if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
3403 flag |= FLAG_WIN_UPDATE;
3404 tcp_update_wl(tp, ack_seq);
3405
3406 if (tp->snd_wnd != nwin) {
3407 tp->snd_wnd = nwin;
3408
3409
3410
3411
3412 tp->pred_flags = 0;
3413 tcp_fast_path_check(sk);
3414
3415 if (nwin > tp->max_window) {
3416 tp->max_window = nwin;
3417 tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie);
3418 }
3419 }
3420 }
3421
3422 tp->snd_una = ack;
3423
3424 return flag;
3425}
3426
3427
3428
3429
3430static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
3431{
3432 tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
3433 tp->snd_cwnd_cnt = 0;
3434 tp->bytes_acked = 0;
3435 TCP_ECN_queue_cwr(tp);
3436 tcp_moderate_cwnd(tp);
3437}
3438
3439
3440
3441
3442static void tcp_ratehalving_spur_to_response(struct sock *sk)
3443{
3444 tcp_enter_cwr(sk, 0);
3445}
3446
3447static void tcp_undo_spur_to_response(struct sock *sk, int flag)
3448{
3449 if (flag & FLAG_ECE)
3450 tcp_ratehalving_spur_to_response(sk);
3451 else
3452 tcp_undo_cwr(sk, 1);
3453}
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485static int tcp_process_frto(struct sock *sk, int flag)
3486{
3487 struct tcp_sock *tp = tcp_sk(sk);
3488
3489 tcp_verify_left_out(tp);
3490
3491
3492 if (flag & FLAG_DATA_ACKED)
3493 inet_csk(sk)->icsk_retransmits = 0;
3494
3495 if ((flag & FLAG_NONHEAD_RETRANS_ACKED) ||
3496 ((tp->frto_counter >= 2) && (flag & FLAG_RETRANS_DATA_ACKED)))
3497 tp->undo_marker = 0;
3498
3499 if (!before(tp->snd_una, tp->frto_highmark)) {
3500 tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag);
3501 return 1;
3502 }
3503
3504 if (!tcp_is_sackfrto(tp)) {
3505
3506
3507
3508
3509 if (!(flag & FLAG_ANY_PROGRESS) && (flag & FLAG_NOT_DUP))
3510 return 1;
3511
3512 if (!(flag & FLAG_DATA_ACKED)) {
3513 tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3),
3514 flag);
3515 return 1;
3516 }
3517 } else {
3518 if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) {
3519
3520 tp->snd_cwnd = min(tp->snd_cwnd,
3521 tcp_packets_in_flight(tp));
3522 return 1;
3523 }
3524
3525 if ((tp->frto_counter >= 2) &&
3526 (!(flag & FLAG_FORWARD_PROGRESS) ||
3527 ((flag & FLAG_DATA_SACKED) &&
3528 !(flag & FLAG_ONLY_ORIG_SACKED)))) {
3529
3530 if (!(flag & FLAG_FORWARD_PROGRESS) &&
3531 (flag & FLAG_NOT_DUP))
3532 return 1;
3533
3534 tcp_enter_frto_loss(sk, 3, flag);
3535 return 1;
3536 }
3537 }
3538
3539 if (tp->frto_counter == 1) {
3540
3541 tp->snd_cwnd = tcp_packets_in_flight(tp) + 2;
3542 tp->frto_counter = 2;
3543
3544 if (!tcp_may_send_now(sk))
3545 tcp_enter_frto_loss(sk, 2, flag);
3546
3547 return 1;
3548 } else {
3549 switch (sysctl_tcp_frto_response) {
3550 case 2:
3551 tcp_undo_spur_to_response(sk, flag);
3552 break;
3553 case 1:
3554 tcp_conservative_spur_to_response(tp);
3555 break;
3556 default:
3557 tcp_ratehalving_spur_to_response(sk);
3558 break;
3559 }
3560 tp->frto_counter = 0;
3561 tp->undo_marker = 0;
3562 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS);
3563 }
3564 return 0;
3565}
3566
3567
3568static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
3569{
3570 struct inet_connection_sock *icsk = inet_csk(sk);
3571 struct tcp_sock *tp = tcp_sk(sk);
3572 u32 prior_snd_una = tp->snd_una;
3573 u32 ack_seq = TCP_SKB_CB(skb)->seq;
3574 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3575 u32 prior_in_flight;
3576 u32 prior_fackets;
3577 int prior_packets;
3578 int frto_cwnd = 0;
3579
3580
3581
3582
3583 if (before(ack, prior_snd_una))
3584 goto old_ack;
3585
3586
3587
3588
3589 if (after(ack, tp->snd_nxt))
3590 goto invalid_ack;
3591
3592 if (after(ack, prior_snd_una))
3593 flag |= FLAG_SND_UNA_ADVANCED;
3594
3595 if (sysctl_tcp_abc) {
3596 if (icsk->icsk_ca_state < TCP_CA_CWR)
3597 tp->bytes_acked += ack - prior_snd_una;
3598 else if (icsk->icsk_ca_state == TCP_CA_Loss)
3599
3600 tp->bytes_acked += min(ack - prior_snd_una,
3601 tp->mss_cache);
3602 }
3603
3604 prior_fackets = tp->fackets_out;
3605 prior_in_flight = tcp_packets_in_flight(tp);
3606
3607 if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
3608
3609
3610
3611
3612 tcp_update_wl(tp, ack_seq);
3613 tp->snd_una = ack;
3614 flag |= FLAG_WIN_UPDATE;
3615
3616 tcp_ca_event(sk, CA_EVENT_FAST_ACK);
3617
3618 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPACKS);
3619 } else {
3620 if (ack_seq != TCP_SKB_CB(skb)->end_seq)
3621 flag |= FLAG_DATA;
3622 else
3623 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPUREACKS);
3624
3625 flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
3626
3627 if (TCP_SKB_CB(skb)->sacked)
3628 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
3629
3630 if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb)))
3631 flag |= FLAG_ECE;
3632
3633 tcp_ca_event(sk, CA_EVENT_SLOW_ACK);
3634 }
3635
3636
3637
3638
3639 sk->sk_err_soft = 0;
3640 icsk->icsk_probes_out = 0;
3641 tp->rcv_tstamp = tcp_time_stamp;
3642 prior_packets = tp->packets_out;
3643 if (!prior_packets)
3644 goto no_queue;
3645
3646
3647 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
3648
3649 if (tp->frto_counter)
3650 frto_cwnd = tcp_process_frto(sk, flag);
3651
3652 if (before(tp->frto_highmark, tp->snd_una))
3653 tp->frto_highmark = 0;
3654
3655 if (tcp_ack_is_dubious(sk, flag)) {
3656
3657 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd &&
3658 tcp_may_raise_cwnd(sk, flag))
3659 tcp_cong_avoid(sk, ack, prior_in_flight);
3660 tcp_fastretrans_alert(sk, prior_packets - tp->packets_out,
3661 flag);
3662 } else {
3663 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
3664 tcp_cong_avoid(sk, ack, prior_in_flight);
3665 }
3666
3667 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
3668 dst_confirm(sk->sk_dst_cache);
3669
3670 return 1;
3671
3672no_queue:
3673
3674
3675
3676
3677 if (tcp_send_head(sk))
3678 tcp_ack_probe(sk);
3679 return 1;
3680
3681invalid_ack:
3682 SOCK_DEBUG(sk, "Ack %u after %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
3683 return -1;
3684
3685old_ack:
3686 if (TCP_SKB_CB(skb)->sacked) {
3687 tcp_sacktag_write_queue(sk, skb, prior_snd_una);
3688 if (icsk->icsk_ca_state == TCP_CA_Open)
3689 tcp_try_keep_open(sk);
3690 }
3691
3692 SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
3693 return 0;
3694}
3695
3696
3697
3698
3699
3700void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
3701 int estab)
3702{
3703 unsigned char *ptr;
3704 struct tcphdr *th = tcp_hdr(skb);
3705 int length = (th->doff * 4) - sizeof(struct tcphdr);
3706
3707 ptr = (unsigned char *)(th + 1);
3708 opt_rx->saw_tstamp = 0;
3709
3710 while (length > 0) {
3711 int opcode = *ptr++;
3712 int opsize;
3713
3714 switch (opcode) {
3715 case TCPOPT_EOL:
3716 return;
3717 case TCPOPT_NOP:
3718 length--;
3719 continue;
3720 default:
3721 opsize = *ptr++;
3722 if (opsize < 2)
3723 return;
3724 if (opsize > length)
3725 return;
3726 switch (opcode) {
3727 case TCPOPT_MSS:
3728 if (opsize == TCPOLEN_MSS && th->syn && !estab) {
3729 u16 in_mss = get_unaligned_be16(ptr);
3730 if (in_mss) {
3731 if (opt_rx->user_mss &&
3732 opt_rx->user_mss < in_mss)
3733 in_mss = opt_rx->user_mss;
3734 opt_rx->mss_clamp = in_mss;
3735 }
3736 }
3737 break;
3738 case TCPOPT_WINDOW:
3739 if (opsize == TCPOLEN_WINDOW && th->syn &&
3740 !estab && sysctl_tcp_window_scaling) {
3741 __u8 snd_wscale = *(__u8 *)ptr;
3742 opt_rx->wscale_ok = 1;
3743 if (snd_wscale > 14) {
3744 if (net_ratelimit())
3745 printk(KERN_INFO "tcp_parse_options: Illegal window "
3746 "scaling value %d >14 received.\n",
3747 snd_wscale);
3748 snd_wscale = 14;
3749 }
3750 opt_rx->snd_wscale = snd_wscale;
3751 }
3752 break;
3753 case TCPOPT_TIMESTAMP:
3754 if ((opsize == TCPOLEN_TIMESTAMP) &&
3755 ((estab && opt_rx->tstamp_ok) ||
3756 (!estab && sysctl_tcp_timestamps))) {
3757 opt_rx->saw_tstamp = 1;
3758 opt_rx->rcv_tsval = get_unaligned_be32(ptr);
3759 opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
3760 }
3761 break;
3762 case TCPOPT_SACK_PERM:
3763 if (opsize == TCPOLEN_SACK_PERM && th->syn &&
3764 !estab && sysctl_tcp_sack) {
3765 opt_rx->sack_ok = 1;
3766 tcp_sack_reset(opt_rx);
3767 }
3768 break;
3769
3770 case TCPOPT_SACK:
3771 if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
3772 !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
3773 opt_rx->sack_ok) {
3774 TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
3775 }
3776 break;
3777#ifdef CONFIG_TCP_MD5SIG
3778 case TCPOPT_MD5SIG:
3779
3780
3781
3782
3783 break;
3784#endif
3785 }
3786
3787 ptr += opsize-2;
3788 length -= opsize;
3789 }
3790 }
3791}
3792
3793static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th)
3794{
3795 __be32 *ptr = (__be32 *)(th + 1);
3796
3797 if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
3798 | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
3799 tp->rx_opt.saw_tstamp = 1;
3800 ++ptr;
3801 tp->rx_opt.rcv_tsval = ntohl(*ptr);
3802 ++ptr;
3803 tp->rx_opt.rcv_tsecr = ntohl(*ptr);
3804 return 1;
3805 }
3806 return 0;
3807}
3808
3809
3810
3811
3812static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
3813 struct tcp_sock *tp)
3814{
3815 if (th->doff == sizeof(struct tcphdr) >> 2) {
3816 tp->rx_opt.saw_tstamp = 0;
3817 return 0;
3818 } else if (tp->rx_opt.tstamp_ok &&
3819 th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) {
3820 if (tcp_parse_aligned_timestamp(tp, th))
3821 return 1;
3822 }
3823 tcp_parse_options(skb, &tp->rx_opt, 1);
3824 return 1;
3825}
3826
3827#ifdef CONFIG_TCP_MD5SIG
3828
3829
3830
3831u8 *tcp_parse_md5sig_option(struct tcphdr *th)
3832{
3833 int length = (th->doff << 2) - sizeof (*th);
3834 u8 *ptr = (u8*)(th + 1);
3835
3836
3837 if (length < TCPOLEN_MD5SIG)
3838 return NULL;
3839
3840 while (length > 0) {
3841 int opcode = *ptr++;
3842 int opsize;
3843
3844 switch(opcode) {
3845 case TCPOPT_EOL:
3846 return NULL;
3847 case TCPOPT_NOP:
3848 length--;
3849 continue;
3850 default:
3851 opsize = *ptr++;
3852 if (opsize < 2 || opsize > length)
3853 return NULL;
3854 if (opcode == TCPOPT_MD5SIG)
3855 return ptr;
3856 }
3857 ptr += opsize - 2;
3858 length -= opsize;
3859 }
3860 return NULL;
3861}
3862#endif
3863
3864static inline void tcp_store_ts_recent(struct tcp_sock *tp)
3865{
3866 tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
3867 tp->rx_opt.ts_recent_stamp = get_seconds();
3868}
3869
3870static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
3871{
3872 if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) {
3873
3874
3875
3876
3877
3878
3879
3880 if (tcp_paws_check(&tp->rx_opt, 0))
3881 tcp_store_ts_recent(tp);
3882 }
3883}
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
3909{
3910 struct tcp_sock *tp = tcp_sk(sk);
3911 struct tcphdr *th = tcp_hdr(skb);
3912 u32 seq = TCP_SKB_CB(skb)->seq;
3913 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3914
3915 return (
3916 (th->ack && seq == TCP_SKB_CB(skb)->end_seq && seq == tp->rcv_nxt) &&
3917
3918
3919 ack == tp->snd_una &&
3920
3921
3922 !tcp_may_update_window(tp, ack, seq, ntohs(th->window) << tp->rx_opt.snd_wscale) &&
3923
3924
3925 (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ);
3926}
3927
3928static inline int tcp_paws_discard(const struct sock *sk,
3929 const struct sk_buff *skb)
3930{
3931 const struct tcp_sock *tp = tcp_sk(sk);
3932
3933 return !tcp_paws_check(&tp->rx_opt, TCP_PAWS_WINDOW) &&
3934 !tcp_disordered_ack(sk, skb);
3935}
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950static inline int tcp_sequence(struct tcp_sock *tp, u32 seq, u32 end_seq)
3951{
3952 return !before(end_seq, tp->rcv_wup) &&
3953 !after(seq, tp->rcv_nxt + tcp_receive_window(tp));
3954}
3955
3956
3957static void tcp_reset(struct sock *sk)
3958{
3959
3960 switch (sk->sk_state) {
3961 case TCP_SYN_SENT:
3962 sk->sk_err = ECONNREFUSED;
3963 break;
3964 case TCP_CLOSE_WAIT:
3965 sk->sk_err = EPIPE;
3966 break;
3967 case TCP_CLOSE:
3968 return;
3969 default:
3970 sk->sk_err = ECONNRESET;
3971 }
3972
3973 if (!sock_flag(sk, SOCK_DEAD))
3974 sk->sk_error_report(sk);
3975
3976 tcp_done(sk);
3977}
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
3994{
3995 struct tcp_sock *tp = tcp_sk(sk);
3996
3997 inet_csk_schedule_ack(sk);
3998
3999 sk->sk_shutdown |= RCV_SHUTDOWN;
4000 sock_set_flag(sk, SOCK_DONE);
4001
4002 switch (sk->sk_state) {
4003 case TCP_SYN_RECV:
4004 case TCP_ESTABLISHED:
4005
4006 tcp_set_state(sk, TCP_CLOSE_WAIT);
4007 inet_csk(sk)->icsk_ack.pingpong = 1;
4008 break;
4009
4010 case TCP_CLOSE_WAIT:
4011 case TCP_CLOSING:
4012
4013
4014
4015 break;
4016 case TCP_LAST_ACK:
4017
4018 break;
4019
4020 case TCP_FIN_WAIT1:
4021
4022
4023
4024
4025 tcp_send_ack(sk);
4026 tcp_set_state(sk, TCP_CLOSING);
4027 break;
4028 case TCP_FIN_WAIT2:
4029
4030 tcp_send_ack(sk);
4031 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
4032 break;
4033 default:
4034
4035
4036
4037 printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n",
4038 __func__, sk->sk_state);
4039 break;
4040 }
4041
4042
4043
4044
4045 __skb_queue_purge(&tp->out_of_order_queue);
4046 if (tcp_is_sack(tp))
4047 tcp_sack_reset(&tp->rx_opt);
4048 sk_mem_reclaim(sk);
4049
4050 if (!sock_flag(sk, SOCK_DEAD)) {
4051 sk->sk_state_change(sk);
4052
4053
4054 if (sk->sk_shutdown == SHUTDOWN_MASK ||
4055 sk->sk_state == TCP_CLOSE)
4056 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
4057 else
4058 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
4059 }
4060}
4061
4062static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
4063 u32 end_seq)
4064{
4065 if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
4066 if (before(seq, sp->start_seq))
4067 sp->start_seq = seq;
4068 if (after(end_seq, sp->end_seq))
4069 sp->end_seq = end_seq;
4070 return 1;
4071 }
4072 return 0;
4073}
4074
4075static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
4076{
4077 struct tcp_sock *tp = tcp_sk(sk);
4078
4079 if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
4080 int mib_idx;
4081
4082 if (before(seq, tp->rcv_nxt))
4083 mib_idx = LINUX_MIB_TCPDSACKOLDSENT;
4084 else
4085 mib_idx = LINUX_MIB_TCPDSACKOFOSENT;
4086
4087 NET_INC_STATS_BH(sock_net(sk), mib_idx);
4088
4089 tp->rx_opt.dsack = 1;
4090 tp->duplicate_sack[0].start_seq = seq;
4091 tp->duplicate_sack[0].end_seq = end_seq;
4092 }
4093}
4094
4095static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
4096{
4097 struct tcp_sock *tp = tcp_sk(sk);
4098
4099 if (!tp->rx_opt.dsack)
4100 tcp_dsack_set(sk, seq, end_seq);
4101 else
4102 tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
4103}
4104
4105static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb)
4106{
4107 struct tcp_sock *tp = tcp_sk(sk);
4108
4109 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
4110 before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4111 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4112 tcp_enter_quickack_mode(sk);
4113
4114 if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
4115 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
4116
4117 if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
4118 end_seq = tp->rcv_nxt;
4119 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, end_seq);
4120 }
4121 }
4122
4123 tcp_send_ack(sk);
4124}
4125
4126
4127
4128
4129static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
4130{
4131 int this_sack;
4132 struct tcp_sack_block *sp = &tp->selective_acks[0];
4133 struct tcp_sack_block *swalk = sp + 1;
4134
4135
4136
4137
4138 for (this_sack = 1; this_sack < tp->rx_opt.num_sacks;) {
4139 if (tcp_sack_extend(sp, swalk->start_seq, swalk->end_seq)) {
4140 int i;
4141
4142
4143
4144
4145 tp->rx_opt.num_sacks--;
4146 for (i = this_sack; i < tp->rx_opt.num_sacks; i++)
4147 sp[i] = sp[i + 1];
4148 continue;
4149 }
4150 this_sack++, swalk++;
4151 }
4152}
4153
4154static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
4155{
4156 struct tcp_sock *tp = tcp_sk(sk);
4157 struct tcp_sack_block *sp = &tp->selective_acks[0];
4158 int cur_sacks = tp->rx_opt.num_sacks;
4159 int this_sack;
4160
4161 if (!cur_sacks)
4162 goto new_sack;
4163
4164 for (this_sack = 0; this_sack < cur_sacks; this_sack++, sp++) {
4165 if (tcp_sack_extend(sp, seq, end_seq)) {
4166
4167 for (; this_sack > 0; this_sack--, sp--)
4168 swap(*sp, *(sp - 1));
4169 if (cur_sacks > 1)
4170 tcp_sack_maybe_coalesce(tp);
4171 return;
4172 }
4173 }
4174
4175
4176
4177
4178
4179
4180
4181 if (this_sack >= TCP_NUM_SACKS) {
4182 this_sack--;
4183 tp->rx_opt.num_sacks--;
4184 sp--;
4185 }
4186 for (; this_sack > 0; this_sack--, sp--)
4187 *sp = *(sp - 1);
4188
4189new_sack:
4190
4191 sp->start_seq = seq;
4192 sp->end_seq = end_seq;
4193 tp->rx_opt.num_sacks++;
4194}
4195
4196
4197
4198static void tcp_sack_remove(struct tcp_sock *tp)
4199{
4200 struct tcp_sack_block *sp = &tp->selective_acks[0];
4201 int num_sacks = tp->rx_opt.num_sacks;
4202 int this_sack;
4203
4204
4205 if (skb_queue_empty(&tp->out_of_order_queue)) {
4206 tp->rx_opt.num_sacks = 0;
4207 return;
4208 }
4209
4210 for (this_sack = 0; this_sack < num_sacks;) {
4211
4212 if (!before(tp->rcv_nxt, sp->start_seq)) {
4213 int i;
4214
4215
4216 WARN_ON(before(tp->rcv_nxt, sp->end_seq));
4217
4218
4219 for (i=this_sack+1; i < num_sacks; i++)
4220 tp->selective_acks[i-1] = tp->selective_acks[i];
4221 num_sacks--;
4222 continue;
4223 }
4224 this_sack++;
4225 sp++;
4226 }
4227 tp->rx_opt.num_sacks = num_sacks;
4228}
4229
4230
4231
4232
4233static void tcp_ofo_queue(struct sock *sk)
4234{
4235 struct tcp_sock *tp = tcp_sk(sk);
4236 __u32 dsack_high = tp->rcv_nxt;
4237 struct sk_buff *skb;
4238
4239 while ((skb = skb_peek(&tp->out_of_order_queue)) != NULL) {
4240 if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
4241 break;
4242
4243 if (before(TCP_SKB_CB(skb)->seq, dsack_high)) {
4244 __u32 dsack = dsack_high;
4245 if (before(TCP_SKB_CB(skb)->end_seq, dsack_high))
4246 dsack_high = TCP_SKB_CB(skb)->end_seq;
4247 tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack);
4248 }
4249
4250 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
4251 SOCK_DEBUG(sk, "ofo packet was already received \n");
4252 __skb_unlink(skb, &tp->out_of_order_queue);
4253 __kfree_skb(skb);
4254 continue;
4255 }
4256 SOCK_DEBUG(sk, "ofo requeuing : rcv_next %X seq %X - %X\n",
4257 tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
4258 TCP_SKB_CB(skb)->end_seq);
4259
4260 __skb_unlink(skb, &tp->out_of_order_queue);
4261 __skb_queue_tail(&sk->sk_receive_queue, skb);
4262 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
4263 if (tcp_hdr(skb)->fin)
4264 tcp_fin(skb, sk, tcp_hdr(skb));
4265 }
4266}
4267
4268static int tcp_prune_ofo_queue(struct sock *sk);
4269static int tcp_prune_queue(struct sock *sk);
4270
4271static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)
4272{
4273 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
4274 !sk_rmem_schedule(sk, size)) {
4275
4276 if (tcp_prune_queue(sk) < 0)
4277 return -1;
4278
4279 if (!sk_rmem_schedule(sk, size)) {
4280 if (!tcp_prune_ofo_queue(sk))
4281 return -1;
4282
4283 if (!sk_rmem_schedule(sk, size))
4284 return -1;
4285 }
4286 }
4287 return 0;
4288}
4289
4290static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
4291{
4292 struct tcphdr *th = tcp_hdr(skb);
4293 struct tcp_sock *tp = tcp_sk(sk);
4294 int eaten = -1;
4295
4296 if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq)
4297 goto drop;
4298
4299 __skb_pull(skb, th->doff * 4);
4300
4301 TCP_ECN_accept_cwr(tp, skb);
4302
4303 tp->rx_opt.dsack = 0;
4304
4305
4306
4307
4308
4309 if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
4310 if (tcp_receive_window(tp) == 0)
4311 goto out_of_window;
4312
4313
4314 if (tp->ucopy.task == current &&
4315 tp->copied_seq == tp->rcv_nxt && tp->ucopy.len &&
4316 sock_owned_by_user(sk) && !tp->urg_data) {
4317 int chunk = min_t(unsigned int, skb->len,
4318 tp->ucopy.len);
4319
4320 __set_current_state(TASK_RUNNING);
4321
4322 local_bh_enable();
4323 if (!skb_copy_datagram_iovec(skb, 0, tp->ucopy.iov, chunk)) {
4324 tp->ucopy.len -= chunk;
4325 tp->copied_seq += chunk;
4326 eaten = (chunk == skb->len && !th->fin);
4327 tcp_rcv_space_adjust(sk);
4328 }
4329 local_bh_disable();
4330 }
4331
4332 if (eaten <= 0) {
4333queue_and_out:
4334 if (eaten < 0 &&
4335 tcp_try_rmem_schedule(sk, skb->truesize))
4336 goto drop;
4337
4338 skb_set_owner_r(skb, sk);
4339 __skb_queue_tail(&sk->sk_receive_queue, skb);
4340 }
4341 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
4342 if (skb->len)
4343 tcp_event_data_recv(sk, skb);
4344 if (th->fin)
4345 tcp_fin(skb, sk, th);
4346
4347 if (!skb_queue_empty(&tp->out_of_order_queue)) {
4348 tcp_ofo_queue(sk);
4349
4350
4351
4352
4353 if (skb_queue_empty(&tp->out_of_order_queue))
4354 inet_csk(sk)->icsk_ack.pingpong = 0;
4355 }
4356
4357 if (tp->rx_opt.num_sacks)
4358 tcp_sack_remove(tp);
4359
4360 tcp_fast_path_check(sk);
4361
4362 if (eaten > 0)
4363 __kfree_skb(skb);
4364 else if (!sock_flag(sk, SOCK_DEAD))
4365 sk->sk_data_ready(sk, 0);
4366 return;
4367 }
4368
4369 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
4370
4371 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4372 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
4373
4374out_of_window:
4375 tcp_enter_quickack_mode(sk);
4376 inet_csk_schedule_ack(sk);
4377drop:
4378 __kfree_skb(skb);
4379 return;
4380 }
4381
4382
4383 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp)))
4384 goto out_of_window;
4385
4386 tcp_enter_quickack_mode(sk);
4387
4388 if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4389
4390 SOCK_DEBUG(sk, "partial packet: rcv_next %X seq %X - %X\n",
4391 tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
4392 TCP_SKB_CB(skb)->end_seq);
4393
4394 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, tp->rcv_nxt);
4395
4396
4397
4398
4399 if (!tcp_receive_window(tp))
4400 goto out_of_window;
4401 goto queue_and_out;
4402 }
4403
4404 TCP_ECN_check_ce(tp, skb);
4405
4406 if (tcp_try_rmem_schedule(sk, skb->truesize))
4407 goto drop;
4408
4409
4410 tp->pred_flags = 0;
4411 inet_csk_schedule_ack(sk);
4412
4413 SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
4414 tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
4415
4416 skb_set_owner_r(skb, sk);
4417
4418 if (!skb_peek(&tp->out_of_order_queue)) {
4419
4420 if (tcp_is_sack(tp)) {
4421 tp->rx_opt.num_sacks = 1;
4422 tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
4423 tp->selective_acks[0].end_seq =
4424 TCP_SKB_CB(skb)->end_seq;
4425 }
4426 __skb_queue_head(&tp->out_of_order_queue, skb);
4427 } else {
4428 struct sk_buff *skb1 = skb_peek_tail(&tp->out_of_order_queue);
4429 u32 seq = TCP_SKB_CB(skb)->seq;
4430 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
4431
4432 if (seq == TCP_SKB_CB(skb1)->end_seq) {
4433 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4434
4435 if (!tp->rx_opt.num_sacks ||
4436 tp->selective_acks[0].end_seq != seq)
4437 goto add_sack;
4438
4439
4440 tp->selective_acks[0].end_seq = end_seq;
4441 return;
4442 }
4443
4444
4445 while (1) {
4446 if (!after(TCP_SKB_CB(skb1)->seq, seq))
4447 break;
4448 if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) {
4449 skb1 = NULL;
4450 break;
4451 }
4452 skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1);
4453 }
4454
4455
4456 if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
4457 if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4458
4459 __kfree_skb(skb);
4460 tcp_dsack_set(sk, seq, end_seq);
4461 goto add_sack;
4462 }
4463 if (after(seq, TCP_SKB_CB(skb1)->seq)) {
4464
4465 tcp_dsack_set(sk, seq,
4466 TCP_SKB_CB(skb1)->end_seq);
4467 } else {
4468 if (skb_queue_is_first(&tp->out_of_order_queue,
4469 skb1))
4470 skb1 = NULL;
4471 else
4472 skb1 = skb_queue_prev(
4473 &tp->out_of_order_queue,
4474 skb1);
4475 }
4476 }
4477 if (!skb1)
4478 __skb_queue_head(&tp->out_of_order_queue, skb);
4479 else
4480 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4481
4482
4483 while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) {
4484 skb1 = skb_queue_next(&tp->out_of_order_queue, skb);
4485
4486 if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
4487 break;
4488 if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4489 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4490 end_seq);
4491 break;
4492 }
4493 __skb_unlink(skb1, &tp->out_of_order_queue);
4494 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4495 TCP_SKB_CB(skb1)->end_seq);
4496 __kfree_skb(skb1);
4497 }
4498
4499add_sack:
4500 if (tcp_is_sack(tp))
4501 tcp_sack_new_ofo_skb(sk, seq, end_seq);
4502 }
4503}
4504
4505static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
4506 struct sk_buff_head *list)
4507{
4508 struct sk_buff *next = NULL;
4509
4510 if (!skb_queue_is_last(list, skb))
4511 next = skb_queue_next(list, skb);
4512
4513 __skb_unlink(skb, list);
4514 __kfree_skb(skb);
4515 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
4516
4517 return next;
4518}
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528static void
4529tcp_collapse(struct sock *sk, struct sk_buff_head *list,
4530 struct sk_buff *head, struct sk_buff *tail,
4531 u32 start, u32 end)
4532{
4533 struct sk_buff *skb, *n;
4534 bool end_of_skbs;
4535
4536
4537
4538 skb = head;
4539restart:
4540 end_of_skbs = true;
4541 skb_queue_walk_from_safe(list, skb, n) {
4542 if (skb == tail)
4543 break;
4544
4545 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
4546 skb = tcp_collapse_one(sk, skb, list);
4547 if (!skb)
4548 break;
4549 goto restart;
4550 }
4551
4552
4553
4554
4555
4556
4557 if (!tcp_hdr(skb)->syn && !tcp_hdr(skb)->fin &&
4558 (tcp_win_from_space(skb->truesize) > skb->len ||
4559 before(TCP_SKB_CB(skb)->seq, start))) {
4560 end_of_skbs = false;
4561 break;
4562 }
4563
4564 if (!skb_queue_is_last(list, skb)) {
4565 struct sk_buff *next = skb_queue_next(list, skb);
4566 if (next != tail &&
4567 TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(next)->seq) {
4568 end_of_skbs = false;
4569 break;
4570 }
4571 }
4572
4573
4574 start = TCP_SKB_CB(skb)->end_seq;
4575 }
4576 if (end_of_skbs || tcp_hdr(skb)->syn || tcp_hdr(skb)->fin)
4577 return;
4578
4579 while (before(start, end)) {
4580 struct sk_buff *nskb;
4581 unsigned int header = skb_headroom(skb);
4582 int copy = SKB_MAX_ORDER(header, 0);
4583
4584
4585 if (copy < 0)
4586 return;
4587 if (end - start < copy)
4588 copy = end - start;
4589 nskb = alloc_skb(copy + header, GFP_ATOMIC);
4590 if (!nskb)
4591 return;
4592
4593 skb_set_mac_header(nskb, skb_mac_header(skb) - skb->head);
4594 skb_set_network_header(nskb, (skb_network_header(skb) -
4595 skb->head));
4596 skb_set_transport_header(nskb, (skb_transport_header(skb) -
4597 skb->head));
4598 skb_reserve(nskb, header);
4599 memcpy(nskb->head, skb->head, header);
4600 memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
4601 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
4602 __skb_queue_before(list, skb, nskb);
4603 skb_set_owner_r(nskb, sk);
4604
4605
4606 while (copy > 0) {
4607 int offset = start - TCP_SKB_CB(skb)->seq;
4608 int size = TCP_SKB_CB(skb)->end_seq - start;
4609
4610 BUG_ON(offset < 0);
4611 if (size > 0) {
4612 size = min(copy, size);
4613 if (skb_copy_bits(skb, offset, skb_put(nskb, size), size))
4614 BUG();
4615 TCP_SKB_CB(nskb)->end_seq += size;
4616 copy -= size;
4617 start += size;
4618 }
4619 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
4620 skb = tcp_collapse_one(sk, skb, list);
4621 if (!skb ||
4622 skb == tail ||
4623 tcp_hdr(skb)->syn ||
4624 tcp_hdr(skb)->fin)
4625 return;
4626 }
4627 }
4628 }
4629}
4630
4631
4632
4633
4634static void tcp_collapse_ofo_queue(struct sock *sk)
4635{
4636 struct tcp_sock *tp = tcp_sk(sk);
4637 struct sk_buff *skb = skb_peek(&tp->out_of_order_queue);
4638 struct sk_buff *head;
4639 u32 start, end;
4640
4641 if (skb == NULL)
4642 return;
4643
4644 start = TCP_SKB_CB(skb)->seq;
4645 end = TCP_SKB_CB(skb)->end_seq;
4646 head = skb;
4647
4648 for (;;) {
4649 struct sk_buff *next = NULL;
4650
4651 if (!skb_queue_is_last(&tp->out_of_order_queue, skb))
4652 next = skb_queue_next(&tp->out_of_order_queue, skb);
4653 skb = next;
4654
4655
4656
4657 if (!skb ||
4658 after(TCP_SKB_CB(skb)->seq, end) ||
4659 before(TCP_SKB_CB(skb)->end_seq, start)) {
4660 tcp_collapse(sk, &tp->out_of_order_queue,
4661 head, skb, start, end);
4662 head = skb;
4663 if (!skb)
4664 break;
4665
4666 start = TCP_SKB_CB(skb)->seq;
4667 end = TCP_SKB_CB(skb)->end_seq;
4668 } else {
4669 if (before(TCP_SKB_CB(skb)->seq, start))
4670 start = TCP_SKB_CB(skb)->seq;
4671 if (after(TCP_SKB_CB(skb)->end_seq, end))
4672 end = TCP_SKB_CB(skb)->end_seq;
4673 }
4674 }
4675}
4676
4677
4678
4679
4680
4681static int tcp_prune_ofo_queue(struct sock *sk)
4682{
4683 struct tcp_sock *tp = tcp_sk(sk);
4684 int res = 0;
4685
4686 if (!skb_queue_empty(&tp->out_of_order_queue)) {
4687 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_OFOPRUNED);
4688 __skb_queue_purge(&tp->out_of_order_queue);
4689
4690
4691
4692
4693
4694
4695 if (tp->rx_opt.sack_ok)
4696 tcp_sack_reset(&tp->rx_opt);
4697 sk_mem_reclaim(sk);
4698 res = 1;
4699 }
4700 return res;
4701}
4702
4703
4704
4705
4706
4707
4708
4709
4710static int tcp_prune_queue(struct sock *sk)
4711{
4712 struct tcp_sock *tp = tcp_sk(sk);
4713
4714 SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq);
4715
4716 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PRUNECALLED);
4717
4718 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
4719 tcp_clamp_window(sk);
4720 else if (tcp_memory_pressure)
4721 tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
4722
4723 tcp_collapse_ofo_queue(sk);
4724 if (!skb_queue_empty(&sk->sk_receive_queue))
4725 tcp_collapse(sk, &sk->sk_receive_queue,
4726 skb_peek(&sk->sk_receive_queue),
4727 NULL,
4728 tp->copied_seq, tp->rcv_nxt);
4729 sk_mem_reclaim(sk);
4730
4731 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
4732 return 0;
4733
4734
4735
4736
4737 tcp_prune_ofo_queue(sk);
4738
4739 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
4740 return 0;
4741
4742
4743
4744
4745
4746 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_RCVPRUNED);
4747
4748
4749 tp->pred_flags = 0;
4750 return -1;
4751}
4752
4753
4754
4755
4756
4757void tcp_cwnd_application_limited(struct sock *sk)
4758{
4759 struct tcp_sock *tp = tcp_sk(sk);
4760
4761 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open &&
4762 sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
4763
4764 u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk));
4765 u32 win_used = max(tp->snd_cwnd_used, init_win);
4766 if (win_used < tp->snd_cwnd) {
4767 tp->snd_ssthresh = tcp_current_ssthresh(sk);
4768 tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
4769 }
4770 tp->snd_cwnd_used = 0;
4771 }
4772 tp->snd_cwnd_stamp = tcp_time_stamp;
4773}
4774
4775static int tcp_should_expand_sndbuf(struct sock *sk)
4776{
4777 struct tcp_sock *tp = tcp_sk(sk);
4778
4779
4780
4781
4782 if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
4783 return 0;
4784
4785
4786 if (tcp_memory_pressure)
4787 return 0;
4788
4789
4790 if (atomic_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0])
4791 return 0;
4792
4793
4794 if (tp->packets_out >= tp->snd_cwnd)
4795 return 0;
4796
4797 return 1;
4798}
4799
4800
4801
4802
4803
4804
4805
4806static void tcp_new_space(struct sock *sk)
4807{
4808 struct tcp_sock *tp = tcp_sk(sk);
4809
4810 if (tcp_should_expand_sndbuf(sk)) {
4811 int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
4812 MAX_TCP_HEADER + 16 + sizeof(struct sk_buff);
4813 int demanded = max_t(unsigned int, tp->snd_cwnd,
4814 tp->reordering + 1);
4815 sndmem *= 2 * demanded;
4816 if (sndmem > sk->sk_sndbuf)
4817 sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
4818 tp->snd_cwnd_stamp = tcp_time_stamp;
4819 }
4820
4821 sk->sk_write_space(sk);
4822}
4823
4824static void tcp_check_space(struct sock *sk)
4825{
4826 if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) {
4827 sock_reset_flag(sk, SOCK_QUEUE_SHRUNK);
4828 if (sk->sk_socket &&
4829 test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
4830 tcp_new_space(sk);
4831 }
4832}
4833
4834static inline void tcp_data_snd_check(struct sock *sk)
4835{
4836 tcp_push_pending_frames(sk);
4837 tcp_check_space(sk);
4838}
4839
4840
4841
4842
4843static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
4844{
4845 struct tcp_sock *tp = tcp_sk(sk);
4846
4847
4848 if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss
4849
4850
4851
4852 && __tcp_select_window(sk) >= tp->rcv_wnd) ||
4853
4854 tcp_in_quickack_mode(sk) ||
4855
4856 (ofo_possible && skb_peek(&tp->out_of_order_queue))) {
4857
4858 tcp_send_ack(sk);
4859 } else {
4860
4861 tcp_send_delayed_ack(sk);
4862 }
4863}
4864
4865static inline void tcp_ack_snd_check(struct sock *sk)
4866{
4867 if (!inet_csk_ack_scheduled(sk)) {
4868
4869 return;
4870 }
4871 __tcp_ack_snd_check(sk, 1);
4872}
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884static void tcp_check_urg(struct sock *sk, struct tcphdr *th)
4885{
4886 struct tcp_sock *tp = tcp_sk(sk);
4887 u32 ptr = ntohs(th->urg_ptr);
4888
4889 if (ptr && !sysctl_tcp_stdurg)
4890 ptr--;
4891 ptr += ntohl(th->seq);
4892
4893
4894 if (after(tp->copied_seq, ptr))
4895 return;
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907 if (before(ptr, tp->rcv_nxt))
4908 return;
4909
4910
4911 if (tp->urg_data && !after(ptr, tp->urg_seq))
4912 return;
4913
4914
4915 sk_send_sigurg(sk);
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932 if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
4933 !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) {
4934 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
4935 tp->copied_seq++;
4936 if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) {
4937 __skb_unlink(skb, &sk->sk_receive_queue);
4938 __kfree_skb(skb);
4939 }
4940 }
4941
4942 tp->urg_data = TCP_URG_NOTYET;
4943 tp->urg_seq = ptr;
4944
4945
4946 tp->pred_flags = 0;
4947}
4948
4949
4950static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th)
4951{
4952 struct tcp_sock *tp = tcp_sk(sk);
4953
4954
4955 if (th->urg)
4956 tcp_check_urg(sk, th);
4957
4958
4959 if (tp->urg_data == TCP_URG_NOTYET) {
4960 u32 ptr = tp->urg_seq - ntohl(th->seq) + (th->doff * 4) -
4961 th->syn;
4962
4963
4964 if (ptr < skb->len) {
4965 u8 tmp;
4966 if (skb_copy_bits(skb, ptr, &tmp, 1))
4967 BUG();
4968 tp->urg_data = TCP_URG_VALID | tmp;
4969 if (!sock_flag(sk, SOCK_DEAD))
4970 sk->sk_data_ready(sk, 0);
4971 }
4972 }
4973}
4974
4975static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
4976{
4977 struct tcp_sock *tp = tcp_sk(sk);
4978 int chunk = skb->len - hlen;
4979 int err;
4980
4981 local_bh_enable();
4982 if (skb_csum_unnecessary(skb))
4983 err = skb_copy_datagram_iovec(skb, hlen, tp->ucopy.iov, chunk);
4984 else
4985 err = skb_copy_and_csum_datagram_iovec(skb, hlen,
4986 tp->ucopy.iov);
4987
4988 if (!err) {
4989 tp->ucopy.len -= chunk;
4990 tp->copied_seq += chunk;
4991 tcp_rcv_space_adjust(sk);
4992 }
4993
4994 local_bh_disable();
4995 return err;
4996}
4997
4998static __sum16 __tcp_checksum_complete_user(struct sock *sk,
4999 struct sk_buff *skb)
5000{
5001 __sum16 result;
5002
5003 if (sock_owned_by_user(sk)) {
5004 local_bh_enable();
5005 result = __tcp_checksum_complete(skb);
5006 local_bh_disable();
5007 } else {
5008 result = __tcp_checksum_complete(skb);
5009 }
5010 return result;
5011}
5012
5013static inline int tcp_checksum_complete_user(struct sock *sk,
5014 struct sk_buff *skb)
5015{
5016 return !skb_csum_unnecessary(skb) &&
5017 __tcp_checksum_complete_user(sk, skb);
5018}
5019
5020#ifdef CONFIG_NET_DMA
5021static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb,
5022 int hlen)
5023{
5024 struct tcp_sock *tp = tcp_sk(sk);
5025 int chunk = skb->len - hlen;
5026 int dma_cookie;
5027 int copied_early = 0;
5028
5029 if (tp->ucopy.wakeup)
5030 return 0;
5031
5032 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
5033 tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
5034
5035 if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) {
5036
5037 dma_cookie = dma_skb_copy_datagram_iovec(tp->ucopy.dma_chan,
5038 skb, hlen,
5039 tp->ucopy.iov, chunk,
5040 tp->ucopy.pinned_list);
5041
5042 if (dma_cookie < 0)
5043 goto out;
5044
5045 tp->ucopy.dma_cookie = dma_cookie;
5046 copied_early = 1;
5047
5048 tp->ucopy.len -= chunk;
5049 tp->copied_seq += chunk;
5050 tcp_rcv_space_adjust(sk);
5051
5052 if ((tp->ucopy.len == 0) ||
5053 (tcp_flag_word(tcp_hdr(skb)) & TCP_FLAG_PSH) ||
5054 (atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1))) {
5055 tp->ucopy.wakeup = 1;
5056 sk->sk_data_ready(sk, 0);
5057 }
5058 } else if (chunk > 0) {
5059 tp->ucopy.wakeup = 1;
5060 sk->sk_data_ready(sk, 0);
5061 }
5062out:
5063 return copied_early;
5064}
5065#endif
5066
5067
5068
5069
5070static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
5071 struct tcphdr *th, int syn_inerr)
5072{
5073 struct tcp_sock *tp = tcp_sk(sk);
5074
5075
5076 if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
5077 tcp_paws_discard(sk, skb)) {
5078 if (!th->rst) {
5079 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
5080 tcp_send_dupack(sk, skb);
5081 goto discard;
5082 }
5083
5084 }
5085
5086
5087 if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
5088
5089
5090
5091
5092
5093
5094 if (!th->rst)
5095 tcp_send_dupack(sk, skb);
5096 goto discard;
5097 }
5098
5099
5100 if (th->rst) {
5101 tcp_reset(sk);
5102 goto discard;
5103 }
5104
5105
5106
5107
5108 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
5109
5110
5111
5112
5113 if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
5114 if (syn_inerr)
5115 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
5116 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN);
5117 tcp_reset(sk);
5118 return -1;
5119 }
5120
5121 return 1;
5122
5123discard:
5124 __kfree_skb(skb);
5125 return 0;
5126}
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
5152 struct tcphdr *th, unsigned len)
5153{
5154 struct tcp_sock *tp = tcp_sk(sk);
5155 int res;
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172 tp->rx_opt.saw_tstamp = 0;
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183 if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
5184 TCP_SKB_CB(skb)->seq == tp->rcv_nxt &&
5185 !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) {
5186 int tcp_header_len = tp->tcp_header_len;
5187
5188
5189
5190
5191
5192
5193
5194 if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
5195
5196 if (!tcp_parse_aligned_timestamp(tp, th))
5197 goto slow_path;
5198
5199
5200 if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
5201 goto slow_path;
5202
5203
5204
5205
5206
5207
5208 }
5209
5210 if (len <= tcp_header_len) {
5211
5212 if (len == tcp_header_len) {
5213
5214
5215
5216
5217 if (tcp_header_len ==
5218 (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
5219 tp->rcv_nxt == tp->rcv_wup)
5220 tcp_store_ts_recent(tp);
5221
5222
5223
5224
5225 tcp_ack(sk, skb, 0);
5226 __kfree_skb(skb);
5227 tcp_data_snd_check(sk);
5228 return 0;
5229 } else {
5230 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
5231 goto discard;
5232 }
5233 } else {
5234 int eaten = 0;
5235 int copied_early = 0;
5236
5237 if (tp->copied_seq == tp->rcv_nxt &&
5238 len - tcp_header_len <= tp->ucopy.len) {
5239#ifdef CONFIG_NET_DMA
5240 if (tcp_dma_try_early_copy(sk, skb, tcp_header_len)) {
5241 copied_early = 1;
5242 eaten = 1;
5243 }
5244#endif
5245 if (tp->ucopy.task == current &&
5246 sock_owned_by_user(sk) && !copied_early) {
5247 __set_current_state(TASK_RUNNING);
5248
5249 if (!tcp_copy_to_iovec(sk, skb, tcp_header_len))
5250 eaten = 1;
5251 }
5252 if (eaten) {
5253
5254
5255
5256
5257 if (tcp_header_len ==
5258 (sizeof(struct tcphdr) +
5259 TCPOLEN_TSTAMP_ALIGNED) &&
5260 tp->rcv_nxt == tp->rcv_wup)
5261 tcp_store_ts_recent(tp);
5262
5263 tcp_rcv_rtt_measure_ts(sk, skb);
5264
5265 __skb_pull(skb, tcp_header_len);
5266 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
5267 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER);
5268 }
5269 if (copied_early)
5270 tcp_cleanup_rbuf(sk, skb->len);
5271 }
5272 if (!eaten) {
5273 if (tcp_checksum_complete_user(sk, skb))
5274 goto csum_error;
5275
5276
5277
5278
5279
5280 if (tcp_header_len ==
5281 (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
5282 tp->rcv_nxt == tp->rcv_wup)
5283 tcp_store_ts_recent(tp);
5284
5285 tcp_rcv_rtt_measure_ts(sk, skb);
5286
5287 if ((int)skb->truesize > sk->sk_forward_alloc)
5288 goto step5;
5289
5290 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS);
5291
5292
5293 __skb_pull(skb, tcp_header_len);
5294 __skb_queue_tail(&sk->sk_receive_queue, skb);
5295 skb_set_owner_r(skb, sk);
5296 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
5297 }
5298
5299 tcp_event_data_recv(sk, skb);
5300
5301 if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
5302
5303 tcp_ack(sk, skb, FLAG_DATA);
5304 tcp_data_snd_check(sk);
5305 if (!inet_csk_ack_scheduled(sk))
5306 goto no_ack;
5307 }
5308
5309 if (!copied_early || tp->rcv_nxt != tp->rcv_wup)
5310 __tcp_ack_snd_check(sk, 0);
5311no_ack:
5312#ifdef CONFIG_NET_DMA
5313 if (copied_early)
5314 __skb_queue_tail(&sk->sk_async_wait_queue, skb);
5315 else
5316#endif
5317 if (eaten)
5318 __kfree_skb(skb);
5319 else
5320 sk->sk_data_ready(sk, 0);
5321 return 0;
5322 }
5323 }
5324
5325slow_path:
5326 if (len < (th->doff << 2) || tcp_checksum_complete_user(sk, skb))
5327 goto csum_error;
5328
5329
5330
5331
5332
5333 res = tcp_validate_incoming(sk, skb, th, 1);
5334 if (res <= 0)
5335 return -res;
5336
5337step5:
5338 if (th->ack && tcp_ack(sk, skb, FLAG_SLOWPATH) < 0)
5339 goto discard;
5340
5341 tcp_rcv_rtt_measure_ts(sk, skb);
5342
5343
5344 tcp_urg(sk, skb, th);
5345
5346
5347 tcp_data_queue(sk, skb);
5348
5349 tcp_data_snd_check(sk);
5350 tcp_ack_snd_check(sk);
5351 return 0;
5352
5353csum_error:
5354 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
5355
5356discard:
5357 __kfree_skb(skb);
5358 return 0;
5359}
5360
5361static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5362 struct tcphdr *th, unsigned len)
5363{
5364 struct tcp_sock *tp = tcp_sk(sk);
5365 struct inet_connection_sock *icsk = inet_csk(sk);
5366 int saved_clamp = tp->rx_opt.mss_clamp;
5367
5368 tcp_parse_options(skb, &tp->rx_opt, 0);
5369
5370 if (th->ack) {
5371
5372
5373
5374
5375