1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64#include <linux/mm.h>
65#include <linux/module.h>
66#include <linux/sysctl.h>
67#include <linux/kernel.h>
68#include <net/dst.h>
69#include <net/tcp.h>
70#include <net/inet_common.h>
71#include <linux/ipsec.h>
72#include <asm/unaligned.h>
73#include <net/netdma.h>
74
75int sysctl_tcp_timestamps __read_mostly = 1;
76int sysctl_tcp_window_scaling __read_mostly = 1;
77int sysctl_tcp_sack __read_mostly = 1;
78int sysctl_tcp_fack __read_mostly = 1;
79int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
80int sysctl_tcp_ecn __read_mostly = 2;
81int sysctl_tcp_dsack __read_mostly = 1;
82int sysctl_tcp_app_win __read_mostly = 31;
83int sysctl_tcp_adv_win_scale __read_mostly = 2;
84
85int sysctl_tcp_stdurg __read_mostly;
86int sysctl_tcp_rfc1337 __read_mostly;
87int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
88int sysctl_tcp_frto __read_mostly = 2;
89int sysctl_tcp_frto_response __read_mostly;
90int sysctl_tcp_nometrics_save __read_mostly;
91
92int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
93int sysctl_tcp_abc __read_mostly;
94
95#define FLAG_DATA 0x01
96#define FLAG_WIN_UPDATE 0x02
97#define FLAG_DATA_ACKED 0x04
98#define FLAG_RETRANS_DATA_ACKED 0x08
99#define FLAG_SYN_ACKED 0x10
100#define FLAG_DATA_SACKED 0x20
101#define FLAG_ECE 0x40
102#define FLAG_DATA_LOST 0x80
103#define FLAG_SLOWPATH 0x100
104#define FLAG_ONLY_ORIG_SACKED 0x200
105#define FLAG_SND_UNA_ADVANCED 0x400
106#define FLAG_DSACKING_ACK 0x800
107#define FLAG_NONHEAD_RETRANS_ACKED 0x1000
108#define FLAG_SACK_RENEGING 0x2000
109
110#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
111#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
112#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE)
113#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)
114#define FLAG_ANY_PROGRESS (FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED)
115
116#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
117#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
118
119
120
121
122static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
123{
124 struct inet_connection_sock *icsk = inet_csk(sk);
125 const unsigned int lss = icsk->icsk_ack.last_seg_size;
126 unsigned int len;
127
128 icsk->icsk_ack.last_seg_size = 0;
129
130
131
132
133 len = skb_shinfo(skb)->gso_size ? : skb->len;
134 if (len >= icsk->icsk_ack.rcv_mss) {
135 icsk->icsk_ack.rcv_mss = len;
136 } else {
137
138
139
140
141
142 len += skb->data - skb_transport_header(skb);
143 if (len >= TCP_MSS_DEFAULT + sizeof(struct tcphdr) ||
144
145
146
147
148
149 (len >= TCP_MIN_MSS + sizeof(struct tcphdr) &&
150 !(tcp_flag_word(tcp_hdr(skb)) & TCP_REMNANT))) {
151
152
153
154
155 len -= tcp_sk(sk)->tcp_header_len;
156 icsk->icsk_ack.last_seg_size = len;
157 if (len == lss) {
158 icsk->icsk_ack.rcv_mss = len;
159 return;
160 }
161 }
162 if (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)
163 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED2;
164 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
165 }
166}
167
168static void tcp_incr_quickack(struct sock *sk)
169{
170 struct inet_connection_sock *icsk = inet_csk(sk);
171 unsigned quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
172
173 if (quickacks == 0)
174 quickacks = 2;
175 if (quickacks > icsk->icsk_ack.quick)
176 icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
177}
178
179void tcp_enter_quickack_mode(struct sock *sk)
180{
181 struct inet_connection_sock *icsk = inet_csk(sk);
182 tcp_incr_quickack(sk);
183 icsk->icsk_ack.pingpong = 0;
184 icsk->icsk_ack.ato = TCP_ATO_MIN;
185}
186
187
188
189
190
191static inline int tcp_in_quickack_mode(const struct sock *sk)
192{
193 const struct inet_connection_sock *icsk = inet_csk(sk);
194 return icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong;
195}
196
197static inline void TCP_ECN_queue_cwr(struct tcp_sock *tp)
198{
199 if (tp->ecn_flags & TCP_ECN_OK)
200 tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
201}
202
203static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, struct sk_buff *skb)
204{
205 if (tcp_hdr(skb)->cwr)
206 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
207}
208
209static inline void TCP_ECN_withdraw_cwr(struct tcp_sock *tp)
210{
211 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
212}
213
214static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb)
215{
216 if (tp->ecn_flags & TCP_ECN_OK) {
217 if (INET_ECN_is_ce(TCP_SKB_CB(skb)->flags))
218 tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
219
220
221
222 else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags)))
223 tcp_enter_quickack_mode((struct sock *)tp);
224 }
225}
226
227static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, struct tcphdr *th)
228{
229 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr))
230 tp->ecn_flags &= ~TCP_ECN_OK;
231}
232
233static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, struct tcphdr *th)
234{
235 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr))
236 tp->ecn_flags &= ~TCP_ECN_OK;
237}
238
239static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th)
240{
241 if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK))
242 return 1;
243 return 0;
244}
245
246
247
248
249
250
251static void tcp_fixup_sndbuf(struct sock *sk)
252{
253 int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 +
254 sizeof(struct sk_buff);
255
256 if (sk->sk_sndbuf < 3 * sndmem)
257 sk->sk_sndbuf = min(3 * sndmem, sysctl_tcp_wmem[2]);
258}
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
287{
288 struct tcp_sock *tp = tcp_sk(sk);
289
290 int truesize = tcp_win_from_space(skb->truesize) >> 1;
291 int window = tcp_win_from_space(sysctl_tcp_rmem[2]) >> 1;
292
293 while (tp->rcv_ssthresh <= window) {
294 if (truesize <= skb->len)
295 return 2 * inet_csk(sk)->icsk_ack.rcv_mss;
296
297 truesize >>= 1;
298 window >>= 1;
299 }
300 return 0;
301}
302
303static void tcp_grow_window(struct sock *sk, struct sk_buff *skb)
304{
305 struct tcp_sock *tp = tcp_sk(sk);
306
307
308 if (tp->rcv_ssthresh < tp->window_clamp &&
309 (int)tp->rcv_ssthresh < tcp_space(sk) &&
310 !tcp_memory_pressure) {
311 int incr;
312
313
314
315
316 if (tcp_win_from_space(skb->truesize) <= skb->len)
317 incr = 2 * tp->advmss;
318 else
319 incr = __tcp_grow_window(sk, skb);
320
321 if (incr) {
322 tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr,
323 tp->window_clamp);
324 inet_csk(sk)->icsk_ack.quick |= 1;
325 }
326 }
327}
328
329
330
331static void tcp_fixup_rcvbuf(struct sock *sk)
332{
333 struct tcp_sock *tp = tcp_sk(sk);
334 int rcvmem = tp->advmss + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff);
335
336
337
338
339
340 while (tcp_win_from_space(rcvmem) < tp->advmss)
341 rcvmem += 128;
342 if (sk->sk_rcvbuf < 4 * rcvmem)
343 sk->sk_rcvbuf = min(4 * rcvmem, sysctl_tcp_rmem[2]);
344}
345
346
347
348
349static void tcp_init_buffer_space(struct sock *sk)
350{
351 struct tcp_sock *tp = tcp_sk(sk);
352 int maxwin;
353
354 if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK))
355 tcp_fixup_rcvbuf(sk);
356 if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
357 tcp_fixup_sndbuf(sk);
358
359 tp->rcvq_space.space = tp->rcv_wnd;
360
361 maxwin = tcp_full_space(sk);
362
363 if (tp->window_clamp >= maxwin) {
364 tp->window_clamp = maxwin;
365
366 if (sysctl_tcp_app_win && maxwin > 4 * tp->advmss)
367 tp->window_clamp = max(maxwin -
368 (maxwin >> sysctl_tcp_app_win),
369 4 * tp->advmss);
370 }
371
372
373 if (sysctl_tcp_app_win &&
374 tp->window_clamp > 2 * tp->advmss &&
375 tp->window_clamp + tp->advmss > maxwin)
376 tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
377
378 tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
379 tp->snd_cwnd_stamp = tcp_time_stamp;
380}
381
382
383static void tcp_clamp_window(struct sock *sk)
384{
385 struct tcp_sock *tp = tcp_sk(sk);
386 struct inet_connection_sock *icsk = inet_csk(sk);
387
388 icsk->icsk_ack.quick = 0;
389
390 if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
391 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
392 !tcp_memory_pressure &&
393 atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
394 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
395 sysctl_tcp_rmem[2]);
396 }
397 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
398 tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
399}
400
401
402
403
404
405
406
407
408void tcp_initialize_rcv_mss(struct sock *sk)
409{
410 struct tcp_sock *tp = tcp_sk(sk);
411 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
412
413 hint = min(hint, tp->rcv_wnd / 2);
414 hint = min(hint, TCP_MSS_DEFAULT);
415 hint = max(hint, TCP_MIN_MSS);
416
417 inet_csk(sk)->icsk_ack.rcv_mss = hint;
418}
419
420
421
422
423
424
425
426
427
428
429
430
431static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
432{
433 u32 new_sample = tp->rcv_rtt_est.rtt;
434 long m = sample;
435
436 if (m == 0)
437 m = 1;
438
439 if (new_sample != 0) {
440
441
442
443
444
445
446
447
448
449
450 if (!win_dep) {
451 m -= (new_sample >> 3);
452 new_sample += m;
453 } else if (m < new_sample)
454 new_sample = m << 3;
455 } else {
456
457 new_sample = m << 3;
458 }
459
460 if (tp->rcv_rtt_est.rtt != new_sample)
461 tp->rcv_rtt_est.rtt = new_sample;
462}
463
464static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
465{
466 if (tp->rcv_rtt_est.time == 0)
467 goto new_measure;
468 if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
469 return;
470 tcp_rcv_rtt_update(tp, jiffies - tp->rcv_rtt_est.time, 1);
471
472new_measure:
473 tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
474 tp->rcv_rtt_est.time = tcp_time_stamp;
475}
476
477static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
478 const struct sk_buff *skb)
479{
480 struct tcp_sock *tp = tcp_sk(sk);
481 if (tp->rx_opt.rcv_tsecr &&
482 (TCP_SKB_CB(skb)->end_seq -
483 TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss))
484 tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rx_opt.rcv_tsecr, 0);
485}
486
487
488
489
490
491void tcp_rcv_space_adjust(struct sock *sk)
492{
493 struct tcp_sock *tp = tcp_sk(sk);
494 int time;
495 int space;
496
497 if (tp->rcvq_space.time == 0)
498 goto new_measure;
499
500 time = tcp_time_stamp - tp->rcvq_space.time;
501 if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0)
502 return;
503
504 space = 2 * (tp->copied_seq - tp->rcvq_space.seq);
505
506 space = max(tp->rcvq_space.space, space);
507
508 if (tp->rcvq_space.space != space) {
509 int rcvmem;
510
511 tp->rcvq_space.space = space;
512
513 if (sysctl_tcp_moderate_rcvbuf &&
514 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
515 int new_clamp = space;
516
517
518
519
520
521 space /= tp->advmss;
522 if (!space)
523 space = 1;
524 rcvmem = (tp->advmss + MAX_TCP_HEADER +
525 16 + sizeof(struct sk_buff));
526 while (tcp_win_from_space(rcvmem) < tp->advmss)
527 rcvmem += 128;
528 space *= rcvmem;
529 space = min(space, sysctl_tcp_rmem[2]);
530 if (space > sk->sk_rcvbuf) {
531 sk->sk_rcvbuf = space;
532
533
534 tp->window_clamp = new_clamp;
535 }
536 }
537 }
538
539new_measure:
540 tp->rcvq_space.seq = tp->copied_seq;
541 tp->rcvq_space.time = tcp_time_stamp;
542}
543
544
545
546
547
548
549
550
551
552
553
554static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
555{
556 struct tcp_sock *tp = tcp_sk(sk);
557 struct inet_connection_sock *icsk = inet_csk(sk);
558 u32 now;
559
560 inet_csk_schedule_ack(sk);
561
562 tcp_measure_rcv_mss(sk, skb);
563
564 tcp_rcv_rtt_measure(tp);
565
566 now = tcp_time_stamp;
567
568 if (!icsk->icsk_ack.ato) {
569
570
571
572 tcp_incr_quickack(sk);
573 icsk->icsk_ack.ato = TCP_ATO_MIN;
574 } else {
575 int m = now - icsk->icsk_ack.lrcvtime;
576
577 if (m <= TCP_ATO_MIN / 2) {
578
579 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + TCP_ATO_MIN / 2;
580 } else if (m < icsk->icsk_ack.ato) {
581 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + m;
582 if (icsk->icsk_ack.ato > icsk->icsk_rto)
583 icsk->icsk_ack.ato = icsk->icsk_rto;
584 } else if (m > icsk->icsk_rto) {
585
586
587
588 tcp_incr_quickack(sk);
589 sk_mem_reclaim(sk);
590 }
591 }
592 icsk->icsk_ack.lrcvtime = now;
593
594 TCP_ECN_check_ce(tp, skb);
595
596 if (skb->len >= 128)
597 tcp_grow_window(sk, skb);
598}
599
600
601
602
603
604
605
606
607
608
609static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
610{
611 struct tcp_sock *tp = tcp_sk(sk);
612 long m = mrtt;
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630 if (m == 0)
631 m = 1;
632 if (tp->srtt != 0) {
633 m -= (tp->srtt >> 3);
634 tp->srtt += m;
635 if (m < 0) {
636 m = -m;
637 m -= (tp->mdev >> 2);
638
639
640
641
642
643
644
645
646 if (m > 0)
647 m >>= 3;
648 } else {
649 m -= (tp->mdev >> 2);
650 }
651 tp->mdev += m;
652 if (tp->mdev > tp->mdev_max) {
653 tp->mdev_max = tp->mdev;
654 if (tp->mdev_max > tp->rttvar)
655 tp->rttvar = tp->mdev_max;
656 }
657 if (after(tp->snd_una, tp->rtt_seq)) {
658 if (tp->mdev_max < tp->rttvar)
659 tp->rttvar -= (tp->rttvar - tp->mdev_max) >> 2;
660 tp->rtt_seq = tp->snd_nxt;
661 tp->mdev_max = tcp_rto_min(sk);
662 }
663 } else {
664
665 tp->srtt = m << 3;
666 tp->mdev = m << 1;
667 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
668 tp->rtt_seq = tp->snd_nxt;
669 }
670}
671
672
673
674
675static inline void tcp_set_rto(struct sock *sk)
676{
677 const struct tcp_sock *tp = tcp_sk(sk);
678
679
680
681
682
683
684
685
686
687
688 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp);
689
690
691
692
693
694
695
696
697
698
699 tcp_bound_rto(sk);
700}
701
702
703
704
705
706void tcp_update_metrics(struct sock *sk)
707{
708 struct tcp_sock *tp = tcp_sk(sk);
709 struct dst_entry *dst = __sk_dst_get(sk);
710
711 if (sysctl_tcp_nometrics_save)
712 return;
713
714 dst_confirm(dst);
715
716 if (dst && (dst->flags & DST_HOST)) {
717 const struct inet_connection_sock *icsk = inet_csk(sk);
718 int m;
719 unsigned long rtt;
720
721 if (icsk->icsk_backoff || !tp->srtt) {
722
723
724
725
726 if (!(dst_metric_locked(dst, RTAX_RTT)))
727 dst->metrics[RTAX_RTT - 1] = 0;
728 return;
729 }
730
731 rtt = dst_metric_rtt(dst, RTAX_RTT);
732 m = rtt - tp->srtt;
733
734
735
736
737
738 if (!(dst_metric_locked(dst, RTAX_RTT))) {
739 if (m <= 0)
740 set_dst_metric_rtt(dst, RTAX_RTT, tp->srtt);
741 else
742 set_dst_metric_rtt(dst, RTAX_RTT, rtt - (m >> 3));
743 }
744
745 if (!(dst_metric_locked(dst, RTAX_RTTVAR))) {
746 unsigned long var;
747 if (m < 0)
748 m = -m;
749
750
751 m >>= 1;
752 if (m < tp->mdev)
753 m = tp->mdev;
754
755 var = dst_metric_rtt(dst, RTAX_RTTVAR);
756 if (m >= var)
757 var = m;
758 else
759 var -= (var - m) >> 2;
760
761 set_dst_metric_rtt(dst, RTAX_RTTVAR, var);
762 }
763
764 if (tcp_in_initial_slowstart(tp)) {
765
766 if (dst_metric(dst, RTAX_SSTHRESH) &&
767 !dst_metric_locked(dst, RTAX_SSTHRESH) &&
768 (tp->snd_cwnd >> 1) > dst_metric(dst, RTAX_SSTHRESH))
769 dst->metrics[RTAX_SSTHRESH-1] = tp->snd_cwnd >> 1;
770 if (!dst_metric_locked(dst, RTAX_CWND) &&
771 tp->snd_cwnd > dst_metric(dst, RTAX_CWND))
772 dst->metrics[RTAX_CWND - 1] = tp->snd_cwnd;
773 } else if (tp->snd_cwnd > tp->snd_ssthresh &&
774 icsk->icsk_ca_state == TCP_CA_Open) {
775
776 if (!dst_metric_locked(dst, RTAX_SSTHRESH))
777 dst->metrics[RTAX_SSTHRESH-1] =
778 max(tp->snd_cwnd >> 1, tp->snd_ssthresh);
779 if (!dst_metric_locked(dst, RTAX_CWND))
780 dst->metrics[RTAX_CWND-1] = (dst_metric(dst, RTAX_CWND) + tp->snd_cwnd) >> 1;
781 } else {
782
783
784
785 if (!dst_metric_locked(dst, RTAX_CWND))
786 dst->metrics[RTAX_CWND-1] = (dst_metric(dst, RTAX_CWND) + tp->snd_ssthresh) >> 1;
787 if (dst_metric(dst, RTAX_SSTHRESH) &&
788 !dst_metric_locked(dst, RTAX_SSTHRESH) &&
789 tp->snd_ssthresh > dst_metric(dst, RTAX_SSTHRESH))
790 dst->metrics[RTAX_SSTHRESH-1] = tp->snd_ssthresh;
791 }
792
793 if (!dst_metric_locked(dst, RTAX_REORDERING)) {
794 if (dst_metric(dst, RTAX_REORDERING) < tp->reordering &&
795 tp->reordering != sysctl_tcp_reordering)
796 dst->metrics[RTAX_REORDERING-1] = tp->reordering;
797 }
798 }
799}
800
801
802
803
804
805
806
807
808
809
810__u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
811{
812 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
813
814 if (!cwnd) {
815 if (tp->mss_cache > 1460)
816 cwnd = 2;
817 else
818 cwnd = (tp->mss_cache > 1095) ? 3 : 4;
819 }
820 return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
821}
822
823
824void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
825{
826 struct tcp_sock *tp = tcp_sk(sk);
827 const struct inet_connection_sock *icsk = inet_csk(sk);
828
829 tp->prior_ssthresh = 0;
830 tp->bytes_acked = 0;
831 if (icsk->icsk_ca_state < TCP_CA_CWR) {
832 tp->undo_marker = 0;
833 if (set_ssthresh)
834 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
835 tp->snd_cwnd = min(tp->snd_cwnd,
836 tcp_packets_in_flight(tp) + 1U);
837 tp->snd_cwnd_cnt = 0;
838 tp->high_seq = tp->snd_nxt;
839 tp->snd_cwnd_stamp = tcp_time_stamp;
840 TCP_ECN_queue_cwr(tp);
841
842 tcp_set_ca_state(sk, TCP_CA_CWR);
843 }
844}
845
846
847
848
849
850static void tcp_disable_fack(struct tcp_sock *tp)
851{
852
853 if (tcp_is_fack(tp))
854 tp->lost_skb_hint = NULL;
855 tp->rx_opt.sack_ok &= ~2;
856}
857
858
859static void tcp_dsack_seen(struct tcp_sock *tp)
860{
861 tp->rx_opt.sack_ok |= 4;
862}
863
864
865
866static void tcp_init_metrics(struct sock *sk)
867{
868 struct tcp_sock *tp = tcp_sk(sk);
869 struct dst_entry *dst = __sk_dst_get(sk);
870
871 if (dst == NULL)
872 goto reset;
873
874 dst_confirm(dst);
875
876 if (dst_metric_locked(dst, RTAX_CWND))
877 tp->snd_cwnd_clamp = dst_metric(dst, RTAX_CWND);
878 if (dst_metric(dst, RTAX_SSTHRESH)) {
879 tp->snd_ssthresh = dst_metric(dst, RTAX_SSTHRESH);
880 if (tp->snd_ssthresh > tp->snd_cwnd_clamp)
881 tp->snd_ssthresh = tp->snd_cwnd_clamp;
882 }
883 if (dst_metric(dst, RTAX_REORDERING) &&
884 tp->reordering != dst_metric(dst, RTAX_REORDERING)) {
885 tcp_disable_fack(tp);
886 tp->reordering = dst_metric(dst, RTAX_REORDERING);
887 }
888
889 if (dst_metric(dst, RTAX_RTT) == 0)
890 goto reset;
891
892 if (!tp->srtt && dst_metric_rtt(dst, RTAX_RTT) < (TCP_TIMEOUT_INIT << 3))
893 goto reset;
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909 if (dst_metric_rtt(dst, RTAX_RTT) > tp->srtt) {
910 tp->srtt = dst_metric_rtt(dst, RTAX_RTT);
911 tp->rtt_seq = tp->snd_nxt;
912 }
913 if (dst_metric_rtt(dst, RTAX_RTTVAR) > tp->mdev) {
914 tp->mdev = dst_metric_rtt(dst, RTAX_RTTVAR);
915 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
916 }
917 tcp_set_rto(sk);
918 if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp)
919 goto reset;
920
921cwnd:
922 tp->snd_cwnd = tcp_init_cwnd(tp, dst);
923 tp->snd_cwnd_stamp = tcp_time_stamp;
924 return;
925
926reset:
927
928
929
930
931 if (!tp->rx_opt.saw_tstamp && tp->srtt) {
932 tp->srtt = 0;
933 tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT;
934 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
935 }
936 goto cwnd;
937}
938
939static void tcp_update_reordering(struct sock *sk, const int metric,
940 const int ts)
941{
942 struct tcp_sock *tp = tcp_sk(sk);
943 if (metric > tp->reordering) {
944 int mib_idx;
945
946 tp->reordering = min(TCP_MAX_REORDERING, metric);
947
948
949 if (ts)
950 mib_idx = LINUX_MIB_TCPTSREORDER;
951 else if (tcp_is_reno(tp))
952 mib_idx = LINUX_MIB_TCPRENOREORDER;
953 else if (tcp_is_fack(tp))
954 mib_idx = LINUX_MIB_TCPFACKREORDER;
955 else
956 mib_idx = LINUX_MIB_TCPSACKREORDER;
957
958 NET_INC_STATS_BH(sock_net(sk), mib_idx);
959#if FASTRETRANS_DEBUG > 1
960 printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n",
961 tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
962 tp->reordering,
963 tp->fackets_out,
964 tp->sacked_out,
965 tp->undo_marker ? tp->undo_retrans : 0);
966#endif
967 tcp_disable_fack(tp);
968 }
969}
970
971
972static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
973{
974 if ((tp->retransmit_skb_hint == NULL) ||
975 before(TCP_SKB_CB(skb)->seq,
976 TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
977 tp->retransmit_skb_hint = skb;
978
979 if (!tp->lost_out ||
980 after(TCP_SKB_CB(skb)->end_seq, tp->retransmit_high))
981 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
982}
983
984static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb)
985{
986 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
987 tcp_verify_retransmit_hint(tp, skb);
988
989 tp->lost_out += tcp_skb_pcount(skb);
990 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
991 }
992}
993
994static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp,
995 struct sk_buff *skb)
996{
997 tcp_verify_retransmit_hint(tp, skb);
998
999 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
1000 tp->lost_out += tcp_skb_pcount(skb);
1001 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1002 }
1003}
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack,
1102 u32 start_seq, u32 end_seq)
1103{
1104
1105 if (after(end_seq, tp->snd_nxt) || !before(start_seq, end_seq))
1106 return 0;
1107
1108
1109 if (!before(start_seq, tp->snd_nxt))
1110 return 0;
1111
1112
1113
1114
1115 if (after(start_seq, tp->snd_una))
1116 return 1;
1117
1118 if (!is_dsack || !tp->undo_marker)
1119 return 0;
1120
1121
1122 if (!after(end_seq, tp->snd_una))
1123 return 0;
1124
1125 if (!before(start_seq, tp->undo_marker))
1126 return 1;
1127
1128
1129 if (!after(end_seq, tp->undo_marker))
1130 return 0;
1131
1132
1133
1134
1135 return !before(start_seq, end_seq - tp->max_window);
1136}
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147static void tcp_mark_lost_retrans(struct sock *sk)
1148{
1149 const struct inet_connection_sock *icsk = inet_csk(sk);
1150 struct tcp_sock *tp = tcp_sk(sk);
1151 struct sk_buff *skb;
1152 int cnt = 0;
1153 u32 new_low_seq = tp->snd_nxt;
1154 u32 received_upto = tcp_highest_sack_seq(tp);
1155
1156 if (!tcp_is_fack(tp) || !tp->retrans_out ||
1157 !after(received_upto, tp->lost_retrans_low) ||
1158 icsk->icsk_ca_state != TCP_CA_Recovery)
1159 return;
1160
1161 tcp_for_write_queue(skb, sk) {
1162 u32 ack_seq = TCP_SKB_CB(skb)->ack_seq;
1163
1164 if (skb == tcp_send_head(sk))
1165 break;
1166 if (cnt == tp->retrans_out)
1167 break;
1168 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1169 continue;
1170
1171 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS))
1172 continue;
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185 if (after(received_upto, ack_seq)) {
1186 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1187 tp->retrans_out -= tcp_skb_pcount(skb);
1188
1189 tcp_skb_mark_lost_uncond_verify(tp, skb);
1190 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT);
1191 } else {
1192 if (before(ack_seq, new_low_seq))
1193 new_low_seq = ack_seq;
1194 cnt += tcp_skb_pcount(skb);
1195 }
1196 }
1197
1198 if (tp->retrans_out)
1199 tp->lost_retrans_low = new_low_seq;
1200}
1201
1202static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb,
1203 struct tcp_sack_block_wire *sp, int num_sacks,
1204 u32 prior_snd_una)
1205{
1206 struct tcp_sock *tp = tcp_sk(sk);
1207 u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq);
1208 u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq);
1209 int dup_sack = 0;
1210
1211 if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) {
1212 dup_sack = 1;
1213 tcp_dsack_seen(tp);
1214 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKRECV);
1215 } else if (num_sacks > 1) {
1216 u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq);
1217 u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq);
1218
1219 if (!after(end_seq_0, end_seq_1) &&
1220 !before(start_seq_0, start_seq_1)) {
1221 dup_sack = 1;
1222 tcp_dsack_seen(tp);
1223 NET_INC_STATS_BH(sock_net(sk),
1224 LINUX_MIB_TCPDSACKOFORECV);
1225 }
1226 }
1227
1228
1229 if (dup_sack &&
1230 !after(end_seq_0, prior_snd_una) &&
1231 after(end_seq_0, tp->undo_marker))
1232 tp->undo_retrans--;
1233
1234 return dup_sack;
1235}
1236
1237struct tcp_sacktag_state {
1238 int reord;
1239 int fack_count;
1240 int flag;
1241};
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
1252 u32 start_seq, u32 end_seq)
1253{
1254 int in_sack, err;
1255 unsigned int pkt_len;
1256 unsigned int mss;
1257
1258 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1259 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1260
1261 if (tcp_skb_pcount(skb) > 1 && !in_sack &&
1262 after(TCP_SKB_CB(skb)->end_seq, start_seq)) {
1263 mss = tcp_skb_mss(skb);
1264 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1265
1266 if (!in_sack) {
1267 pkt_len = start_seq - TCP_SKB_CB(skb)->seq;
1268 if (pkt_len < mss)
1269 pkt_len = mss;
1270 } else {
1271 pkt_len = end_seq - TCP_SKB_CB(skb)->seq;
1272 if (pkt_len < mss)
1273 return -EINVAL;
1274 }
1275
1276
1277
1278
1279 if (pkt_len > mss) {
1280 unsigned int new_len = (pkt_len / mss) * mss;
1281 if (!in_sack && new_len < pkt_len) {
1282 new_len += mss;
1283 if (new_len > skb->len)
1284 return 0;
1285 }
1286 pkt_len = new_len;
1287 }
1288 err = tcp_fragment(sk, skb, pkt_len, mss);
1289 if (err < 0)
1290 return err;
1291 }
1292
1293 return in_sack;
1294}
1295
1296static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
1297 struct tcp_sacktag_state *state,
1298 int dup_sack, int pcount)
1299{
1300 struct tcp_sock *tp = tcp_sk(sk);
1301 u8 sacked = TCP_SKB_CB(skb)->sacked;
1302 int fack_count = state->fack_count;
1303
1304
1305 if (dup_sack && (sacked & TCPCB_RETRANS)) {
1306 if (after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker))
1307 tp->undo_retrans--;
1308 if (sacked & TCPCB_SACKED_ACKED)
1309 state->reord = min(fack_count, state->reord);
1310 }
1311
1312
1313 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1314 return sacked;
1315
1316 if (!(sacked & TCPCB_SACKED_ACKED)) {
1317 if (sacked & TCPCB_SACKED_RETRANS) {
1318
1319
1320
1321
1322 if (sacked & TCPCB_LOST) {
1323 sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
1324 tp->lost_out -= pcount;
1325 tp->retrans_out -= pcount;
1326 }
1327 } else {
1328 if (!(sacked & TCPCB_RETRANS)) {
1329
1330
1331
1332 if (before(TCP_SKB_CB(skb)->seq,
1333 tcp_highest_sack_seq(tp)))
1334 state->reord = min(fack_count,
1335 state->reord);
1336
1337
1338 if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark))
1339 state->flag |= FLAG_ONLY_ORIG_SACKED;
1340 }
1341
1342 if (sacked & TCPCB_LOST) {
1343 sacked &= ~TCPCB_LOST;
1344 tp->lost_out -= pcount;
1345 }
1346 }
1347
1348 sacked |= TCPCB_SACKED_ACKED;
1349 state->flag |= FLAG_DATA_SACKED;
1350 tp->sacked_out += pcount;
1351
1352 fack_count += pcount;
1353
1354
1355 if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) &&
1356 before(TCP_SKB_CB(skb)->seq,
1357 TCP_SKB_CB(tp->lost_skb_hint)->seq))
1358 tp->lost_cnt_hint += pcount;
1359
1360 if (fack_count > tp->fackets_out)
1361 tp->fackets_out = fack_count;
1362 }
1363
1364
1365
1366
1367
1368 if (dup_sack && (sacked & TCPCB_SACKED_RETRANS)) {
1369 sacked &= ~TCPCB_SACKED_RETRANS;
1370 tp->retrans_out -= pcount;
1371 }
1372
1373 return sacked;
1374}
1375
1376static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1377 struct tcp_sacktag_state *state,
1378 unsigned int pcount, int shifted, int mss,
1379 int dup_sack)
1380{
1381 struct tcp_sock *tp = tcp_sk(sk);
1382 struct sk_buff *prev = tcp_write_queue_prev(sk, skb);
1383
1384 BUG_ON(!pcount);
1385
1386
1387 if (!tcp_is_fack(tp) && tcp_is_sack(tp) && tp->lost_skb_hint &&
1388 !before(TCP_SKB_CB(tp->lost_skb_hint)->seq, TCP_SKB_CB(skb)->seq))
1389 tp->lost_cnt_hint += pcount;
1390
1391 TCP_SKB_CB(prev)->end_seq += shifted;
1392 TCP_SKB_CB(skb)->seq += shifted;
1393
1394 skb_shinfo(prev)->gso_segs += pcount;
1395 BUG_ON(skb_shinfo(skb)->gso_segs < pcount);
1396 skb_shinfo(skb)->gso_segs -= pcount;
1397
1398
1399
1400
1401
1402
1403 if (!skb_shinfo(prev)->gso_size) {
1404 skb_shinfo(prev)->gso_size = mss;
1405 skb_shinfo(prev)->gso_type = sk->sk_gso_type;
1406 }
1407
1408
1409 if (skb_shinfo(skb)->gso_segs <= 1) {
1410 skb_shinfo(skb)->gso_size = 0;
1411 skb_shinfo(skb)->gso_type = 0;
1412 }
1413
1414
1415 tcp_sacktag_one(skb, sk, state, dup_sack, pcount);
1416
1417
1418 TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
1419
1420 if (skb->len > 0) {
1421 BUG_ON(!tcp_skb_pcount(skb));
1422 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTED);
1423 return 0;
1424 }
1425
1426
1427
1428 if (skb == tp->retransmit_skb_hint)
1429 tp->retransmit_skb_hint = prev;
1430 if (skb == tp->scoreboard_skb_hint)
1431 tp->scoreboard_skb_hint = prev;
1432 if (skb == tp->lost_skb_hint) {
1433 tp->lost_skb_hint = prev;
1434 tp->lost_cnt_hint -= tcp_skb_pcount(prev);
1435 }
1436
1437 TCP_SKB_CB(skb)->flags |= TCP_SKB_CB(prev)->flags;
1438 if (skb == tcp_highest_sack(sk))
1439 tcp_advance_highest_sack(sk, skb);
1440
1441 tcp_unlink_write_queue(skb, sk);
1442 sk_wmem_free_skb(sk, skb);
1443
1444 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKMERGED);
1445
1446 return 1;
1447}
1448
1449
1450
1451
1452static int tcp_skb_seglen(struct sk_buff *skb)
1453{
1454 return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb);
1455}
1456
1457
1458static int skb_can_shift(struct sk_buff *skb)
1459{
1460 return !skb_headlen(skb) && skb_is_nonlinear(skb);
1461}
1462
1463
1464
1465
1466static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
1467 struct tcp_sacktag_state *state,
1468 u32 start_seq, u32 end_seq,
1469 int dup_sack)
1470{
1471 struct tcp_sock *tp = tcp_sk(sk);
1472 struct sk_buff *prev;
1473 int mss;
1474 int pcount = 0;
1475 int len;
1476 int in_sack;
1477
1478 if (!sk_can_gso(sk))
1479 goto fallback;
1480
1481
1482 if (!dup_sack &&
1483 (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS)
1484 goto fallback;
1485 if (!skb_can_shift(skb))
1486 goto fallback;
1487
1488 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1489 goto fallback;
1490
1491
1492 if (unlikely(skb == tcp_write_queue_head(sk)))
1493 goto fallback;
1494 prev = tcp_write_queue_prev(sk, skb);
1495
1496 if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
1497 goto fallback;
1498
1499 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1500 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1501
1502 if (in_sack) {
1503 len = skb->len;
1504 pcount = tcp_skb_pcount(skb);
1505 mss = tcp_skb_seglen(skb);
1506
1507
1508
1509
1510 if (mss != tcp_skb_seglen(prev))
1511 goto fallback;
1512 } else {
1513 if (!after(TCP_SKB_CB(skb)->end_seq, start_seq))
1514 goto noop;
1515
1516
1517
1518
1519 if (tcp_skb_pcount(skb) <= 1)
1520 goto noop;
1521
1522 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1523 if (!in_sack) {
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535 goto fallback;
1536 }
1537
1538 len = end_seq - TCP_SKB_CB(skb)->seq;
1539 BUG_ON(len < 0);
1540 BUG_ON(len > skb->len);
1541
1542
1543
1544
1545
1546 mss = tcp_skb_mss(skb);
1547
1548
1549
1550
1551 if (mss != tcp_skb_seglen(prev))
1552 goto fallback;
1553
1554 if (len == mss) {
1555 pcount = 1;
1556 } else if (len < mss) {
1557 goto noop;
1558 } else {
1559 pcount = len / mss;
1560 len = pcount * mss;
1561 }
1562 }
1563
1564 if (!skb_shift(prev, skb, len))
1565 goto fallback;
1566 if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack))
1567 goto out;
1568
1569
1570
1571
1572 if (prev == tcp_write_queue_tail(sk))
1573 goto out;
1574 skb = tcp_write_queue_next(sk, prev);
1575
1576 if (!skb_can_shift(skb) ||
1577 (skb == tcp_send_head(sk)) ||
1578 ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) ||
1579 (mss != tcp_skb_seglen(skb)))
1580 goto out;
1581
1582 len = skb->len;
1583 if (skb_shift(prev, skb, len)) {
1584 pcount += tcp_skb_pcount(skb);
1585 tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0);
1586 }
1587
1588out:
1589 state->fack_count += pcount;
1590 return prev;
1591
1592noop:
1593 return skb;
1594
1595fallback:
1596 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK);
1597 return NULL;
1598}
1599
1600static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1601 struct tcp_sack_block *next_dup,
1602 struct tcp_sacktag_state *state,
1603 u32 start_seq, u32 end_seq,
1604 int dup_sack_in)
1605{
1606 struct tcp_sock *tp = tcp_sk(sk);
1607 struct sk_buff *tmp;
1608
1609 tcp_for_write_queue_from(skb, sk) {
1610 int in_sack = 0;
1611 int dup_sack = dup_sack_in;
1612
1613 if (skb == tcp_send_head(sk))
1614 break;
1615
1616
1617 if (!before(TCP_SKB_CB(skb)->seq, end_seq))
1618 break;
1619
1620 if ((next_dup != NULL) &&
1621 before(TCP_SKB_CB(skb)->seq, next_dup->end_seq)) {
1622 in_sack = tcp_match_skb_to_sack(sk, skb,
1623 next_dup->start_seq,
1624 next_dup->end_seq);
1625 if (in_sack > 0)
1626 dup_sack = 1;
1627 }
1628
1629
1630
1631
1632
1633 if (in_sack <= 0) {
1634 tmp = tcp_shift_skb_data(sk, skb, state,
1635 start_seq, end_seq, dup_sack);
1636 if (tmp != NULL) {
1637 if (tmp != skb) {
1638 skb = tmp;
1639 continue;
1640 }
1641
1642 in_sack = 0;
1643 } else {
1644 in_sack = tcp_match_skb_to_sack(sk, skb,
1645 start_seq,
1646 end_seq);
1647 }
1648 }
1649
1650 if (unlikely(in_sack < 0))
1651 break;
1652
1653 if (in_sack) {
1654 TCP_SKB_CB(skb)->sacked = tcp_sacktag_one(skb, sk,
1655 state,
1656 dup_sack,
1657 tcp_skb_pcount(skb));
1658
1659 if (!before(TCP_SKB_CB(skb)->seq,
1660 tcp_highest_sack_seq(tp)))
1661 tcp_advance_highest_sack(sk, skb);
1662 }
1663
1664 state->fack_count += tcp_skb_pcount(skb);
1665 }
1666 return skb;
1667}
1668
1669
1670
1671
1672static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
1673 struct tcp_sacktag_state *state,
1674 u32 skip_to_seq)
1675{
1676 tcp_for_write_queue_from(skb, sk) {
1677 if (skb == tcp_send_head(sk))
1678 break;
1679
1680 if (after(TCP_SKB_CB(skb)->end_seq, skip_to_seq))
1681 break;
1682
1683 state->fack_count += tcp_skb_pcount(skb);
1684 }
1685 return skb;
1686}
1687
1688static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
1689 struct sock *sk,
1690 struct tcp_sack_block *next_dup,
1691 struct tcp_sacktag_state *state,
1692 u32 skip_to_seq)
1693{
1694 if (next_dup == NULL)
1695 return skb;
1696
1697 if (before(next_dup->start_seq, skip_to_seq)) {
1698 skb = tcp_sacktag_skip(skb, sk, state, next_dup->start_seq);
1699 skb = tcp_sacktag_walk(skb, sk, NULL, state,
1700 next_dup->start_seq, next_dup->end_seq,
1701 1);
1702 }
1703
1704 return skb;
1705}
1706
1707static int tcp_sack_cache_ok(struct tcp_sock *tp, struct tcp_sack_block *cache)
1708{
1709 return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1710}
1711
1712static int
1713tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb,
1714 u32 prior_snd_una)
1715{
1716 const struct inet_connection_sock *icsk = inet_csk(sk);
1717 struct tcp_sock *tp = tcp_sk(sk);
1718 unsigned char *ptr = (skb_transport_header(ack_skb) +
1719 TCP_SKB_CB(ack_skb)->sacked);
1720 struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
1721 struct tcp_sack_block sp[TCP_NUM_SACKS];
1722 struct tcp_sack_block *cache;
1723 struct tcp_sacktag_state state;
1724 struct sk_buff *skb;
1725 int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
1726 int used_sacks;
1727 int found_dup_sack = 0;
1728 int i, j;
1729 int first_sack_index;
1730
1731 state.flag = 0;
1732 state.reord = tp->packets_out;
1733
1734 if (!tp->sacked_out) {
1735 if (WARN_ON(tp->fackets_out))
1736 tp->fackets_out = 0;
1737 tcp_highest_sack_reset(sk);
1738 }
1739
1740 found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
1741 num_sacks, prior_snd_una);
1742 if (found_dup_sack)
1743 state.flag |= FLAG_DSACKING_ACK;
1744
1745
1746
1747
1748
1749 if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window))
1750 return 0;
1751
1752 if (!tp->packets_out)
1753 goto out;
1754
1755 used_sacks = 0;
1756 first_sack_index = 0;
1757 for (i = 0; i < num_sacks; i++) {
1758 int dup_sack = !i && found_dup_sack;
1759
1760 sp[used_sacks].start_seq = get_unaligned_be32(&sp_wire[i].start_seq);
1761 sp[used_sacks].end_seq = get_unaligned_be32(&sp_wire[i].end_seq);
1762
1763 if (!tcp_is_sackblock_valid(tp, dup_sack,
1764 sp[used_sacks].start_seq,
1765 sp[used_sacks].end_seq)) {
1766 int mib_idx;
1767
1768 if (dup_sack) {
1769 if (!tp->undo_marker)
1770 mib_idx = LINUX_MIB_TCPDSACKIGNOREDNOUNDO;
1771 else
1772 mib_idx = LINUX_MIB_TCPDSACKIGNOREDOLD;
1773 } else {
1774
1775 if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) &&
1776 !after(sp[used_sacks].end_seq, tp->snd_una))
1777 continue;
1778 mib_idx = LINUX_MIB_TCPSACKDISCARD;
1779 }
1780
1781 NET_INC_STATS_BH(sock_net(sk), mib_idx);
1782 if (i == 0)
1783 first_sack_index = -1;
1784 continue;
1785 }
1786
1787
1788 if (!after(sp[used_sacks].end_seq, prior_snd_una))
1789 continue;
1790
1791 used_sacks++;
1792 }
1793
1794
1795 for (i = used_sacks - 1; i > 0; i--) {
1796 for (j = 0; j < i; j++) {
1797 if (after(sp[j].start_seq, sp[j + 1].start_seq)) {
1798 swap(sp[j], sp[j + 1]);
1799
1800
1801 if (j == first_sack_index)
1802 first_sack_index = j + 1;
1803 }
1804 }
1805 }
1806
1807 skb = tcp_write_queue_head(sk);
1808 state.fack_count = 0;
1809 i = 0;
1810
1811 if (!tp->sacked_out) {
1812
1813 cache = tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1814 } else {
1815 cache = tp->recv_sack_cache;
1816
1817 while (tcp_sack_cache_ok(tp, cache) && !cache->start_seq &&
1818 !cache->end_seq)
1819 cache++;
1820 }
1821
1822 while (i < used_sacks) {
1823 u32 start_seq = sp[i].start_seq;
1824 u32 end_seq = sp[i].end_seq;
1825 int dup_sack = (found_dup_sack && (i == first_sack_index));
1826 struct tcp_sack_block *next_dup = NULL;
1827
1828 if (found_dup_sack && ((i + 1) == first_sack_index))
1829 next_dup = &sp[i + 1];
1830
1831
1832 if (after(end_seq, tp->high_seq))
1833 state.flag |= FLAG_DATA_LOST;
1834
1835
1836 while (tcp_sack_cache_ok(tp, cache) &&
1837 !before(start_seq, cache->end_seq))
1838 cache++;
1839
1840
1841 if (tcp_sack_cache_ok(tp, cache) && !dup_sack &&
1842 after(end_seq, cache->start_seq)) {
1843
1844
1845 if (before(start_seq, cache->start_seq)) {
1846 skb = tcp_sacktag_skip(skb, sk, &state,
1847 start_seq);
1848 skb = tcp_sacktag_walk(skb, sk, next_dup,
1849 &state,
1850 start_seq,
1851 cache->start_seq,
1852 dup_sack);
1853 }
1854
1855
1856 if (!after(end_seq, cache->end_seq))
1857 goto advance_sp;
1858
1859 skb = tcp_maybe_skipping_dsack(skb, sk, next_dup,
1860 &state,
1861 cache->end_seq);
1862
1863
1864 if (tcp_highest_sack_seq(tp) == cache->end_seq) {
1865
1866 skb = tcp_highest_sack(sk);
1867 if (skb == NULL)
1868 break;
1869 state.fack_count = tp->fackets_out;
1870 cache++;
1871 goto walk;
1872 }
1873
1874 skb = tcp_sacktag_skip(skb, sk, &state, cache->end_seq);
1875
1876 cache++;
1877 continue;
1878 }
1879
1880 if (!before(start_seq, tcp_highest_sack_seq(tp))) {
1881 skb = tcp_highest_sack(sk);
1882 if (skb == NULL)
1883 break;
1884 state.fack_count = tp->fackets_out;
1885 }
1886 skb = tcp_sacktag_skip(skb, sk, &state, start_seq);
1887
1888walk:
1889 skb = tcp_sacktag_walk(skb, sk, next_dup, &state,
1890 start_seq, end_seq, dup_sack);
1891
1892advance_sp:
1893
1894
1895
1896 if (after(end_seq, tp->frto_highmark))
1897 state.flag &= ~FLAG_ONLY_ORIG_SACKED;
1898
1899 i++;
1900 }
1901
1902
1903 for (i = 0; i < ARRAY_SIZE(tp->recv_sack_cache) - used_sacks; i++) {
1904 tp->recv_sack_cache[i].start_seq = 0;
1905 tp->recv_sack_cache[i].end_seq = 0;
1906 }
1907 for (j = 0; j < used_sacks; j++)
1908 tp->recv_sack_cache[i++] = sp[j];
1909
1910 tcp_mark_lost_retrans(sk);
1911
1912 tcp_verify_left_out(tp);
1913
1914 if ((state.reord < tp->fackets_out) &&
1915 ((icsk->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker) &&
1916 (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark)))
1917 tcp_update_reordering(sk, tp->fackets_out - state.reord, 0);
1918
1919out:
1920
1921#if FASTRETRANS_DEBUG > 0
1922 WARN_ON((int)tp->sacked_out < 0);
1923 WARN_ON((int)tp->lost_out < 0);
1924 WARN_ON((int)tp->retrans_out < 0);
1925 WARN_ON((int)tcp_packets_in_flight(tp) < 0);
1926#endif
1927 return state.flag;
1928}
1929
1930
1931
1932
1933static int tcp_limit_reno_sacked(struct tcp_sock *tp)
1934{
1935 u32 holes;
1936
1937 holes = max(tp->lost_out, 1U);
1938 holes = min(holes, tp->packets_out);
1939
1940 if ((tp->sacked_out + holes) > tp->packets_out) {
1941 tp->sacked_out = tp->packets_out - holes;
1942 return 1;
1943 }
1944 return 0;
1945}
1946
1947
1948
1949
1950
1951static void tcp_check_reno_reordering(struct sock *sk, const int addend)
1952{
1953 struct tcp_sock *tp = tcp_sk(sk);
1954 if (tcp_limit_reno_sacked(tp))
1955 tcp_update_reordering(sk, tp->packets_out + addend, 0);
1956}
1957
1958
1959
1960static void tcp_add_reno_sack(struct sock *sk)
1961{
1962 struct tcp_sock *tp = tcp_sk(sk);
1963 tp->sacked_out++;
1964 tcp_check_reno_reordering(sk, 0);
1965 tcp_verify_left_out(tp);
1966}
1967
1968
1969
1970static void tcp_remove_reno_sacks(struct sock *sk, int acked)
1971{
1972 struct tcp_sock *tp = tcp_sk(sk);
1973
1974 if (acked > 0) {
1975
1976 if (acked - 1 >= tp->sacked_out)
1977 tp->sacked_out = 0;
1978 else
1979 tp->sacked_out -= acked - 1;
1980 }
1981 tcp_check_reno_reordering(sk, acked);
1982 tcp_verify_left_out(tp);
1983}
1984
1985static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
1986{
1987 tp->sacked_out = 0;
1988}
1989
1990static int tcp_is_sackfrto(const struct tcp_sock *tp)
1991{
1992 return (sysctl_tcp_frto == 0x2) && !tcp_is_reno(tp);
1993}
1994
1995
1996
1997
1998int tcp_use_frto(struct sock *sk)
1999{
2000 const struct tcp_sock *tp = tcp_sk(sk);
2001 const struct inet_connection_sock *icsk = inet_csk(sk);
2002 struct sk_buff *skb;
2003
2004 if (!sysctl_tcp_frto)
2005 return 0;
2006
2007
2008 if (icsk->icsk_mtup.probe_size)
2009 return 0;
2010
2011 if (tcp_is_sackfrto(tp))
2012 return 1;
2013
2014
2015 if (tp->retrans_out > 1)
2016 return 0;
2017
2018 skb = tcp_write_queue_head(sk);
2019 if (tcp_skb_is_last(sk, skb))
2020 return 1;
2021 skb = tcp_write_queue_next(sk, skb);
2022 tcp_for_write_queue_from(skb, sk) {
2023 if (skb == tcp_send_head(sk))
2024 break;
2025 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
2026 return 0;
2027
2028 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
2029 break;
2030 }
2031 return 1;
2032}
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046void tcp_enter_frto(struct sock *sk)
2047{
2048 const struct inet_connection_sock *icsk = inet_csk(sk);
2049 struct tcp_sock *tp = tcp_sk(sk);
2050 struct sk_buff *skb;
2051
2052 if ((!tp->frto_counter && icsk->icsk_ca_state <= TCP_CA_Disorder) ||
2053 tp->snd_una == tp->high_seq ||
2054 ((icsk->icsk_ca_state == TCP_CA_Loss || tp->frto_counter) &&
2055 !icsk->icsk_retransmits)) {
2056 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066 if (tp->frto_counter) {
2067 u32 stored_cwnd;
2068 stored_cwnd = tp->snd_cwnd;
2069 tp->snd_cwnd = 2;
2070 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
2071 tp->snd_cwnd = stored_cwnd;
2072 } else {
2073 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
2074 }
2075
2076
2077
2078
2079
2080
2081
2082 tcp_ca_event(sk, CA_EVENT_FRTO);
2083 }
2084
2085 tp->undo_marker = tp->snd_una;
2086 tp->undo_retrans = 0;
2087
2088 skb = tcp_write_queue_head(sk);
2089 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
2090 tp->undo_marker = 0;
2091 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2092 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
2093 tp->retrans_out -= tcp_skb_pcount(skb);
2094 }
2095 tcp_verify_left_out(tp);
2096
2097
2098 tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);
2099
2100
2101
2102
2103 if (tcp_is_sackfrto(tp) && (tp->frto_counter ||
2104 ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) &&
2105 after(tp->high_seq, tp->snd_una)) {
2106 tp->frto_highmark = tp->high_seq;
2107 } else {
2108 tp->frto_highmark = tp->snd_nxt;
2109 }
2110 tcp_set_ca_state(sk, TCP_CA_Disorder);
2111 tp->high_seq = tp->snd_nxt;
2112 tp->frto_counter = 1;
2113}
2114
2115
2116
2117
2118
2119static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
2120{
2121 struct tcp_sock *tp = tcp_sk(sk);
2122 struct sk_buff *skb;
2123
2124 tp->lost_out = 0;
2125 tp->retrans_out = 0;
2126 if (tcp_is_reno(tp))
2127 tcp_reset_reno_sack(tp);
2128
2129 tcp_for_write_queue(skb, sk) {
2130 if (skb == tcp_send_head(sk))
2131 break;
2132
2133 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
2134
2135
2136
2137
2138 if ((tp->frto_counter == 1) && !(flag & FLAG_DATA_ACKED)) {
2139
2140 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
2141 tp->retrans_out += tcp_skb_pcount(skb);
2142
2143 flag |= FLAG_DATA_ACKED;
2144 } else {
2145 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
2146 tp->undo_marker = 0;
2147 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
2148 }
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
2160 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
2161 tp->lost_out += tcp_skb_pcount(skb);
2162 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
2163 }
2164 }
2165 tcp_verify_left_out(tp);
2166
2167 tp->snd_cwnd = tcp_packets_in_flight(tp) + allowed_segments;
2168 tp->snd_cwnd_cnt = 0;
2169 tp->snd_cwnd_stamp = tcp_time_stamp;
2170 tp->frto_counter = 0;
2171 tp->bytes_acked = 0;
2172
2173 tp->reordering = min_t(unsigned int, tp->reordering,
2174 sysctl_tcp_reordering);
2175 tcp_set_ca_state(sk, TCP_CA_Loss);
2176 tp->high_seq = tp->snd_nxt;
2177 TCP_ECN_queue_cwr(tp);
2178
2179 tcp_clear_all_retrans_hints(tp);
2180}
2181
2182static void tcp_clear_retrans_partial(struct tcp_sock *tp)
2183{
2184 tp->retrans_out = 0;
2185 tp->lost_out = 0;
2186
2187 tp->undo_marker = 0;
2188 tp->undo_retrans = 0;
2189}
2190
2191void tcp_clear_retrans(struct tcp_sock *tp)
2192{
2193 tcp_clear_retrans_partial(tp);
2194
2195 tp->fackets_out = 0;
2196 tp->sacked_out = 0;
2197}
2198
2199
2200
2201
2202
2203void tcp_enter_loss(struct sock *sk, int how)
2204{
2205 const struct inet_connection_sock *icsk = inet_csk(sk);
2206 struct tcp_sock *tp = tcp_sk(sk);
2207 struct sk_buff *skb;
2208
2209
2210 if (icsk->icsk_ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq ||
2211 (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
2212 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2213 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
2214 tcp_ca_event(sk, CA_EVENT_LOSS);
2215 }
2216 tp->snd_cwnd = 1;
2217 tp->snd_cwnd_cnt = 0;
2218 tp->snd_cwnd_stamp = tcp_time_stamp;
2219
2220 tp->bytes_acked = 0;
2221 tcp_clear_retrans_partial(tp);
2222
2223 if (tcp_is_reno(tp))
2224 tcp_reset_reno_sack(tp);
2225
2226 if (!how) {
2227
2228
2229 tp->undo_marker = tp->snd_una;
2230 } else {
2231 tp->sacked_out = 0;
2232 tp->fackets_out = 0;
2233 }
2234 tcp_clear_all_retrans_hints(tp);
2235
2236 tcp_for_write_queue(skb, sk) {
2237 if (skb == tcp_send_head(sk))
2238 break;
2239
2240 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
2241 tp->undo_marker = 0;
2242 TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
2243 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) {
2244 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
2245 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
2246 tp->lost_out += tcp_skb_pcount(skb);
2247 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
2248 }
2249 }
2250 tcp_verify_left_out(tp);
2251
2252 tp->reordering = min_t(unsigned int, tp->reordering,
2253 sysctl_tcp_reordering);
2254 tcp_set_ca_state(sk, TCP_CA_Loss);
2255 tp->high_seq = tp->snd_nxt;
2256 TCP_ECN_queue_cwr(tp);
2257
2258 tp->frto_counter = 0;
2259}
2260
2261
2262
2263
2264
2265
2266
2267static int tcp_check_sack_reneging(struct sock *sk, int flag)
2268{
2269 if (flag & FLAG_SACK_RENEGING) {
2270 struct inet_connection_sock *icsk = inet_csk(sk);
2271 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
2272
2273 tcp_enter_loss(sk, 1);
2274 icsk->icsk_retransmits++;
2275 tcp_retransmit_skb(sk, tcp_write_queue_head(sk));
2276 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2277 icsk->icsk_rto, TCP_RTO_MAX);
2278 return 1;
2279 }
2280 return 0;
2281}
2282
2283static inline int tcp_fackets_out(struct tcp_sock *tp)
2284{
2285 return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out;
2286}
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303static inline int tcp_dupack_heuristics(struct tcp_sock *tp)
2304{
2305 return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
2306}
2307
2308static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb)
2309{
2310 return (tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto);
2311}
2312
2313static inline int tcp_head_timedout(struct sock *sk)
2314{
2315 struct tcp_sock *tp = tcp_sk(sk);
2316
2317 return tp->packets_out &&
2318 tcp_skb_timedout(sk, tcp_write_queue_head(sk));
2319}
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414static int tcp_time_to_recover(struct sock *sk)
2415{
2416 struct tcp_sock *tp = tcp_sk(sk);
2417 __u32 packets_out;
2418
2419
2420 if (tp->frto_counter)
2421 return 0;
2422
2423
2424 if (tp->lost_out)
2425 return 1;
2426
2427
2428 if (tcp_dupack_heuristics(tp) > tp->reordering)
2429 return 1;
2430
2431
2432
2433
2434 if (tcp_is_fack(tp) && tcp_head_timedout(sk))
2435 return 1;
2436
2437
2438
2439
2440 packets_out = tp->packets_out;
2441 if (packets_out <= tp->reordering &&
2442 tp->sacked_out >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) &&
2443 !tcp_may_send_now(sk)) {
2444
2445
2446
2447 return 1;
2448 }
2449
2450 return 0;
2451}
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465static void tcp_timeout_skbs(struct sock *sk)
2466{
2467 struct tcp_sock *tp = tcp_sk(sk);
2468 struct sk_buff *skb;
2469
2470 if (!tcp_is_fack(tp) || !tcp_head_timedout(sk))
2471 return;
2472
2473 skb = tp->scoreboard_skb_hint;
2474 if (tp->scoreboard_skb_hint == NULL)
2475 skb = tcp_write_queue_head(sk);
2476
2477 tcp_for_write_queue_from(skb, sk) {
2478 if (skb == tcp_send_head(sk))
2479 break;
2480 if (!tcp_skb_timedout(sk, skb))
2481 break;
2482
2483 tcp_skb_mark_lost(tp, skb);
2484 }
2485
2486 tp->scoreboard_skb_hint = skb;
2487
2488 tcp_verify_left_out(tp);
2489}
2490
2491
2492
2493
2494static void tcp_mark_head_lost(struct sock *sk, int packets)
2495{
2496 struct tcp_sock *tp = tcp_sk(sk);
2497 struct sk_buff *skb;
2498 int cnt, oldcnt;
2499 int err;
2500 unsigned int mss;
2501
2502 WARN_ON(packets > tp->packets_out);
2503 if (tp->lost_skb_hint) {
2504 skb = tp->lost_skb_hint;
2505 cnt = tp->lost_cnt_hint;
2506 } else {
2507 skb = tcp_write_queue_head(sk);
2508 cnt = 0;
2509 }
2510
2511 tcp_for_write_queue_from(skb, sk) {
2512 if (skb == tcp_send_head(sk))
2513 break;
2514
2515
2516 tp->lost_skb_hint = skb;
2517 tp->lost_cnt_hint = cnt;
2518
2519 if (after(TCP_SKB_CB(skb)->end_seq, tp->high_seq))
2520 break;
2521
2522 oldcnt = cnt;
2523 if (tcp_is_fack(tp) || tcp_is_reno(tp) ||
2524 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
2525 cnt += tcp_skb_pcount(skb);
2526
2527 if (cnt > packets) {
2528 if (tcp_is_sack(tp) || (oldcnt >= packets))
2529 break;
2530
2531 mss = skb_shinfo(skb)->gso_size;
2532 err = tcp_fragment(sk, skb, (packets - oldcnt) * mss, mss);
2533 if (err < 0)
2534 break;
2535 cnt = packets;
2536 }
2537
2538 tcp_skb_mark_lost(tp, skb);
2539 }
2540 tcp_verify_left_out(tp);
2541}
2542
2543
2544
2545static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
2546{
2547 struct tcp_sock *tp = tcp_sk(sk);
2548
2549 if (tcp_is_reno(tp)) {
2550 tcp_mark_head_lost(sk, 1);
2551 } else if (tcp_is_fack(tp)) {
2552 int lost = tp->fackets_out - tp->reordering;
2553 if (lost <= 0)
2554 lost = 1;
2555 tcp_mark_head_lost(sk, lost);
2556 } else {
2557 int sacked_upto = tp->sacked_out - tp->reordering;
2558 if (sacked_upto < fast_rexmit)
2559 sacked_upto = fast_rexmit;
2560 tcp_mark_head_lost(sk, sacked_upto);
2561 }
2562
2563 tcp_timeout_skbs(sk);
2564}
2565
2566
2567
2568
2569static inline void tcp_moderate_cwnd(struct tcp_sock *tp)
2570{
2571 tp->snd_cwnd = min(tp->snd_cwnd,
2572 tcp_packets_in_flight(tp) + tcp_max_burst(tp));
2573 tp->snd_cwnd_stamp = tcp_time_stamp;
2574}
2575
2576
2577
2578
2579static inline u32 tcp_cwnd_min(const struct sock *sk)
2580{
2581 const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
2582
2583 return ca_ops->min_cwnd ? ca_ops->min_cwnd(sk) : tcp_sk(sk)->snd_ssthresh;
2584}
2585
2586
2587static void tcp_cwnd_down(struct sock *sk, int flag)
2588{
2589 struct tcp_sock *tp = tcp_sk(sk);
2590 int decr = tp->snd_cwnd_cnt + 1;
2591
2592 if ((flag & (FLAG_ANY_PROGRESS | FLAG_DSACKING_ACK)) ||
2593 (tcp_is_reno(tp) && !(flag & FLAG_NOT_DUP))) {
2594 tp->snd_cwnd_cnt = decr & 1;
2595 decr >>= 1;
2596
2597 if (decr && tp->snd_cwnd > tcp_cwnd_min(sk))
2598 tp->snd_cwnd -= decr;
2599
2600 tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);
2601 tp->snd_cwnd_stamp = tcp_time_stamp;
2602 }
2603}
2604
2605
2606
2607
2608static inline int tcp_packet_delayed(struct tcp_sock *tp)
2609{
2610 return !tp->retrans_stamp ||
2611 (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
2612 before(tp->rx_opt.rcv_tsecr, tp->retrans_stamp));
2613}
2614
2615
2616
2617#if FASTRETRANS_DEBUG > 1
2618static void DBGUNDO(struct sock *sk, const char *msg)
2619{
2620 struct tcp_sock *tp = tcp_sk(sk);
2621 struct inet_sock *inet = inet_sk(sk);
2622
2623 if (sk->sk_family == AF_INET) {
2624 printk(KERN_DEBUG "Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n",
2625 msg,
2626 &inet->daddr, ntohs(inet->dport),
2627 tp->snd_cwnd, tcp_left_out(tp),
2628 tp->snd_ssthresh, tp->prior_ssthresh,
2629 tp->packets_out);
2630 }
2631#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
2632 else if (sk->sk_family == AF_INET6) {
2633 struct ipv6_pinfo *np = inet6_sk(sk);
2634 printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
2635 msg,
2636 &np->daddr, ntohs(inet->dport),
2637 tp->snd_cwnd, tcp_left_out(tp),
2638 tp->snd_ssthresh, tp->prior_ssthresh,
2639 tp->packets_out);
2640 }
2641#endif
2642}
2643#else
2644#define DBGUNDO(x...) do { } while (0)
2645#endif
2646
2647static void tcp_undo_cwr(struct sock *sk, const int undo)
2648{
2649 struct tcp_sock *tp = tcp_sk(sk);
2650
2651 if (tp->prior_ssthresh) {
2652 const struct inet_connection_sock *icsk = inet_csk(sk);
2653
2654 if (icsk->icsk_ca_ops->undo_cwnd)
2655 tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
2656 else
2657 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1);
2658
2659 if (undo && tp->prior_ssthresh > tp->snd_ssthresh) {
2660 tp->snd_ssthresh = tp->prior_ssthresh;
2661 TCP_ECN_withdraw_cwr(tp);
2662 }
2663 } else {
2664 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);
2665 }
2666 tcp_moderate_cwnd(tp);
2667 tp->snd_cwnd_stamp = tcp_time_stamp;
2668}
2669
2670static inline int tcp_may_undo(struct tcp_sock *tp)
2671{
2672 return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
2673}
2674
2675
2676static int tcp_try_undo_recovery(struct sock *sk)
2677{
2678 struct tcp_sock *tp = tcp_sk(sk);
2679
2680 if (tcp_may_undo(tp)) {
2681 int mib_idx;
2682
2683
2684
2685
2686 DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
2687 tcp_undo_cwr(sk, 1);
2688 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
2689 mib_idx = LINUX_MIB_TCPLOSSUNDO;
2690 else
2691 mib_idx = LINUX_MIB_TCPFULLUNDO;
2692
2693 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2694 tp->undo_marker = 0;
2695 }
2696 if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
2697
2698
2699
2700 tcp_moderate_cwnd(tp);
2701 return 1;
2702 }
2703 tcp_set_ca_state(sk, TCP_CA_Open);
2704 return 0;
2705}
2706
2707
2708static void tcp_try_undo_dsack(struct sock *sk)
2709{
2710 struct tcp_sock *tp = tcp_sk(sk);
2711
2712 if (tp->undo_marker && !tp->undo_retrans) {
2713 DBGUNDO(sk, "D-SACK");
2714 tcp_undo_cwr(sk, 1);
2715 tp->undo_marker = 0;
2716 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
2717 }
2718}
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734static int tcp_any_retrans_done(struct sock *sk)
2735{
2736 struct tcp_sock *tp = tcp_sk(sk);
2737 struct sk_buff *skb;
2738
2739 if (tp->retrans_out)
2740 return 1;
2741
2742 skb = tcp_write_queue_head(sk);
2743 if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
2744 return 1;
2745
2746 return 0;
2747}
2748
2749
2750
2751static int tcp_try_undo_partial(struct sock *sk, int acked)
2752{
2753 struct tcp_sock *tp = tcp_sk(sk);
2754
2755 int failed = tcp_is_reno(tp) || (tcp_fackets_out(tp) > tp->reordering);
2756
2757 if (tcp_may_undo(tp)) {
2758
2759
2760
2761 if (!tcp_any_retrans_done(sk))
2762 tp->retrans_stamp = 0;
2763
2764 tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
2765
2766 DBGUNDO(sk, "Hoe");
2767 tcp_undo_cwr(sk, 0);
2768 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
2769
2770
2771
2772
2773
2774 failed = 0;
2775 }
2776 return failed;
2777}
2778
2779
2780static int tcp_try_undo_loss(struct sock *sk)
2781{
2782 struct tcp_sock *tp = tcp_sk(sk);
2783
2784 if (tcp_may_undo(tp)) {
2785 struct sk_buff *skb;
2786 tcp_for_write_queue(skb, sk) {
2787 if (skb == tcp_send_head(sk))
2788 break;
2789 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
2790 }
2791
2792 tcp_clear_all_retrans_hints(tp);
2793
2794 DBGUNDO(sk, "partial loss");
2795 tp->lost_out = 0;
2796 tcp_undo_cwr(sk, 1);
2797 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
2798 inet_csk(sk)->icsk_retransmits = 0;
2799 tp->undo_marker = 0;
2800 if (tcp_is_sack(tp))
2801 tcp_set_ca_state(sk, TCP_CA_Open);
2802 return 1;
2803 }
2804 return 0;
2805}
2806
2807static inline void tcp_complete_cwr(struct sock *sk)
2808{
2809 struct tcp_sock *tp = tcp_sk(sk);
2810 tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
2811 tp->snd_cwnd_stamp = tcp_time_stamp;
2812 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
2813}
2814
2815static void tcp_try_keep_open(struct sock *sk)
2816{
2817 struct tcp_sock *tp = tcp_sk(sk);
2818 int state = TCP_CA_Open;
2819
2820 if (tcp_left_out(tp) || tcp_any_retrans_done(sk) || tp->undo_marker)
2821 state = TCP_CA_Disorder;
2822
2823 if (inet_csk(sk)->icsk_ca_state != state) {
2824 tcp_set_ca_state(sk, state);
2825 tp->high_seq = tp->snd_nxt;
2826 }
2827}
2828
2829static void tcp_try_to_open(struct sock *sk, int flag)
2830{
2831 struct tcp_sock *tp = tcp_sk(sk);
2832
2833 tcp_verify_left_out(tp);
2834
2835 if (!tp->frto_counter && !tcp_any_retrans_done(sk))
2836 tp->retrans_stamp = 0;
2837
2838 if (flag & FLAG_ECE)
2839 tcp_enter_cwr(sk, 1);
2840
2841 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
2842 tcp_try_keep_open(sk);
2843 tcp_moderate_cwnd(tp);
2844 } else {
2845 tcp_cwnd_down(sk, flag);
2846 }
2847}
2848
2849static void tcp_mtup_probe_failed(struct sock *sk)
2850{
2851 struct inet_connection_sock *icsk = inet_csk(sk);
2852
2853 icsk->icsk_mtup.search_high = icsk->icsk_mtup.probe_size - 1;
2854 icsk->icsk_mtup.probe_size = 0;
2855}
2856
2857static void tcp_mtup_probe_success(struct sock *sk)
2858{
2859 struct tcp_sock *tp = tcp_sk(sk);
2860 struct inet_connection_sock *icsk = inet_csk(sk);
2861
2862
2863 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2864 tp->snd_cwnd = tp->snd_cwnd *
2865 tcp_mss_to_mtu(sk, tp->mss_cache) /
2866 icsk->icsk_mtup.probe_size;
2867 tp->snd_cwnd_cnt = 0;
2868 tp->snd_cwnd_stamp = tcp_time_stamp;
2869 tp->rcv_ssthresh = tcp_current_ssthresh(sk);
2870
2871 icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size;
2872 icsk->icsk_mtup.probe_size = 0;
2873 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
2874}
2875
2876
2877
2878
2879
2880void tcp_simple_retransmit(struct sock *sk)
2881{
2882 const struct inet_connection_sock *icsk = inet_csk(sk);
2883 struct tcp_sock *tp = tcp_sk(sk);
2884 struct sk_buff *skb;
2885 unsigned int mss = tcp_current_mss(sk);
2886 u32 prior_lost = tp->lost_out;
2887
2888 tcp_for_write_queue(skb, sk) {
2889 if (skb == tcp_send_head(sk))
2890 break;
2891 if (tcp_skb_seglen(skb) > mss &&
2892 !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
2893 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2894 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
2895 tp->retrans_out -= tcp_skb_pcount(skb);
2896 }
2897 tcp_skb_mark_lost_uncond_verify(tp, skb);
2898 }
2899 }
2900
2901 tcp_clear_retrans_hints_partial(tp);
2902
2903 if (prior_lost == tp->lost_out)
2904 return;
2905
2906 if (tcp_is_reno(tp))
2907 tcp_limit_reno_sacked(tp);
2908
2909 tcp_verify_left_out(tp);
2910
2911
2912
2913
2914
2915
2916 if (icsk->icsk_ca_state != TCP_CA_Loss) {
2917 tp->high_seq = tp->snd_nxt;
2918 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2919 tp->prior_ssthresh = 0;
2920 tp->undo_marker = 0;
2921 tcp_set_ca_state(sk, TCP_CA_Loss);
2922 }
2923 tcp_xmit_retransmit_queue(sk);
2924}
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
2938{
2939 struct inet_connection_sock *icsk = inet_csk(sk);
2940 struct tcp_sock *tp = tcp_sk(sk);
2941 int is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
2942 int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
2943 (tcp_fackets_out(tp) > tp->reordering));
2944 int fast_rexmit = 0, mib_idx;
2945
2946 if (WARN_ON(!tp->packets_out && tp->sacked_out))
2947 tp->sacked_out = 0;
2948 if (WARN_ON(!tp->sacked_out && tp->fackets_out))
2949 tp->fackets_out = 0;
2950
2951
2952
2953 if (flag & FLAG_ECE)
2954 tp->prior_ssthresh = 0;
2955
2956
2957 if (tcp_check_sack_reneging(sk, flag))
2958 return;
2959
2960
2961 if (tcp_is_fack(tp) && (flag & FLAG_DATA_LOST) &&
2962 before(tp->snd_una, tp->high_seq) &&
2963 icsk->icsk_ca_state != TCP_CA_Open &&
2964 tp->fackets_out > tp->reordering) {
2965 tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering);
2966 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSS);
2967 }
2968
2969
2970 tcp_verify_left_out(tp);
2971
2972
2973
2974 if (icsk->icsk_ca_state == TCP_CA_Open) {
2975 WARN_ON(tp->retrans_out != 0);
2976 tp->retrans_stamp = 0;
2977 } else if (!before(tp->snd_una, tp->high_seq)) {
2978 switch (icsk->icsk_ca_state) {
2979 case TCP_CA_Loss:
2980 icsk->icsk_retransmits = 0;
2981 if (tcp_try_undo_recovery(sk))
2982 return;
2983 break;
2984
2985 case TCP_CA_CWR:
2986
2987
2988 if (tp->snd_una != tp->high_seq) {
2989 tcp_complete_cwr(sk);
2990 tcp_set_ca_state(sk, TCP_CA_Open);
2991 }
2992 break;
2993
2994 case TCP_CA_Disorder:
2995 tcp_try_undo_dsack(sk);
2996 if (!tp->undo_marker ||
2997
2998
2999 tcp_is_reno(tp) || tp->snd_una != tp->high_seq) {
3000 tp->undo_marker = 0;
3001 tcp_set_ca_state(sk, TCP_CA_Open);
3002 }
3003 break;
3004
3005 case TCP_CA_Recovery:
3006 if (tcp_is_reno(tp))
3007 tcp_reset_reno_sack(tp);
3008 if (tcp_try_undo_recovery(sk))
3009 return;
3010 tcp_complete_cwr(sk);
3011 break;
3012 }
3013 }
3014
3015
3016 switch (icsk->icsk_ca_state) {
3017 case TCP_CA_Recovery:
3018 if (!(flag & FLAG_SND_UNA_ADVANCED)) {
3019 if (tcp_is_reno(tp) && is_dupack)
3020 tcp_add_reno_sack(sk);
3021 } else
3022 do_lost = tcp_try_undo_partial(sk, pkts_acked);
3023 break;
3024 case TCP_CA_Loss:
3025 if (flag & FLAG_DATA_ACKED)
3026 icsk->icsk_retransmits = 0;
3027 if (tcp_is_reno(tp) && flag & FLAG_SND_UNA_ADVANCED)
3028 tcp_reset_reno_sack(tp);
3029 if (!tcp_try_undo_loss(sk)) {
3030 tcp_moderate_cwnd(tp);
3031 tcp_xmit_retransmit_queue(sk);
3032 return;
3033 }
3034 if (icsk->icsk_ca_state != TCP_CA_Open)
3035 return;
3036
3037 default:
3038 if (tcp_is_reno(tp)) {
3039 if (flag & FLAG_SND_UNA_ADVANCED)
3040 tcp_reset_reno_sack(tp);
3041 if (is_dupack)
3042 tcp_add_reno_sack(sk);
3043 }
3044
3045 if (icsk->icsk_ca_state == TCP_CA_Disorder)
3046 tcp_try_undo_dsack(sk);
3047
3048 if (!tcp_time_to_recover(sk)) {
3049 tcp_try_to_open(sk, flag);
3050 return;
3051 }
3052
3053
3054 if (icsk->icsk_ca_state < TCP_CA_CWR &&
3055 icsk->icsk_mtup.probe_size &&
3056 tp->snd_una == tp->mtu_probe.probe_seq_start) {
3057 tcp_mtup_probe_failed(sk);
3058
3059 tp->snd_cwnd++;
3060 tcp_simple_retransmit(sk);
3061 return;
3062 }
3063
3064
3065
3066 if (tcp_is_reno(tp))
3067 mib_idx = LINUX_MIB_TCPRENORECOVERY;
3068 else
3069 mib_idx = LINUX_MIB_TCPSACKRECOVERY;
3070
3071 NET_INC_STATS_BH(sock_net(sk), mib_idx);
3072
3073 tp->high_seq = tp->snd_nxt;
3074 tp->prior_ssthresh = 0;
3075 tp->undo_marker = tp->snd_una;
3076 tp->undo_retrans = tp->retrans_out;
3077
3078 if (icsk->icsk_ca_state < TCP_CA_CWR) {
3079 if (!(flag & FLAG_ECE))
3080 tp->prior_ssthresh = tcp_current_ssthresh(sk);
3081 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
3082 TCP_ECN_queue_cwr(tp);
3083 }
3084
3085 tp->bytes_acked = 0;
3086 tp->snd_cwnd_cnt = 0;
3087 tcp_set_ca_state(sk, TCP_CA_Recovery);
3088 fast_rexmit = 1;
3089 }
3090
3091 if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk)))
3092 tcp_update_scoreboard(sk, fast_rexmit);
3093 tcp_cwnd_down(sk, flag);
3094 tcp_xmit_retransmit_queue(sk);
3095}
3096
3097static void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt)
3098{
3099 tcp_rtt_estimator(sk, seq_rtt);
3100 tcp_set_rto(sk);
3101 inet_csk(sk)->icsk_backoff = 0;
3102}
3103
3104
3105
3106
3107static void tcp_ack_saw_tstamp(struct sock *sk, int flag)
3108{
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124 struct tcp_sock *tp = tcp_sk(sk);
3125
3126 tcp_valid_rtt_meas(sk, tcp_time_stamp - tp->rx_opt.rcv_tsecr);
3127}
3128
3129static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag)
3130{
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140 if (flag & FLAG_RETRANS_DATA_ACKED)
3141 return;
3142
3143 tcp_valid_rtt_meas(sk, seq_rtt);
3144}
3145
3146static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
3147 const s32 seq_rtt)
3148{
3149 const struct tcp_sock *tp = tcp_sk(sk);
3150
3151 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
3152 tcp_ack_saw_tstamp(sk, flag);
3153 else if (seq_rtt >= 0)
3154 tcp_ack_no_tstamp(sk, seq_rtt, flag);
3155}
3156
3157static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
3158{
3159 const struct inet_connection_sock *icsk = inet_csk(sk);
3160 icsk->icsk_ca_ops->cong_avoid(sk, ack, in_flight);
3161 tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp;
3162}
3163
3164
3165
3166
3167static void tcp_rearm_rto(struct sock *sk)
3168{
3169 struct tcp_sock *tp = tcp_sk(sk);
3170
3171 if (!tp->packets_out) {
3172 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
3173 } else {
3174 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
3175 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
3176 }
3177}
3178
3179
3180static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
3181{
3182 struct tcp_sock *tp = tcp_sk(sk);
3183 u32 packets_acked;
3184
3185 BUG_ON(!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una));
3186
3187 packets_acked = tcp_skb_pcount(skb);
3188 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
3189 return 0;
3190 packets_acked -= tcp_skb_pcount(skb);
3191
3192 if (packets_acked) {
3193 BUG_ON(tcp_skb_pcount(skb) == 0);
3194 BUG_ON(!before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq));
3195 }
3196
3197 return packets_acked;
3198}
3199
3200
3201
3202
3203
3204static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3205 u32 prior_snd_una)
3206{
3207 struct tcp_sock *tp = tcp_sk(sk);
3208 const struct inet_connection_sock *icsk = inet_csk(sk);
3209 struct sk_buff *skb;
3210 u32 now = tcp_time_stamp;
3211 int fully_acked = 1;
3212 int flag = 0;
3213 u32 pkts_acked = 0;
3214 u32 reord = tp->packets_out;
3215 u32 prior_sacked = tp->sacked_out;
3216 s32 seq_rtt = -1;
3217 s32 ca_seq_rtt = -1;
3218 ktime_t last_ackt = net_invalid_timestamp();
3219
3220 while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
3221 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
3222 u32 acked_pcount;
3223 u8 sacked = scb->sacked;
3224
3225
3226 if (after(scb->end_seq, tp->snd_una)) {
3227 if (tcp_skb_pcount(skb) == 1 ||
3228 !after(tp->snd_una, scb->seq))
3229 break;
3230
3231 acked_pcount = tcp_tso_acked(sk, skb);
3232 if (!acked_pcount)
3233 break;
3234
3235 fully_acked = 0;
3236 } else {
3237 acked_pcount = tcp_skb_pcount(skb);
3238 }
3239
3240 if (sacked & TCPCB_RETRANS) {
3241 if (sacked & TCPCB_SACKED_RETRANS)
3242 tp->retrans_out -= acked_pcount;
3243 flag |= FLAG_RETRANS_DATA_ACKED;
3244 ca_seq_rtt = -1;
3245 seq_rtt = -1;
3246 if ((flag & FLAG_DATA_ACKED) || (acked_pcount > 1))
3247 flag |= FLAG_NONHEAD_RETRANS_ACKED;
3248 } else {
3249 ca_seq_rtt = now - scb->when;
3250 last_ackt = skb->tstamp;
3251 if (seq_rtt < 0) {
3252 seq_rtt = ca_seq_rtt;
3253 }
3254 if (!(sacked & TCPCB_SACKED_ACKED))
3255 reord = min(pkts_acked, reord);
3256 }
3257
3258 if (sacked & TCPCB_SACKED_ACKED)
3259 tp->sacked_out -= acked_pcount;
3260 if (sacked & TCPCB_LOST)
3261 tp->lost_out -= acked_pcount;
3262
3263 tp->packets_out -= acked_pcount;
3264 pkts_acked += acked_pcount;
3265
3266
3267
3268
3269
3270
3271
3272
3273 if (!(scb->flags & TCPCB_FLAG_SYN)) {
3274 flag |= FLAG_DATA_ACKED;
3275 } else {
3276 flag |= FLAG_SYN_ACKED;
3277 tp->retrans_stamp = 0;
3278 }
3279
3280 if (!fully_acked)
3281 break;
3282
3283 tcp_unlink_write_queue(skb, sk);
3284 sk_wmem_free_skb(sk, skb);
3285 tp->scoreboard_skb_hint = NULL;
3286 if (skb == tp->retransmit_skb_hint)
3287 tp->retransmit_skb_hint = NULL;
3288 if (skb == tp->lost_skb_hint)
3289 tp->lost_skb_hint = NULL;
3290 }
3291
3292 if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una)))
3293 tp->snd_up = tp->snd_una;
3294
3295 if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
3296 flag |= FLAG_SACK_RENEGING;
3297
3298 if (flag & FLAG_ACKED) {
3299 const struct tcp_congestion_ops *ca_ops
3300 = inet_csk(sk)->icsk_ca_ops;
3301
3302 if (unlikely(icsk->icsk_mtup.probe_size &&
3303 !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
3304 tcp_mtup_probe_success(sk);
3305 }
3306
3307 tcp_ack_update_rtt(sk, flag, seq_rtt);
3308 tcp_rearm_rto(sk);
3309
3310 if (tcp_is_reno(tp)) {
3311 tcp_remove_reno_sacks(sk, pkts_acked);
3312 } else {
3313 int delta;
3314
3315
3316 if (reord < prior_fackets)
3317 tcp_update_reordering(sk, tp->fackets_out - reord, 0);
3318
3319 delta = tcp_is_fack(tp) ? pkts_acked :
3320 prior_sacked - tp->sacked_out;
3321 tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta);
3322 }
3323
3324 tp->fackets_out -= min(pkts_acked, tp->fackets_out);
3325
3326 if (ca_ops->pkts_acked) {
3327 s32 rtt_us = -1;
3328
3329
3330 if (!(flag & FLAG_RETRANS_DATA_ACKED)) {
3331
3332 if (ca_ops->flags & TCP_CONG_RTT_STAMP &&
3333 !ktime_equal(last_ackt,
3334 net_invalid_timestamp()))
3335 rtt_us = ktime_us_delta(ktime_get_real(),
3336 last_ackt);
3337 else if (ca_seq_rtt > 0)
3338 rtt_us = jiffies_to_usecs(ca_seq_rtt);
3339 }
3340
3341 ca_ops->pkts_acked(sk, pkts_acked, rtt_us);
3342 }
3343 }
3344
3345#if FASTRETRANS_DEBUG > 0
3346 WARN_ON((int)tp->sacked_out < 0);
3347 WARN_ON((int)tp->lost_out < 0);
3348 WARN_ON((int)tp->retrans_out < 0);
3349 if (!tp->packets_out && tcp_is_sack(tp)) {
3350 icsk = inet_csk(sk);
3351 if (tp->lost_out) {
3352 printk(KERN_DEBUG "Leak l=%u %d\n",
3353 tp->lost_out, icsk->icsk_ca_state);
3354 tp->lost_out = 0;
3355 }
3356 if (tp->sacked_out) {
3357 printk(KERN_DEBUG "Leak s=%u %d\n",
3358 tp->sacked_out, icsk->icsk_ca_state);
3359 tp->sacked_out = 0;
3360 }
3361 if (tp->retrans_out) {
3362 printk(KERN_DEBUG "Leak r=%u %d\n",
3363 tp->retrans_out, icsk->icsk_ca_state);
3364 tp->retrans_out = 0;
3365 }
3366 }
3367#endif
3368 return flag;
3369}
3370
3371static void tcp_ack_probe(struct sock *sk)
3372{
3373 const struct tcp_sock *tp = tcp_sk(sk);
3374 struct inet_connection_sock *icsk = inet_csk(sk);
3375
3376
3377
3378 if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, tcp_wnd_end(tp))) {
3379 icsk->icsk_backoff = 0;
3380 inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
3381
3382
3383
3384 } else {
3385 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
3386 min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX),
3387 TCP_RTO_MAX);
3388 }
3389}
3390
3391static inline int tcp_ack_is_dubious(const struct sock *sk, const int flag)
3392{
3393 return (!(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
3394 inet_csk(sk)->icsk_ca_state != TCP_CA_Open);
3395}
3396
3397static inline int tcp_may_raise_cwnd(const struct sock *sk, const int flag)
3398{
3399 const struct tcp_sock *tp = tcp_sk(sk);
3400 return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) &&
3401 !((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_Recovery | TCPF_CA_CWR));
3402}
3403
3404
3405
3406
3407static inline int tcp_may_update_window(const struct tcp_sock *tp,
3408 const u32 ack, const u32 ack_seq,
3409 const u32 nwin)
3410{
3411 return (after(ack, tp->snd_una) ||
3412 after(ack_seq, tp->snd_wl1) ||
3413 (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd));
3414}
3415
3416
3417
3418
3419
3420
3421static int tcp_ack_update_window(struct sock *sk, struct sk_buff *skb, u32 ack,
3422 u32 ack_seq)
3423{
3424 struct tcp_sock *tp = tcp_sk(sk);
3425 int flag = 0;
3426 u32 nwin = ntohs(tcp_hdr(skb)->window);
3427
3428 if (likely(!tcp_hdr(skb)->syn))
3429 nwin <<= tp->rx_opt.snd_wscale;
3430
3431 if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
3432 flag |= FLAG_WIN_UPDATE;
3433 tcp_update_wl(tp, ack_seq);
3434
3435 if (tp->snd_wnd != nwin) {
3436 tp->snd_wnd = nwin;
3437
3438
3439
3440
3441 tp->pred_flags = 0;
3442 tcp_fast_path_check(sk);
3443
3444 if (nwin > tp->max_window) {
3445 tp->max_window = nwin;
3446 tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie);
3447 }
3448 }
3449 }
3450
3451 tp->snd_una = ack;
3452
3453 return flag;
3454}
3455
3456
3457
3458
3459static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
3460{
3461 tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
3462 tp->snd_cwnd_cnt = 0;
3463 tp->bytes_acked = 0;
3464 TCP_ECN_queue_cwr(tp);
3465 tcp_moderate_cwnd(tp);
3466}
3467
3468
3469
3470
3471static void tcp_ratehalving_spur_to_response(struct sock *sk)
3472{
3473 tcp_enter_cwr(sk, 0);
3474}
3475
3476static void tcp_undo_spur_to_response(struct sock *sk, int flag)
3477{
3478 if (flag & FLAG_ECE)
3479 tcp_ratehalving_spur_to_response(sk);
3480 else
3481 tcp_undo_cwr(sk, 1);
3482}
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514static int tcp_process_frto(struct sock *sk, int flag)
3515{
3516 struct tcp_sock *tp = tcp_sk(sk);
3517
3518 tcp_verify_left_out(tp);
3519
3520
3521 if (flag & FLAG_DATA_ACKED)
3522 inet_csk(sk)->icsk_retransmits = 0;
3523
3524 if ((flag & FLAG_NONHEAD_RETRANS_ACKED) ||
3525 ((tp->frto_counter >= 2) && (flag & FLAG_RETRANS_DATA_ACKED)))
3526 tp->undo_marker = 0;
3527
3528 if (!before(tp->snd_una, tp->frto_highmark)) {
3529 tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag);
3530 return 1;
3531 }
3532
3533 if (!tcp_is_sackfrto(tp)) {
3534
3535
3536
3537
3538 if (!(flag & FLAG_ANY_PROGRESS) && (flag & FLAG_NOT_DUP))
3539 return 1;
3540
3541 if (!(flag & FLAG_DATA_ACKED)) {
3542 tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3),
3543 flag);
3544 return 1;
3545 }
3546 } else {
3547 if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) {
3548
3549 tp->snd_cwnd = min(tp->snd_cwnd,
3550 tcp_packets_in_flight(tp));
3551 return 1;
3552 }
3553
3554 if ((tp->frto_counter >= 2) &&
3555 (!(flag & FLAG_FORWARD_PROGRESS) ||
3556 ((flag & FLAG_DATA_SACKED) &&
3557 !(flag & FLAG_ONLY_ORIG_SACKED)))) {
3558
3559 if (!(flag & FLAG_FORWARD_PROGRESS) &&
3560 (flag & FLAG_NOT_DUP))
3561 return 1;
3562
3563 tcp_enter_frto_loss(sk, 3, flag);
3564 return 1;
3565 }
3566 }
3567
3568 if (tp->frto_counter == 1) {
3569
3570 tp->snd_cwnd = tcp_packets_in_flight(tp) + 2;
3571 tp->frto_counter = 2;
3572
3573 if (!tcp_may_send_now(sk))
3574 tcp_enter_frto_loss(sk, 2, flag);
3575
3576 return 1;
3577 } else {
3578 switch (sysctl_tcp_frto_response) {
3579 case 2:
3580 tcp_undo_spur_to_response(sk, flag);
3581 break;
3582 case 1:
3583 tcp_conservative_spur_to_response(tp);
3584 break;
3585 default:
3586 tcp_ratehalving_spur_to_response(sk);
3587 break;
3588 }
3589 tp->frto_counter = 0;
3590 tp->undo_marker = 0;
3591 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS);
3592 }
3593 return 0;
3594}
3595
3596
3597static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
3598{
3599 struct inet_connection_sock *icsk = inet_csk(sk);
3600 struct tcp_sock *tp = tcp_sk(sk);
3601 u32 prior_snd_una = tp->snd_una;
3602 u32 ack_seq = TCP_SKB_CB(skb)->seq;
3603 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3604 u32 prior_in_flight;
3605 u32 prior_fackets;
3606 int prior_packets;
3607 int frto_cwnd = 0;
3608
3609
3610
3611
3612 if (before(ack, prior_snd_una))
3613 goto old_ack;
3614
3615
3616
3617
3618 if (after(ack, tp->snd_nxt))
3619 goto invalid_ack;
3620
3621 if (after(ack, prior_snd_una))
3622 flag |= FLAG_SND_UNA_ADVANCED;
3623
3624 if (sysctl_tcp_abc) {
3625 if (icsk->icsk_ca_state < TCP_CA_CWR)
3626 tp->bytes_acked += ack - prior_snd_una;
3627 else if (icsk->icsk_ca_state == TCP_CA_Loss)
3628
3629 tp->bytes_acked += min(ack - prior_snd_una,
3630 tp->mss_cache);
3631 }
3632
3633 prior_fackets = tp->fackets_out;
3634 prior_in_flight = tcp_packets_in_flight(tp);
3635
3636 if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
3637
3638
3639
3640
3641 tcp_update_wl(tp, ack_seq);
3642 tp->snd_una = ack;
3643 flag |= FLAG_WIN_UPDATE;
3644
3645 tcp_ca_event(sk, CA_EVENT_FAST_ACK);
3646
3647 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPACKS);
3648 } else {
3649 if (ack_seq != TCP_SKB_CB(skb)->end_seq)
3650 flag |= FLAG_DATA;
3651 else
3652 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPUREACKS);
3653
3654 flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
3655
3656 if (TCP_SKB_CB(skb)->sacked)
3657 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
3658
3659 if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb)))
3660 flag |= FLAG_ECE;
3661
3662 tcp_ca_event(sk, CA_EVENT_SLOW_ACK);
3663 }
3664
3665
3666
3667
3668 sk->sk_err_soft = 0;
3669 icsk->icsk_probes_out = 0;
3670 tp->rcv_tstamp = tcp_time_stamp;
3671 prior_packets = tp->packets_out;
3672 if (!prior_packets)
3673 goto no_queue;
3674
3675
3676 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
3677
3678 if (tp->frto_counter)
3679 frto_cwnd = tcp_process_frto(sk, flag);
3680
3681 if (before(tp->frto_highmark, tp->snd_una))
3682 tp->frto_highmark = 0;
3683
3684 if (tcp_ack_is_dubious(sk, flag)) {
3685
3686 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd &&
3687 tcp_may_raise_cwnd(sk, flag))
3688 tcp_cong_avoid(sk, ack, prior_in_flight);
3689 tcp_fastretrans_alert(sk, prior_packets - tp->packets_out,
3690 flag);
3691 } else {
3692 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
3693 tcp_cong_avoid(sk, ack, prior_in_flight);
3694 }
3695
3696 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
3697 dst_confirm(sk->sk_dst_cache);
3698
3699 return 1;
3700
3701no_queue:
3702
3703
3704
3705
3706 if (tcp_send_head(sk))
3707 tcp_ack_probe(sk);
3708 return 1;
3709
3710invalid_ack:
3711 SOCK_DEBUG(sk, "Ack %u after %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
3712 return -1;
3713
3714old_ack:
3715 if (TCP_SKB_CB(skb)->sacked) {
3716 tcp_sacktag_write_queue(sk, skb, prior_snd_una);
3717 if (icsk->icsk_ca_state == TCP_CA_Open)
3718 tcp_try_keep_open(sk);
3719 }
3720
3721 SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
3722 return 0;
3723}
3724
3725
3726
3727
3728
3729void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
3730 u8 **hvpp, int estab)
3731{
3732 unsigned char *ptr;
3733 struct tcphdr *th = tcp_hdr(skb);
3734 int length = (th->doff * 4) - sizeof(struct tcphdr);
3735
3736 ptr = (unsigned char *)(th + 1);
3737 opt_rx->saw_tstamp = 0;
3738
3739 while (length > 0) {
3740 int opcode = *ptr++;
3741 int opsize;
3742
3743 switch (opcode) {
3744 case TCPOPT_EOL:
3745 return;
3746 case TCPOPT_NOP:
3747 length--;
3748 continue;
3749 default:
3750 opsize = *ptr++;
3751 if (opsize < 2)
3752 return;
3753 if (opsize > length)
3754 return;
3755 switch (opcode) {
3756 case TCPOPT_MSS:
3757 if (opsize == TCPOLEN_MSS && th->syn && !estab) {
3758 u16 in_mss = get_unaligned_be16(ptr);
3759 if (in_mss) {
3760 if (opt_rx->user_mss &&
3761 opt_rx->user_mss < in_mss)
3762 in_mss = opt_rx->user_mss;
3763 opt_rx->mss_clamp = in_mss;
3764 }
3765 }
3766 break;
3767 case TCPOPT_WINDOW:
3768 if (opsize == TCPOLEN_WINDOW && th->syn &&
3769 !estab && sysctl_tcp_window_scaling) {
3770 __u8 snd_wscale = *(__u8 *)ptr;
3771 opt_rx->wscale_ok = 1;
3772 if (snd_wscale > 14) {
3773 if (net_ratelimit())
3774 printk(KERN_INFO "tcp_parse_options: Illegal window "
3775 "scaling value %d >14 received.\n",
3776 snd_wscale);
3777 snd_wscale = 14;
3778 }
3779 opt_rx->snd_wscale = snd_wscale;
3780 }
3781 break;
3782 case TCPOPT_TIMESTAMP:
3783 if ((opsize == TCPOLEN_TIMESTAMP) &&
3784 ((estab && opt_rx->tstamp_ok) ||
3785 (!estab && sysctl_tcp_timestamps))) {
3786 opt_rx->saw_tstamp = 1;
3787 opt_rx->rcv_tsval = get_unaligned_be32(ptr);
3788 opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
3789 }
3790 break;
3791 case TCPOPT_SACK_PERM:
3792 if (opsize == TCPOLEN_SACK_PERM && th->syn &&
3793 !estab && sysctl_tcp_sack) {
3794 opt_rx->sack_ok = 1;
3795 tcp_sack_reset(opt_rx);
3796 }
3797 break;
3798
3799 case TCPOPT_SACK:
3800 if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
3801 !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
3802 opt_rx->sack_ok) {
3803 TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
3804 }
3805 break;
3806#ifdef CONFIG_TCP_MD5SIG
3807 case TCPOPT_MD5SIG:
3808
3809
3810
3811
3812 break;
3813#endif
3814 case TCPOPT_COOKIE:
3815
3816
3817 switch (opsize) {
3818 case TCPOLEN_COOKIE_BASE:
3819
3820 break;
3821 case TCPOLEN_COOKIE_PAIR:
3822
3823 break;
3824 case TCPOLEN_COOKIE_MIN+0:
3825 case TCPOLEN_COOKIE_MIN+2:
3826 case TCPOLEN_COOKIE_MIN+4:
3827 case TCPOLEN_COOKIE_MIN+6:
3828 case TCPOLEN_COOKIE_MAX:
3829
3830 opt_rx->cookie_plus = opsize;
3831 *hvpp = ptr;
3832 default:
3833
3834 break;
3835 };
3836 break;
3837 };
3838
3839 ptr += opsize-2;
3840 length -= opsize;
3841 }
3842 }
3843}
3844
3845static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th)
3846{
3847 __be32 *ptr = (__be32 *)(th + 1);
3848
3849 if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
3850 | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
3851 tp->rx_opt.saw_tstamp = 1;
3852 ++ptr;
3853 tp->rx_opt.rcv_tsval = ntohl(*ptr);
3854 ++ptr;
3855 tp->rx_opt.rcv_tsecr = ntohl(*ptr);
3856 return 1;
3857 }
3858 return 0;
3859}
3860
3861
3862
3863
3864static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
3865 struct tcp_sock *tp, u8 **hvpp)
3866{
3867
3868
3869
3870 if (th->doff == (sizeof(*th) / 4)) {
3871 tp->rx_opt.saw_tstamp = 0;
3872 return 0;
3873 } else if (tp->rx_opt.tstamp_ok &&
3874 th->doff == ((sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4)) {
3875 if (tcp_parse_aligned_timestamp(tp, th))
3876 return 1;
3877 }
3878 tcp_parse_options(skb, &tp->rx_opt, hvpp, 1);
3879 return 1;
3880}
3881
3882#ifdef CONFIG_TCP_MD5SIG
3883
3884
3885
3886u8 *tcp_parse_md5sig_option(struct tcphdr *th)
3887{
3888 int length = (th->doff << 2) - sizeof (*th);
3889 u8 *ptr = (u8*)(th + 1);
3890
3891
3892 if (length < TCPOLEN_MD5SIG)
3893 return NULL;
3894
3895 while (length > 0) {
3896 int opcode = *ptr++;
3897 int opsize;
3898
3899 switch(opcode) {
3900 case TCPOPT_EOL:
3901 return NULL;
3902 case TCPOPT_NOP:
3903 length--;
3904 continue;
3905 default:
3906 opsize = *ptr++;
3907 if (opsize < 2 || opsize > length)
3908 return NULL;
3909 if (opcode == TCPOPT_MD5SIG)
3910 return ptr;
3911 }
3912 ptr += opsize - 2;
3913 length -= opsize;
3914 }
3915 return NULL;
3916}
3917#endif
3918
3919static inline void tcp_store_ts_recent(struct tcp_sock *tp)
3920{
3921 tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
3922 tp->rx_opt.ts_recent_stamp = get_seconds();
3923}
3924
3925static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
3926{
3927 if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) {
3928
3929
3930
3931
3932
3933
3934
3935 if (tcp_paws_check(&tp->rx_opt, 0))
3936 tcp_store_ts_recent(tp);
3937 }
3938}
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
3964{
3965 struct tcp_sock *tp = tcp_sk(sk);
3966 struct tcphdr *th = tcp_hdr(skb);
3967 u32 seq = TCP_SKB_CB(skb)->seq;
3968 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3969
3970 return (
3971 (th->ack && seq == TCP_SKB_CB(skb)->end_seq && seq == tp->rcv_nxt) &&
3972
3973
3974 ack == tp->snd_una &&
3975
3976
3977 !tcp_may_update_window(tp, ack, seq, ntohs(th->window) << tp->rx_opt.snd_wscale) &&
3978
3979
3980 (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ);
3981}
3982
3983static inline int tcp_paws_discard(const struct sock *sk,
3984 const struct sk_buff *skb)
3985{
3986 const struct tcp_sock *tp = tcp_sk(sk);
3987
3988 return !tcp_paws_check(&tp->rx_opt, TCP_PAWS_WINDOW) &&
3989 !tcp_disordered_ack(sk, skb);
3990}
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005static inline int tcp_sequence(struct tcp_sock *tp, u32 seq, u32 end_seq)
4006{
4007 return !before(end_seq, tp->rcv_wup) &&
4008 !after(seq, tp->rcv_nxt + tcp_receive_window(tp));
4009}
4010
4011
4012static void tcp_reset(struct sock *sk)
4013{
4014
4015 switch (sk->sk_state) {
4016 case TCP_SYN_SENT:
4017 sk->sk_err = ECONNREFUSED;
4018 break;
4019 case TCP_CLOSE_WAIT:
4020 sk->sk_err = EPIPE;
4021 break;
4022 case TCP_CLOSE:
4023 return;
4024 default:
4025 sk->sk_err = ECONNRESET;
4026 }
4027
4028 if (!sock_flag(sk, SOCK_DEAD))
4029 sk->sk_error_report(sk);
4030
4031 tcp_done(sk);
4032}
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
4049{
4050 struct tcp_sock *tp = tcp_sk(sk);
4051
4052 inet_csk_schedule_ack(sk);
4053
4054 sk->sk_shutdown |= RCV_SHUTDOWN;
4055 sock_set_flag(sk, SOCK_DONE);
4056
4057 switch (sk->sk_state) {
4058 case TCP_SYN_RECV:
4059 case TCP_ESTABLISHED:
4060
4061 tcp_set_state(sk, TCP_CLOSE_WAIT);
4062 inet_csk(sk)->icsk_ack.pingpong = 1;
4063 break;
4064
4065 case TCP_CLOSE_WAIT:
4066 case TCP_CLOSING:
4067
4068
4069
4070 break;
4071 case TCP_LAST_ACK:
4072
4073 break;
4074
4075 case TCP_FIN_WAIT1:
4076
4077
4078
4079
4080 tcp_send_ack(sk);
4081 tcp_set_state(sk, TCP_CLOSING);
4082 break;
4083 case TCP_FIN_WAIT2:
4084
4085 tcp_send_ack(sk);
4086 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
4087 break;
4088 default:
4089
4090
4091
4092 printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n",
4093 __func__, sk->sk_state);
4094 break;
4095 }
4096
4097
4098
4099
4100 __skb_queue_purge(&tp->out_of_order_queue);
4101 if (tcp_is_sack(tp))
4102 tcp_sack_reset(&tp->rx_opt);
4103 sk_mem_reclaim(sk);
4104
4105 if (!sock_flag(sk, SOCK_DEAD)) {
4106 sk->sk_state_change(sk);
4107
4108
4109 if (sk->sk_shutdown == SHUTDOWN_MASK ||
4110 sk->sk_state == TCP_CLOSE)
4111 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
4112 else
4113 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
4114 }
4115}
4116
4117static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
4118 u32 end_seq)
4119{
4120 if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
4121 if (before(seq, sp->start_seq))
4122 sp->start_seq = seq;
4123 if (after(end_seq, sp->end_seq))
4124 sp->end_seq = end_seq;
4125 return 1;
4126 }
4127 return 0;
4128}
4129
4130static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
4131{
4132 struct tcp_sock *tp = tcp_sk(sk);
4133
4134 if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
4135 int mib_idx;
4136
4137 if (before(seq, tp->rcv_nxt))
4138 mib_idx = LINUX_MIB_TCPDSACKOLDSENT;
4139 else
4140 mib_idx = LINUX_MIB_TCPDSACKOFOSENT;
4141
4142 NET_INC_STATS_BH(sock_net(sk), mib_idx);
4143
4144 tp->rx_opt.dsack = 1;
4145 tp->duplicate_sack[0].start_seq = seq;
4146 tp->duplicate_sack[0].end_seq = end_seq;
4147 }
4148}
4149
4150static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
4151{
4152 struct tcp_sock *tp = tcp_sk(sk);
4153
4154 if (!tp->rx_opt.dsack)
4155 tcp_dsack_set(sk, seq, end_seq);
4156 else
4157 tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
4158}
4159
4160static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb)
4161{
4162 struct tcp_sock *tp = tcp_sk(sk);
4163
4164 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
4165 before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4166 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4167 tcp_enter_quickack_mode(sk);
4168
4169 if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
4170 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
4171
4172 if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
4173 end_seq = tp->rcv_nxt;
4174 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, end_seq);
4175 }
4176 }
4177
4178 tcp_send_ack(sk);
4179}
4180
4181
4182
4183
4184static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
4185{
4186 int this_sack;
4187 struct tcp_sack_block *sp = &tp->selective_acks[0];
4188 struct tcp_sack_block *swalk = sp + 1;
4189
4190
4191
4192
4193 for (this_sack = 1; this_sack < tp->rx_opt.num_sacks;) {
4194 if (tcp_sack_extend(sp, swalk->start_seq, swalk->end_seq)) {
4195 int i;
4196
4197
4198
4199
4200 tp->rx_opt.num_sacks--;
4201 for (i = this_sack; i < tp->rx_opt.num_sacks; i++)
4202 sp[i] = sp[i + 1];
4203 continue;
4204 }
4205 this_sack++, swalk++;
4206 }
4207}
4208
4209static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
4210{
4211 struct tcp_sock *tp = tcp_sk(sk);
4212 struct tcp_sack_block *sp = &tp->selective_acks[0];
4213 int cur_sacks = tp->rx_opt.num_sacks;
4214 int this_sack;
4215
4216 if (!cur_sacks)
4217 goto new_sack;
4218
4219 for (this_sack = 0; this_sack < cur_sacks; this_sack++, sp++) {
4220 if (tcp_sack_extend(sp, seq, end_seq)) {
4221
4222 for (; this_sack > 0; this_sack--, sp--)
4223 swap(*sp, *(sp - 1));
4224 if (cur_sacks > 1)
4225 tcp_sack_maybe_coalesce(tp);
4226 return;
4227 }
4228 }
4229
4230
4231
4232
4233
4234
4235
4236 if (this_sack >= TCP_NUM_SACKS) {
4237 this_sack--;
4238 tp->rx_opt.num_sacks--;
4239 sp--;
4240 }
4241 for (; this_sack > 0; this_sack--, sp--)
4242 *sp = *(sp - 1);
4243
4244new_sack:
4245
4246 sp->start_seq = seq;
4247 sp->end_seq = end_seq;
4248 tp->rx_opt.num_sacks++;
4249}
4250
4251
4252
4253static void tcp_sack_remove(struct tcp_sock *tp)
4254{
4255 struct tcp_sack_block *sp = &tp->selective_acks[0];
4256 int num_sacks = tp->rx_opt.num_sacks;
4257 int this_sack;
4258
4259
4260 if (skb_queue_empty(&tp->out_of_order_queue)) {
4261 tp->rx_opt.num_sacks = 0;
4262 return;
4263 }
4264
4265 for (this_sack = 0; this_sack < num_sacks;) {
4266
4267 if (!before(tp->rcv_nxt, sp->start_seq)) {
4268 int i;
4269
4270
4271 WARN_ON(before(tp->rcv_nxt, sp->end_seq));
4272
4273
4274 for (i=this_sack+1; i < num_sacks; i++)
4275 tp->selective_acks[i-1] = tp->selective_acks[i];
4276 num_sacks--;
4277 continue;
4278 }
4279 this_sack++;
4280 sp++;
4281 }
4282 tp->rx_opt.num_sacks = num_sacks;
4283}
4284
4285
4286
4287
4288static void tcp_ofo_queue(struct sock *sk)
4289{
4290 struct tcp_sock *tp = tcp_sk(sk);
4291 __u32 dsack_high = tp->rcv_nxt;
4292 struct sk_buff *skb;
4293
4294 while ((skb = skb_peek(&tp->out_of_order_queue)) != NULL) {
4295 if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
4296 break;
4297
4298 if (before(TCP_SKB_CB(skb)->seq, dsack_high)) {
4299 __u32 dsack = dsack_high;
4300 if (before(TCP_SKB_CB(skb)->end_seq, dsack_high))
4301 dsack_high = TCP_SKB_CB(skb)->end_seq;
4302 tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack);
4303 }
4304
4305 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
4306 SOCK_DEBUG(sk, "ofo packet was already received \n");
4307 __skb_unlink(skb, &tp->out_of_order_queue);
4308 __kfree_skb(skb);
4309 continue;
4310 }
4311 SOCK_DEBUG(sk, "ofo requeuing : rcv_next %X seq %X - %X\n",
4312 tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
4313 TCP_SKB_CB(skb)->end_seq);
4314
4315 __skb_unlink(skb, &tp->out_of_order_queue);
4316 __skb_queue_tail(&sk->sk_receive_queue, skb);
4317 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
4318 if (tcp_hdr(skb)->fin)
4319 tcp_fin(skb, sk, tcp_hdr(skb));
4320 }
4321}
4322
4323static int tcp_prune_ofo_queue(struct sock *sk);
4324static int tcp_prune_queue(struct sock *sk);
4325
4326static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)
4327{
4328 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
4329 !sk_rmem_schedule(sk, size)) {
4330
4331 if (tcp_prune_queue(sk) < 0)
4332 return -1;
4333
4334 if (!sk_rmem_schedule(sk, size)) {
4335 if (!tcp_prune_ofo_queue(sk))
4336 return -1;
4337
4338 if (!sk_rmem_schedule(sk, size))
4339 return -1;
4340 }
4341 }
4342 return 0;
4343}
4344
4345static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
4346{
4347 struct tcphdr *th = tcp_hdr(skb);
4348 struct tcp_sock *tp = tcp_sk(sk);
4349 int eaten = -1;
4350
4351 if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq)
4352 goto drop;
4353
4354 __skb_pull(skb, th->doff * 4);
4355
4356 TCP_ECN_accept_cwr(tp, skb);
4357
4358 tp->rx_opt.dsack = 0;
4359
4360
4361
4362
4363
4364 if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
4365 if (tcp_receive_window(tp) == 0)
4366 goto out_of_window;
4367
4368
4369 if (tp->ucopy.task == current &&
4370 tp->copied_seq == tp->rcv_nxt && tp->ucopy.len &&
4371 sock_owned_by_user(sk) && !tp->urg_data) {
4372 int chunk = min_t(unsigned int, skb->len,
4373 tp->ucopy.len);
4374
4375 __set_current_state(TASK_RUNNING);
4376
4377 local_bh_enable();
4378 if (!skb_copy_datagram_iovec(skb, 0, tp->ucopy.iov, chunk)) {
4379 tp->ucopy.len -= chunk;
4380 tp->copied_seq += chunk;
4381 eaten = (chunk == skb->len && !th->fin);
4382 tcp_rcv_space_adjust(sk);
4383 }
4384 local_bh_disable();
4385 }
4386
4387 if (eaten <= 0) {
4388queue_and_out:
4389 if (eaten < 0 &&
4390 tcp_try_rmem_schedule(sk, skb->truesize))
4391 goto drop;
4392
4393 skb_set_owner_r(skb, sk);
4394 __skb_queue_tail(&sk->sk_receive_queue, skb);
4395 }
4396 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
4397 if (skb->len)
4398 tcp_event_data_recv(sk, skb);
4399 if (th->fin)
4400 tcp_fin(skb, sk, th);
4401
4402 if (!skb_queue_empty(&tp->out_of_order_queue)) {
4403 tcp_ofo_queue(sk);
4404
4405
4406
4407
4408 if (skb_queue_empty(&tp->out_of_order_queue))
4409 inet_csk(sk)->icsk_ack.pingpong = 0;
4410 }
4411
4412 if (tp->rx_opt.num_sacks)
4413 tcp_sack_remove(tp);
4414
4415 tcp_fast_path_check(sk);
4416
4417 if (eaten > 0)
4418 __kfree_skb(skb);
4419 else if (!sock_flag(sk, SOCK_DEAD))
4420 sk->sk_data_ready(sk, 0);
4421 return;
4422 }
4423
4424 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
4425
4426 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4427 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
4428
4429out_of_window:
4430 tcp_enter_quickack_mode(sk);
4431 inet_csk_schedule_ack(sk);
4432drop:
4433 __kfree_skb(skb);
4434 return;
4435 }
4436
4437
4438 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp)))
4439 goto out_of_window;
4440
4441 tcp_enter_quickack_mode(sk);
4442
4443 if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4444
4445 SOCK_DEBUG(sk, "partial packet: rcv_next %X seq %X - %X\n",
4446 tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
4447 TCP_SKB_CB(skb)->end_seq);
4448
4449 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, tp->rcv_nxt);
4450
4451
4452
4453
4454 if (!tcp_receive_window(tp))
4455 goto out_of_window;
4456 goto queue_and_out;
4457 }
4458
4459 TCP_ECN_check_ce(tp, skb);
4460
4461 if (tcp_try_rmem_schedule(sk, skb->truesize))
4462 goto drop;
4463
4464
4465 tp->pred_flags = 0;
4466 inet_csk_schedule_ack(sk);
4467
4468 SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
4469 tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
4470
4471 skb_set_owner_r(skb, sk);
4472
4473 if (!skb_peek(&tp->out_of_order_queue)) {
4474
4475 if (tcp_is_sack(tp)) {
4476 tp->rx_opt.num_sacks = 1;
4477 tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
4478 tp->selective_acks[0].end_seq =
4479 TCP_SKB_CB(skb)->end_seq;
4480 }
4481 __skb_queue_head(&tp->out_of_order_queue, skb);
4482 } else {
4483 struct sk_buff *skb1 = skb_peek_tail(&tp->out_of_order_queue);
4484 u32 seq = TCP_SKB_CB(skb)->seq;
4485 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
4486
4487 if (seq == TCP_SKB_CB(skb1)->end_seq) {
4488 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4489
4490 if (!tp->rx_opt.num_sacks ||
4491 tp->selective_acks[0].end_seq != seq)
4492 goto add_sack;
4493
4494
4495 tp->selective_acks[0].end_seq = end_seq;
4496 return;
4497 }
4498
4499
4500 while (1) {
4501 if (!after(TCP_SKB_CB(skb1)->seq, seq))
4502 break;
4503 if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) {
4504 skb1 = NULL;
4505 break;
4506 }
4507 skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1);
4508 }
4509
4510
4511 if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
4512 if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4513
4514 __kfree_skb(skb);
4515 tcp_dsack_set(sk, seq, end_seq);
4516 goto add_sack;
4517 }
4518 if (after(seq, TCP_SKB_CB(skb1)->seq)) {
4519
4520 tcp_dsack_set(sk, seq,
4521 TCP_SKB_CB(skb1)->end_seq);
4522 } else {
4523 if (skb_queue_is_first(&tp->out_of_order_queue,
4524 skb1))
4525 skb1 = NULL;
4526 else
4527 skb1 = skb_queue_prev(
4528 &tp->out_of_order_queue,
4529 skb1);
4530 }
4531 }
4532 if (!skb1)
4533 __skb_queue_head(&tp->out_of_order_queue, skb);
4534 else
4535 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4536
4537
4538 while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) {
4539 skb1 = skb_queue_next(&tp->out_of_order_queue, skb);
4540
4541 if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
4542 break;
4543 if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4544 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4545 end_seq);
4546 break;
4547 }
4548 __skb_unlink(skb1, &tp->out_of_order_queue);
4549 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4550 TCP_SKB_CB(skb1)->end_seq);
4551 __kfree_skb(skb1);
4552 }
4553
4554add_sack:
4555 if (tcp_is_sack(tp))
4556 tcp_sack_new_ofo_skb(sk, seq, end_seq);
4557 }
4558}
4559
4560static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
4561 struct sk_buff_head *list)
4562{
4563 struct sk_buff *next = NULL;
4564
4565 if (!skb_queue_is_last(list, skb))
4566 next = skb_queue_next(list, skb);
4567
4568 __skb_unlink(skb, list);
4569 __kfree_skb(skb);
4570 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
4571
4572 return next;
4573}
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583static void
4584tcp_collapse(struct sock *sk, struct sk_buff_head *list,
4585 struct sk_buff *head, struct sk_buff *tail,
4586 u32 start, u32 end)
4587{
4588 struct sk_buff *skb, *n;
4589 bool end_of_skbs;
4590
4591
4592
4593 skb = head;
4594restart:
4595 end_of_skbs = true;
4596 skb_queue_walk_from_safe(list, skb, n) {
4597 if (skb == tail)
4598 break;
4599
4600 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
4601 skb = tcp_collapse_one(sk, skb, list);
4602 if (!skb)
4603 break;
4604 goto restart;
4605 }
4606
4607
4608
4609
4610
4611
4612 if (!tcp_hdr(skb)->syn && !tcp_hdr(skb)->fin &&
4613 (tcp_win_from_space(skb->truesize) > skb->len ||
4614 before(TCP_SKB_CB(skb)->seq, start))) {
4615 end_of_skbs = false;
4616 break;
4617 }
4618
4619 if (!skb_queue_is_last(list, skb)) {
4620 struct sk_buff *next = skb_queue_next(list, skb);
4621 if (next != tail &&
4622 TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(next)->seq) {
4623 end_of_skbs = false;
4624 break;
4625 }
4626 }
4627
4628
4629 start = TCP_SKB_CB(skb)->end_seq;
4630 }
4631 if (end_of_skbs || tcp_hdr(skb)->syn || tcp_hdr(skb)->fin)
4632 return;
4633
4634 while (before(start, end)) {
4635 struct sk_buff *nskb;
4636 unsigned int header = skb_headroom(skb);
4637 int copy = SKB_MAX_ORDER(header, 0);
4638
4639
4640 if (copy < 0)
4641 return;
4642 if (end - start < copy)
4643 copy = end - start;
4644 nskb = alloc_skb(copy + header, GFP_ATOMIC);
4645 if (!nskb)
4646 return;
4647
4648 skb_set_mac_header(nskb, skb_mac_header(skb) - skb->head);
4649 skb_set_network_header(nskb, (skb_network_header(skb) -
4650 skb->head));
4651 skb_set_transport_header(nskb, (skb_transport_header(skb) -
4652 skb->head));
4653 skb_reserve(nskb, header);
4654 memcpy(nskb->head, skb->head, header);
4655 memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
4656 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
4657 __skb_queue_before(list, skb, nskb);
4658 skb_set_owner_r(nskb, sk);
4659
4660
4661 while (copy > 0) {
4662 int offset = start - TCP_SKB_CB(skb)->seq;
4663 int size = TCP_SKB_CB(skb)->end_seq - start;
4664
4665 BUG_ON(offset < 0);
4666 if (size > 0) {
4667 size = min(copy, size);
4668 if (skb_copy_bits(skb, offset, skb_put(nskb, size), size))
4669 BUG();
4670 TCP_SKB_CB(nskb)->end_seq += size;
4671 copy -= size;
4672 start += size;
4673 }
4674 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
4675 skb = tcp_collapse_one(sk, skb, list);
4676 if (!skb ||
4677 skb == tail ||
4678 tcp_hdr(skb)->syn ||
4679 tcp_hdr(skb)->fin)
4680 return;
4681 }
4682 }
4683 }
4684}
4685
4686
4687
4688
4689static void tcp_collapse_ofo_queue(struct sock *sk)
4690{
4691 struct tcp_sock *tp = tcp_sk(sk);
4692 struct sk_buff *skb = skb_peek(&tp->out_of_order_queue);
4693 struct sk_buff *head;
4694 u32 start, end;
4695
4696 if (skb == NULL)
4697 return;
4698
4699 start = TCP_SKB_CB(skb)->seq;
4700 end = TCP_SKB_CB(skb)->end_seq;
4701 head = skb;
4702
4703 for (;;) {
4704 struct sk_buff *next = NULL;
4705
4706 if (!skb_queue_is_last(&tp->out_of_order_queue, skb))
4707 next = skb_queue_next(&tp->out_of_order_queue, skb);
4708 skb = next;
4709
4710
4711
4712 if (!skb ||
4713 after(TCP_SKB_CB(skb)->seq, end) ||
4714 before(TCP_SKB_CB(skb)->end_seq, start)) {
4715 tcp_collapse(sk, &tp->out_of_order_queue,
4716 head, skb, start, end);
4717 head = skb;
4718 if (!skb)
4719 break;
4720
4721 start = TCP_SKB_CB(skb)->seq;
4722 end = TCP_SKB_CB(skb)->end_seq;
4723 } else {
4724 if (before(TCP_SKB_CB(skb)->seq, start))
4725 start = TCP_SKB_CB(skb)->seq;
4726 if (after(TCP_SKB_CB(skb)->end_seq, end))
4727 end = TCP_SKB_CB(skb)->end_seq;
4728 }
4729 }
4730}
4731
4732
4733
4734
4735
4736static int tcp_prune_ofo_queue(struct sock *sk)
4737{
4738 struct tcp_sock *tp = tcp_sk(sk);
4739 int res = 0;
4740
4741 if (!skb_queue_empty(&tp->out_of_order_queue)) {
4742 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_OFOPRUNED);
4743 __skb_queue_purge(&tp->out_of_order_queue);
4744
4745
4746
4747
4748
4749
4750 if (tp->rx_opt.sack_ok)
4751 tcp_sack_reset(&tp->rx_opt);
4752 sk_mem_reclaim(sk);
4753 res = 1;
4754 }
4755 return res;
4756}
4757
4758
4759
4760
4761
4762
4763
4764
4765static int tcp_prune_queue(struct sock *sk)
4766{
4767 struct tcp_sock *tp = tcp_sk(sk);
4768
4769 SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq);
4770
4771 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PRUNECALLED);
4772
4773 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
4774 tcp_clamp_window(sk);
4775 else if (tcp_memory_pressure)
4776 tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
4777
4778 tcp_collapse_ofo_queue(sk);
4779 if (!skb_queue_empty(&sk->sk_receive_queue))
4780 tcp_collapse(sk, &sk->sk_receive_queue,
4781 skb_peek(&sk->sk_receive_queue),
4782 NULL,
4783 tp->copied_seq, tp->rcv_nxt);
4784 sk_mem_reclaim(sk);
4785
4786 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
4787 return 0;
4788
4789
4790
4791
4792 tcp_prune_ofo_queue(sk);
4793
4794 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
4795 return 0;
4796
4797
4798
4799
4800
4801 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_RCVPRUNED);
4802
4803
4804 tp->pred_flags = 0;
4805 return -1;
4806}
4807
4808
4809
4810
4811
4812void tcp_cwnd_application_limited(struct sock *sk)
4813{
4814 struct tcp_sock *tp = tcp_sk(sk);
4815
4816 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open &&
4817 sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
4818
4819 u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk));
4820 u32 win_used = max(tp->snd_cwnd_used, init_win);
4821 if (win_used < tp->snd_cwnd) {
4822 tp->snd_ssthresh = tcp_current_ssthresh(sk);
4823 tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
4824 }
4825 tp->snd_cwnd_used = 0;
4826 }
4827 tp->snd_cwnd_stamp = tcp_time_stamp;
4828}
4829
4830static int tcp_should_expand_sndbuf(struct sock *sk)
4831{
4832 struct tcp_sock *tp = tcp_sk(sk);
4833
4834
4835
4836
4837 if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
4838 return 0;
4839
4840
4841 if (tcp_memory_pressure)
4842 return 0;
4843
4844
4845 if (atomic_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0])
4846 return 0;
4847
4848
4849 if (tp->packets_out >= tp->snd_cwnd)
4850 return 0;
4851
4852 return 1;
4853}
4854
4855
4856
4857
4858
4859
4860
4861static void tcp_new_space(struct sock *sk)
4862{
4863 struct tcp_sock *tp = tcp_sk(sk);
4864
4865 if (tcp_should_expand_sndbuf(sk)) {
4866 int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
4867 MAX_TCP_HEADER + 16 + sizeof(struct sk_buff);
4868 int demanded = max_t(unsigned int, tp->snd_cwnd,
4869 tp->reordering + 1);
4870 sndmem *= 2 * demanded;
4871 if (sndmem > sk->sk_sndbuf)
4872 sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
4873 tp->snd_cwnd_stamp = tcp_time_stamp;
4874 }
4875
4876 sk->sk_write_space(sk);
4877}
4878
4879static void tcp_check_space(struct sock *sk)
4880{
4881 if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) {
4882 sock_reset_flag(sk, SOCK_QUEUE_SHRUNK);
4883 if (sk->sk_socket &&
4884 test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
4885 tcp_new_space(sk);
4886 }
4887}
4888
4889static inline void tcp_data_snd_check(struct sock *sk)
4890{
4891 tcp_push_pending_frames(sk);
4892 tcp_check_space(sk);
4893}
4894
4895
4896
4897
4898static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
4899{
4900 struct tcp_sock *tp = tcp_sk(sk);
4901
4902
4903 if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss &&
4904
4905
4906
4907 __tcp_select_window(sk) >= tp->rcv_wnd) ||
4908
4909 tcp_in_quickack_mode(sk) ||
4910
4911 (ofo_possible && skb_peek(&tp->out_of_order_queue))) {
4912
4913 tcp_send_ack(sk);
4914 } else {
4915
4916 tcp_send_delayed_ack(sk);
4917 }
4918}
4919
4920static inline void tcp_ack_snd_check(struct sock *sk)
4921{
4922 if (!inet_csk_ack_scheduled(sk)) {
4923
4924 return;
4925 }
4926 __tcp_ack_snd_check(sk, 1);
4927}
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939static void tcp_check_urg(struct sock *sk, struct tcphdr *th)
4940{
4941 struct tcp_sock *tp = tcp_sk(sk);
4942 u32 ptr = ntohs(th->urg_ptr);
4943
4944 if (ptr && !sysctl_tcp_stdurg)
4945 ptr--;
4946 ptr += ntohl(th->seq);
4947
4948
4949 if (after(tp->copied_seq, ptr))
4950 return;
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962 if (before(ptr, tp->rcv_nxt))
4963 return;
4964
4965
4966 if (tp->urg_data && !after(ptr, tp->urg_seq))
4967 return;
4968
4969
4970 sk_send_sigurg(sk);
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987 if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
4988 !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) {
4989 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
4990 tp->copied_seq++;
4991 if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) {
4992 __skb_unlink(skb, &sk->sk_receive_queue);
4993 __kfree_skb(skb);
4994 }
4995 }
4996
4997 tp->urg_data = TCP_URG_NOTYET;
4998 tp->urg_seq = ptr;
4999
5000
5001 tp->pred_flags = 0;
5002}
5003
5004
5005static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th)
5006{
5007 struct tcp_sock *tp = tcp_sk(sk);
5008
5009
5010 if (th->urg)
5011 tcp_check_urg(sk, th);
5012
5013
5014 if (tp->urg_data == TCP_URG_NOTYET) {
5015 u32 ptr = tp->urg_seq - ntohl(th->seq) + (th->doff * 4) -
5016 th->syn;
5017
5018
5019 if (ptr < skb->len) {
5020 u8 tmp;
5021 if (skb_copy_bits(skb, ptr, &tmp, 1))
5022 BUG();
5023 tp->urg_data = TCP_URG_VALID | tmp;
5024 if (!sock_flag(sk, SOCK_DEAD))
5025 sk->sk_data_ready(sk, 0);
5026 }
5027 }
5028}
5029
5030static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
5031{
5032 struct tcp_sock *tp = tcp_sk(sk);
5033 int chunk = skb->len - hlen;
5034 int err;
5035
5036 local_bh_enable();
5037 if (skb_csum_unnecessary(skb))
5038 err = skb_copy_datagram_iovec(skb, hlen, tp->ucopy.iov, chunk);
5039 else
5040 err = skb_copy_and_csum_datagram_iovec(skb, hlen,
5041 tp->ucopy.iov);
5042
5043 if (!err) {
5044 tp->ucopy.len -= chunk;
5045 tp->copied_seq += chunk;
5046 tcp_rcv_space_adjust(sk);
5047 }
5048
5049 local_bh_disable();
5050 return err;
5051}
5052
5053static __sum16 __tcp_checksum_complete_user(struct sock *sk,
5054 struct sk_buff *skb)
5055{
5056 __sum16 result;
5057
5058 if (sock_owned_by_user(sk)) {
5059 local_bh_enable();
5060 result = __tcp_checksum_complete(skb);
5061 local_bh_disable();
5062 } else {
5063 result = __tcp_checksum_complete(skb);
5064 }
5065 return result;
5066}
5067
5068static inline int tcp_checksum_complete_user(struct sock *sk,
5069 struct sk_buff *skb)
5070{
5071 return !skb_csum_unnecessary(skb) &&
5072 __tcp_checksum_complete_user(sk, skb);
5073}
5074
5075#ifdef CONFIG_NET_DMA
5076static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb,
5077 int hlen)
5078{
5079 struct tcp_sock *tp = tcp_sk(sk);
5080 int chunk = skb->len - hlen;
5081 int dma_cookie;
5082 int copied_early = 0;
5083
5084 if (tp->ucopy.wakeup)
5085 return 0;
5086
5087 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
5088 tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
5089
5090 if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) {
5091
5092 dma_cookie = dma_skb_copy_datagram_iovec(tp->ucopy.dma_chan,
5093 skb, hlen,
5094 tp->ucopy.iov, chunk,
5095 tp->ucopy.pinned_list);
5096
5097 if (dma_cookie < 0)
5098 goto out;
5099
5100 tp->ucopy.dma_cookie = dma_cookie;
5101 copied_early = 1;
5102
5103 tp->ucopy.len -= chunk;
5104 tp->copied_seq += chunk;
5105 tcp_rcv_space_adjust(sk);
5106
5107 if ((tp->ucopy.len == 0) ||
5108 (tcp_flag_word(tcp_hdr(skb)) & TCP_FLAG_PSH) ||
5109 (atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1))) {
5110 tp->ucopy.wakeup = 1;
5111 sk->sk_data_ready(sk, 0);
5112 }
5113 } else if (chunk > 0) {
5114 tp->ucopy.wakeup = 1;
5115 sk->sk_data_ready(sk, 0);
5116 }
5117out:
5118 return copied_early;
5119}
5120#endif
5121
5122
5123
5124
5125static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
5126 struct tcphdr *th, int syn_inerr)
5127{
5128 u8 *hash_location;
5129 struct tcp_sock *tp = tcp_sk(sk);
5130
5131
5132 if (tcp_fast_parse_options(skb, th, tp, &hash_location) &&
5133 tp->rx_opt.saw_tstamp &&
5134 tcp_paws_discard(sk, skb)) {
5135 if (!th->rst) {
5136 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
5137 tcp_send_dupack(sk, skb);
5138 goto discard;
5139 }
5140
5141 }
5142
5143
5144 if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
5145
5146
5147
5148
5149
5150
5151 if (!th->rst)
5152 tcp_send_dupack(sk, skb);
5153 goto discard;
5154 }
5155
5156
5157 if (th->rst) {
5158 tcp_reset(sk);
5159 goto discard;
5160 }
5161
5162
5163
5164
5165 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
5166
5167
5168
5169
5170 if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
5171 if (syn_inerr)
5172 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
5173 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN);
5174 tcp_reset(sk);
5175 return -1;
5176 }
5177
5178 return 1;
5179
5180discard:
5181 __kfree_skb(skb);
5182 return 0;
5183}
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
5209 struct tcphdr *th, unsigned len)
5210{
5211 struct tcp_sock *tp = tcp_sk(sk);
5212 int res;
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229 tp->rx_opt.saw_tstamp = 0;
5230
5231
5232
5233
5234
5235
5236
5237
5238
5239
5240 if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
5241 TCP_SKB_CB(skb)->seq == tp->rcv_nxt &&
5242 !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) {
5243 int tcp_header_len = tp->tcp_header_len;
5244
5245
5246
5247
5248
5249
5250
5251 if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
5252
5253 if (!tcp_parse_aligned_timestamp(tp, th))
5254 goto slow_path;
5255
5256
5257 if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
5258 goto slow_path;
5259
5260
5261
5262
5263
5264
5265 }
5266
5267 if (len <= tcp_header_len) {
5268
5269 if (len == tcp_header_len) {
5270
5271
5272
5273
5274 if (tcp_header_len ==
5275 (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
5276 tp->rcv_nxt == tp->rcv_wup)
5277 tcp_store_ts_recent(tp);
5278
5279
5280
5281
5282 tcp_ack(sk, skb, 0);
5283 __kfree_skb(skb);
5284 tcp_data_snd_check(sk);
5285 return 0;
5286 } else {
5287 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
5288 goto discard;
5289 }
5290 } else {
5291 int eaten = 0;
5292 int copied_early = 0;
5293
5294 if (tp->copied_seq == tp->rcv_nxt &&
5295 len - tcp_header_len <= tp->ucopy.len) {
5296#ifdef CONFIG_NET_DMA
5297 if (tcp_dma_try_early_copy(sk, skb, tcp_header_len)) {
5298 copied_early = 1;
5299 eaten = 1;
5300 }
5301#endif
5302 if (tp->ucopy.task == current &&
5303 sock_owned_by_user(sk) && !copied_early) {
5304 __set_current_state(TASK_RUNNING);
5305
5306 if (!tcp_copy_to_iovec(sk, skb, tcp_header_len))
5307 eaten = 1;
5308 }
5309 if (eaten) {
5310
5311
5312
5313
5314 if (tcp_header_len ==
5315 (sizeof(struct tcphdr) +
5316 TCPOLEN_TSTAMP_ALIGNED) &&
5317 tp->rcv_nxt == tp->rcv_wup)
5318 tcp_store_ts_recent(tp);
5319
5320 tcp_rcv_rtt_measure_ts(sk, skb);
5321
5322 __skb_pull(skb, tcp_header_len);
5323 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
5324 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER);
5325 }
5326 if (copied_early)
5327 tcp_cleanup_rbuf(sk, skb->len);
5328 }
5329 if (!eaten) {
5330 if (tcp_checksum_complete_user(sk, skb))
5331 goto csum_error;
5332
5333
5334
5335
5336
5337 if (tcp_header_len ==
5338 (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
5339 tp->rcv_nxt == tp->rcv_wup)
5340 tcp_store_ts_recent(tp);
5341
5342 tcp_rcv_rtt_measure_ts(sk, skb);
5343
5344 if ((int)skb->truesize > sk->sk_forward_alloc)
5345 goto step5;
5346
5347 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS);
5348
5349
5350 __skb_pull(skb, tcp_header_len);
5351 __skb_queue_tail(&sk->sk_receive_queue, skb);
5352 skb_set_owner_r(skb, sk);
5353 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
5354 }
5355
5356 tcp_event_data_recv(sk, skb);
5357
5358 if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
5359
5360 tcp_ack(sk, skb, FLAG_DATA);
5361 tcp_data_snd_check(sk);
5362 if (!inet_csk_ack_scheduled(sk))
5363 goto no_ack;
5364 }
5365
5366 if (!copied_early || tp->rcv_nxt != tp->rcv_wup)
5367 __tcp_ack_snd_check(sk, 0);
5368no_ack:
5369#ifdef CONFIG_NET_DMA
5370 if (copied_early)
5371 __skb_queue_tail(&sk->sk_async_wait_queue, skb);
5372 else
5373#endif
5374 if (eaten)
5375 __kfree_skb(skb);
5376 else
5377 sk->sk_data_ready(sk, 0);
5378 return 0;
5379 }
5380 }
5381
5382slow_path:
5383 if (len < (th->doff << 2) || tcp_checksum_complete_user(sk, skb))
5384 goto csum_error;
5385
5386
5387
5388
5389
5390 res = tcp_validate_incoming(sk, skb, th, 1);
5391 if (res <= 0)
5392 return -res;
5393
5394step5:
5395 if (th->ack && tcp_ack(sk, skb, FLAG_SLOWPATH) < 0)
5396 goto discard;
5397
5398 tcp_rcv_rtt_measure_ts(sk, skb);
5399
5400
5401 tcp_urg(sk, skb, th);
5402
5403
5404 tcp_data_queue(sk, skb);
5405
5406 tcp_data_snd_check(sk);
5407 tcp_ack_snd_check(sk);
5408 return 0;
5409
5410csum_error:
5411 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
5412
5413discard:
5414 __kfree_skb(skb);
5415 return 0;
5416}
5417
5418static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5419 struct tcphdr *th, unsigned len)
5420{
5421 u8 *hash_location;
5422 struct inet_connection_sock *icsk = inet_csk(sk);
5423 struct tcp_sock *tp = tcp_sk(sk);
5424 struct tcp_cookie_values *cvp = tp->cookie_values;
5425 int saved_clamp = tp->rx_opt.mss_clamp;
5426
5427 tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0);
5428
5429 if (th->ack) {
5430
5431
5432
5433
5434
5435
5436
5437
5438
5439
5440
5441 if (TCP_SKB_CB(skb)->ack_seq != tp->snd_nxt)
5442 goto reset_and_undo;
5443
5444 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
5445 !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
5446 tcp_time_stamp)) {
5447 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSACTIVEREJECTED);
5448 goto reset_and_undo;
5449 }
5450
5451
5452
5453
5454
5455
5456
5457
5458
5459 if (th->rst) {
5460 tcp_reset(sk);
5461 goto discard;
5462 }
5463
5464
5465
5466
5467
5468
5469
5470
5471 if (!th->syn)
5472 goto discard_and_undo;
5473
5474
5475
5476
5477
5478
5479
5480
5481 TCP_ECN_rcv_synack(tp, th);
5482
5483 tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
5484 tcp_ack(sk, skb, FLAG_SLOWPATH);
5485
5486
5487
5488
5489 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
5490 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
5491
5492
5493
5494
5495 tp->snd_wnd = ntohs(th->window);
5496 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5497
5498 if (!tp->rx_opt.wscale_ok) {
5499 tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
5500 tp->window_clamp = min(tp->window_clamp, 65535U);
5501 }
5502
5503 if (tp->rx_opt.saw_tstamp) {
5504 tp->rx_opt.tstamp_ok = 1;
5505 tp->tcp_header_len =
5506 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
5507 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
5508 tcp_store_ts_recent(tp);
5509 } else {
5510 tp->tcp_header_len = sizeof(struct tcphdr);
5511 }
5512
5513 if (tcp_is_sack(tp) && sysctl_tcp_fack)
5514 tcp_enable_fack(tp);
5515
5516 tcp_mtup_init(sk);
5517 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
5518 tcp_initialize_rcv_mss(sk);
5519
5520
5521
5522
5523 tp->copied_seq = tp->rcv_nxt;
5524
5525 if (cvp != NULL &&
5526 cvp->cookie_pair_size > 0 &&
5527 tp->rx_opt.cookie_plus > 0) {
5528 int cookie_size = tp->rx_opt.cookie_plus
5529 - TCPOLEN_COOKIE_BASE;
5530 int cookie_pair_size = cookie_size
5531 + cvp->cookie_desired;
5532
5533
5534
5535
5536
5537
5538
5539
5540
5541
5542 if (sizeof(cvp->cookie_pair) >= cookie_pair_size) {
5543 memcpy(&cvp->cookie_pair[cvp->cookie_desired],
5544 hash_location, cookie_size);
5545 cvp->cookie_pair_size = cookie_pair_size;
5546 }
5547 }
5548
5549 smp_mb();
5550 tcp_set_state(sk, TCP_ESTABLISHED);
5551
5552 security_inet_conn_established(sk, skb);
5553
5554
5555 icsk->icsk_af_ops->rebuild_header(sk);
5556
5557 tcp_init_metrics(sk);
5558
5559 tcp_init_congestion_control(sk);
5560
5561
5562
5563
5564 tp->lsndtime = tcp_time_stamp;
5565
5566 tcp_init_buffer_space(sk);
5567
5568 if (sock_flag(sk, SOCK_KEEPOPEN))
5569 inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
5570
5571 if (!tp->rx_opt.snd_wscale)
5572 __tcp_fast_path_on(tp, tp->snd_wnd);
5573 else
5574 tp->pred_flags = 0;
5575
5576 if (!sock_flag(sk, SOCK_DEAD)) {
5577 sk->sk_state_change(sk);
5578 sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
5579 }
5580
5581 if (sk->sk_write_pending ||
5582 icsk->icsk_accept_queue.rskq_defer_accept ||
5583 icsk->icsk_ack.pingpong) {
5584
5585
5586
5587
5588
5589
5590
5591 inet_csk_schedule_ack(sk);
5592 icsk->icsk_ack.lrcvtime = tcp_time_stamp;
5593 icsk->icsk_ack.ato = TCP_ATO_MIN;
5594 tcp_incr_quickack(sk);
5595 tcp_enter_quickack_mode(sk);
5596 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
5597 TCP_DELACK_MAX, TCP_RTO_MAX);
5598
5599discard:
5600 __kfree_skb(skb);
5601 return 0;
5602 } else {
5603 tcp_send_ack(sk);
5604 }
5605 return -1;
5606 }
5607
5608
5609
5610 if (th->rst) {
5611
5612
5613
5614
5615
5616
5617 goto discard_and_undo;
5618 }
5619
5620
5621 if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp &&
5622 tcp_paws_reject(&tp->rx_opt, 0))
5623 goto discard_and_undo;
5624
5625 if (th->syn) {
5626
5627
5628
5629
5630 tcp_set_state(sk, TCP_SYN_RECV);
5631
5632 if (tp->rx_opt.saw_tstamp) {
5633 tp->rx_opt.tstamp_ok = 1;
5634 tcp_store_ts_recent(tp);
5635 tp->tcp_header_len =
5636 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
5637 } else {
5638 tp->tcp_header_len = sizeof(struct tcphdr);
5639 }
5640
5641 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
5642 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
5643
5644
5645
5646
5647 tp->snd_wnd = ntohs(th->window);
5648 tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
5649 tp->max_window = tp->snd_wnd;
5650
5651 TCP_ECN_rcv_syn(tp, th);
5652
5653 tcp_mtup_init(sk);
5654 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
5655 tcp_initialize_rcv_mss(sk);
5656
5657 tcp_send_synack(sk);
5658#if 0
5659
5660
5661
5662
5663
5664
5665
5666
5667
5668 return -1;
5669#else
5670 goto discard;
5671#endif
5672 }
5673
5674
5675
5676
5677discard_and_undo:
5678 tcp_clear_options(&tp->rx_opt);
5679 tp->rx_opt.mss_clamp = saved_clamp;
5680 goto discard;
5681
5682reset_and_undo:
5683 tcp_clear_options(&tp->rx_opt);
5684 tp->rx_opt.mss_clamp = saved_clamp;
5685 return 1;
5686}
5687
5688
5689
5690
5691
5692
5693
5694
5695int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5696 struct tcphdr *th, unsigned len)
5697{
5698 struct tcp_sock *tp = tcp_sk(sk);
5699 struct inet_connection_sock *icsk = inet_csk(sk);
5700 int queued = 0;
5701 int res;
5702
5703 tp->rx_opt.saw_tstamp = 0;
5704
5705 switch (sk->sk_state) {
5706 case TCP_CLOSE:
5707 goto discard;
5708
5709 case TCP_LISTEN:
5710 if (th->ack)
5711 return 1;
5712
5713 if (th->rst)
5714 goto discard;
5715
5716 if (th->syn) {
5717 if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
5718 return 1;
5719
5720
5721
5722
5723
5724
5725
5726
5727
5728
5729
5730
5731
5732
5733
5734
5735
5736
5737 kfree_skb(skb);
5738 return 0;
5739 }
5740 goto discard;
5741
5742 case TCP_SYN_SENT:
5743 queued = tcp_rcv_synsent_state_process(sk, skb, th, len);
5744 if (queued >= 0)
5745 return queued;
5746
5747
5748 tcp_urg(sk, skb, th);
5749 __kfree_skb(skb);
5750 tcp_data_snd_check(sk);
5751 return 0;
5752 }
5753
5754 res = tcp_validate_incoming(sk, skb, th, 0);
5755 if (res <= 0)
5756 return -res;
5757
5758
5759 if (th->ack) {
5760 int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH) > 0;
5761
5762 switch (sk->sk_state) {
5763 case TCP_SYN_RECV:
5764 if (acceptable) {
5765 tp->copied_seq = tp->rcv_nxt;
5766 smp_mb();
5767 tcp_set_state(sk, TCP_ESTABLISHED);
5768 sk->sk_state_change(sk);
5769
5770
5771
5772
5773
5774
5775 if (sk->sk_socket)
5776 sk_wake_async(sk,
5777 SOCK_WAKE_IO, POLL_OUT);
5778
5779 tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
5780 tp->snd_wnd = ntohs(th->window) <<
5781 tp->rx_opt.snd_wscale;
5782 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5783
5784
5785
5786
5787
5788 tcp_ack_update_rtt(sk, 0, 0);
5789
5790 if (tp->rx_opt.tstamp_ok)
5791 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
5792
5793
5794
5795
5796 icsk->icsk_af_ops->rebuild_header(sk);
5797
5798 tcp_init_metrics(sk);
5799
5800 tcp_init_congestion_control(sk);
5801
5802
5803
5804
5805 tp->lsndtime = tcp_time_stamp;
5806
5807 tcp_mtup_init(sk);
5808 tcp_initialize_rcv_mss(sk);
5809 tcp_init_buffer_space(sk);
5810 tcp_fast_path_on(tp);
5811 } else {
5812 return 1;
5813 }
5814 break;
5815
5816 case TCP_FIN_WAIT1:
5817 if (tp->snd_una == tp->write_seq) {
5818 tcp_set_state(sk, TCP_FIN_WAIT2);
5819 sk->sk_shutdown |= SEND_SHUTDOWN;
5820 dst_confirm(sk->sk_dst_cache);
5821
5822 if (!sock_flag(sk, SOCK_DEAD))
5823
5824 sk->sk_state_change(sk);
5825 else {
5826 int tmo;
5827
5828 if (tp->linger2 < 0 ||
5829 (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
5830 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {
5831 tcp_done(sk);
5832 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
5833 return 1;
5834 }
5835
5836 tmo = tcp_fin_time(sk);
5837 if (tmo > TCP_TIMEWAIT_LEN) {
5838 inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
5839 } else if (th->fin || sock_owned_by_user(sk)) {
5840
5841
5842
5843
5844
5845
5846 inet_csk_reset_keepalive_timer(sk, tmo);
5847 } else {
5848 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
5849 goto discard;
5850 }
5851 }
5852 }
5853 break;
5854
5855 case TCP_CLOSING:
5856 if (tp->snd_una == tp->write_seq) {
5857 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
5858 goto discard;
5859 }
5860 break;
5861
5862 case TCP_LAST_ACK:
5863 if (tp->snd_una == tp->write_seq) {
5864 tcp_update_metrics(sk);
5865 tcp_done(sk);
5866 goto discard;
5867 }
5868 break;
5869 }
5870 } else
5871 goto discard;
5872
5873
5874 tcp_urg(sk, skb, th);
5875
5876
5877 switch (sk->sk_state) {
5878 case TCP_CLOSE_WAIT:
5879 case TCP_CLOSING:
5880 case TCP_LAST_ACK:
5881 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
5882 break;
5883 case TCP_FIN_WAIT1:
5884 case TCP_FIN_WAIT2:
5885
5886
5887
5888
5889 if (sk->sk_shutdown & RCV_SHUTDOWN) {
5890 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
5891 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
5892 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
5893 tcp_reset(sk);
5894 return 1;
5895 }
5896 }
5897
5898 case TCP_ESTABLISHED:
5899 tcp_data_queue(sk, skb);
5900 queued = 1;
5901 break;
5902 }
5903
5904
5905 if (sk->sk_state != TCP_CLOSE) {
5906 tcp_data_snd_check(sk);
5907 tcp_ack_snd_check(sk);
5908 }
5909
5910 if (!queued) {
5911discard:
5912 __kfree_skb(skb);
5913 }
5914 return 0;
5915}
5916
5917EXPORT_SYMBOL(sysctl_tcp_ecn);
5918EXPORT_SYMBOL(sysctl_tcp_reordering);
5919EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
5920EXPORT_SYMBOL(tcp_parse_options);
5921#ifdef CONFIG_TCP_MD5SIG
5922EXPORT_SYMBOL(tcp_parse_md5sig_option);
5923#endif
5924EXPORT_SYMBOL(tcp_rcv_established);
5925EXPORT_SYMBOL(tcp_rcv_state_process);
5926EXPORT_SYMBOL(tcp_initialize_rcv_mss);
5927