1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64#define pr_fmt(fmt) "TCP: " fmt
65
66#include <linux/mm.h>
67#include <linux/slab.h>
68#include <linux/module.h>
69#include <linux/sysctl.h>
70#include <linux/kernel.h>
71#include <net/dst.h>
72#include <net/tcp.h>
73#include <net/inet_common.h>
74#include <linux/ipsec.h>
75#include <asm/unaligned.h>
76#include <net/netdma.h>
77
78int sysctl_tcp_timestamps __read_mostly = 1;
79int sysctl_tcp_window_scaling __read_mostly = 1;
80int sysctl_tcp_sack __read_mostly = 1;
81int sysctl_tcp_fack __read_mostly = 1;
82int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
83EXPORT_SYMBOL(sysctl_tcp_reordering);
84int sysctl_tcp_ecn __read_mostly = 2;
85EXPORT_SYMBOL(sysctl_tcp_ecn);
86int sysctl_tcp_dsack __read_mostly = 1;
87int sysctl_tcp_app_win __read_mostly = 31;
88int sysctl_tcp_adv_win_scale __read_mostly = 1;
89EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
90
91
92int sysctl_tcp_challenge_ack_limit = 100;
93
94int sysctl_tcp_stdurg __read_mostly;
95int sysctl_tcp_rfc1337 __read_mostly;
96int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
97int sysctl_tcp_frto __read_mostly = 2;
98int sysctl_tcp_frto_response __read_mostly;
99
100int sysctl_tcp_thin_dupack __read_mostly;
101
102int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
103int sysctl_tcp_abc __read_mostly;
104int sysctl_tcp_early_retrans __read_mostly = 2;
105
106#define FLAG_DATA 0x01
107#define FLAG_WIN_UPDATE 0x02
108#define FLAG_DATA_ACKED 0x04
109#define FLAG_RETRANS_DATA_ACKED 0x08
110#define FLAG_SYN_ACKED 0x10
111#define FLAG_DATA_SACKED 0x20
112#define FLAG_ECE 0x40
113#define FLAG_SLOWPATH 0x100
114#define FLAG_ONLY_ORIG_SACKED 0x200
115#define FLAG_SND_UNA_ADVANCED 0x400
116#define FLAG_DSACKING_ACK 0x800
117#define FLAG_NONHEAD_RETRANS_ACKED 0x1000
118#define FLAG_SACK_RENEGING 0x2000
119
120#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
121#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
122#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE)
123#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)
124#define FLAG_ANY_PROGRESS (FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED)
125
126#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
127#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
128
129
130
131
132static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
133{
134 struct inet_connection_sock *icsk = inet_csk(sk);
135 const unsigned int lss = icsk->icsk_ack.last_seg_size;
136 unsigned int len;
137
138 icsk->icsk_ack.last_seg_size = 0;
139
140
141
142
143 len = skb_shinfo(skb)->gso_size ? : skb->len;
144 if (len >= icsk->icsk_ack.rcv_mss) {
145 icsk->icsk_ack.rcv_mss = len;
146 } else {
147
148
149
150
151
152 len += skb->data - skb_transport_header(skb);
153 if (len >= TCP_MSS_DEFAULT + sizeof(struct tcphdr) ||
154
155
156
157
158
159 (len >= TCP_MIN_MSS + sizeof(struct tcphdr) &&
160 !(tcp_flag_word(tcp_hdr(skb)) & TCP_REMNANT))) {
161
162
163
164
165 len -= tcp_sk(sk)->tcp_header_len;
166 icsk->icsk_ack.last_seg_size = len;
167 if (len == lss) {
168 icsk->icsk_ack.rcv_mss = len;
169 return;
170 }
171 }
172 if (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)
173 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED2;
174 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
175 }
176}
177
178static void tcp_incr_quickack(struct sock *sk)
179{
180 struct inet_connection_sock *icsk = inet_csk(sk);
181 unsigned int quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
182
183 if (quickacks == 0)
184 quickacks = 2;
185 if (quickacks > icsk->icsk_ack.quick)
186 icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
187}
188
189static void tcp_enter_quickack_mode(struct sock *sk)
190{
191 struct inet_connection_sock *icsk = inet_csk(sk);
192 tcp_incr_quickack(sk);
193 icsk->icsk_ack.pingpong = 0;
194 icsk->icsk_ack.ato = TCP_ATO_MIN;
195}
196
197
198
199
200
201static inline bool tcp_in_quickack_mode(const struct sock *sk)
202{
203 const struct inet_connection_sock *icsk = inet_csk(sk);
204
205 return icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong;
206}
207
208static inline void TCP_ECN_queue_cwr(struct tcp_sock *tp)
209{
210 if (tp->ecn_flags & TCP_ECN_OK)
211 tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
212}
213
214static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, const struct sk_buff *skb)
215{
216 if (tcp_hdr(skb)->cwr)
217 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
218}
219
220static inline void TCP_ECN_withdraw_cwr(struct tcp_sock *tp)
221{
222 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
223}
224
225static inline void TCP_ECN_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
226{
227 if (!(tp->ecn_flags & TCP_ECN_OK))
228 return;
229
230 switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) {
231 case INET_ECN_NOT_ECT:
232
233
234
235
236 if (tp->ecn_flags & TCP_ECN_SEEN)
237 tcp_enter_quickack_mode((struct sock *)tp);
238 break;
239 case INET_ECN_CE:
240 tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
241
242 default:
243 tp->ecn_flags |= TCP_ECN_SEEN;
244 }
245}
246
247static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th)
248{
249 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr))
250 tp->ecn_flags &= ~TCP_ECN_OK;
251}
252
253static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th)
254{
255 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr))
256 tp->ecn_flags &= ~TCP_ECN_OK;
257}
258
259static bool TCP_ECN_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th)
260{
261 if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK))
262 return true;
263 return false;
264}
265
266
267
268
269
270
271static void tcp_fixup_sndbuf(struct sock *sk)
272{
273 int sndmem = SKB_TRUESIZE(tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER);
274
275 sndmem *= TCP_INIT_CWND;
276 if (sk->sk_sndbuf < sndmem)
277 sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
278}
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
307{
308 struct tcp_sock *tp = tcp_sk(sk);
309
310 int truesize = tcp_win_from_space(skb->truesize) >> 1;
311 int window = tcp_win_from_space(sysctl_tcp_rmem[2]) >> 1;
312
313 while (tp->rcv_ssthresh <= window) {
314 if (truesize <= skb->len)
315 return 2 * inet_csk(sk)->icsk_ack.rcv_mss;
316
317 truesize >>= 1;
318 window >>= 1;
319 }
320 return 0;
321}
322
323static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
324{
325 struct tcp_sock *tp = tcp_sk(sk);
326
327
328 if (tp->rcv_ssthresh < tp->window_clamp &&
329 (int)tp->rcv_ssthresh < tcp_space(sk) &&
330 !sk_under_memory_pressure(sk)) {
331 int incr;
332
333
334
335
336 if (tcp_win_from_space(skb->truesize) <= skb->len)
337 incr = 2 * tp->advmss;
338 else
339 incr = __tcp_grow_window(sk, skb);
340
341 if (incr) {
342 incr = max_t(int, incr, 2 * skb->len);
343 tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr,
344 tp->window_clamp);
345 inet_csk(sk)->icsk_ack.quick |= 1;
346 }
347 }
348}
349
350
351
352static void tcp_fixup_rcvbuf(struct sock *sk)
353{
354 u32 mss = tcp_sk(sk)->advmss;
355 u32 icwnd = TCP_DEFAULT_INIT_RCVWND;
356 int rcvmem;
357
358
359
360
361 if (mss > 1460)
362 icwnd = max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2);
363
364 rcvmem = SKB_TRUESIZE(mss + MAX_TCP_HEADER);
365 while (tcp_win_from_space(rcvmem) < mss)
366 rcvmem += 128;
367
368 rcvmem *= icwnd;
369
370 if (sk->sk_rcvbuf < rcvmem)
371 sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]);
372}
373
374
375
376
377static void tcp_init_buffer_space(struct sock *sk)
378{
379 struct tcp_sock *tp = tcp_sk(sk);
380 int maxwin;
381
382 if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK))
383 tcp_fixup_rcvbuf(sk);
384 if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
385 tcp_fixup_sndbuf(sk);
386
387 tp->rcvq_space.space = tp->rcv_wnd;
388
389 maxwin = tcp_full_space(sk);
390
391 if (tp->window_clamp >= maxwin) {
392 tp->window_clamp = maxwin;
393
394 if (sysctl_tcp_app_win && maxwin > 4 * tp->advmss)
395 tp->window_clamp = max(maxwin -
396 (maxwin >> sysctl_tcp_app_win),
397 4 * tp->advmss);
398 }
399
400
401 if (sysctl_tcp_app_win &&
402 tp->window_clamp > 2 * tp->advmss &&
403 tp->window_clamp + tp->advmss > maxwin)
404 tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
405
406 tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
407 tp->snd_cwnd_stamp = tcp_time_stamp;
408}
409
410
411static void tcp_clamp_window(struct sock *sk)
412{
413 struct tcp_sock *tp = tcp_sk(sk);
414 struct inet_connection_sock *icsk = inet_csk(sk);
415
416 icsk->icsk_ack.quick = 0;
417
418 if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
419 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
420 !sk_under_memory_pressure(sk) &&
421 sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
422 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
423 sysctl_tcp_rmem[2]);
424 }
425 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
426 tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
427}
428
429
430
431
432
433
434
435
436void tcp_initialize_rcv_mss(struct sock *sk)
437{
438 const struct tcp_sock *tp = tcp_sk(sk);
439 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
440
441 hint = min(hint, tp->rcv_wnd / 2);
442 hint = min(hint, TCP_MSS_DEFAULT);
443 hint = max(hint, TCP_MIN_MSS);
444
445 inet_csk(sk)->icsk_ack.rcv_mss = hint;
446}
447EXPORT_SYMBOL(tcp_initialize_rcv_mss);
448
449
450
451
452
453
454
455
456
457
458
459
460static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
461{
462 u32 new_sample = tp->rcv_rtt_est.rtt;
463 long m = sample;
464
465 if (m == 0)
466 m = 1;
467
468 if (new_sample != 0) {
469
470
471
472
473
474
475
476
477
478
479 if (!win_dep) {
480 m -= (new_sample >> 3);
481 new_sample += m;
482 } else {
483 m <<= 3;
484 if (m < new_sample)
485 new_sample = m;
486 }
487 } else {
488
489 new_sample = m << 3;
490 }
491
492 if (tp->rcv_rtt_est.rtt != new_sample)
493 tp->rcv_rtt_est.rtt = new_sample;
494}
495
496static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
497{
498 if (tp->rcv_rtt_est.time == 0)
499 goto new_measure;
500 if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
501 return;
502 tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rcv_rtt_est.time, 1);
503
504new_measure:
505 tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
506 tp->rcv_rtt_est.time = tcp_time_stamp;
507}
508
509static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
510 const struct sk_buff *skb)
511{
512 struct tcp_sock *tp = tcp_sk(sk);
513 if (tp->rx_opt.rcv_tsecr &&
514 (TCP_SKB_CB(skb)->end_seq -
515 TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss))
516 tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rx_opt.rcv_tsecr, 0);
517}
518
519
520
521
522
523void tcp_rcv_space_adjust(struct sock *sk)
524{
525 struct tcp_sock *tp = tcp_sk(sk);
526 int time;
527 int space;
528
529 if (tp->rcvq_space.time == 0)
530 goto new_measure;
531
532 time = tcp_time_stamp - tp->rcvq_space.time;
533 if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0)
534 return;
535
536 space = 2 * (tp->copied_seq - tp->rcvq_space.seq);
537
538 space = max(tp->rcvq_space.space, space);
539
540 if (tp->rcvq_space.space != space) {
541 int rcvmem;
542
543 tp->rcvq_space.space = space;
544
545 if (sysctl_tcp_moderate_rcvbuf &&
546 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
547 int new_clamp = space;
548
549
550
551
552
553 space /= tp->advmss;
554 if (!space)
555 space = 1;
556 rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
557 while (tcp_win_from_space(rcvmem) < tp->advmss)
558 rcvmem += 128;
559 space *= rcvmem;
560 space = min(space, sysctl_tcp_rmem[2]);
561 if (space > sk->sk_rcvbuf) {
562 sk->sk_rcvbuf = space;
563
564
565 tp->window_clamp = new_clamp;
566 }
567 }
568 }
569
570new_measure:
571 tp->rcvq_space.seq = tp->copied_seq;
572 tp->rcvq_space.time = tcp_time_stamp;
573}
574
575
576
577
578
579
580
581
582
583
584
585static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
586{
587 struct tcp_sock *tp = tcp_sk(sk);
588 struct inet_connection_sock *icsk = inet_csk(sk);
589 u32 now;
590
591 inet_csk_schedule_ack(sk);
592
593 tcp_measure_rcv_mss(sk, skb);
594
595 tcp_rcv_rtt_measure(tp);
596
597 now = tcp_time_stamp;
598
599 if (!icsk->icsk_ack.ato) {
600
601
602
603 tcp_incr_quickack(sk);
604 icsk->icsk_ack.ato = TCP_ATO_MIN;
605 } else {
606 int m = now - icsk->icsk_ack.lrcvtime;
607
608 if (m <= TCP_ATO_MIN / 2) {
609
610 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + TCP_ATO_MIN / 2;
611 } else if (m < icsk->icsk_ack.ato) {
612 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + m;
613 if (icsk->icsk_ack.ato > icsk->icsk_rto)
614 icsk->icsk_ack.ato = icsk->icsk_rto;
615 } else if (m > icsk->icsk_rto) {
616
617
618
619 tcp_incr_quickack(sk);
620 sk_mem_reclaim(sk);
621 }
622 }
623 icsk->icsk_ack.lrcvtime = now;
624
625 TCP_ECN_check_ce(tp, skb);
626
627 if (skb->len >= 128)
628 tcp_grow_window(sk, skb);
629}
630
631
632
633
634
635
636
637
638
639
640static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
641{
642 struct tcp_sock *tp = tcp_sk(sk);
643 long m = mrtt;
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661 if (m == 0)
662 m = 1;
663 if (tp->srtt != 0) {
664 m -= (tp->srtt >> 3);
665 tp->srtt += m;
666 if (m < 0) {
667 m = -m;
668 m -= (tp->mdev >> 2);
669
670
671
672
673
674
675
676
677 if (m > 0)
678 m >>= 3;
679 } else {
680 m -= (tp->mdev >> 2);
681 }
682 tp->mdev += m;
683 if (tp->mdev > tp->mdev_max) {
684 tp->mdev_max = tp->mdev;
685 if (tp->mdev_max > tp->rttvar)
686 tp->rttvar = tp->mdev_max;
687 }
688 if (after(tp->snd_una, tp->rtt_seq)) {
689 if (tp->mdev_max < tp->rttvar)
690 tp->rttvar -= (tp->rttvar - tp->mdev_max) >> 2;
691 tp->rtt_seq = tp->snd_nxt;
692 tp->mdev_max = tcp_rto_min(sk);
693 }
694 } else {
695
696 tp->srtt = m << 3;
697 tp->mdev = m << 1;
698 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
699 tp->rtt_seq = tp->snd_nxt;
700 }
701}
702
703
704
705
706void tcp_set_rto(struct sock *sk)
707{
708 const struct tcp_sock *tp = tcp_sk(sk);
709
710
711
712
713
714
715
716
717
718
719 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp);
720
721
722
723
724
725
726
727
728
729
730 tcp_bound_rto(sk);
731}
732
733__u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)
734{
735 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
736
737 if (!cwnd)
738 cwnd = TCP_INIT_CWND;
739 return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
740}
741
742
743void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
744{
745 struct tcp_sock *tp = tcp_sk(sk);
746 const struct inet_connection_sock *icsk = inet_csk(sk);
747
748 tp->prior_ssthresh = 0;
749 tp->bytes_acked = 0;
750 if (icsk->icsk_ca_state < TCP_CA_CWR) {
751 tp->undo_marker = 0;
752 if (set_ssthresh)
753 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
754 tp->snd_cwnd = min(tp->snd_cwnd,
755 tcp_packets_in_flight(tp) + 1U);
756 tp->snd_cwnd_cnt = 0;
757 tp->high_seq = tp->snd_nxt;
758 tp->snd_cwnd_stamp = tcp_time_stamp;
759 TCP_ECN_queue_cwr(tp);
760
761 tcp_set_ca_state(sk, TCP_CA_CWR);
762 }
763}
764
765
766
767
768
769void tcp_disable_fack(struct tcp_sock *tp)
770{
771
772 if (tcp_is_fack(tp))
773 tp->lost_skb_hint = NULL;
774 tp->rx_opt.sack_ok &= ~TCP_FACK_ENABLED;
775}
776
777
778static void tcp_dsack_seen(struct tcp_sock *tp)
779{
780 tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
781}
782
783static void tcp_update_reordering(struct sock *sk, const int metric,
784 const int ts)
785{
786 struct tcp_sock *tp = tcp_sk(sk);
787 if (metric > tp->reordering) {
788 int mib_idx;
789
790 tp->reordering = min(TCP_MAX_REORDERING, metric);
791
792
793 if (ts)
794 mib_idx = LINUX_MIB_TCPTSREORDER;
795 else if (tcp_is_reno(tp))
796 mib_idx = LINUX_MIB_TCPRENOREORDER;
797 else if (tcp_is_fack(tp))
798 mib_idx = LINUX_MIB_TCPFACKREORDER;
799 else
800 mib_idx = LINUX_MIB_TCPSACKREORDER;
801
802 NET_INC_STATS_BH(sock_net(sk), mib_idx);
803#if FASTRETRANS_DEBUG > 1
804 pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
805 tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
806 tp->reordering,
807 tp->fackets_out,
808 tp->sacked_out,
809 tp->undo_marker ? tp->undo_retrans : 0);
810#endif
811 tcp_disable_fack(tp);
812 }
813
814 if (metric > 0)
815 tcp_disable_early_retrans(tp);
816}
817
818
819static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
820{
821 if ((tp->retransmit_skb_hint == NULL) ||
822 before(TCP_SKB_CB(skb)->seq,
823 TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
824 tp->retransmit_skb_hint = skb;
825
826 if (!tp->lost_out ||
827 after(TCP_SKB_CB(skb)->end_seq, tp->retransmit_high))
828 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
829}
830
831static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb)
832{
833 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
834 tcp_verify_retransmit_hint(tp, skb);
835
836 tp->lost_out += tcp_skb_pcount(skb);
837 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
838 }
839}
840
841static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp,
842 struct sk_buff *skb)
843{
844 tcp_verify_retransmit_hint(tp, skb);
845
846 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
847 tp->lost_out += tcp_skb_pcount(skb);
848 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
849 }
850}
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946static bool tcp_is_sackblock_valid(struct tcp_sock *tp, bool is_dsack,
947 u32 start_seq, u32 end_seq)
948{
949
950 if (after(end_seq, tp->snd_nxt) || !before(start_seq, end_seq))
951 return false;
952
953
954 if (!before(start_seq, tp->snd_nxt))
955 return false;
956
957
958
959
960 if (after(start_seq, tp->snd_una))
961 return true;
962
963 if (!is_dsack || !tp->undo_marker)
964 return false;
965
966
967 if (after(end_seq, tp->snd_una))
968 return false;
969
970 if (!before(start_seq, tp->undo_marker))
971 return true;
972
973
974 if (!after(end_seq, tp->undo_marker))
975 return false;
976
977
978
979
980 return !before(start_seq, end_seq - tp->max_window);
981}
982
983
984
985
986
987
988
989
990
991
992static void tcp_mark_lost_retrans(struct sock *sk)
993{
994 const struct inet_connection_sock *icsk = inet_csk(sk);
995 struct tcp_sock *tp = tcp_sk(sk);
996 struct sk_buff *skb;
997 int cnt = 0;
998 u32 new_low_seq = tp->snd_nxt;
999 u32 received_upto = tcp_highest_sack_seq(tp);
1000
1001 if (!tcp_is_fack(tp) || !tp->retrans_out ||
1002 !after(received_upto, tp->lost_retrans_low) ||
1003 icsk->icsk_ca_state != TCP_CA_Recovery)
1004 return;
1005
1006 tcp_for_write_queue(skb, sk) {
1007 u32 ack_seq = TCP_SKB_CB(skb)->ack_seq;
1008
1009 if (skb == tcp_send_head(sk))
1010 break;
1011 if (cnt == tp->retrans_out)
1012 break;
1013 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1014 continue;
1015
1016 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS))
1017 continue;
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030 if (after(received_upto, ack_seq)) {
1031 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1032 tp->retrans_out -= tcp_skb_pcount(skb);
1033
1034 tcp_skb_mark_lost_uncond_verify(tp, skb);
1035 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT);
1036 } else {
1037 if (before(ack_seq, new_low_seq))
1038 new_low_seq = ack_seq;
1039 cnt += tcp_skb_pcount(skb);
1040 }
1041 }
1042
1043 if (tp->retrans_out)
1044 tp->lost_retrans_low = new_low_seq;
1045}
1046
1047static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
1048 struct tcp_sack_block_wire *sp, int num_sacks,
1049 u32 prior_snd_una)
1050{
1051 struct tcp_sock *tp = tcp_sk(sk);
1052 u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq);
1053 u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq);
1054 bool dup_sack = false;
1055
1056 if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) {
1057 dup_sack = true;
1058 tcp_dsack_seen(tp);
1059 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKRECV);
1060 } else if (num_sacks > 1) {
1061 u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq);
1062 u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq);
1063
1064 if (!after(end_seq_0, end_seq_1) &&
1065 !before(start_seq_0, start_seq_1)) {
1066 dup_sack = true;
1067 tcp_dsack_seen(tp);
1068 NET_INC_STATS_BH(sock_net(sk),
1069 LINUX_MIB_TCPDSACKOFORECV);
1070 }
1071 }
1072
1073
1074 if (dup_sack && tp->undo_marker && tp->undo_retrans &&
1075 !after(end_seq_0, prior_snd_una) &&
1076 after(end_seq_0, tp->undo_marker))
1077 tp->undo_retrans--;
1078
1079 return dup_sack;
1080}
1081
1082struct tcp_sacktag_state {
1083 int reord;
1084 int fack_count;
1085 int flag;
1086};
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
1097 u32 start_seq, u32 end_seq)
1098{
1099 int err;
1100 bool in_sack;
1101 unsigned int pkt_len;
1102 unsigned int mss;
1103
1104 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1105 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1106
1107 if (tcp_skb_pcount(skb) > 1 && !in_sack &&
1108 after(TCP_SKB_CB(skb)->end_seq, start_seq)) {
1109 mss = tcp_skb_mss(skb);
1110 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1111
1112 if (!in_sack) {
1113 pkt_len = start_seq - TCP_SKB_CB(skb)->seq;
1114 if (pkt_len < mss)
1115 pkt_len = mss;
1116 } else {
1117 pkt_len = end_seq - TCP_SKB_CB(skb)->seq;
1118 if (pkt_len < mss)
1119 return -EINVAL;
1120 }
1121
1122
1123
1124
1125 if (pkt_len > mss) {
1126 unsigned int new_len = (pkt_len / mss) * mss;
1127 if (!in_sack && new_len < pkt_len) {
1128 new_len += mss;
1129 if (new_len > skb->len)
1130 return 0;
1131 }
1132 pkt_len = new_len;
1133 }
1134 err = tcp_fragment(sk, skb, pkt_len, mss);
1135 if (err < 0)
1136 return err;
1137 }
1138
1139 return in_sack;
1140}
1141
1142
1143static u8 tcp_sacktag_one(struct sock *sk,
1144 struct tcp_sacktag_state *state, u8 sacked,
1145 u32 start_seq, u32 end_seq,
1146 bool dup_sack, int pcount)
1147{
1148 struct tcp_sock *tp = tcp_sk(sk);
1149 int fack_count = state->fack_count;
1150
1151
1152 if (dup_sack && (sacked & TCPCB_RETRANS)) {
1153 if (tp->undo_marker && tp->undo_retrans &&
1154 after(end_seq, tp->undo_marker))
1155 tp->undo_retrans--;
1156 if (sacked & TCPCB_SACKED_ACKED)
1157 state->reord = min(fack_count, state->reord);
1158 }
1159
1160
1161 if (!after(end_seq, tp->snd_una))
1162 return sacked;
1163
1164 if (!(sacked & TCPCB_SACKED_ACKED)) {
1165 if (sacked & TCPCB_SACKED_RETRANS) {
1166
1167
1168
1169
1170 if (sacked & TCPCB_LOST) {
1171 sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
1172 tp->lost_out -= pcount;
1173 tp->retrans_out -= pcount;
1174 }
1175 } else {
1176 if (!(sacked & TCPCB_RETRANS)) {
1177
1178
1179
1180 if (before(start_seq,
1181 tcp_highest_sack_seq(tp)))
1182 state->reord = min(fack_count,
1183 state->reord);
1184
1185
1186 if (!after(end_seq, tp->frto_highmark))
1187 state->flag |= FLAG_ONLY_ORIG_SACKED;
1188 }
1189
1190 if (sacked & TCPCB_LOST) {
1191 sacked &= ~TCPCB_LOST;
1192 tp->lost_out -= pcount;
1193 }
1194 }
1195
1196 sacked |= TCPCB_SACKED_ACKED;
1197 state->flag |= FLAG_DATA_SACKED;
1198 tp->sacked_out += pcount;
1199
1200 fack_count += pcount;
1201
1202
1203 if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) &&
1204 before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq))
1205 tp->lost_cnt_hint += pcount;
1206
1207 if (fack_count > tp->fackets_out)
1208 tp->fackets_out = fack_count;
1209 }
1210
1211
1212
1213
1214
1215 if (dup_sack && (sacked & TCPCB_SACKED_RETRANS)) {
1216 sacked &= ~TCPCB_SACKED_RETRANS;
1217 tp->retrans_out -= pcount;
1218 }
1219
1220 return sacked;
1221}
1222
1223
1224
1225
1226static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1227 struct tcp_sacktag_state *state,
1228 unsigned int pcount, int shifted, int mss,
1229 bool dup_sack)
1230{
1231 struct tcp_sock *tp = tcp_sk(sk);
1232 struct sk_buff *prev = tcp_write_queue_prev(sk, skb);
1233 u32 start_seq = TCP_SKB_CB(skb)->seq;
1234 u32 end_seq = start_seq + shifted;
1235
1236 BUG_ON(!pcount);
1237
1238
1239
1240
1241
1242
1243
1244 tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
1245 start_seq, end_seq, dup_sack, pcount);
1246
1247 if (skb == tp->lost_skb_hint)
1248 tp->lost_cnt_hint += pcount;
1249
1250 TCP_SKB_CB(prev)->end_seq += shifted;
1251 TCP_SKB_CB(skb)->seq += shifted;
1252
1253 skb_shinfo(prev)->gso_segs += pcount;
1254 BUG_ON(skb_shinfo(skb)->gso_segs < pcount);
1255 skb_shinfo(skb)->gso_segs -= pcount;
1256
1257
1258
1259
1260
1261
1262 if (!skb_shinfo(prev)->gso_size) {
1263 skb_shinfo(prev)->gso_size = mss;
1264 skb_shinfo(prev)->gso_type = sk->sk_gso_type;
1265 }
1266
1267
1268 if (skb_shinfo(skb)->gso_segs <= 1) {
1269 skb_shinfo(skb)->gso_size = 0;
1270 skb_shinfo(skb)->gso_type = 0;
1271 }
1272
1273
1274 TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
1275
1276 if (skb->len > 0) {
1277 BUG_ON(!tcp_skb_pcount(skb));
1278 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTED);
1279 return false;
1280 }
1281
1282
1283
1284 if (skb == tp->retransmit_skb_hint)
1285 tp->retransmit_skb_hint = prev;
1286 if (skb == tp->scoreboard_skb_hint)
1287 tp->scoreboard_skb_hint = prev;
1288 if (skb == tp->lost_skb_hint) {
1289 tp->lost_skb_hint = prev;
1290 tp->lost_cnt_hint -= tcp_skb_pcount(prev);
1291 }
1292
1293 TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(prev)->tcp_flags;
1294 if (skb == tcp_highest_sack(sk))
1295 tcp_advance_highest_sack(sk, skb);
1296
1297 tcp_unlink_write_queue(skb, sk);
1298 sk_wmem_free_skb(sk, skb);
1299
1300 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKMERGED);
1301
1302 return true;
1303}
1304
1305
1306
1307
1308static int tcp_skb_seglen(const struct sk_buff *skb)
1309{
1310 return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb);
1311}
1312
1313
1314static int skb_can_shift(const struct sk_buff *skb)
1315{
1316 return !skb_headlen(skb) && skb_is_nonlinear(skb);
1317}
1318
1319
1320
1321
1322static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
1323 struct tcp_sacktag_state *state,
1324 u32 start_seq, u32 end_seq,
1325 bool dup_sack)
1326{
1327 struct tcp_sock *tp = tcp_sk(sk);
1328 struct sk_buff *prev;
1329 int mss;
1330 int pcount = 0;
1331 int len;
1332 int in_sack;
1333
1334 if (!sk_can_gso(sk))
1335 goto fallback;
1336
1337
1338 if (!dup_sack &&
1339 (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS)
1340 goto fallback;
1341 if (!skb_can_shift(skb))
1342 goto fallback;
1343
1344 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1345 goto fallback;
1346
1347
1348 if (unlikely(skb == tcp_write_queue_head(sk)))
1349 goto fallback;
1350 prev = tcp_write_queue_prev(sk, skb);
1351
1352 if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
1353 goto fallback;
1354
1355 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1356 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1357
1358 if (in_sack) {
1359 len = skb->len;
1360 pcount = tcp_skb_pcount(skb);
1361 mss = tcp_skb_seglen(skb);
1362
1363
1364
1365
1366 if (mss != tcp_skb_seglen(prev))
1367 goto fallback;
1368 } else {
1369 if (!after(TCP_SKB_CB(skb)->end_seq, start_seq))
1370 goto noop;
1371
1372
1373
1374
1375 if (tcp_skb_pcount(skb) <= 1)
1376 goto noop;
1377
1378 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1379 if (!in_sack) {
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391 goto fallback;
1392 }
1393
1394 len = end_seq - TCP_SKB_CB(skb)->seq;
1395 BUG_ON(len < 0);
1396 BUG_ON(len > skb->len);
1397
1398
1399
1400
1401
1402 mss = tcp_skb_mss(skb);
1403
1404
1405
1406
1407 if (mss != tcp_skb_seglen(prev))
1408 goto fallback;
1409
1410 if (len == mss) {
1411 pcount = 1;
1412 } else if (len < mss) {
1413 goto noop;
1414 } else {
1415 pcount = len / mss;
1416 len = pcount * mss;
1417 }
1418 }
1419
1420
1421 if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una))
1422 goto fallback;
1423
1424 if (!skb_shift(prev, skb, len))
1425 goto fallback;
1426 if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack))
1427 goto out;
1428
1429
1430
1431
1432 if (prev == tcp_write_queue_tail(sk))
1433 goto out;
1434 skb = tcp_write_queue_next(sk, prev);
1435
1436 if (!skb_can_shift(skb) ||
1437 (skb == tcp_send_head(sk)) ||
1438 ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) ||
1439 (mss != tcp_skb_seglen(skb)))
1440 goto out;
1441
1442 len = skb->len;
1443 if (skb_shift(prev, skb, len)) {
1444 pcount += tcp_skb_pcount(skb);
1445 tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0);
1446 }
1447
1448out:
1449 state->fack_count += pcount;
1450 return prev;
1451
1452noop:
1453 return skb;
1454
1455fallback:
1456 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK);
1457 return NULL;
1458}
1459
1460static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1461 struct tcp_sack_block *next_dup,
1462 struct tcp_sacktag_state *state,
1463 u32 start_seq, u32 end_seq,
1464 bool dup_sack_in)
1465{
1466 struct tcp_sock *tp = tcp_sk(sk);
1467 struct sk_buff *tmp;
1468
1469 tcp_for_write_queue_from(skb, sk) {
1470 int in_sack = 0;
1471 bool dup_sack = dup_sack_in;
1472
1473 if (skb == tcp_send_head(sk))
1474 break;
1475
1476
1477 if (!before(TCP_SKB_CB(skb)->seq, end_seq))
1478 break;
1479
1480 if ((next_dup != NULL) &&
1481 before(TCP_SKB_CB(skb)->seq, next_dup->end_seq)) {
1482 in_sack = tcp_match_skb_to_sack(sk, skb,
1483 next_dup->start_seq,
1484 next_dup->end_seq);
1485 if (in_sack > 0)
1486 dup_sack = true;
1487 }
1488
1489
1490
1491
1492
1493 if (in_sack <= 0) {
1494 tmp = tcp_shift_skb_data(sk, skb, state,
1495 start_seq, end_seq, dup_sack);
1496 if (tmp != NULL) {
1497 if (tmp != skb) {
1498 skb = tmp;
1499 continue;
1500 }
1501
1502 in_sack = 0;
1503 } else {
1504 in_sack = tcp_match_skb_to_sack(sk, skb,
1505 start_seq,
1506 end_seq);
1507 }
1508 }
1509
1510 if (unlikely(in_sack < 0))
1511 break;
1512
1513 if (in_sack) {
1514 TCP_SKB_CB(skb)->sacked =
1515 tcp_sacktag_one(sk,
1516 state,
1517 TCP_SKB_CB(skb)->sacked,
1518 TCP_SKB_CB(skb)->seq,
1519 TCP_SKB_CB(skb)->end_seq,
1520 dup_sack,
1521 tcp_skb_pcount(skb));
1522
1523 if (!before(TCP_SKB_CB(skb)->seq,
1524 tcp_highest_sack_seq(tp)))
1525 tcp_advance_highest_sack(sk, skb);
1526 }
1527
1528 state->fack_count += tcp_skb_pcount(skb);
1529 }
1530 return skb;
1531}
1532
1533
1534
1535
1536static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
1537 struct tcp_sacktag_state *state,
1538 u32 skip_to_seq)
1539{
1540 tcp_for_write_queue_from(skb, sk) {
1541 if (skb == tcp_send_head(sk))
1542 break;
1543
1544 if (after(TCP_SKB_CB(skb)->end_seq, skip_to_seq))
1545 break;
1546
1547 state->fack_count += tcp_skb_pcount(skb);
1548 }
1549 return skb;
1550}
1551
1552static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
1553 struct sock *sk,
1554 struct tcp_sack_block *next_dup,
1555 struct tcp_sacktag_state *state,
1556 u32 skip_to_seq)
1557{
1558 if (next_dup == NULL)
1559 return skb;
1560
1561 if (before(next_dup->start_seq, skip_to_seq)) {
1562 skb = tcp_sacktag_skip(skb, sk, state, next_dup->start_seq);
1563 skb = tcp_sacktag_walk(skb, sk, NULL, state,
1564 next_dup->start_seq, next_dup->end_seq,
1565 1);
1566 }
1567
1568 return skb;
1569}
1570
1571static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_block *cache)
1572{
1573 return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1574}
1575
1576static int
1577tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1578 u32 prior_snd_una)
1579{
1580 const struct inet_connection_sock *icsk = inet_csk(sk);
1581 struct tcp_sock *tp = tcp_sk(sk);
1582 const unsigned char *ptr = (skb_transport_header(ack_skb) +
1583 TCP_SKB_CB(ack_skb)->sacked);
1584 struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
1585 struct tcp_sack_block sp[TCP_NUM_SACKS];
1586 struct tcp_sack_block *cache;
1587 struct tcp_sacktag_state state;
1588 struct sk_buff *skb;
1589 int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
1590 int used_sacks;
1591 bool found_dup_sack = false;
1592 int i, j;
1593 int first_sack_index;
1594
1595 state.flag = 0;
1596 state.reord = tp->packets_out;
1597
1598 if (!tp->sacked_out) {
1599 if (WARN_ON(tp->fackets_out))
1600 tp->fackets_out = 0;
1601 tcp_highest_sack_reset(sk);
1602 }
1603
1604 found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
1605 num_sacks, prior_snd_una);
1606 if (found_dup_sack)
1607 state.flag |= FLAG_DSACKING_ACK;
1608
1609
1610
1611
1612
1613 if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window))
1614 return 0;
1615
1616 if (!tp->packets_out)
1617 goto out;
1618
1619 used_sacks = 0;
1620 first_sack_index = 0;
1621 for (i = 0; i < num_sacks; i++) {
1622 bool dup_sack = !i && found_dup_sack;
1623
1624 sp[used_sacks].start_seq = get_unaligned_be32(&sp_wire[i].start_seq);
1625 sp[used_sacks].end_seq = get_unaligned_be32(&sp_wire[i].end_seq);
1626
1627 if (!tcp_is_sackblock_valid(tp, dup_sack,
1628 sp[used_sacks].start_seq,
1629 sp[used_sacks].end_seq)) {
1630 int mib_idx;
1631
1632 if (dup_sack) {
1633 if (!tp->undo_marker)
1634 mib_idx = LINUX_MIB_TCPDSACKIGNOREDNOUNDO;
1635 else
1636 mib_idx = LINUX_MIB_TCPDSACKIGNOREDOLD;
1637 } else {
1638
1639 if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) &&
1640 !after(sp[used_sacks].end_seq, tp->snd_una))
1641 continue;
1642 mib_idx = LINUX_MIB_TCPSACKDISCARD;
1643 }
1644
1645 NET_INC_STATS_BH(sock_net(sk), mib_idx);
1646 if (i == 0)
1647 first_sack_index = -1;
1648 continue;
1649 }
1650
1651
1652 if (!after(sp[used_sacks].end_seq, prior_snd_una))
1653 continue;
1654
1655 used_sacks++;
1656 }
1657
1658
1659 for (i = used_sacks - 1; i > 0; i--) {
1660 for (j = 0; j < i; j++) {
1661 if (after(sp[j].start_seq, sp[j + 1].start_seq)) {
1662 swap(sp[j], sp[j + 1]);
1663
1664
1665 if (j == first_sack_index)
1666 first_sack_index = j + 1;
1667 }
1668 }
1669 }
1670
1671 skb = tcp_write_queue_head(sk);
1672 state.fack_count = 0;
1673 i = 0;
1674
1675 if (!tp->sacked_out) {
1676
1677 cache = tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1678 } else {
1679 cache = tp->recv_sack_cache;
1680
1681 while (tcp_sack_cache_ok(tp, cache) && !cache->start_seq &&
1682 !cache->end_seq)
1683 cache++;
1684 }
1685
1686 while (i < used_sacks) {
1687 u32 start_seq = sp[i].start_seq;
1688 u32 end_seq = sp[i].end_seq;
1689 bool dup_sack = (found_dup_sack && (i == first_sack_index));
1690 struct tcp_sack_block *next_dup = NULL;
1691
1692 if (found_dup_sack && ((i + 1) == first_sack_index))
1693 next_dup = &sp[i + 1];
1694
1695
1696 while (tcp_sack_cache_ok(tp, cache) &&
1697 !before(start_seq, cache->end_seq))
1698 cache++;
1699
1700
1701 if (tcp_sack_cache_ok(tp, cache) && !dup_sack &&
1702 after(end_seq, cache->start_seq)) {
1703
1704
1705 if (before(start_seq, cache->start_seq)) {
1706 skb = tcp_sacktag_skip(skb, sk, &state,
1707 start_seq);
1708 skb = tcp_sacktag_walk(skb, sk, next_dup,
1709 &state,
1710 start_seq,
1711 cache->start_seq,
1712 dup_sack);
1713 }
1714
1715
1716 if (!after(end_seq, cache->end_seq))
1717 goto advance_sp;
1718
1719 skb = tcp_maybe_skipping_dsack(skb, sk, next_dup,
1720 &state,
1721 cache->end_seq);
1722
1723
1724 if (tcp_highest_sack_seq(tp) == cache->end_seq) {
1725
1726 skb = tcp_highest_sack(sk);
1727 if (skb == NULL)
1728 break;
1729 state.fack_count = tp->fackets_out;
1730 cache++;
1731 goto walk;
1732 }
1733
1734 skb = tcp_sacktag_skip(skb, sk, &state, cache->end_seq);
1735
1736 cache++;
1737 continue;
1738 }
1739
1740 if (!before(start_seq, tcp_highest_sack_seq(tp))) {
1741 skb = tcp_highest_sack(sk);
1742 if (skb == NULL)
1743 break;
1744 state.fack_count = tp->fackets_out;
1745 }
1746 skb = tcp_sacktag_skip(skb, sk, &state, start_seq);
1747
1748walk:
1749 skb = tcp_sacktag_walk(skb, sk, next_dup, &state,
1750 start_seq, end_seq, dup_sack);
1751
1752advance_sp:
1753
1754
1755
1756 if (after(end_seq, tp->frto_highmark))
1757 state.flag &= ~FLAG_ONLY_ORIG_SACKED;
1758
1759 i++;
1760 }
1761
1762
1763 for (i = 0; i < ARRAY_SIZE(tp->recv_sack_cache) - used_sacks; i++) {
1764 tp->recv_sack_cache[i].start_seq = 0;
1765 tp->recv_sack_cache[i].end_seq = 0;
1766 }
1767 for (j = 0; j < used_sacks; j++)
1768 tp->recv_sack_cache[i++] = sp[j];
1769
1770 tcp_mark_lost_retrans(sk);
1771
1772 tcp_verify_left_out(tp);
1773
1774 if ((state.reord < tp->fackets_out) &&
1775 ((icsk->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker) &&
1776 (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark)))
1777 tcp_update_reordering(sk, tp->fackets_out - state.reord, 0);
1778
1779out:
1780
1781#if FASTRETRANS_DEBUG > 0
1782 WARN_ON((int)tp->sacked_out < 0);
1783 WARN_ON((int)tp->lost_out < 0);
1784 WARN_ON((int)tp->retrans_out < 0);
1785 WARN_ON((int)tcp_packets_in_flight(tp) < 0);
1786#endif
1787 return state.flag;
1788}
1789
1790
1791
1792
1793static bool tcp_limit_reno_sacked(struct tcp_sock *tp)
1794{
1795 u32 holes;
1796
1797 holes = max(tp->lost_out, 1U);
1798 holes = min(holes, tp->packets_out);
1799
1800 if ((tp->sacked_out + holes) > tp->packets_out) {
1801 tp->sacked_out = tp->packets_out - holes;
1802 return true;
1803 }
1804 return false;
1805}
1806
1807
1808
1809
1810
1811static void tcp_check_reno_reordering(struct sock *sk, const int addend)
1812{
1813 struct tcp_sock *tp = tcp_sk(sk);
1814 if (tcp_limit_reno_sacked(tp))
1815 tcp_update_reordering(sk, tp->packets_out + addend, 0);
1816}
1817
1818
1819
1820static void tcp_add_reno_sack(struct sock *sk)
1821{
1822 struct tcp_sock *tp = tcp_sk(sk);
1823 tp->sacked_out++;
1824 tcp_check_reno_reordering(sk, 0);
1825 tcp_verify_left_out(tp);
1826}
1827
1828
1829
1830static void tcp_remove_reno_sacks(struct sock *sk, int acked)
1831{
1832 struct tcp_sock *tp = tcp_sk(sk);
1833
1834 if (acked > 0) {
1835
1836 if (acked - 1 >= tp->sacked_out)
1837 tp->sacked_out = 0;
1838 else
1839 tp->sacked_out -= acked - 1;
1840 }
1841 tcp_check_reno_reordering(sk, acked);
1842 tcp_verify_left_out(tp);
1843}
1844
1845static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
1846{
1847 tp->sacked_out = 0;
1848}
1849
1850static int tcp_is_sackfrto(const struct tcp_sock *tp)
1851{
1852 return (sysctl_tcp_frto == 0x2) && !tcp_is_reno(tp);
1853}
1854
1855
1856
1857
1858bool tcp_use_frto(struct sock *sk)
1859{
1860 const struct tcp_sock *tp = tcp_sk(sk);
1861 const struct inet_connection_sock *icsk = inet_csk(sk);
1862 struct sk_buff *skb;
1863
1864 if (!sysctl_tcp_frto)
1865 return false;
1866
1867
1868 if (icsk->icsk_mtup.probe_size)
1869 return false;
1870
1871 if (tcp_is_sackfrto(tp))
1872 return true;
1873
1874
1875 if (tp->retrans_out > 1)
1876 return false;
1877
1878 skb = tcp_write_queue_head(sk);
1879 if (tcp_skb_is_last(sk, skb))
1880 return true;
1881 skb = tcp_write_queue_next(sk, skb);
1882 tcp_for_write_queue_from(skb, sk) {
1883 if (skb == tcp_send_head(sk))
1884 break;
1885 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
1886 return false;
1887
1888 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
1889 break;
1890 }
1891 return true;
1892}
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906void tcp_enter_frto(struct sock *sk)
1907{
1908 const struct inet_connection_sock *icsk = inet_csk(sk);
1909 struct tcp_sock *tp = tcp_sk(sk);
1910 struct sk_buff *skb;
1911
1912 if ((!tp->frto_counter && icsk->icsk_ca_state <= TCP_CA_Disorder) ||
1913 tp->snd_una == tp->high_seq ||
1914 ((icsk->icsk_ca_state == TCP_CA_Loss || tp->frto_counter) &&
1915 !icsk->icsk_retransmits)) {
1916 tp->prior_ssthresh = tcp_current_ssthresh(sk);
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926 if (tp->frto_counter) {
1927 u32 stored_cwnd;
1928 stored_cwnd = tp->snd_cwnd;
1929 tp->snd_cwnd = 2;
1930 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
1931 tp->snd_cwnd = stored_cwnd;
1932 } else {
1933 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
1934 }
1935
1936
1937
1938
1939
1940
1941
1942 tcp_ca_event(sk, CA_EVENT_FRTO);
1943 }
1944
1945 tp->undo_marker = tp->snd_una;
1946 tp->undo_retrans = 0;
1947
1948 skb = tcp_write_queue_head(sk);
1949 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
1950 tp->undo_marker = 0;
1951 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
1952 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1953 tp->retrans_out -= tcp_skb_pcount(skb);
1954 }
1955 tcp_verify_left_out(tp);
1956
1957
1958 tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);
1959
1960
1961
1962
1963 if (tcp_is_sackfrto(tp) && (tp->frto_counter ||
1964 ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) &&
1965 after(tp->high_seq, tp->snd_una)) {
1966 tp->frto_highmark = tp->high_seq;
1967 } else {
1968 tp->frto_highmark = tp->snd_nxt;
1969 }
1970 tcp_set_ca_state(sk, TCP_CA_Disorder);
1971 tp->high_seq = tp->snd_nxt;
1972 tp->frto_counter = 1;
1973}
1974
1975
1976
1977
1978
1979static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
1980{
1981 struct tcp_sock *tp = tcp_sk(sk);
1982 struct sk_buff *skb;
1983
1984 tp->lost_out = 0;
1985 tp->retrans_out = 0;
1986 if (tcp_is_reno(tp))
1987 tcp_reset_reno_sack(tp);
1988
1989 tcp_for_write_queue(skb, sk) {
1990 if (skb == tcp_send_head(sk))
1991 break;
1992
1993 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
1994
1995
1996
1997
1998 if ((tp->frto_counter == 1) && !(flag & FLAG_DATA_ACKED)) {
1999
2000 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
2001 tp->retrans_out += tcp_skb_pcount(skb);
2002
2003 flag |= FLAG_DATA_ACKED;
2004 } else {
2005 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
2006 tp->undo_marker = 0;
2007 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
2008 }
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
2020 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
2021 tp->lost_out += tcp_skb_pcount(skb);
2022 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
2023 }
2024 }
2025 tcp_verify_left_out(tp);
2026
2027 tp->snd_cwnd = tcp_packets_in_flight(tp) + allowed_segments;
2028 tp->snd_cwnd_cnt = 0;
2029 tp->snd_cwnd_stamp = tcp_time_stamp;
2030 tp->frto_counter = 0;
2031 tp->bytes_acked = 0;
2032
2033 tp->reordering = min_t(unsigned int, tp->reordering,
2034 sysctl_tcp_reordering);
2035 tcp_set_ca_state(sk, TCP_CA_Loss);
2036 tp->high_seq = tp->snd_nxt;
2037 TCP_ECN_queue_cwr(tp);
2038
2039 tcp_clear_all_retrans_hints(tp);
2040}
2041
2042static void tcp_clear_retrans_partial(struct tcp_sock *tp)
2043{
2044 tp->retrans_out = 0;
2045 tp->lost_out = 0;
2046
2047 tp->undo_marker = 0;
2048 tp->undo_retrans = 0;
2049}
2050
2051void tcp_clear_retrans(struct tcp_sock *tp)
2052{
2053 tcp_clear_retrans_partial(tp);
2054
2055 tp->fackets_out = 0;
2056 tp->sacked_out = 0;
2057}
2058
2059
2060
2061
2062
2063void tcp_enter_loss(struct sock *sk, int how)
2064{
2065 const struct inet_connection_sock *icsk = inet_csk(sk);
2066 struct tcp_sock *tp = tcp_sk(sk);
2067 struct sk_buff *skb;
2068
2069
2070 if (icsk->icsk_ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq ||
2071 (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
2072 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2073 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
2074 tcp_ca_event(sk, CA_EVENT_LOSS);
2075 }
2076 tp->snd_cwnd = 1;
2077 tp->snd_cwnd_cnt = 0;
2078 tp->snd_cwnd_stamp = tcp_time_stamp;
2079
2080 tp->bytes_acked = 0;
2081 tcp_clear_retrans_partial(tp);
2082
2083 if (tcp_is_reno(tp))
2084 tcp_reset_reno_sack(tp);
2085
2086 if (!how) {
2087
2088
2089 tp->undo_marker = tp->snd_una;
2090 } else {
2091 tp->sacked_out = 0;
2092 tp->fackets_out = 0;
2093 }
2094 tcp_clear_all_retrans_hints(tp);
2095
2096 tcp_for_write_queue(skb, sk) {
2097 if (skb == tcp_send_head(sk))
2098 break;
2099
2100 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
2101 tp->undo_marker = 0;
2102 TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
2103 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) {
2104 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
2105 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
2106 tp->lost_out += tcp_skb_pcount(skb);
2107 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
2108 }
2109 }
2110 tcp_verify_left_out(tp);
2111
2112 tp->reordering = min_t(unsigned int, tp->reordering,
2113 sysctl_tcp_reordering);
2114 tcp_set_ca_state(sk, TCP_CA_Loss);
2115 tp->high_seq = tp->snd_nxt;
2116 TCP_ECN_queue_cwr(tp);
2117
2118 tp->frto_counter = 0;
2119}
2120
2121
2122
2123
2124
2125
2126
2127static bool tcp_check_sack_reneging(struct sock *sk, int flag)
2128{
2129 if (flag & FLAG_SACK_RENEGING) {
2130 struct inet_connection_sock *icsk = inet_csk(sk);
2131 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
2132
2133 tcp_enter_loss(sk, 1);
2134 icsk->icsk_retransmits++;
2135 tcp_retransmit_skb(sk, tcp_write_queue_head(sk));
2136 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2137 icsk->icsk_rto, TCP_RTO_MAX);
2138 return true;
2139 }
2140 return false;
2141}
2142
2143static inline int tcp_fackets_out(const struct tcp_sock *tp)
2144{
2145 return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out;
2146}
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
2164{
2165 return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
2166}
2167
2168static bool tcp_pause_early_retransmit(struct sock *sk, int flag)
2169{
2170 struct tcp_sock *tp = tcp_sk(sk);
2171 unsigned long delay;
2172
2173
2174
2175
2176
2177 if (sysctl_tcp_early_retrans < 2 || (flag & FLAG_ECE) || !tp->srtt)
2178 return false;
2179
2180 delay = max_t(unsigned long, (tp->srtt >> 5), msecs_to_jiffies(2));
2181 if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay)))
2182 return false;
2183
2184 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, delay, TCP_RTO_MAX);
2185 tp->early_retrans_delayed = 1;
2186 return true;
2187}
2188
2189static inline int tcp_skb_timedout(const struct sock *sk,
2190 const struct sk_buff *skb)
2191{
2192 return tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto;
2193}
2194
2195static inline int tcp_head_timedout(const struct sock *sk)
2196{
2197 const struct tcp_sock *tp = tcp_sk(sk);
2198
2199 return tp->packets_out &&
2200 tcp_skb_timedout(sk, tcp_write_queue_head(sk));
2201}
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296static bool tcp_time_to_recover(struct sock *sk, int flag)
2297{
2298 struct tcp_sock *tp = tcp_sk(sk);
2299 __u32 packets_out;
2300
2301
2302 if (tp->frto_counter)
2303 return false;
2304
2305
2306 if (tp->lost_out)
2307 return true;
2308
2309
2310 if (tcp_dupack_heuristics(tp) > tp->reordering)
2311 return true;
2312
2313
2314
2315
2316 if (tcp_is_fack(tp) && tcp_head_timedout(sk))
2317 return true;
2318
2319
2320
2321
2322 packets_out = tp->packets_out;
2323 if (packets_out <= tp->reordering &&
2324 tp->sacked_out >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) &&
2325 !tcp_may_send_now(sk)) {
2326
2327
2328
2329 return true;
2330 }
2331
2332
2333
2334
2335
2336
2337 if ((tp->thin_dupack || sysctl_tcp_thin_dupack) &&
2338 tcp_stream_is_thin(tp) && tcp_dupack_heuristics(tp) > 1 &&
2339 tcp_is_sack(tp) && !tcp_send_head(sk))
2340 return true;
2341
2342
2343
2344
2345
2346
2347 if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out &&
2348 (tp->packets_out == (tp->sacked_out + 1) && tp->packets_out < 4) &&
2349 !tcp_may_send_now(sk))
2350 return !tcp_pause_early_retransmit(sk, flag);
2351
2352 return false;
2353}
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367static void tcp_timeout_skbs(struct sock *sk)
2368{
2369 struct tcp_sock *tp = tcp_sk(sk);
2370 struct sk_buff *skb;
2371
2372 if (!tcp_is_fack(tp) || !tcp_head_timedout(sk))
2373 return;
2374
2375 skb = tp->scoreboard_skb_hint;
2376 if (tp->scoreboard_skb_hint == NULL)
2377 skb = tcp_write_queue_head(sk);
2378
2379 tcp_for_write_queue_from(skb, sk) {
2380 if (skb == tcp_send_head(sk))
2381 break;
2382 if (!tcp_skb_timedout(sk, skb))
2383 break;
2384
2385 tcp_skb_mark_lost(tp, skb);
2386 }
2387
2388 tp->scoreboard_skb_hint = skb;
2389
2390 tcp_verify_left_out(tp);
2391}
2392
2393
2394
2395
2396
2397
2398
2399static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2400{
2401 struct tcp_sock *tp = tcp_sk(sk);
2402 struct sk_buff *skb;
2403 int cnt, oldcnt;
2404 int err;
2405 unsigned int mss;
2406
2407 const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq;
2408
2409 WARN_ON(packets > tp->packets_out);
2410 if (tp->lost_skb_hint) {
2411 skb = tp->lost_skb_hint;
2412 cnt = tp->lost_cnt_hint;
2413
2414 if (mark_head && skb != tcp_write_queue_head(sk))
2415 return;
2416 } else {
2417 skb = tcp_write_queue_head(sk);
2418 cnt = 0;
2419 }
2420
2421 tcp_for_write_queue_from(skb, sk) {
2422 if (skb == tcp_send_head(sk))
2423 break;
2424
2425
2426 tp->lost_skb_hint = skb;
2427 tp->lost_cnt_hint = cnt;
2428
2429 if (after(TCP_SKB_CB(skb)->end_seq, loss_high))
2430 break;
2431
2432 oldcnt = cnt;
2433 if (tcp_is_fack(tp) || tcp_is_reno(tp) ||
2434 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
2435 cnt += tcp_skb_pcount(skb);
2436
2437 if (cnt > packets) {
2438 if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) ||
2439 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
2440 (oldcnt >= packets))
2441 break;
2442
2443 mss = skb_shinfo(skb)->gso_size;
2444 err = tcp_fragment(sk, skb, (packets - oldcnt) * mss, mss);
2445 if (err < 0)
2446 break;
2447 cnt = packets;
2448 }
2449
2450 tcp_skb_mark_lost(tp, skb);
2451
2452 if (mark_head)
2453 break;
2454 }
2455 tcp_verify_left_out(tp);
2456}
2457
2458
2459
2460static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
2461{
2462 struct tcp_sock *tp = tcp_sk(sk);
2463
2464 if (tcp_is_reno(tp)) {
2465 tcp_mark_head_lost(sk, 1, 1);
2466 } else if (tcp_is_fack(tp)) {
2467 int lost = tp->fackets_out - tp->reordering;
2468 if (lost <= 0)
2469 lost = 1;
2470 tcp_mark_head_lost(sk, lost, 0);
2471 } else {
2472 int sacked_upto = tp->sacked_out - tp->reordering;
2473 if (sacked_upto >= 0)
2474 tcp_mark_head_lost(sk, sacked_upto, 0);
2475 else if (fast_rexmit)
2476 tcp_mark_head_lost(sk, 1, 1);
2477 }
2478
2479 tcp_timeout_skbs(sk);
2480}
2481
2482
2483
2484
2485static inline void tcp_moderate_cwnd(struct tcp_sock *tp)
2486{
2487 tp->snd_cwnd = min(tp->snd_cwnd,
2488 tcp_packets_in_flight(tp) + tcp_max_burst(tp));
2489 tp->snd_cwnd_stamp = tcp_time_stamp;
2490}
2491
2492
2493
2494
2495static inline u32 tcp_cwnd_min(const struct sock *sk)
2496{
2497 const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
2498
2499 return ca_ops->min_cwnd ? ca_ops->min_cwnd(sk) : tcp_sk(sk)->snd_ssthresh;
2500}
2501
2502
2503static void tcp_cwnd_down(struct sock *sk, int flag)
2504{
2505 struct tcp_sock *tp = tcp_sk(sk);
2506 int decr = tp->snd_cwnd_cnt + 1;
2507
2508 if ((flag & (FLAG_ANY_PROGRESS | FLAG_DSACKING_ACK)) ||
2509 (tcp_is_reno(tp) && !(flag & FLAG_NOT_DUP))) {
2510 tp->snd_cwnd_cnt = decr & 1;
2511 decr >>= 1;
2512
2513 if (decr && tp->snd_cwnd > tcp_cwnd_min(sk))
2514 tp->snd_cwnd -= decr;
2515
2516 tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);
2517 tp->snd_cwnd_stamp = tcp_time_stamp;
2518 }
2519}
2520
2521
2522
2523
2524static inline bool tcp_packet_delayed(const struct tcp_sock *tp)
2525{
2526 return !tp->retrans_stamp ||
2527 (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
2528 before(tp->rx_opt.rcv_tsecr, tp->retrans_stamp));
2529}
2530
2531
2532
2533#if FASTRETRANS_DEBUG > 1
2534static void DBGUNDO(struct sock *sk, const char *msg)
2535{
2536 struct tcp_sock *tp = tcp_sk(sk);
2537 struct inet_sock *inet = inet_sk(sk);
2538
2539 if (sk->sk_family == AF_INET) {
2540 pr_debug("Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n",
2541 msg,
2542 &inet->inet_daddr, ntohs(inet->inet_dport),
2543 tp->snd_cwnd, tcp_left_out(tp),
2544 tp->snd_ssthresh, tp->prior_ssthresh,
2545 tp->packets_out);
2546 }
2547#if IS_ENABLED(CONFIG_IPV6)
2548 else if (sk->sk_family == AF_INET6) {
2549 struct ipv6_pinfo *np = inet6_sk(sk);
2550 pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
2551 msg,
2552 &np->daddr, ntohs(inet->inet_dport),
2553 tp->snd_cwnd, tcp_left_out(tp),
2554 tp->snd_ssthresh, tp->prior_ssthresh,
2555 tp->packets_out);
2556 }
2557#endif
2558}
2559#else
2560#define DBGUNDO(x...) do { } while (0)
2561#endif
2562
2563static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh)
2564{
2565 struct tcp_sock *tp = tcp_sk(sk);
2566
2567 if (tp->prior_ssthresh) {
2568 const struct inet_connection_sock *icsk = inet_csk(sk);
2569
2570 if (icsk->icsk_ca_ops->undo_cwnd)
2571 tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
2572 else
2573 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1);
2574
2575 if (undo_ssthresh && tp->prior_ssthresh > tp->snd_ssthresh) {
2576 tp->snd_ssthresh = tp->prior_ssthresh;
2577 TCP_ECN_withdraw_cwr(tp);
2578 }
2579 } else {
2580 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);
2581 }
2582 tp->snd_cwnd_stamp = tcp_time_stamp;
2583}
2584
2585static inline bool tcp_may_undo(const struct tcp_sock *tp)
2586{
2587 return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
2588}
2589
2590
2591static bool tcp_try_undo_recovery(struct sock *sk)
2592{
2593 struct tcp_sock *tp = tcp_sk(sk);
2594
2595 if (tcp_may_undo(tp)) {
2596 int mib_idx;
2597
2598
2599
2600
2601 DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
2602 tcp_undo_cwr(sk, true);
2603 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
2604 mib_idx = LINUX_MIB_TCPLOSSUNDO;
2605 else
2606 mib_idx = LINUX_MIB_TCPFULLUNDO;
2607
2608 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2609 tp->undo_marker = 0;
2610 }
2611 if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
2612
2613
2614
2615 tcp_moderate_cwnd(tp);
2616 return true;
2617 }
2618 tcp_set_ca_state(sk, TCP_CA_Open);
2619 return false;
2620}
2621
2622
2623static void tcp_try_undo_dsack(struct sock *sk)
2624{
2625 struct tcp_sock *tp = tcp_sk(sk);
2626
2627 if (tp->undo_marker && !tp->undo_retrans) {
2628 DBGUNDO(sk, "D-SACK");
2629 tcp_undo_cwr(sk, true);
2630 tp->undo_marker = 0;
2631 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
2632 }
2633}
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649static bool tcp_any_retrans_done(const struct sock *sk)
2650{
2651 const struct tcp_sock *tp = tcp_sk(sk);
2652 struct sk_buff *skb;
2653
2654 if (tp->retrans_out)
2655 return true;
2656
2657 skb = tcp_write_queue_head(sk);
2658 if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
2659 return true;
2660
2661 return false;
2662}
2663
2664
2665
2666static int tcp_try_undo_partial(struct sock *sk, int acked)
2667{
2668 struct tcp_sock *tp = tcp_sk(sk);
2669
2670 int failed = tcp_is_reno(tp) || (tcp_fackets_out(tp) > tp->reordering);
2671
2672 if (tcp_may_undo(tp)) {
2673
2674
2675
2676 if (!tcp_any_retrans_done(sk))
2677 tp->retrans_stamp = 0;
2678
2679 tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
2680
2681 DBGUNDO(sk, "Hoe");
2682 tcp_undo_cwr(sk, false);
2683 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
2684
2685
2686
2687
2688
2689 failed = 0;
2690 }
2691 return failed;
2692}
2693
2694
2695static bool tcp_try_undo_loss(struct sock *sk)
2696{
2697 struct tcp_sock *tp = tcp_sk(sk);
2698
2699 if (tcp_may_undo(tp)) {
2700 struct sk_buff *skb;
2701 tcp_for_write_queue(skb, sk) {
2702 if (skb == tcp_send_head(sk))
2703 break;
2704 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
2705 }
2706
2707 tcp_clear_all_retrans_hints(tp);
2708
2709 DBGUNDO(sk, "partial loss");
2710 tp->lost_out = 0;
2711 tcp_undo_cwr(sk, true);
2712 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
2713 inet_csk(sk)->icsk_retransmits = 0;
2714 tp->undo_marker = 0;
2715 if (tcp_is_sack(tp))
2716 tcp_set_ca_state(sk, TCP_CA_Open);
2717 return true;
2718 }
2719 return false;
2720}
2721
2722static inline void tcp_complete_cwr(struct sock *sk)
2723{
2724 struct tcp_sock *tp = tcp_sk(sk);
2725
2726
2727 if (tp->undo_marker) {
2728 if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR) {
2729 tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
2730 tp->snd_cwnd_stamp = tcp_time_stamp;
2731 } else if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH) {
2732
2733 tp->snd_cwnd = tp->snd_ssthresh;
2734 tp->snd_cwnd_stamp = tcp_time_stamp;
2735 }
2736 }
2737 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
2738}
2739
2740static void tcp_try_keep_open(struct sock *sk)
2741{
2742 struct tcp_sock *tp = tcp_sk(sk);
2743 int state = TCP_CA_Open;
2744
2745 if (tcp_left_out(tp) || tcp_any_retrans_done(sk))
2746 state = TCP_CA_Disorder;
2747
2748 if (inet_csk(sk)->icsk_ca_state != state) {
2749 tcp_set_ca_state(sk, state);
2750 tp->high_seq = tp->snd_nxt;
2751 }
2752}
2753
2754static void tcp_try_to_open(struct sock *sk, int flag)
2755{
2756 struct tcp_sock *tp = tcp_sk(sk);
2757
2758 tcp_verify_left_out(tp);
2759
2760 if (!tp->frto_counter && !tcp_any_retrans_done(sk))
2761 tp->retrans_stamp = 0;
2762
2763 if (flag & FLAG_ECE)
2764 tcp_enter_cwr(sk, 1);
2765
2766 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
2767 tcp_try_keep_open(sk);
2768 if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
2769 tcp_moderate_cwnd(tp);
2770 } else {
2771 tcp_cwnd_down(sk, flag);
2772 }
2773}
2774
2775static void tcp_mtup_probe_failed(struct sock *sk)
2776{
2777 struct inet_connection_sock *icsk = inet_csk(sk);
2778
2779 icsk->icsk_mtup.search_high = icsk->icsk_mtup.probe_size - 1;
2780 icsk->icsk_mtup.probe_size = 0;
2781}
2782
2783static void tcp_mtup_probe_success(struct sock *sk)
2784{
2785 struct tcp_sock *tp = tcp_sk(sk);
2786 struct inet_connection_sock *icsk = inet_csk(sk);
2787
2788
2789 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2790 tp->snd_cwnd = tp->snd_cwnd *
2791 tcp_mss_to_mtu(sk, tp->mss_cache) /
2792 icsk->icsk_mtup.probe_size;
2793 tp->snd_cwnd_cnt = 0;
2794 tp->snd_cwnd_stamp = tcp_time_stamp;
2795 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2796
2797 icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size;
2798 icsk->icsk_mtup.probe_size = 0;
2799 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
2800}
2801
2802
2803
2804
2805
2806void tcp_simple_retransmit(struct sock *sk)
2807{
2808 const struct inet_connection_sock *icsk = inet_csk(sk);
2809 struct tcp_sock *tp = tcp_sk(sk);
2810 struct sk_buff *skb;
2811 unsigned int mss = tcp_current_mss(sk);
2812 u32 prior_lost = tp->lost_out;
2813
2814 tcp_for_write_queue(skb, sk) {
2815 if (skb == tcp_send_head(sk))
2816 break;
2817 if (tcp_skb_seglen(skb) > mss &&
2818 !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
2819 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2820 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
2821 tp->retrans_out -= tcp_skb_pcount(skb);
2822 }
2823 tcp_skb_mark_lost_uncond_verify(tp, skb);
2824 }
2825 }
2826
2827 tcp_clear_retrans_hints_partial(tp);
2828
2829 if (prior_lost == tp->lost_out)
2830 return;
2831
2832 if (tcp_is_reno(tp))
2833 tcp_limit_reno_sacked(tp);
2834
2835 tcp_verify_left_out(tp);
2836
2837
2838
2839
2840
2841
2842 if (icsk->icsk_ca_state != TCP_CA_Loss) {
2843 tp->high_seq = tp->snd_nxt;
2844 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2845 tp->prior_ssthresh = 0;
2846 tp->undo_marker = 0;
2847 tcp_set_ca_state(sk, TCP_CA_Loss);
2848 }
2849 tcp_xmit_retransmit_queue(sk);
2850}
2851EXPORT_SYMBOL(tcp_simple_retransmit);
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked,
2865 int fast_rexmit, int flag)
2866{
2867 struct tcp_sock *tp = tcp_sk(sk);
2868 int sndcnt = 0;
2869 int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
2870
2871 if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) {
2872 u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
2873 tp->prior_cwnd - 1;
2874 sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
2875 } else {
2876 sndcnt = min_t(int, delta,
2877 max_t(int, tp->prr_delivered - tp->prr_out,
2878 newly_acked_sacked) + 1);
2879 }
2880
2881 sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0));
2882 tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
2883}
2884
2885static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
2886{
2887 struct tcp_sock *tp = tcp_sk(sk);
2888 int mib_idx;
2889
2890 if (tcp_is_reno(tp))
2891 mib_idx = LINUX_MIB_TCPRENORECOVERY;
2892 else
2893 mib_idx = LINUX_MIB_TCPSACKRECOVERY;
2894
2895 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2896
2897 tp->high_seq = tp->snd_nxt;
2898 tp->prior_ssthresh = 0;
2899 tp->undo_marker = tp->snd_una;
2900 tp->undo_retrans = tp->retrans_out;
2901
2902 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
2903 if (!ece_ack)
2904 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2905 tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
2906 TCP_ECN_queue_cwr(tp);
2907 }
2908
2909 tp->bytes_acked = 0;
2910 tp->snd_cwnd_cnt = 0;
2911 tp->prior_cwnd = tp->snd_cwnd;
2912 tp->prr_delivered = 0;
2913 tp->prr_out = 0;
2914 tcp_set_ca_state(sk, TCP_CA_Recovery);
2915}
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
2929 int prior_sacked, bool is_dupack,
2930 int flag)
2931{
2932 struct inet_connection_sock *icsk = inet_csk(sk);
2933 struct tcp_sock *tp = tcp_sk(sk);
2934 int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
2935 (tcp_fackets_out(tp) > tp->reordering));
2936 int newly_acked_sacked = 0;
2937 int fast_rexmit = 0;
2938
2939 if (WARN_ON(!tp->packets_out && tp->sacked_out))
2940 tp->sacked_out = 0;
2941 if (WARN_ON(!tp->sacked_out && tp->fackets_out))
2942 tp->fackets_out = 0;
2943
2944
2945
2946 if (flag & FLAG_ECE)
2947 tp->prior_ssthresh = 0;
2948
2949
2950 if (tcp_check_sack_reneging(sk, flag))
2951 return;
2952
2953
2954 tcp_verify_left_out(tp);
2955
2956
2957
2958 if (icsk->icsk_ca_state == TCP_CA_Open) {
2959 WARN_ON(tp->retrans_out != 0);
2960 tp->retrans_stamp = 0;
2961 } else if (!before(tp->snd_una, tp->high_seq)) {
2962 switch (icsk->icsk_ca_state) {
2963 case TCP_CA_Loss:
2964 icsk->icsk_retransmits = 0;
2965 if (tcp_try_undo_recovery(sk))
2966 return;
2967 break;
2968
2969 case TCP_CA_CWR:
2970
2971
2972 if (tp->snd_una != tp->high_seq) {
2973 tcp_complete_cwr(sk);
2974 tcp_set_ca_state(sk, TCP_CA_Open);
2975 }
2976 break;
2977
2978 case TCP_CA_Recovery:
2979 if (tcp_is_reno(tp))
2980 tcp_reset_reno_sack(tp);
2981 if (tcp_try_undo_recovery(sk))
2982 return;
2983 tcp_complete_cwr(sk);
2984 break;
2985 }
2986 }
2987
2988
2989 switch (icsk->icsk_ca_state) {
2990 case TCP_CA_Recovery:
2991 if (!(flag & FLAG_SND_UNA_ADVANCED)) {
2992 if (tcp_is_reno(tp) && is_dupack)
2993 tcp_add_reno_sack(sk);
2994 } else
2995 do_lost = tcp_try_undo_partial(sk, pkts_acked);
2996 newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked;
2997 break;
2998 case TCP_CA_Loss:
2999 if (flag & FLAG_DATA_ACKED)
3000 icsk->icsk_retransmits = 0;
3001 if (tcp_is_reno(tp) && flag & FLAG_SND_UNA_ADVANCED)
3002 tcp_reset_reno_sack(tp);
3003 if (!tcp_try_undo_loss(sk)) {
3004 tcp_moderate_cwnd(tp);
3005 tcp_xmit_retransmit_queue(sk);
3006 return;
3007 }
3008 if (icsk->icsk_ca_state != TCP_CA_Open)
3009 return;
3010
3011 default:
3012 if (tcp_is_reno(tp)) {
3013 if (flag & FLAG_SND_UNA_ADVANCED)
3014 tcp_reset_reno_sack(tp);
3015 if (is_dupack)
3016 tcp_add_reno_sack(sk);
3017 }
3018 newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked;
3019
3020 if (icsk->icsk_ca_state <= TCP_CA_Disorder)
3021 tcp_try_undo_dsack(sk);
3022
3023 if (!tcp_time_to_recover(sk, flag)) {
3024 tcp_try_to_open(sk, flag);
3025 return;
3026 }
3027
3028
3029 if (icsk->icsk_ca_state < TCP_CA_CWR &&
3030 icsk->icsk_mtup.probe_size &&
3031 tp->snd_una == tp->mtu_probe.probe_seq_start) {
3032 tcp_mtup_probe_failed(sk);
3033
3034 tp->snd_cwnd++;
3035 tcp_simple_retransmit(sk);
3036 return;
3037 }
3038
3039
3040 tcp_enter_recovery(sk, (flag & FLAG_ECE));
3041 fast_rexmit = 1;
3042 }
3043
3044 if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk)))
3045 tcp_update_scoreboard(sk, fast_rexmit);
3046 tp->prr_delivered += newly_acked_sacked;
3047 tcp_update_cwnd_in_recovery(sk, newly_acked_sacked, fast_rexmit, flag);
3048 tcp_xmit_retransmit_queue(sk);
3049}
3050
3051void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt)
3052{
3053 tcp_rtt_estimator(sk, seq_rtt);
3054 tcp_set_rto(sk);
3055 inet_csk(sk)->icsk_backoff = 0;
3056}
3057EXPORT_SYMBOL(tcp_valid_rtt_meas);
3058
3059
3060
3061
3062static void tcp_ack_saw_tstamp(struct sock *sk, int flag)
3063{
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079 struct tcp_sock *tp = tcp_sk(sk);
3080
3081 tcp_valid_rtt_meas(sk, tcp_time_stamp - tp->rx_opt.rcv_tsecr);
3082}
3083
3084static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag)
3085{
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095 if (flag & FLAG_RETRANS_DATA_ACKED)
3096 return;
3097
3098 tcp_valid_rtt_meas(sk, seq_rtt);
3099}
3100
3101static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
3102 const s32 seq_rtt)
3103{
3104 const struct tcp_sock *tp = tcp_sk(sk);
3105
3106 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
3107 tcp_ack_saw_tstamp(sk, flag);
3108 else if (seq_rtt >= 0)
3109 tcp_ack_no_tstamp(sk, seq_rtt, flag);
3110}
3111
3112static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
3113{
3114 const struct inet_connection_sock *icsk = inet_csk(sk);
3115 icsk->icsk_ca_ops->cong_avoid(sk, ack, in_flight);
3116 tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp;
3117}
3118
3119
3120
3121
3122void tcp_rearm_rto(struct sock *sk)
3123{
3124 struct tcp_sock *tp = tcp_sk(sk);
3125
3126 if (!tp->packets_out) {
3127 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
3128 } else {
3129 u32 rto = inet_csk(sk)->icsk_rto;
3130
3131 if (tp->early_retrans_delayed) {
3132 struct sk_buff *skb = tcp_write_queue_head(sk);
3133 const u32 rto_time_stamp = TCP_SKB_CB(skb)->when + rto;
3134 s32 delta = (s32)(rto_time_stamp - tcp_time_stamp);
3135
3136
3137
3138 if (delta > 0)
3139 rto = delta;
3140 }
3141 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
3142 TCP_RTO_MAX);
3143 }
3144 tp->early_retrans_delayed = 0;
3145}
3146
3147
3148
3149
3150void tcp_resume_early_retransmit(struct sock *sk)
3151{
3152 struct tcp_sock *tp = tcp_sk(sk);
3153
3154 tcp_rearm_rto(sk);
3155
3156
3157 if (!tp->do_early_retrans)
3158 return;
3159
3160 tcp_enter_recovery(sk, false);
3161 tcp_update_scoreboard(sk, 1);
3162 tcp_xmit_retransmit_queue(sk);
3163}
3164
3165
3166static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
3167{
3168 struct tcp_sock *tp = tcp_sk(sk);
3169 u32 packets_acked;
3170
3171 BUG_ON(!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una));
3172
3173 packets_acked = tcp_skb_pcount(skb);
3174 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
3175 return 0;
3176 packets_acked -= tcp_skb_pcount(skb);
3177
3178 if (packets_acked) {
3179 BUG_ON(tcp_skb_pcount(skb) == 0);
3180 BUG_ON(!before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq));
3181 }
3182
3183 return packets_acked;
3184}
3185
3186
3187
3188
3189
3190static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3191 u32 prior_snd_una)
3192{
3193 struct tcp_sock *tp = tcp_sk(sk);
3194 const struct inet_connection_sock *icsk = inet_csk(sk);
3195 struct sk_buff *skb;
3196 u32 now = tcp_time_stamp;
3197 int fully_acked = true;
3198 int flag = 0;
3199 u32 pkts_acked = 0;
3200 u32 reord = tp->packets_out;
3201 u32 prior_sacked = tp->sacked_out;
3202 s32 seq_rtt = -1;
3203 s32 ca_seq_rtt = -1;
3204 ktime_t last_ackt = net_invalid_timestamp();
3205
3206 while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
3207 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
3208 u32 acked_pcount;
3209 u8 sacked = scb->sacked;
3210
3211
3212 if (after(scb->end_seq, tp->snd_una)) {
3213 if (tcp_skb_pcount(skb) == 1 ||
3214 !after(tp->snd_una, scb->seq))
3215 break;
3216
3217 acked_pcount = tcp_tso_acked(sk, skb);
3218 if (!acked_pcount)
3219 break;
3220
3221 fully_acked = false;
3222 } else {
3223 acked_pcount = tcp_skb_pcount(skb);
3224 }
3225
3226 if (sacked & TCPCB_RETRANS) {
3227 if (sacked & TCPCB_SACKED_RETRANS)
3228 tp->retrans_out -= acked_pcount;
3229 flag |= FLAG_RETRANS_DATA_ACKED;
3230 ca_seq_rtt = -1;
3231 seq_rtt = -1;
3232 if ((flag & FLAG_DATA_ACKED) || (acked_pcount > 1))
3233 flag |= FLAG_NONHEAD_RETRANS_ACKED;
3234 } else {
3235 ca_seq_rtt = now - scb->when;
3236 last_ackt = skb->tstamp;
3237 if (seq_rtt < 0) {
3238 seq_rtt = ca_seq_rtt;
3239 }
3240 if (!(sacked & TCPCB_SACKED_ACKED))
3241 reord = min(pkts_acked, reord);
3242 }
3243
3244 if (sacked & TCPCB_SACKED_ACKED)
3245 tp->sacked_out -= acked_pcount;
3246 if (sacked & TCPCB_LOST)
3247 tp->lost_out -= acked_pcount;
3248
3249 tp->packets_out -= acked_pcount;
3250 pkts_acked += acked_pcount;
3251
3252
3253
3254
3255
3256
3257
3258
3259 if (!(scb->tcp_flags & TCPHDR_SYN)) {
3260 flag |= FLAG_DATA_ACKED;
3261 } else {
3262 flag |= FLAG_SYN_ACKED;
3263 tp->retrans_stamp = 0;
3264 }
3265
3266 if (!fully_acked)
3267 break;
3268
3269 tcp_unlink_write_queue(skb, sk);
3270 sk_wmem_free_skb(sk, skb);
3271 tp->scoreboard_skb_hint = NULL;
3272 if (skb == tp->retransmit_skb_hint)
3273 tp->retransmit_skb_hint = NULL;
3274 if (skb == tp->lost_skb_hint)
3275 tp->lost_skb_hint = NULL;
3276 }
3277
3278 if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una)))
3279 tp->snd_up = tp->snd_una;
3280
3281 if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
3282 flag |= FLAG_SACK_RENEGING;
3283
3284 if (flag & FLAG_ACKED) {
3285 const struct tcp_congestion_ops *ca_ops
3286 = inet_csk(sk)->icsk_ca_ops;
3287
3288 if (unlikely(icsk->icsk_mtup.probe_size &&
3289 !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
3290 tcp_mtup_probe_success(sk);
3291 }
3292
3293 tcp_ack_update_rtt(sk, flag, seq_rtt);
3294 tcp_rearm_rto(sk);
3295
3296 if (tcp_is_reno(tp)) {
3297 tcp_remove_reno_sacks(sk, pkts_acked);
3298 } else {
3299 int delta;
3300
3301
3302 if (reord < prior_fackets)
3303 tcp_update_reordering(sk, tp->fackets_out - reord, 0);
3304
3305 delta = tcp_is_fack(tp) ? pkts_acked :
3306 prior_sacked - tp->sacked_out;
3307 tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta);
3308 }
3309
3310 tp->fackets_out -= min(pkts_acked, tp->fackets_out);
3311
3312 if (ca_ops->pkts_acked) {
3313 s32 rtt_us = -1;
3314
3315
3316 if (!(flag & FLAG_RETRANS_DATA_ACKED)) {
3317
3318 if (ca_ops->flags & TCP_CONG_RTT_STAMP &&
3319 !ktime_equal(last_ackt,
3320 net_invalid_timestamp()))
3321 rtt_us = ktime_us_delta(ktime_get_real(),
3322 last_ackt);
3323 else if (ca_seq_rtt >= 0)
3324 rtt_us = jiffies_to_usecs(ca_seq_rtt);
3325 }
3326
3327 ca_ops->pkts_acked(sk, pkts_acked, rtt_us);
3328 }
3329 }
3330
3331#if FASTRETRANS_DEBUG > 0
3332 WARN_ON((int)tp->sacked_out < 0);
3333 WARN_ON((int)tp->lost_out < 0);
3334 WARN_ON((int)tp->retrans_out < 0);
3335 if (!tp->packets_out && tcp_is_sack(tp)) {
3336 icsk = inet_csk(sk);
3337 if (tp->lost_out) {
3338 pr_debug("Leak l=%u %d\n",
3339 tp->lost_out, icsk->icsk_ca_state);
3340 tp->lost_out = 0;
3341 }
3342 if (tp->sacked_out) {
3343 pr_debug("Leak s=%u %d\n",
3344 tp->sacked_out, icsk->icsk_ca_state);
3345 tp->sacked_out = 0;
3346 }
3347 if (tp->retrans_out) {
3348 pr_debug("Leak r=%u %d\n",
3349 tp->retrans_out, icsk->icsk_ca_state);
3350 tp->retrans_out = 0;
3351 }
3352 }
3353#endif
3354 return flag;
3355}
3356
3357static void tcp_ack_probe(struct sock *sk)
3358{
3359 const struct tcp_sock *tp = tcp_sk(sk);
3360 struct inet_connection_sock *icsk = inet_csk(sk);
3361
3362
3363
3364 if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, tcp_wnd_end(tp))) {
3365 icsk->icsk_backoff = 0;
3366 inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
3367
3368
3369
3370 } else {
3371 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
3372 min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX),
3373 TCP_RTO_MAX);
3374 }
3375}
3376
3377static inline bool tcp_ack_is_dubious(const struct sock *sk, const int flag)
3378{
3379 return !(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
3380 inet_csk(sk)->icsk_ca_state != TCP_CA_Open;
3381}
3382
3383static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
3384{
3385 const struct tcp_sock *tp = tcp_sk(sk);
3386 return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) &&
3387 !((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_Recovery | TCPF_CA_CWR));
3388}
3389
3390
3391
3392
3393static inline bool tcp_may_update_window(const struct tcp_sock *tp,
3394 const u32 ack, const u32 ack_seq,
3395 const u32 nwin)
3396{
3397 return after(ack, tp->snd_una) ||
3398 after(ack_seq, tp->snd_wl1) ||
3399 (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd);
3400}
3401
3402
3403
3404
3405
3406
3407static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 ack,
3408 u32 ack_seq)
3409{
3410 struct tcp_sock *tp = tcp_sk(sk);
3411 int flag = 0;
3412 u32 nwin = ntohs(tcp_hdr(skb)->window);
3413
3414 if (likely(!tcp_hdr(skb)->syn))
3415 nwin <<= tp->rx_opt.snd_wscale;
3416
3417 if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
3418 flag |= FLAG_WIN_UPDATE;
3419 tcp_update_wl(tp, ack_seq);
3420
3421 if (tp->snd_wnd != nwin) {
3422 tp->snd_wnd = nwin;
3423
3424
3425
3426
3427 tp->pred_flags = 0;
3428 tcp_fast_path_check(sk);
3429
3430 if (nwin > tp->max_window) {
3431 tp->max_window = nwin;
3432 tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie);
3433 }
3434 }
3435 }
3436
3437 tp->snd_una = ack;
3438
3439 return flag;
3440}
3441
3442
3443
3444
3445static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
3446{
3447 tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
3448 tp->snd_cwnd_cnt = 0;
3449 tp->bytes_acked = 0;
3450 TCP_ECN_queue_cwr(tp);
3451 tcp_moderate_cwnd(tp);
3452}
3453
3454
3455
3456
3457static void tcp_ratehalving_spur_to_response(struct sock *sk)
3458{
3459 tcp_enter_cwr(sk, 0);
3460}
3461
3462static void tcp_undo_spur_to_response(struct sock *sk, int flag)
3463{
3464 if (flag & FLAG_ECE)
3465 tcp_ratehalving_spur_to_response(sk);
3466 else
3467 tcp_undo_cwr(sk, true);
3468}
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500static bool tcp_process_frto(struct sock *sk, int flag)
3501{
3502 struct tcp_sock *tp = tcp_sk(sk);
3503
3504 tcp_verify_left_out(tp);
3505
3506
3507 if (flag & FLAG_DATA_ACKED)
3508 inet_csk(sk)->icsk_retransmits = 0;
3509
3510 if ((flag & FLAG_NONHEAD_RETRANS_ACKED) ||
3511 ((tp->frto_counter >= 2) && (flag & FLAG_RETRANS_DATA_ACKED)))
3512 tp->undo_marker = 0;
3513
3514 if (!before(tp->snd_una, tp->frto_highmark)) {
3515 tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag);
3516 return true;
3517 }
3518
3519 if (!tcp_is_sackfrto(tp)) {
3520
3521
3522
3523
3524 if (!(flag & FLAG_ANY_PROGRESS) && (flag & FLAG_NOT_DUP))
3525 return true;
3526
3527 if (!(flag & FLAG_DATA_ACKED)) {
3528 tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3),
3529 flag);
3530 return true;
3531 }
3532 } else {
3533 if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) {
3534
3535 tp->snd_cwnd = min(tp->snd_cwnd,
3536 tcp_packets_in_flight(tp));
3537 return true;
3538 }
3539
3540 if ((tp->frto_counter >= 2) &&
3541 (!(flag & FLAG_FORWARD_PROGRESS) ||
3542 ((flag & FLAG_DATA_SACKED) &&
3543 !(flag & FLAG_ONLY_ORIG_SACKED)))) {
3544
3545 if (!(flag & FLAG_FORWARD_PROGRESS) &&
3546 (flag & FLAG_NOT_DUP))
3547 return true;
3548
3549 tcp_enter_frto_loss(sk, 3, flag);
3550 return true;
3551 }
3552 }
3553
3554 if (tp->frto_counter == 1) {
3555
3556 tp->snd_cwnd = tcp_packets_in_flight(tp) + 2;
3557 tp->frto_counter = 2;
3558
3559 if (!tcp_may_send_now(sk))
3560 tcp_enter_frto_loss(sk, 2, flag);
3561
3562 return true;
3563 } else {
3564 switch (sysctl_tcp_frto_response) {
3565 case 2:
3566 tcp_undo_spur_to_response(sk, flag);
3567 break;
3568 case 1:
3569 tcp_conservative_spur_to_response(tp);
3570 break;
3571 default:
3572 tcp_ratehalving_spur_to_response(sk);
3573 break;
3574 }
3575 tp->frto_counter = 0;
3576 tp->undo_marker = 0;
3577 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS);
3578 }
3579 return false;
3580}
3581
3582
3583static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3584{
3585 struct inet_connection_sock *icsk = inet_csk(sk);
3586 struct tcp_sock *tp = tcp_sk(sk);
3587 u32 prior_snd_una = tp->snd_una;
3588 u32 ack_seq = TCP_SKB_CB(skb)->seq;
3589 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3590 bool is_dupack = false;
3591 u32 prior_in_flight;
3592 u32 prior_fackets;
3593 int prior_packets;
3594 int prior_sacked = tp->sacked_out;
3595 int pkts_acked = 0;
3596 bool frto_cwnd = false;
3597
3598
3599
3600
3601 if (before(ack, prior_snd_una))
3602 goto old_ack;
3603
3604
3605
3606
3607 if (after(ack, tp->snd_nxt))
3608 goto invalid_ack;
3609
3610 if (tp->early_retrans_delayed)
3611 tcp_rearm_rto(sk);
3612
3613 if (after(ack, prior_snd_una))
3614 flag |= FLAG_SND_UNA_ADVANCED;
3615
3616 if (sysctl_tcp_abc) {
3617 if (icsk->icsk_ca_state < TCP_CA_CWR)
3618 tp->bytes_acked += ack - prior_snd_una;
3619 else if (icsk->icsk_ca_state == TCP_CA_Loss)
3620
3621 tp->bytes_acked += min(ack - prior_snd_una,
3622 tp->mss_cache);
3623 }
3624
3625 prior_fackets = tp->fackets_out;
3626 prior_in_flight = tcp_packets_in_flight(tp);
3627
3628 if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
3629
3630
3631
3632
3633 tcp_update_wl(tp, ack_seq);
3634 tp->snd_una = ack;
3635 flag |= FLAG_WIN_UPDATE;
3636
3637 tcp_ca_event(sk, CA_EVENT_FAST_ACK);
3638
3639 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPACKS);
3640 } else {
3641 if (ack_seq != TCP_SKB_CB(skb)->end_seq)
3642 flag |= FLAG_DATA;
3643 else
3644 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPUREACKS);
3645
3646 flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
3647
3648 if (TCP_SKB_CB(skb)->sacked)
3649 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
3650
3651 if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb)))
3652 flag |= FLAG_ECE;
3653
3654 tcp_ca_event(sk, CA_EVENT_SLOW_ACK);
3655 }
3656
3657
3658
3659
3660 sk->sk_err_soft = 0;
3661 icsk->icsk_probes_out = 0;
3662 tp->rcv_tstamp = tcp_time_stamp;
3663 prior_packets = tp->packets_out;
3664 if (!prior_packets)
3665 goto no_queue;
3666
3667
3668 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
3669
3670 pkts_acked = prior_packets - tp->packets_out;
3671
3672 if (tp->frto_counter)
3673 frto_cwnd = tcp_process_frto(sk, flag);
3674
3675 if (before(tp->frto_highmark, tp->snd_una))
3676 tp->frto_highmark = 0;
3677
3678 if (tcp_ack_is_dubious(sk, flag)) {
3679
3680 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd &&
3681 tcp_may_raise_cwnd(sk, flag))
3682 tcp_cong_avoid(sk, ack, prior_in_flight);
3683 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
3684 tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
3685 is_dupack, flag);
3686 } else {
3687 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
3688 tcp_cong_avoid(sk, ack, prior_in_flight);
3689 }
3690
3691 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) {
3692 struct dst_entry *dst = __sk_dst_get(sk);
3693 if (dst)
3694 dst_confirm(dst);
3695 }
3696 return 1;
3697
3698no_queue:
3699
3700 if (flag & FLAG_DSACKING_ACK)
3701 tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
3702 is_dupack, flag);
3703
3704
3705
3706
3707 if (tcp_send_head(sk))
3708 tcp_ack_probe(sk);
3709 return 1;
3710
3711invalid_ack:
3712 SOCK_DEBUG(sk, "Ack %u after %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
3713 return -1;
3714
3715old_ack:
3716
3717
3718
3719 if (TCP_SKB_CB(skb)->sacked) {
3720 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
3721 tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
3722 is_dupack, flag);
3723 }
3724
3725 SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
3726 return 0;
3727}
3728
3729
3730
3731
3732
3733void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx,
3734 const u8 **hvpp, int estab,
3735 struct tcp_fastopen_cookie *foc)
3736{
3737 const unsigned char *ptr;
3738 const struct tcphdr *th = tcp_hdr(skb);
3739 int length = (th->doff * 4) - sizeof(struct tcphdr);
3740
3741 ptr = (const unsigned char *)(th + 1);
3742 opt_rx->saw_tstamp = 0;
3743
3744 while (length > 0) {
3745 int opcode = *ptr++;
3746 int opsize;
3747
3748 switch (opcode) {
3749 case TCPOPT_EOL:
3750 return;
3751 case TCPOPT_NOP:
3752 length--;
3753 continue;
3754 default:
3755 opsize = *ptr++;
3756 if (opsize < 2)
3757 return;
3758 if (opsize > length)
3759 return;
3760 switch (opcode) {
3761 case TCPOPT_MSS:
3762 if (opsize == TCPOLEN_MSS && th->syn && !estab) {
3763 u16 in_mss = get_unaligned_be16(ptr);
3764 if (in_mss) {
3765 if (opt_rx->user_mss &&
3766 opt_rx->user_mss < in_mss)
3767 in_mss = opt_rx->user_mss;
3768 opt_rx->mss_clamp = in_mss;
3769 }
3770 }
3771 break;
3772 case TCPOPT_WINDOW:
3773 if (opsize == TCPOLEN_WINDOW && th->syn &&
3774 !estab && sysctl_tcp_window_scaling) {
3775 __u8 snd_wscale = *(__u8 *)ptr;
3776 opt_rx->wscale_ok = 1;
3777 if (snd_wscale > 14) {
3778 net_info_ratelimited("%s: Illegal window scaling value %d >14 received\n",
3779 __func__,
3780 snd_wscale);
3781 snd_wscale = 14;
3782 }
3783 opt_rx->snd_wscale = snd_wscale;
3784 }
3785 break;
3786 case TCPOPT_TIMESTAMP:
3787 if ((opsize == TCPOLEN_TIMESTAMP) &&
3788 ((estab && opt_rx->tstamp_ok) ||
3789 (!estab && sysctl_tcp_timestamps))) {
3790 opt_rx->saw_tstamp = 1;
3791 opt_rx->rcv_tsval = get_unaligned_be32(ptr);
3792 opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
3793 }
3794 break;
3795 case TCPOPT_SACK_PERM:
3796 if (opsize == TCPOLEN_SACK_PERM && th->syn &&
3797 !estab && sysctl_tcp_sack) {
3798 opt_rx->sack_ok = TCP_SACK_SEEN;
3799 tcp_sack_reset(opt_rx);
3800 }
3801 break;
3802
3803 case TCPOPT_SACK:
3804 if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
3805 !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
3806 opt_rx->sack_ok) {
3807 TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
3808 }
3809 break;
3810#ifdef CONFIG_TCP_MD5SIG
3811 case TCPOPT_MD5SIG:
3812
3813
3814
3815
3816 break;
3817#endif
3818 case TCPOPT_COOKIE:
3819
3820
3821 switch (opsize) {
3822 case TCPOLEN_COOKIE_BASE:
3823
3824 break;
3825 case TCPOLEN_COOKIE_PAIR:
3826
3827 break;
3828 case TCPOLEN_COOKIE_MIN+0:
3829 case TCPOLEN_COOKIE_MIN+2:
3830 case TCPOLEN_COOKIE_MIN+4:
3831 case TCPOLEN_COOKIE_MIN+6:
3832 case TCPOLEN_COOKIE_MAX:
3833
3834 opt_rx->cookie_plus = opsize;
3835 *hvpp = ptr;
3836 break;
3837 default:
3838
3839 break;
3840 }
3841 break;
3842
3843 case TCPOPT_EXP:
3844
3845
3846
3847
3848 if (opsize < TCPOLEN_EXP_FASTOPEN_BASE ||
3849 get_unaligned_be16(ptr) != TCPOPT_FASTOPEN_MAGIC ||
3850 foc == NULL || !th->syn || (opsize & 1))
3851 break;
3852 foc->len = opsize - TCPOLEN_EXP_FASTOPEN_BASE;
3853 if (foc->len >= TCP_FASTOPEN_COOKIE_MIN &&
3854 foc->len <= TCP_FASTOPEN_COOKIE_MAX)
3855 memcpy(foc->val, ptr + 2, foc->len);
3856 else if (foc->len != 0)
3857 foc->len = -1;
3858 break;
3859
3860 }
3861 ptr += opsize-2;
3862 length -= opsize;
3863 }
3864 }
3865}
3866EXPORT_SYMBOL(tcp_parse_options);
3867
3868static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr *th)
3869{
3870 const __be32 *ptr = (const __be32 *)(th + 1);
3871
3872 if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
3873 | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
3874 tp->rx_opt.saw_tstamp = 1;
3875 ++ptr;
3876 tp->rx_opt.rcv_tsval = ntohl(*ptr);
3877 ++ptr;
3878 tp->rx_opt.rcv_tsecr = ntohl(*ptr);
3879 return true;
3880 }
3881 return false;
3882}
3883
3884
3885
3886
3887static bool tcp_fast_parse_options(const struct sk_buff *skb,
3888 const struct tcphdr *th,
3889 struct tcp_sock *tp, const u8 **hvpp)
3890{
3891
3892
3893
3894 if (th->doff == (sizeof(*th) / 4)) {
3895 tp->rx_opt.saw_tstamp = 0;
3896 return false;
3897 } else if (tp->rx_opt.tstamp_ok &&
3898 th->doff == ((sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4)) {
3899 if (tcp_parse_aligned_timestamp(tp, th))
3900 return true;
3901 }
3902 tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL);
3903 return true;
3904}
3905
3906#ifdef CONFIG_TCP_MD5SIG
3907
3908
3909
3910const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
3911{
3912 int length = (th->doff << 2) - sizeof(*th);
3913 const u8 *ptr = (const u8 *)(th + 1);
3914
3915
3916 if (length < TCPOLEN_MD5SIG)
3917 return NULL;
3918
3919 while (length > 0) {
3920 int opcode = *ptr++;
3921 int opsize;
3922
3923 switch(opcode) {
3924 case TCPOPT_EOL:
3925 return NULL;
3926 case TCPOPT_NOP:
3927 length--;
3928 continue;
3929 default:
3930 opsize = *ptr++;
3931 if (opsize < 2 || opsize > length)
3932 return NULL;
3933 if (opcode == TCPOPT_MD5SIG)
3934 return opsize == TCPOLEN_MD5SIG ? ptr : NULL;
3935 }
3936 ptr += opsize - 2;
3937 length -= opsize;
3938 }
3939 return NULL;
3940}
3941EXPORT_SYMBOL(tcp_parse_md5sig_option);
3942#endif
3943
3944static inline void tcp_store_ts_recent(struct tcp_sock *tp)
3945{
3946 tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
3947 tp->rx_opt.ts_recent_stamp = get_seconds();
3948}
3949
3950static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
3951{
3952 if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) {
3953
3954
3955
3956
3957
3958
3959
3960 if (tcp_paws_check(&tp->rx_opt, 0))
3961 tcp_store_ts_recent(tp);
3962 }
3963}
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
3989{
3990 const struct tcp_sock *tp = tcp_sk(sk);
3991 const struct tcphdr *th = tcp_hdr(skb);
3992 u32 seq = TCP_SKB_CB(skb)->seq;
3993 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3994
3995 return (
3996 (th->ack && seq == TCP_SKB_CB(skb)->end_seq && seq == tp->rcv_nxt) &&
3997
3998
3999 ack == tp->snd_una &&
4000
4001
4002 !tcp_may_update_window(tp, ack, seq, ntohs(th->window) << tp->rx_opt.snd_wscale) &&
4003
4004
4005 (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ);
4006}
4007
4008static inline bool tcp_paws_discard(const struct sock *sk,
4009 const struct sk_buff *skb)
4010{
4011 const struct tcp_sock *tp = tcp_sk(sk);
4012
4013 return !tcp_paws_check(&tp->rx_opt, TCP_PAWS_WINDOW) &&
4014 !tcp_disordered_ack(sk, skb);
4015}
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
4031{
4032 return !before(end_seq, tp->rcv_wup) &&
4033 !after(seq, tp->rcv_nxt + tcp_receive_window(tp));
4034}
4035
4036
4037static void tcp_reset(struct sock *sk)
4038{
4039
4040 switch (sk->sk_state) {
4041 case TCP_SYN_SENT:
4042 sk->sk_err = ECONNREFUSED;
4043 break;
4044 case TCP_CLOSE_WAIT:
4045 sk->sk_err = EPIPE;
4046 break;
4047 case TCP_CLOSE:
4048 return;
4049 default:
4050 sk->sk_err = ECONNRESET;
4051 }
4052
4053 smp_wmb();
4054
4055 if (!sock_flag(sk, SOCK_DEAD))
4056 sk->sk_error_report(sk);
4057
4058 tcp_done(sk);
4059}
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075static void tcp_fin(struct sock *sk)
4076{
4077 struct tcp_sock *tp = tcp_sk(sk);
4078
4079 inet_csk_schedule_ack(sk);
4080
4081 sk->sk_shutdown |= RCV_SHUTDOWN;
4082 sock_set_flag(sk, SOCK_DONE);
4083
4084 switch (sk->sk_state) {
4085 case TCP_SYN_RECV:
4086 case TCP_ESTABLISHED:
4087
4088 tcp_set_state(sk, TCP_CLOSE_WAIT);
4089 inet_csk(sk)->icsk_ack.pingpong = 1;
4090 break;
4091
4092 case TCP_CLOSE_WAIT:
4093 case TCP_CLOSING:
4094
4095
4096
4097 break;
4098 case TCP_LAST_ACK:
4099
4100 break;
4101
4102 case TCP_FIN_WAIT1:
4103
4104
4105
4106
4107 tcp_send_ack(sk);
4108 tcp_set_state(sk, TCP_CLOSING);
4109 break;
4110 case TCP_FIN_WAIT2:
4111
4112 tcp_send_ack(sk);
4113 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
4114 break;
4115 default:
4116
4117
4118
4119 pr_err("%s: Impossible, sk->sk_state=%d\n",
4120 __func__, sk->sk_state);
4121 break;
4122 }
4123
4124
4125
4126
4127 __skb_queue_purge(&tp->out_of_order_queue);
4128 if (tcp_is_sack(tp))
4129 tcp_sack_reset(&tp->rx_opt);
4130 sk_mem_reclaim(sk);
4131
4132 if (!sock_flag(sk, SOCK_DEAD)) {
4133 sk->sk_state_change(sk);
4134
4135
4136 if (sk->sk_shutdown == SHUTDOWN_MASK ||
4137 sk->sk_state == TCP_CLOSE)
4138 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
4139 else
4140 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
4141 }
4142}
4143
4144static inline bool tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
4145 u32 end_seq)
4146{
4147 if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
4148 if (before(seq, sp->start_seq))
4149 sp->start_seq = seq;
4150 if (after(end_seq, sp->end_seq))
4151 sp->end_seq = end_seq;
4152 return true;
4153 }
4154 return false;
4155}
4156
4157static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
4158{
4159 struct tcp_sock *tp = tcp_sk(sk);
4160
4161 if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
4162 int mib_idx;
4163
4164 if (before(seq, tp->rcv_nxt))
4165 mib_idx = LINUX_MIB_TCPDSACKOLDSENT;
4166 else
4167 mib_idx = LINUX_MIB_TCPDSACKOFOSENT;
4168
4169 NET_INC_STATS_BH(sock_net(sk), mib_idx);
4170
4171 tp->rx_opt.dsack = 1;
4172 tp->duplicate_sack[0].start_seq = seq;
4173 tp->duplicate_sack[0].end_seq = end_seq;
4174 }
4175}
4176
4177static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
4178{
4179 struct tcp_sock *tp = tcp_sk(sk);
4180
4181 if (!tp->rx_opt.dsack)
4182 tcp_dsack_set(sk, seq, end_seq);
4183 else
4184 tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
4185}
4186
4187static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
4188{
4189 struct tcp_sock *tp = tcp_sk(sk);
4190
4191 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
4192 before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4193 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4194 tcp_enter_quickack_mode(sk);
4195
4196 if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
4197 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
4198
4199 if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
4200 end_seq = tp->rcv_nxt;
4201 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, end_seq);
4202 }
4203 }
4204
4205 tcp_send_ack(sk);
4206}
4207
4208
4209
4210
4211static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
4212{
4213 int this_sack;
4214 struct tcp_sack_block *sp = &tp->selective_acks[0];
4215 struct tcp_sack_block *swalk = sp + 1;
4216
4217
4218
4219
4220 for (this_sack = 1; this_sack < tp->rx_opt.num_sacks;) {
4221 if (tcp_sack_extend(sp, swalk->start_seq, swalk->end_seq)) {
4222 int i;
4223
4224
4225
4226
4227 tp->rx_opt.num_sacks--;
4228 for (i = this_sack; i < tp->rx_opt.num_sacks; i++)
4229 sp[i] = sp[i + 1];
4230 continue;
4231 }
4232 this_sack++, swalk++;
4233 }
4234}
4235
4236static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
4237{
4238 struct tcp_sock *tp = tcp_sk(sk);
4239 struct tcp_sack_block *sp = &tp->selective_acks[0];
4240 int cur_sacks = tp->rx_opt.num_sacks;
4241 int this_sack;
4242
4243 if (!cur_sacks)
4244 goto new_sack;
4245
4246 for (this_sack = 0; this_sack < cur_sacks; this_sack++, sp++) {
4247 if (tcp_sack_extend(sp, seq, end_seq)) {
4248
4249 for (; this_sack > 0; this_sack--, sp--)
4250 swap(*sp, *(sp - 1));
4251 if (cur_sacks > 1)
4252 tcp_sack_maybe_coalesce(tp);
4253 return;
4254 }
4255 }
4256
4257
4258
4259
4260
4261
4262
4263 if (this_sack >= TCP_NUM_SACKS) {
4264 this_sack--;
4265 tp->rx_opt.num_sacks--;
4266 sp--;
4267 }
4268 for (; this_sack > 0; this_sack--, sp--)
4269 *sp = *(sp - 1);
4270
4271new_sack:
4272
4273 sp->start_seq = seq;
4274 sp->end_seq = end_seq;
4275 tp->rx_opt.num_sacks++;
4276}
4277
4278
4279
4280static void tcp_sack_remove(struct tcp_sock *tp)
4281{
4282 struct tcp_sack_block *sp = &tp->selective_acks[0];
4283 int num_sacks = tp->rx_opt.num_sacks;
4284 int this_sack;
4285
4286
4287 if (skb_queue_empty(&tp->out_of_order_queue)) {
4288 tp->rx_opt.num_sacks = 0;
4289 return;
4290 }
4291
4292 for (this_sack = 0; this_sack < num_sacks;) {
4293
4294 if (!before(tp->rcv_nxt, sp->start_seq)) {
4295 int i;
4296
4297
4298 WARN_ON(before(tp->rcv_nxt, sp->end_seq));
4299
4300
4301 for (i=this_sack+1; i < num_sacks; i++)
4302 tp->selective_acks[i-1] = tp->selective_acks[i];
4303 num_sacks--;
4304 continue;
4305 }
4306 this_sack++;
4307 sp++;
4308 }
4309 tp->rx_opt.num_sacks = num_sacks;
4310}
4311
4312
4313
4314
4315static void tcp_ofo_queue(struct sock *sk)
4316{
4317 struct tcp_sock *tp = tcp_sk(sk);
4318 __u32 dsack_high = tp->rcv_nxt;
4319 struct sk_buff *skb;
4320
4321 while ((skb = skb_peek(&tp->out_of_order_queue)) != NULL) {
4322 if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
4323 break;
4324
4325 if (before(TCP_SKB_CB(skb)->seq, dsack_high)) {
4326 __u32 dsack = dsack_high;
4327 if (before(TCP_SKB_CB(skb)->end_seq, dsack_high))
4328 dsack_high = TCP_SKB_CB(skb)->end_seq;
4329 tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack);
4330 }
4331
4332 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
4333 SOCK_DEBUG(sk, "ofo packet was already received\n");
4334 __skb_unlink(skb, &tp->out_of_order_queue);
4335 __kfree_skb(skb);
4336 continue;
4337 }
4338 SOCK_DEBUG(sk, "ofo requeuing : rcv_next %X seq %X - %X\n",
4339 tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
4340 TCP_SKB_CB(skb)->end_seq);
4341
4342 __skb_unlink(skb, &tp->out_of_order_queue);
4343 __skb_queue_tail(&sk->sk_receive_queue, skb);
4344 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
4345 if (tcp_hdr(skb)->fin)
4346 tcp_fin(sk);
4347 }
4348}
4349
4350static bool tcp_prune_ofo_queue(struct sock *sk);
4351static int tcp_prune_queue(struct sock *sk);
4352
4353static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
4354 unsigned int size)
4355{
4356 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
4357 !sk_rmem_schedule(sk, skb, size)) {
4358
4359 if (tcp_prune_queue(sk) < 0)
4360 return -1;
4361
4362 if (!sk_rmem_schedule(sk, skb, size)) {
4363 if (!tcp_prune_ofo_queue(sk))
4364 return -1;
4365
4366 if (!sk_rmem_schedule(sk, skb, size))
4367 return -1;
4368 }
4369 }
4370 return 0;
4371}
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386static bool tcp_try_coalesce(struct sock *sk,
4387 struct sk_buff *to,
4388 struct sk_buff *from,
4389 bool *fragstolen)
4390{
4391 int delta;
4392
4393 *fragstolen = false;
4394
4395 if (tcp_hdr(from)->fin)
4396 return false;
4397
4398
4399 if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq)
4400 return false;
4401
4402 if (!skb_try_coalesce(to, from, fragstolen, &delta))
4403 return false;
4404
4405 atomic_add(delta, &sk->sk_rmem_alloc);
4406 sk_mem_charge(sk, delta);
4407 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE);
4408 TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
4409 TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
4410 return true;
4411}
4412
4413static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4414{
4415 struct tcp_sock *tp = tcp_sk(sk);
4416 struct sk_buff *skb1;
4417 u32 seq, end_seq;
4418
4419 TCP_ECN_check_ce(tp, skb);
4420
4421 if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
4422 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFODROP);
4423 __kfree_skb(skb);
4424 return;
4425 }
4426
4427
4428 tp->pred_flags = 0;
4429 inet_csk_schedule_ack(sk);
4430
4431 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
4432 SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
4433 tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
4434
4435 skb1 = skb_peek_tail(&tp->out_of_order_queue);
4436 if (!skb1) {
4437
4438 if (tcp_is_sack(tp)) {
4439 tp->rx_opt.num_sacks = 1;
4440 tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
4441 tp->selective_acks[0].end_seq =
4442 TCP_SKB_CB(skb)->end_seq;
4443 }
4444 __skb_queue_head(&tp->out_of_order_queue, skb);
4445 goto end;
4446 }
4447
4448 seq = TCP_SKB_CB(skb)->seq;
4449 end_seq = TCP_SKB_CB(skb)->end_seq;
4450
4451 if (seq == TCP_SKB_CB(skb1)->end_seq) {
4452 bool fragstolen;
4453
4454 if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
4455 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4456 } else {
4457 kfree_skb_partial(skb, fragstolen);
4458 skb = NULL;
4459 }
4460
4461 if (!tp->rx_opt.num_sacks ||
4462 tp->selective_acks[0].end_seq != seq)
4463 goto add_sack;
4464
4465
4466 tp->selective_acks[0].end_seq = end_seq;
4467 goto end;
4468 }
4469
4470
4471 while (1) {
4472 if (!after(TCP_SKB_CB(skb1)->seq, seq))
4473 break;
4474 if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) {
4475 skb1 = NULL;
4476 break;
4477 }
4478 skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1);
4479 }
4480
4481
4482 if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
4483 if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4484
4485 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
4486 __kfree_skb(skb);
4487 skb = NULL;
4488 tcp_dsack_set(sk, seq, end_seq);
4489 goto add_sack;
4490 }
4491 if (after(seq, TCP_SKB_CB(skb1)->seq)) {
4492
4493 tcp_dsack_set(sk, seq,
4494 TCP_SKB_CB(skb1)->end_seq);
4495 } else {
4496 if (skb_queue_is_first(&tp->out_of_order_queue,
4497 skb1))
4498 skb1 = NULL;
4499 else
4500 skb1 = skb_queue_prev(
4501 &tp->out_of_order_queue,
4502 skb1);
4503 }
4504 }
4505 if (!skb1)
4506 __skb_queue_head(&tp->out_of_order_queue, skb);
4507 else
4508 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4509
4510
4511 while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) {
4512 skb1 = skb_queue_next(&tp->out_of_order_queue, skb);
4513
4514 if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
4515 break;
4516 if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4517 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4518 end_seq);
4519 break;
4520 }
4521 __skb_unlink(skb1, &tp->out_of_order_queue);
4522 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4523 TCP_SKB_CB(skb1)->end_seq);
4524 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
4525 __kfree_skb(skb1);
4526 }
4527
4528add_sack:
4529 if (tcp_is_sack(tp))
4530 tcp_sack_new_ofo_skb(sk, seq, end_seq);
4531end:
4532 if (skb)
4533 skb_set_owner_r(skb, sk);
4534}
4535
4536static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen,
4537 bool *fragstolen)
4538{
4539 int eaten;
4540 struct sk_buff *tail = skb_peek_tail(&sk->sk_receive_queue);
4541
4542 __skb_pull(skb, hdrlen);
4543 eaten = (tail &&
4544 tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0;
4545 tcp_sk(sk)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
4546 if (!eaten) {
4547 __skb_queue_tail(&sk->sk_receive_queue, skb);
4548 skb_set_owner_r(skb, sk);
4549 }
4550 return eaten;
4551}
4552
4553int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
4554{
4555 struct sk_buff *skb = NULL;
4556 struct tcphdr *th;
4557 bool fragstolen;
4558
4559 skb = alloc_skb(size + sizeof(*th), sk->sk_allocation);
4560 if (!skb)
4561 goto err;
4562
4563 if (tcp_try_rmem_schedule(sk, skb, size + sizeof(*th)))
4564 goto err_free;
4565
4566 th = (struct tcphdr *)skb_put(skb, sizeof(*th));
4567 skb_reset_transport_header(skb);
4568 memset(th, 0, sizeof(*th));
4569
4570 if (memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size))
4571 goto err_free;
4572
4573 TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
4574 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + size;
4575 TCP_SKB_CB(skb)->ack_seq = tcp_sk(sk)->snd_una - 1;
4576
4577 if (tcp_queue_rcv(sk, skb, sizeof(*th), &fragstolen)) {
4578 WARN_ON_ONCE(fragstolen);
4579 __kfree_skb(skb);
4580 }
4581 return size;
4582
4583err_free:
4584 kfree_skb(skb);
4585err:
4586 return -ENOMEM;
4587}
4588
4589static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
4590{
4591 const struct tcphdr *th = tcp_hdr(skb);
4592 struct tcp_sock *tp = tcp_sk(sk);
4593 int eaten = -1;
4594 bool fragstolen = false;
4595
4596 if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq)
4597 goto drop;
4598
4599 skb_dst_drop(skb);
4600 __skb_pull(skb, th->doff * 4);
4601
4602 TCP_ECN_accept_cwr(tp, skb);
4603
4604 tp->rx_opt.dsack = 0;
4605
4606
4607
4608
4609
4610 if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
4611 if (tcp_receive_window(tp) == 0)
4612 goto out_of_window;
4613
4614
4615 if (tp->ucopy.task == current &&
4616 tp->copied_seq == tp->rcv_nxt && tp->ucopy.len &&
4617 sock_owned_by_user(sk) && !tp->urg_data) {
4618 int chunk = min_t(unsigned int, skb->len,
4619 tp->ucopy.len);
4620
4621 __set_current_state(TASK_RUNNING);
4622
4623 local_bh_enable();
4624 if (!skb_copy_datagram_iovec(skb, 0, tp->ucopy.iov, chunk)) {
4625 tp->ucopy.len -= chunk;
4626 tp->copied_seq += chunk;
4627 eaten = (chunk == skb->len);
4628 tcp_rcv_space_adjust(sk);
4629 }
4630 local_bh_disable();
4631 }
4632
4633 if (eaten <= 0) {
4634queue_and_out:
4635 if (eaten < 0 &&
4636 tcp_try_rmem_schedule(sk, skb, skb->truesize))
4637 goto drop;
4638
4639 eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
4640 }
4641 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
4642 if (skb->len)
4643 tcp_event_data_recv(sk, skb);
4644 if (th->fin)
4645 tcp_fin(sk);
4646
4647 if (!skb_queue_empty(&tp->out_of_order_queue)) {
4648 tcp_ofo_queue(sk);
4649
4650
4651
4652
4653 if (skb_queue_empty(&tp->out_of_order_queue))
4654 inet_csk(sk)->icsk_ack.pingpong = 0;
4655 }
4656
4657 if (tp->rx_opt.num_sacks)
4658 tcp_sack_remove(tp);
4659
4660 tcp_fast_path_check(sk);
4661
4662 if (eaten > 0)
4663 kfree_skb_partial(skb, fragstolen);
4664 if (!sock_flag(sk, SOCK_DEAD))
4665 sk->sk_data_ready(sk, 0);
4666 return;
4667 }
4668
4669 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
4670
4671 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4672 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
4673
4674out_of_window:
4675 tcp_enter_quickack_mode(sk);
4676 inet_csk_schedule_ack(sk);
4677drop:
4678 __kfree_skb(skb);
4679 return;
4680 }
4681
4682
4683 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp)))
4684 goto out_of_window;
4685
4686 tcp_enter_quickack_mode(sk);
4687
4688 if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4689
4690 SOCK_DEBUG(sk, "partial packet: rcv_next %X seq %X - %X\n",
4691 tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
4692 TCP_SKB_CB(skb)->end_seq);
4693
4694 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, tp->rcv_nxt);
4695
4696
4697
4698
4699 if (!tcp_receive_window(tp))
4700 goto out_of_window;
4701 goto queue_and_out;
4702 }
4703
4704 tcp_data_queue_ofo(sk, skb);
4705}
4706
4707static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
4708 struct sk_buff_head *list)
4709{
4710 struct sk_buff *next = NULL;
4711
4712 if (!skb_queue_is_last(list, skb))
4713 next = skb_queue_next(list, skb);
4714
4715 __skb_unlink(skb, list);
4716 __kfree_skb(skb);
4717 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
4718
4719 return next;
4720}
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730static void
4731tcp_collapse(struct sock *sk, struct sk_buff_head *list,
4732 struct sk_buff *head, struct sk_buff *tail,
4733 u32 start, u32 end)
4734{
4735 struct sk_buff *skb, *n;
4736 bool end_of_skbs;
4737
4738
4739
4740 skb = head;
4741restart:
4742 end_of_skbs = true;
4743 skb_queue_walk_from_safe(list, skb, n) {
4744 if (skb == tail)
4745 break;
4746
4747 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
4748 skb = tcp_collapse_one(sk, skb, list);
4749 if (!skb)
4750 break;
4751 goto restart;
4752 }
4753
4754
4755
4756
4757
4758
4759 if (!tcp_hdr(skb)->syn && !tcp_hdr(skb)->fin &&
4760 (tcp_win_from_space(skb->truesize) > skb->len ||
4761 before(TCP_SKB_CB(skb)->seq, start))) {
4762 end_of_skbs = false;
4763 break;
4764 }
4765
4766 if (!skb_queue_is_last(list, skb)) {
4767 struct sk_buff *next = skb_queue_next(list, skb);
4768 if (next != tail &&
4769 TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(next)->seq) {
4770 end_of_skbs = false;
4771 break;
4772 }
4773 }
4774
4775
4776 start = TCP_SKB_CB(skb)->end_seq;
4777 }
4778 if (end_of_skbs || tcp_hdr(skb)->syn || tcp_hdr(skb)->fin)
4779 return;
4780
4781 while (before(start, end)) {
4782 struct sk_buff *nskb;
4783 unsigned int header = skb_headroom(skb);
4784 int copy = SKB_MAX_ORDER(header, 0);
4785
4786
4787 if (copy < 0)
4788 return;
4789 if (end - start < copy)
4790 copy = end - start;
4791 nskb = alloc_skb(copy + header, GFP_ATOMIC);
4792 if (!nskb)
4793 return;
4794
4795 skb_set_mac_header(nskb, skb_mac_header(skb) - skb->head);
4796 skb_set_network_header(nskb, (skb_network_header(skb) -
4797 skb->head));
4798 skb_set_transport_header(nskb, (skb_transport_header(skb) -
4799 skb->head));
4800 skb_reserve(nskb, header);
4801 memcpy(nskb->head, skb->head, header);
4802 memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
4803 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
4804 __skb_queue_before(list, skb, nskb);
4805 skb_set_owner_r(nskb, sk);
4806
4807
4808 while (copy > 0) {
4809 int offset = start - TCP_SKB_CB(skb)->seq;
4810 int size = TCP_SKB_CB(skb)->end_seq - start;
4811
4812 BUG_ON(offset < 0);
4813 if (size > 0) {
4814 size = min(copy, size);
4815 if (skb_copy_bits(skb, offset, skb_put(nskb, size), size))
4816 BUG();
4817 TCP_SKB_CB(nskb)->end_seq += size;
4818 copy -= size;
4819 start += size;
4820 }
4821 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
4822 skb = tcp_collapse_one(sk, skb, list);
4823 if (!skb ||
4824 skb == tail ||
4825 tcp_hdr(skb)->syn ||
4826 tcp_hdr(skb)->fin)
4827 return;
4828 }
4829 }
4830 }
4831}
4832
4833
4834
4835
4836static void tcp_collapse_ofo_queue(struct sock *sk)
4837{
4838 struct tcp_sock *tp = tcp_sk(sk);
4839 struct sk_buff *skb = skb_peek(&tp->out_of_order_queue);
4840 struct sk_buff *head;
4841 u32 start, end;
4842
4843 if (skb == NULL)
4844 return;
4845
4846 start = TCP_SKB_CB(skb)->seq;
4847 end = TCP_SKB_CB(skb)->end_seq;
4848 head = skb;
4849
4850 for (;;) {
4851 struct sk_buff *next = NULL;
4852
4853 if (!skb_queue_is_last(&tp->out_of_order_queue, skb))
4854 next = skb_queue_next(&tp->out_of_order_queue, skb);
4855 skb = next;
4856
4857
4858
4859 if (!skb ||
4860 after(TCP_SKB_CB(skb)->seq, end) ||
4861 before(TCP_SKB_CB(skb)->end_seq, start)) {
4862 tcp_collapse(sk, &tp->out_of_order_queue,
4863 head, skb, start, end);
4864 head = skb;
4865 if (!skb)
4866 break;
4867
4868 start = TCP_SKB_CB(skb)->seq;
4869 end = TCP_SKB_CB(skb)->end_seq;
4870 } else {
4871 if (before(TCP_SKB_CB(skb)->seq, start))
4872 start = TCP_SKB_CB(skb)->seq;
4873 if (after(TCP_SKB_CB(skb)->end_seq, end))
4874 end = TCP_SKB_CB(skb)->end_seq;
4875 }
4876 }
4877}
4878
4879
4880
4881
4882
4883static bool tcp_prune_ofo_queue(struct sock *sk)
4884{
4885 struct tcp_sock *tp = tcp_sk(sk);
4886 bool res = false;
4887
4888 if (!skb_queue_empty(&tp->out_of_order_queue)) {
4889 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_OFOPRUNED);
4890 __skb_queue_purge(&tp->out_of_order_queue);
4891
4892
4893
4894
4895
4896
4897 if (tp->rx_opt.sack_ok)
4898 tcp_sack_reset(&tp->rx_opt);
4899 sk_mem_reclaim(sk);
4900 res = true;
4901 }
4902 return res;
4903}
4904
4905
4906
4907
4908
4909
4910
4911
4912static int tcp_prune_queue(struct sock *sk)
4913{
4914 struct tcp_sock *tp = tcp_sk(sk);
4915
4916 SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq);
4917
4918 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PRUNECALLED);
4919
4920 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
4921 tcp_clamp_window(sk);
4922 else if (sk_under_memory_pressure(sk))
4923 tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
4924
4925 tcp_collapse_ofo_queue(sk);
4926 if (!skb_queue_empty(&sk->sk_receive_queue))
4927 tcp_collapse(sk, &sk->sk_receive_queue,
4928 skb_peek(&sk->sk_receive_queue),
4929 NULL,
4930 tp->copied_seq, tp->rcv_nxt);
4931 sk_mem_reclaim(sk);
4932
4933 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
4934 return 0;
4935
4936
4937
4938
4939 tcp_prune_ofo_queue(sk);
4940
4941 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
4942 return 0;
4943
4944
4945
4946
4947
4948 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_RCVPRUNED);
4949
4950
4951 tp->pred_flags = 0;
4952 return -1;
4953}
4954
4955
4956
4957
4958
4959void tcp_cwnd_application_limited(struct sock *sk)
4960{
4961 struct tcp_sock *tp = tcp_sk(sk);
4962
4963 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open &&
4964 sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
4965
4966 u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk));
4967 u32 win_used = max(tp->snd_cwnd_used, init_win);
4968 if (win_used < tp->snd_cwnd) {
4969 tp->snd_ssthresh = tcp_current_ssthresh(sk);
4970 tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
4971 }
4972 tp->snd_cwnd_used = 0;
4973 }
4974 tp->snd_cwnd_stamp = tcp_time_stamp;
4975}
4976
4977static bool tcp_should_expand_sndbuf(const struct sock *sk)
4978{
4979 const struct tcp_sock *tp = tcp_sk(sk);
4980
4981
4982
4983
4984 if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
4985 return false;
4986
4987
4988 if (sk_under_memory_pressure(sk))
4989 return false;
4990
4991
4992 if (sk_memory_allocated(sk) >= sk_prot_mem_limits(sk, 0))
4993 return false;
4994
4995
4996 if (tp->packets_out >= tp->snd_cwnd)
4997 return false;
4998
4999 return true;
5000}
5001
5002
5003
5004
5005
5006
5007
5008static void tcp_new_space(struct sock *sk)
5009{
5010 struct tcp_sock *tp = tcp_sk(sk);
5011
5012 if (tcp_should_expand_sndbuf(sk)) {
5013 int sndmem = SKB_TRUESIZE(max_t(u32,
5014 tp->rx_opt.mss_clamp,
5015 tp->mss_cache) +
5016 MAX_TCP_HEADER);
5017 int demanded = max_t(unsigned int, tp->snd_cwnd,
5018 tp->reordering + 1);
5019 sndmem *= 2 * demanded;
5020 if (sndmem > sk->sk_sndbuf)
5021 sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
5022 tp->snd_cwnd_stamp = tcp_time_stamp;
5023 }
5024
5025 sk->sk_write_space(sk);
5026}
5027
5028static void tcp_check_space(struct sock *sk)
5029{
5030 if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) {
5031 sock_reset_flag(sk, SOCK_QUEUE_SHRUNK);
5032 if (sk->sk_socket &&
5033 test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
5034 tcp_new_space(sk);
5035 }
5036}
5037
5038static inline void tcp_data_snd_check(struct sock *sk)
5039{
5040 tcp_push_pending_frames(sk);
5041 tcp_check_space(sk);
5042}
5043
5044
5045
5046
5047static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
5048{
5049 struct tcp_sock *tp = tcp_sk(sk);
5050
5051
5052 if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss &&
5053
5054
5055
5056 __tcp_select_window(sk) >= tp->rcv_wnd) ||
5057
5058 tcp_in_quickack_mode(sk) ||
5059
5060 (ofo_possible && skb_peek(&tp->out_of_order_queue))) {
5061
5062 tcp_send_ack(sk);
5063 } else {
5064
5065 tcp_send_delayed_ack(sk);
5066 }
5067}
5068
5069static inline void tcp_ack_snd_check(struct sock *sk)
5070{
5071 if (!inet_csk_ack_scheduled(sk)) {
5072
5073 return;
5074 }
5075 __tcp_ack_snd_check(sk, 1);
5076}
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
5089{
5090 struct tcp_sock *tp = tcp_sk(sk);
5091 u32 ptr = ntohs(th->urg_ptr);
5092
5093 if (ptr && !sysctl_tcp_stdurg)
5094 ptr--;
5095 ptr += ntohl(th->seq);
5096
5097
5098 if (after(tp->copied_seq, ptr))
5099 return;
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111 if (before(ptr, tp->rcv_nxt))
5112 return;
5113
5114
5115 if (tp->urg_data && !after(ptr, tp->urg_seq))
5116 return;
5117
5118
5119 sk_send_sigurg(sk);
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136 if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
5137 !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) {
5138 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
5139 tp->copied_seq++;
5140 if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) {
5141 __skb_unlink(skb, &sk->sk_receive_queue);
5142 __kfree_skb(skb);
5143 }
5144 }
5145
5146 tp->urg_data = TCP_URG_NOTYET;
5147 tp->urg_seq = ptr;
5148
5149
5150 tp->pred_flags = 0;
5151}
5152
5153
5154static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th)
5155{
5156 struct tcp_sock *tp = tcp_sk(sk);
5157
5158
5159 if (th->urg)
5160 tcp_check_urg(sk, th);
5161
5162
5163 if (tp->urg_data == TCP_URG_NOTYET) {
5164 u32 ptr = tp->urg_seq - ntohl(th->seq) + (th->doff * 4) -
5165 th->syn;
5166
5167
5168 if (ptr < skb->len) {
5169 u8 tmp;
5170 if (skb_copy_bits(skb, ptr, &tmp, 1))
5171 BUG();
5172 tp->urg_data = TCP_URG_VALID | tmp;
5173 if (!sock_flag(sk, SOCK_DEAD))
5174 sk->sk_data_ready(sk, 0);
5175 }
5176 }
5177}
5178
5179static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
5180{
5181 struct tcp_sock *tp = tcp_sk(sk);
5182 int chunk = skb->len - hlen;
5183 int err;
5184
5185 local_bh_enable();
5186 if (skb_csum_unnecessary(skb))
5187 err = skb_copy_datagram_iovec(skb, hlen, tp->ucopy.iov, chunk);
5188 else
5189 err = skb_copy_and_csum_datagram_iovec(skb, hlen,
5190 tp->ucopy.iov);
5191
5192 if (!err) {
5193 tp->ucopy.len -= chunk;
5194 tp->copied_seq += chunk;
5195 tcp_rcv_space_adjust(sk);
5196 }
5197
5198 local_bh_disable();
5199 return err;
5200}
5201
5202static __sum16 __tcp_checksum_complete_user(struct sock *sk,
5203 struct sk_buff *skb)
5204{
5205 __sum16 result;
5206
5207 if (sock_owned_by_user(sk)) {
5208 local_bh_enable();
5209 result = __tcp_checksum_complete(skb);
5210 local_bh_disable();
5211 } else {
5212 result = __tcp_checksum_complete(skb);
5213 }
5214 return result;
5215}
5216
5217static inline bool tcp_checksum_complete_user(struct sock *sk,
5218 struct sk_buff *skb)
5219{
5220 return !skb_csum_unnecessary(skb) &&
5221 __tcp_checksum_complete_user(sk, skb);
5222}
5223
5224#ifdef CONFIG_NET_DMA
5225static bool tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb,
5226 int hlen)
5227{
52