1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65#define pr_fmt(fmt) "TCP: " fmt
66
67#include <linux/mm.h>
68#include <linux/slab.h>
69#include <linux/module.h>
70#include <linux/sysctl.h>
71#include <linux/kernel.h>
72#include <linux/prefetch.h>
73#include <net/dst.h>
74#include <net/tcp.h>
75#include <net/inet_common.h>
76#include <linux/ipsec.h>
77#include <asm/unaligned.h>
78#include <linux/errqueue.h>
79#include <trace/events/tcp.h>
80#include <linux/jump_label_ratelimit.h>
81#include <net/busy_poll.h>
82#include <net/mptcp.h>
83
84int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
85
86#define FLAG_DATA 0x01
87#define FLAG_WIN_UPDATE 0x02
88#define FLAG_DATA_ACKED 0x04
89#define FLAG_RETRANS_DATA_ACKED 0x08
90#define FLAG_SYN_ACKED 0x10
91#define FLAG_DATA_SACKED 0x20
92#define FLAG_ECE 0x40
93#define FLAG_LOST_RETRANS 0x80
94#define FLAG_SLOWPATH 0x100
95#define FLAG_ORIG_SACK_ACKED 0x200
96#define FLAG_SND_UNA_ADVANCED 0x400
97#define FLAG_DSACKING_ACK 0x800
98#define FLAG_SET_XMIT_TIMER 0x1000
99#define FLAG_SACK_RENEGING 0x2000
100#define FLAG_UPDATE_TS_RECENT 0x4000
101#define FLAG_NO_CHALLENGE_ACK 0x8000
102#define FLAG_ACK_MAYBE_DELAYED 0x10000
103
104#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
105#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
106#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE|FLAG_DSACKING_ACK)
107#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)
108
109#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
110#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
111
112#define REXMIT_NONE 0
113#define REXMIT_LOST 1
114#define REXMIT_NEW 2
115
116#if IS_ENABLED(CONFIG_TLS_DEVICE)
117static DEFINE_STATIC_KEY_DEFERRED_FALSE(clean_acked_data_enabled, HZ);
118
119void clean_acked_data_enable(struct inet_connection_sock *icsk,
120 void (*cad)(struct sock *sk, u32 ack_seq))
121{
122 icsk->icsk_clean_acked = cad;
123 static_branch_deferred_inc(&clean_acked_data_enabled);
124}
125EXPORT_SYMBOL_GPL(clean_acked_data_enable);
126
127void clean_acked_data_disable(struct inet_connection_sock *icsk)
128{
129 static_branch_slow_dec_deferred(&clean_acked_data_enabled);
130 icsk->icsk_clean_acked = NULL;
131}
132EXPORT_SYMBOL_GPL(clean_acked_data_disable);
133
134void clean_acked_data_flush(void)
135{
136 static_key_deferred_flush(&clean_acked_data_enabled);
137}
138EXPORT_SYMBOL_GPL(clean_acked_data_flush);
139#endif
140
141#ifdef CONFIG_CGROUP_BPF
142static void bpf_skops_parse_hdr(struct sock *sk, struct sk_buff *skb)
143{
144 bool unknown_opt = tcp_sk(sk)->rx_opt.saw_unknown &&
145 BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk),
146 BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG);
147 bool parse_all_opt = BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk),
148 BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG);
149 struct bpf_sock_ops_kern sock_ops;
150
151 if (likely(!unknown_opt && !parse_all_opt))
152 return;
153
154
155
156
157
158 switch (sk->sk_state) {
159 case TCP_SYN_RECV:
160 case TCP_SYN_SENT:
161 case TCP_LISTEN:
162 return;
163 }
164
165 sock_owned_by_me(sk);
166
167 memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
168 sock_ops.op = BPF_SOCK_OPS_PARSE_HDR_OPT_CB;
169 sock_ops.is_fullsock = 1;
170 sock_ops.sk = sk;
171 bpf_skops_init_skb(&sock_ops, skb, tcp_hdrlen(skb));
172
173 BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
174}
175
176static void bpf_skops_established(struct sock *sk, int bpf_op,
177 struct sk_buff *skb)
178{
179 struct bpf_sock_ops_kern sock_ops;
180
181 sock_owned_by_me(sk);
182
183 memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
184 sock_ops.op = bpf_op;
185 sock_ops.is_fullsock = 1;
186 sock_ops.sk = sk;
187
188 if (skb)
189 bpf_skops_init_skb(&sock_ops, skb, tcp_hdrlen(skb));
190
191 BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
192}
193#else
194static void bpf_skops_parse_hdr(struct sock *sk, struct sk_buff *skb)
195{
196}
197
198static void bpf_skops_established(struct sock *sk, int bpf_op,
199 struct sk_buff *skb)
200{
201}
202#endif
203
204static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb,
205 unsigned int len)
206{
207 static bool __once __read_mostly;
208
209 if (!__once) {
210 struct net_device *dev;
211
212 __once = true;
213
214 rcu_read_lock();
215 dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif);
216 if (!dev || len >= dev->mtu)
217 pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n",
218 dev ? dev->name : "Unknown driver");
219 rcu_read_unlock();
220 }
221}
222
223
224
225
226static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
227{
228 struct inet_connection_sock *icsk = inet_csk(sk);
229 const unsigned int lss = icsk->icsk_ack.last_seg_size;
230 unsigned int len;
231
232 icsk->icsk_ack.last_seg_size = 0;
233
234
235
236
237 len = skb_shinfo(skb)->gso_size ? : skb->len;
238 if (len >= icsk->icsk_ack.rcv_mss) {
239 icsk->icsk_ack.rcv_mss = min_t(unsigned int, len,
240 tcp_sk(sk)->advmss);
241
242 if (unlikely(len > icsk->icsk_ack.rcv_mss +
243 MAX_TCP_OPTION_SPACE))
244 tcp_gro_dev_warn(sk, skb, len);
245 } else {
246
247
248
249
250
251 len += skb->data - skb_transport_header(skb);
252 if (len >= TCP_MSS_DEFAULT + sizeof(struct tcphdr) ||
253
254
255
256
257
258 (len >= TCP_MIN_MSS + sizeof(struct tcphdr) &&
259 !(tcp_flag_word(tcp_hdr(skb)) & TCP_REMNANT))) {
260
261
262
263
264 len -= tcp_sk(sk)->tcp_header_len;
265 icsk->icsk_ack.last_seg_size = len;
266 if (len == lss) {
267 icsk->icsk_ack.rcv_mss = len;
268 return;
269 }
270 }
271 if (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)
272 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED2;
273 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
274 }
275}
276
277static void tcp_incr_quickack(struct sock *sk, unsigned int max_quickacks)
278{
279 struct inet_connection_sock *icsk = inet_csk(sk);
280 unsigned int quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
281
282 if (quickacks == 0)
283 quickacks = 2;
284 quickacks = min(quickacks, max_quickacks);
285 if (quickacks > icsk->icsk_ack.quick)
286 icsk->icsk_ack.quick = quickacks;
287}
288
289void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
290{
291 struct inet_connection_sock *icsk = inet_csk(sk);
292
293 tcp_incr_quickack(sk, max_quickacks);
294 inet_csk_exit_pingpong_mode(sk);
295 icsk->icsk_ack.ato = TCP_ATO_MIN;
296}
297EXPORT_SYMBOL(tcp_enter_quickack_mode);
298
299
300
301
302
303static bool tcp_in_quickack_mode(struct sock *sk)
304{
305 const struct inet_connection_sock *icsk = inet_csk(sk);
306 const struct dst_entry *dst = __sk_dst_get(sk);
307
308 return (dst && dst_metric(dst, RTAX_QUICKACK)) ||
309 (icsk->icsk_ack.quick && !inet_csk_in_pingpong_mode(sk));
310}
311
312static void tcp_ecn_queue_cwr(struct tcp_sock *tp)
313{
314 if (tp->ecn_flags & TCP_ECN_OK)
315 tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
316}
317
318static void tcp_ecn_accept_cwr(struct sock *sk, const struct sk_buff *skb)
319{
320 if (tcp_hdr(skb)->cwr) {
321 tcp_sk(sk)->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
322
323
324
325
326
327 if (TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq)
328 inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
329 }
330}
331
332static void tcp_ecn_withdraw_cwr(struct tcp_sock *tp)
333{
334 tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
335}
336
337static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
338{
339 struct tcp_sock *tp = tcp_sk(sk);
340
341 switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) {
342 case INET_ECN_NOT_ECT:
343
344
345
346
347 if (tp->ecn_flags & TCP_ECN_SEEN)
348 tcp_enter_quickack_mode(sk, 2);
349 break;
350 case INET_ECN_CE:
351 if (tcp_ca_needs_ecn(sk))
352 tcp_ca_event(sk, CA_EVENT_ECN_IS_CE);
353
354 if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
355
356 tcp_enter_quickack_mode(sk, 2);
357 tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
358 }
359 tp->ecn_flags |= TCP_ECN_SEEN;
360 break;
361 default:
362 if (tcp_ca_needs_ecn(sk))
363 tcp_ca_event(sk, CA_EVENT_ECN_NO_CE);
364 tp->ecn_flags |= TCP_ECN_SEEN;
365 break;
366 }
367}
368
369static void tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
370{
371 if (tcp_sk(sk)->ecn_flags & TCP_ECN_OK)
372 __tcp_ecn_check_ce(sk, skb);
373}
374
375static void tcp_ecn_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th)
376{
377 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr))
378 tp->ecn_flags &= ~TCP_ECN_OK;
379}
380
381static void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th)
382{
383 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr))
384 tp->ecn_flags &= ~TCP_ECN_OK;
385}
386
387static bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th)
388{
389 if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK))
390 return true;
391 return false;
392}
393
394
395
396
397
398
399static void tcp_sndbuf_expand(struct sock *sk)
400{
401 const struct tcp_sock *tp = tcp_sk(sk);
402 const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
403 int sndmem, per_mss;
404 u32 nr_segs;
405
406
407
408
409 per_mss = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
410 MAX_TCP_HEADER +
411 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
412
413 per_mss = roundup_pow_of_two(per_mss) +
414 SKB_DATA_ALIGN(sizeof(struct sk_buff));
415
416 nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd);
417 nr_segs = max_t(u32, nr_segs, tp->reordering + 1);
418
419
420
421
422
423 sndmem = ca_ops->sndbuf_expand ? ca_ops->sndbuf_expand(sk) : 2;
424 sndmem *= nr_segs * per_mss;
425
426 if (sk->sk_sndbuf < sndmem)
427 WRITE_ONCE(sk->sk_sndbuf,
428 min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]));
429}
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
458{
459 struct tcp_sock *tp = tcp_sk(sk);
460
461 int truesize = tcp_win_from_space(sk, skb->truesize) >> 1;
462 int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
463
464 while (tp->rcv_ssthresh <= window) {
465 if (truesize <= skb->len)
466 return 2 * inet_csk(sk)->icsk_ack.rcv_mss;
467
468 truesize >>= 1;
469 window >>= 1;
470 }
471 return 0;
472}
473
474static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
475{
476 struct tcp_sock *tp = tcp_sk(sk);
477 int room;
478
479 room = min_t(int, tp->window_clamp, tcp_space(sk)) - tp->rcv_ssthresh;
480
481
482 if (room > 0 && !tcp_under_memory_pressure(sk)) {
483 int incr;
484
485
486
487
488 if (tcp_win_from_space(sk, skb->truesize) <= skb->len)
489 incr = 2 * tp->advmss;
490 else
491 incr = __tcp_grow_window(sk, skb);
492
493 if (incr) {
494 incr = max_t(int, incr, 2 * skb->len);
495 tp->rcv_ssthresh += min(room, incr);
496 inet_csk(sk)->icsk_ack.quick |= 1;
497 }
498 }
499}
500
501
502
503
504static void tcp_init_buffer_space(struct sock *sk)
505{
506 int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win;
507 struct tcp_sock *tp = tcp_sk(sk);
508 int maxwin;
509
510 if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
511 tcp_sndbuf_expand(sk);
512
513 tcp_mstamp_refresh(tp);
514 tp->rcvq_space.time = tp->tcp_mstamp;
515 tp->rcvq_space.seq = tp->copied_seq;
516
517 maxwin = tcp_full_space(sk);
518
519 if (tp->window_clamp >= maxwin) {
520 tp->window_clamp = maxwin;
521
522 if (tcp_app_win && maxwin > 4 * tp->advmss)
523 tp->window_clamp = max(maxwin -
524 (maxwin >> tcp_app_win),
525 4 * tp->advmss);
526 }
527
528
529 if (tcp_app_win &&
530 tp->window_clamp > 2 * tp->advmss &&
531 tp->window_clamp + tp->advmss > maxwin)
532 tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
533
534 tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
535 tp->snd_cwnd_stamp = tcp_jiffies32;
536 tp->rcvq_space.space = min3(tp->rcv_ssthresh, tp->rcv_wnd,
537 (u32)TCP_INIT_CWND * tp->advmss);
538}
539
540
541static void tcp_clamp_window(struct sock *sk)
542{
543 struct tcp_sock *tp = tcp_sk(sk);
544 struct inet_connection_sock *icsk = inet_csk(sk);
545 struct net *net = sock_net(sk);
546
547 icsk->icsk_ack.quick = 0;
548
549 if (sk->sk_rcvbuf < net->ipv4.sysctl_tcp_rmem[2] &&
550 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
551 !tcp_under_memory_pressure(sk) &&
552 sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
553 WRITE_ONCE(sk->sk_rcvbuf,
554 min(atomic_read(&sk->sk_rmem_alloc),
555 net->ipv4.sysctl_tcp_rmem[2]));
556 }
557 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
558 tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
559}
560
561
562
563
564
565
566
567
568void tcp_initialize_rcv_mss(struct sock *sk)
569{
570 const struct tcp_sock *tp = tcp_sk(sk);
571 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
572
573 hint = min(hint, tp->rcv_wnd / 2);
574 hint = min(hint, TCP_MSS_DEFAULT);
575 hint = max(hint, TCP_MIN_MSS);
576
577 inet_csk(sk)->icsk_ack.rcv_mss = hint;
578}
579EXPORT_SYMBOL(tcp_initialize_rcv_mss);
580
581
582
583
584
585
586
587
588
589
590
591
592static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
593{
594 u32 new_sample = tp->rcv_rtt_est.rtt_us;
595 long m = sample;
596
597 if (new_sample != 0) {
598
599
600
601
602
603
604
605
606
607
608 if (!win_dep) {
609 m -= (new_sample >> 3);
610 new_sample += m;
611 } else {
612 m <<= 3;
613 if (m < new_sample)
614 new_sample = m;
615 }
616 } else {
617
618 new_sample = m << 3;
619 }
620
621 tp->rcv_rtt_est.rtt_us = new_sample;
622}
623
624static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
625{
626 u32 delta_us;
627
628 if (tp->rcv_rtt_est.time == 0)
629 goto new_measure;
630 if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
631 return;
632 delta_us = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcv_rtt_est.time);
633 if (!delta_us)
634 delta_us = 1;
635 tcp_rcv_rtt_update(tp, delta_us, 1);
636
637new_measure:
638 tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
639 tp->rcv_rtt_est.time = tp->tcp_mstamp;
640}
641
642static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
643 const struct sk_buff *skb)
644{
645 struct tcp_sock *tp = tcp_sk(sk);
646
647 if (tp->rx_opt.rcv_tsecr == tp->rcv_rtt_last_tsecr)
648 return;
649 tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr;
650
651 if (TCP_SKB_CB(skb)->end_seq -
652 TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss) {
653 u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
654 u32 delta_us;
655
656 if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
657 if (!delta)
658 delta = 1;
659 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
660 tcp_rcv_rtt_update(tp, delta_us, 0);
661 }
662 }
663}
664
665
666
667
668
669void tcp_rcv_space_adjust(struct sock *sk)
670{
671 struct tcp_sock *tp = tcp_sk(sk);
672 u32 copied;
673 int time;
674
675 trace_tcp_rcv_space_adjust(sk);
676
677 tcp_mstamp_refresh(tp);
678 time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time);
679 if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0)
680 return;
681
682
683 copied = tp->copied_seq - tp->rcvq_space.seq;
684 if (copied <= tp->rcvq_space.space)
685 goto new_measure;
686
687
688
689
690
691
692
693
694
695
696 if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
697 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
698 int rcvmem, rcvbuf;
699 u64 rcvwin, grow;
700
701
702
703
704 rcvwin = ((u64)copied << 1) + 16 * tp->advmss;
705
706
707 grow = rcvwin * (copied - tp->rcvq_space.space);
708 do_div(grow, tp->rcvq_space.space);
709 rcvwin += (grow << 1);
710
711 rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
712 while (tcp_win_from_space(sk, rcvmem) < tp->advmss)
713 rcvmem += 128;
714
715 do_div(rcvwin, tp->advmss);
716 rcvbuf = min_t(u64, rcvwin * rcvmem,
717 sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
718 if (rcvbuf > sk->sk_rcvbuf) {
719 WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
720
721
722 tp->window_clamp = tcp_win_from_space(sk, rcvbuf);
723 }
724 }
725 tp->rcvq_space.space = copied;
726
727new_measure:
728 tp->rcvq_space.seq = tp->copied_seq;
729 tp->rcvq_space.time = tp->tcp_mstamp;
730}
731
732
733
734
735
736
737
738
739
740
741
742static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
743{
744 struct tcp_sock *tp = tcp_sk(sk);
745 struct inet_connection_sock *icsk = inet_csk(sk);
746 u32 now;
747
748 inet_csk_schedule_ack(sk);
749
750 tcp_measure_rcv_mss(sk, skb);
751
752 tcp_rcv_rtt_measure(tp);
753
754 now = tcp_jiffies32;
755
756 if (!icsk->icsk_ack.ato) {
757
758
759
760 tcp_incr_quickack(sk, TCP_MAX_QUICKACKS);
761 icsk->icsk_ack.ato = TCP_ATO_MIN;
762 } else {
763 int m = now - icsk->icsk_ack.lrcvtime;
764
765 if (m <= TCP_ATO_MIN / 2) {
766
767 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + TCP_ATO_MIN / 2;
768 } else if (m < icsk->icsk_ack.ato) {
769 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + m;
770 if (icsk->icsk_ack.ato > icsk->icsk_rto)
771 icsk->icsk_ack.ato = icsk->icsk_rto;
772 } else if (m > icsk->icsk_rto) {
773
774
775
776 tcp_incr_quickack(sk, TCP_MAX_QUICKACKS);
777 sk_mem_reclaim(sk);
778 }
779 }
780 icsk->icsk_ack.lrcvtime = now;
781
782 tcp_ecn_check_ce(sk, skb);
783
784 if (skb->len >= 128)
785 tcp_grow_window(sk, skb);
786}
787
788
789
790
791
792
793
794
795
796
797static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
798{
799 struct tcp_sock *tp = tcp_sk(sk);
800 long m = mrtt_us;
801 u32 srtt = tp->srtt_us;
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819 if (srtt != 0) {
820 m -= (srtt >> 3);
821 srtt += m;
822 if (m < 0) {
823 m = -m;
824 m -= (tp->mdev_us >> 2);
825
826
827
828
829
830
831
832
833 if (m > 0)
834 m >>= 3;
835 } else {
836 m -= (tp->mdev_us >> 2);
837 }
838 tp->mdev_us += m;
839 if (tp->mdev_us > tp->mdev_max_us) {
840 tp->mdev_max_us = tp->mdev_us;
841 if (tp->mdev_max_us > tp->rttvar_us)
842 tp->rttvar_us = tp->mdev_max_us;
843 }
844 if (after(tp->snd_una, tp->rtt_seq)) {
845 if (tp->mdev_max_us < tp->rttvar_us)
846 tp->rttvar_us -= (tp->rttvar_us - tp->mdev_max_us) >> 2;
847 tp->rtt_seq = tp->snd_nxt;
848 tp->mdev_max_us = tcp_rto_min_us(sk);
849
850 tcp_bpf_rtt(sk);
851 }
852 } else {
853
854 srtt = m << 3;
855 tp->mdev_us = m << 1;
856 tp->rttvar_us = max(tp->mdev_us, tcp_rto_min_us(sk));
857 tp->mdev_max_us = tp->rttvar_us;
858 tp->rtt_seq = tp->snd_nxt;
859
860 tcp_bpf_rtt(sk);
861 }
862 tp->srtt_us = max(1U, srtt);
863}
864
865static void tcp_update_pacing_rate(struct sock *sk)
866{
867 const struct tcp_sock *tp = tcp_sk(sk);
868 u64 rate;
869
870
871 rate = (u64)tp->mss_cache * ((USEC_PER_SEC / 100) << 3);
872
873
874
875
876
877
878
879
880
881 if (tp->snd_cwnd < tp->snd_ssthresh / 2)
882 rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio;
883 else
884 rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio;
885
886 rate *= max(tp->snd_cwnd, tp->packets_out);
887
888 if (likely(tp->srtt_us))
889 do_div(rate, tp->srtt_us);
890
891
892
893
894
895 WRITE_ONCE(sk->sk_pacing_rate, min_t(u64, rate,
896 sk->sk_max_pacing_rate));
897}
898
899
900
901
902static void tcp_set_rto(struct sock *sk)
903{
904 const struct tcp_sock *tp = tcp_sk(sk);
905
906
907
908
909
910
911
912
913
914
915 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp);
916
917
918
919
920
921
922
923
924
925
926 tcp_bound_rto(sk);
927}
928
929__u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)
930{
931 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
932
933 if (!cwnd)
934 cwnd = TCP_INIT_CWND;
935 return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
936}
937
938struct tcp_sacktag_state {
939
940
941
942
943 u64 first_sackt;
944 u64 last_sackt;
945 u32 reord;
946 u32 sack_delivered;
947 int flag;
948 unsigned int mss_now;
949 struct rate_sample *rate;
950};
951
952
953
954
955
956
957
958static u32 tcp_dsack_seen(struct tcp_sock *tp, u32 start_seq,
959 u32 end_seq, struct tcp_sacktag_state *state)
960{
961 u32 seq_len, dup_segs = 1;
962
963 if (!before(start_seq, end_seq))
964 return 0;
965
966 seq_len = end_seq - start_seq;
967
968 if (seq_len > tp->max_window)
969 return 0;
970 if (seq_len > tp->mss_cache)
971 dup_segs = DIV_ROUND_UP(seq_len, tp->mss_cache);
972
973 tp->dsack_dups += dup_segs;
974
975 if (tp->dsack_dups > tp->total_retrans)
976 return 0;
977
978 tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
979 tp->rack.dsack_seen = 1;
980
981 state->flag |= FLAG_DSACKING_ACK;
982
983 state->sack_delivered += dup_segs;
984
985 return dup_segs;
986}
987
988
989
990
991
992static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq,
993 const int ts)
994{
995 struct tcp_sock *tp = tcp_sk(sk);
996 const u32 mss = tp->mss_cache;
997 u32 fack, metric;
998
999 fack = tcp_highest_sack_seq(tp);
1000 if (!before(low_seq, fack))
1001 return;
1002
1003 metric = fack - low_seq;
1004 if ((metric > tp->reordering * mss) && mss) {
1005#if FASTRETRANS_DEBUG > 1
1006 pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
1007 tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
1008 tp->reordering,
1009 0,
1010 tp->sacked_out,
1011 tp->undo_marker ? tp->undo_retrans : 0);
1012#endif
1013 tp->reordering = min_t(u32, (metric + mss - 1) / mss,
1014 sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
1015 }
1016
1017
1018 tp->reord_seen++;
1019 NET_INC_STATS(sock_net(sk),
1020 ts ? LINUX_MIB_TCPTSREORDER : LINUX_MIB_TCPSACKREORDER);
1021}
1022
1023
1024
1025
1026
1027
1028static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
1029{
1030 if ((!tp->retransmit_skb_hint && tp->retrans_out >= tp->lost_out) ||
1031 (tp->retransmit_skb_hint &&
1032 before(TCP_SKB_CB(skb)->seq,
1033 TCP_SKB_CB(tp->retransmit_skb_hint)->seq)))
1034 tp->retransmit_skb_hint = skb;
1035}
1036
1037
1038
1039
1040static void tcp_notify_skb_loss_event(struct tcp_sock *tp, const struct sk_buff *skb)
1041{
1042 tp->lost += tcp_skb_pcount(skb);
1043}
1044
1045void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
1046{
1047 __u8 sacked = TCP_SKB_CB(skb)->sacked;
1048 struct tcp_sock *tp = tcp_sk(sk);
1049
1050 if (sacked & TCPCB_SACKED_ACKED)
1051 return;
1052
1053 tcp_verify_retransmit_hint(tp, skb);
1054 if (sacked & TCPCB_LOST) {
1055 if (sacked & TCPCB_SACKED_RETRANS) {
1056
1057 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1058 tp->retrans_out -= tcp_skb_pcount(skb);
1059 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT,
1060 tcp_skb_pcount(skb));
1061 tcp_notify_skb_loss_event(tp, skb);
1062 }
1063 } else {
1064 tp->lost_out += tcp_skb_pcount(skb);
1065 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1066 tcp_notify_skb_loss_event(tp, skb);
1067 }
1068}
1069
1070
1071static void tcp_count_delivered(struct tcp_sock *tp, u32 delivered,
1072 bool ece_ack)
1073{
1074 tp->delivered += delivered;
1075 if (ece_ack)
1076 tp->delivered_ce += delivered;
1077}
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172static bool tcp_is_sackblock_valid(struct tcp_sock *tp, bool is_dsack,
1173 u32 start_seq, u32 end_seq)
1174{
1175
1176 if (after(end_seq, tp->snd_nxt) || !before(start_seq, end_seq))
1177 return false;
1178
1179
1180 if (!before(start_seq, tp->snd_nxt))
1181 return false;
1182
1183
1184
1185
1186 if (after(start_seq, tp->snd_una))
1187 return true;
1188
1189 if (!is_dsack || !tp->undo_marker)
1190 return false;
1191
1192
1193 if (after(end_seq, tp->snd_una))
1194 return false;
1195
1196 if (!before(start_seq, tp->undo_marker))
1197 return true;
1198
1199
1200 if (!after(end_seq, tp->undo_marker))
1201 return false;
1202
1203
1204
1205
1206 return !before(start_seq, end_seq - tp->max_window);
1207}
1208
1209static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
1210 struct tcp_sack_block_wire *sp, int num_sacks,
1211 u32 prior_snd_una, struct tcp_sacktag_state *state)
1212{
1213 struct tcp_sock *tp = tcp_sk(sk);
1214 u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq);
1215 u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq);
1216 u32 dup_segs;
1217
1218 if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) {
1219 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKRECV);
1220 } else if (num_sacks > 1) {
1221 u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq);
1222 u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq);
1223
1224 if (after(end_seq_0, end_seq_1) || before(start_seq_0, start_seq_1))
1225 return false;
1226 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKOFORECV);
1227 } else {
1228 return false;
1229 }
1230
1231 dup_segs = tcp_dsack_seen(tp, start_seq_0, end_seq_0, state);
1232 if (!dup_segs) {
1233 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKIGNOREDDUBIOUS);
1234 return false;
1235 }
1236
1237 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDSACKRECVSEGS, dup_segs);
1238
1239
1240 if (tp->undo_marker && tp->undo_retrans > 0 &&
1241 !after(end_seq_0, prior_snd_una) &&
1242 after(end_seq_0, tp->undo_marker))
1243 tp->undo_retrans = max_t(int, 0, tp->undo_retrans - dup_segs);
1244
1245 return true;
1246}
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
1257 u32 start_seq, u32 end_seq)
1258{
1259 int err;
1260 bool in_sack;
1261 unsigned int pkt_len;
1262 unsigned int mss;
1263
1264 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1265 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1266
1267 if (tcp_skb_pcount(skb) > 1 && !in_sack &&
1268 after(TCP_SKB_CB(skb)->end_seq, start_seq)) {
1269 mss = tcp_skb_mss(skb);
1270 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1271
1272 if (!in_sack) {
1273 pkt_len = start_seq - TCP_SKB_CB(skb)->seq;
1274 if (pkt_len < mss)
1275 pkt_len = mss;
1276 } else {
1277 pkt_len = end_seq - TCP_SKB_CB(skb)->seq;
1278 if (pkt_len < mss)
1279 return -EINVAL;
1280 }
1281
1282
1283
1284
1285 if (pkt_len > mss) {
1286 unsigned int new_len = (pkt_len / mss) * mss;
1287 if (!in_sack && new_len < pkt_len)
1288 new_len += mss;
1289 pkt_len = new_len;
1290 }
1291
1292 if (pkt_len >= skb->len && !in_sack)
1293 return 0;
1294
1295 err = tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
1296 pkt_len, mss, GFP_ATOMIC);
1297 if (err < 0)
1298 return err;
1299 }
1300
1301 return in_sack;
1302}
1303
1304
1305static u8 tcp_sacktag_one(struct sock *sk,
1306 struct tcp_sacktag_state *state, u8 sacked,
1307 u32 start_seq, u32 end_seq,
1308 int dup_sack, int pcount,
1309 u64 xmit_time)
1310{
1311 struct tcp_sock *tp = tcp_sk(sk);
1312
1313
1314 if (dup_sack && (sacked & TCPCB_RETRANS)) {
1315 if (tp->undo_marker && tp->undo_retrans > 0 &&
1316 after(end_seq, tp->undo_marker))
1317 tp->undo_retrans--;
1318 if ((sacked & TCPCB_SACKED_ACKED) &&
1319 before(start_seq, state->reord))
1320 state->reord = start_seq;
1321 }
1322
1323
1324 if (!after(end_seq, tp->snd_una))
1325 return sacked;
1326
1327 if (!(sacked & TCPCB_SACKED_ACKED)) {
1328 tcp_rack_advance(tp, sacked, end_seq, xmit_time);
1329
1330 if (sacked & TCPCB_SACKED_RETRANS) {
1331
1332
1333
1334
1335 if (sacked & TCPCB_LOST) {
1336 sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
1337 tp->lost_out -= pcount;
1338 tp->retrans_out -= pcount;
1339 }
1340 } else {
1341 if (!(sacked & TCPCB_RETRANS)) {
1342
1343
1344
1345 if (before(start_seq,
1346 tcp_highest_sack_seq(tp)) &&
1347 before(start_seq, state->reord))
1348 state->reord = start_seq;
1349
1350 if (!after(end_seq, tp->high_seq))
1351 state->flag |= FLAG_ORIG_SACK_ACKED;
1352 if (state->first_sackt == 0)
1353 state->first_sackt = xmit_time;
1354 state->last_sackt = xmit_time;
1355 }
1356
1357 if (sacked & TCPCB_LOST) {
1358 sacked &= ~TCPCB_LOST;
1359 tp->lost_out -= pcount;
1360 }
1361 }
1362
1363 sacked |= TCPCB_SACKED_ACKED;
1364 state->flag |= FLAG_DATA_SACKED;
1365 tp->sacked_out += pcount;
1366
1367 state->sack_delivered += pcount;
1368
1369
1370 if (tp->lost_skb_hint &&
1371 before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq))
1372 tp->lost_cnt_hint += pcount;
1373 }
1374
1375
1376
1377
1378
1379 if (dup_sack && (sacked & TCPCB_SACKED_RETRANS)) {
1380 sacked &= ~TCPCB_SACKED_RETRANS;
1381 tp->retrans_out -= pcount;
1382 }
1383
1384 return sacked;
1385}
1386
1387
1388
1389
1390static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev,
1391 struct sk_buff *skb,
1392 struct tcp_sacktag_state *state,
1393 unsigned int pcount, int shifted, int mss,
1394 bool dup_sack)
1395{
1396 struct tcp_sock *tp = tcp_sk(sk);
1397 u32 start_seq = TCP_SKB_CB(skb)->seq;
1398 u32 end_seq = start_seq + shifted;
1399
1400 BUG_ON(!pcount);
1401
1402
1403
1404
1405
1406
1407
1408 tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
1409 start_seq, end_seq, dup_sack, pcount,
1410 tcp_skb_timestamp_us(skb));
1411 tcp_rate_skb_delivered(sk, skb, state->rate);
1412
1413 if (skb == tp->lost_skb_hint)
1414 tp->lost_cnt_hint += pcount;
1415
1416 TCP_SKB_CB(prev)->end_seq += shifted;
1417 TCP_SKB_CB(skb)->seq += shifted;
1418
1419 tcp_skb_pcount_add(prev, pcount);
1420 WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount);
1421 tcp_skb_pcount_add(skb, -pcount);
1422
1423
1424
1425
1426
1427
1428 if (!TCP_SKB_CB(prev)->tcp_gso_size)
1429 TCP_SKB_CB(prev)->tcp_gso_size = mss;
1430
1431
1432 if (tcp_skb_pcount(skb) <= 1)
1433 TCP_SKB_CB(skb)->tcp_gso_size = 0;
1434
1435
1436 TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
1437
1438 if (skb->len > 0) {
1439 BUG_ON(!tcp_skb_pcount(skb));
1440 NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKSHIFTED);
1441 return false;
1442 }
1443
1444
1445
1446 if (skb == tp->retransmit_skb_hint)
1447 tp->retransmit_skb_hint = prev;
1448 if (skb == tp->lost_skb_hint) {
1449 tp->lost_skb_hint = prev;
1450 tp->lost_cnt_hint -= tcp_skb_pcount(prev);
1451 }
1452
1453 TCP_SKB_CB(prev)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
1454 TCP_SKB_CB(prev)->eor = TCP_SKB_CB(skb)->eor;
1455 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
1456 TCP_SKB_CB(prev)->end_seq++;
1457
1458 if (skb == tcp_highest_sack(sk))
1459 tcp_advance_highest_sack(sk, skb);
1460
1461 tcp_skb_collapse_tstamp(prev, skb);
1462 if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp))
1463 TCP_SKB_CB(prev)->tx.delivered_mstamp = 0;
1464
1465 tcp_rtx_queue_unlink_and_free(skb, sk);
1466
1467 NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKMERGED);
1468
1469 return true;
1470}
1471
1472
1473
1474
1475static int tcp_skb_seglen(const struct sk_buff *skb)
1476{
1477 return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb);
1478}
1479
1480
1481static int skb_can_shift(const struct sk_buff *skb)
1482{
1483 return !skb_headlen(skb) && skb_is_nonlinear(skb);
1484}
1485
1486int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from,
1487 int pcount, int shiftlen)
1488{
1489
1490
1491
1492
1493
1494 if (unlikely(to->len + shiftlen >= 65535 * TCP_MIN_GSO_SIZE))
1495 return 0;
1496 if (unlikely(tcp_skb_pcount(to) + pcount > 65535))
1497 return 0;
1498 return skb_shift(to, from, shiftlen);
1499}
1500
1501
1502
1503
1504static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
1505 struct tcp_sacktag_state *state,
1506 u32 start_seq, u32 end_seq,
1507 bool dup_sack)
1508{
1509 struct tcp_sock *tp = tcp_sk(sk);
1510 struct sk_buff *prev;
1511 int mss;
1512 int pcount = 0;
1513 int len;
1514 int in_sack;
1515
1516
1517 if (!dup_sack &&
1518 (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS)
1519 goto fallback;
1520 if (!skb_can_shift(skb))
1521 goto fallback;
1522
1523 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1524 goto fallback;
1525
1526
1527 prev = skb_rb_prev(skb);
1528 if (!prev)
1529 goto fallback;
1530
1531 if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
1532 goto fallback;
1533
1534 if (!tcp_skb_can_collapse(prev, skb))
1535 goto fallback;
1536
1537 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1538 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1539
1540 if (in_sack) {
1541 len = skb->len;
1542 pcount = tcp_skb_pcount(skb);
1543 mss = tcp_skb_seglen(skb);
1544
1545
1546
1547
1548 if (mss != tcp_skb_seglen(prev))
1549 goto fallback;
1550 } else {
1551 if (!after(TCP_SKB_CB(skb)->end_seq, start_seq))
1552 goto noop;
1553
1554
1555
1556
1557 if (tcp_skb_pcount(skb) <= 1)
1558 goto noop;
1559
1560 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1561 if (!in_sack) {
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573 goto fallback;
1574 }
1575
1576 len = end_seq - TCP_SKB_CB(skb)->seq;
1577 BUG_ON(len < 0);
1578 BUG_ON(len > skb->len);
1579
1580
1581
1582
1583
1584 mss = tcp_skb_mss(skb);
1585
1586
1587
1588
1589 if (mss != tcp_skb_seglen(prev))
1590 goto fallback;
1591
1592 if (len == mss) {
1593 pcount = 1;
1594 } else if (len < mss) {
1595 goto noop;
1596 } else {
1597 pcount = len / mss;
1598 len = pcount * mss;
1599 }
1600 }
1601
1602
1603 if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una))
1604 goto fallback;
1605
1606 if (!tcp_skb_shift(prev, skb, pcount, len))
1607 goto fallback;
1608 if (!tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss, dup_sack))
1609 goto out;
1610
1611
1612
1613
1614 skb = skb_rb_next(prev);
1615 if (!skb)
1616 goto out;
1617
1618 if (!skb_can_shift(skb) ||
1619 ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) ||
1620 (mss != tcp_skb_seglen(skb)))
1621 goto out;
1622
1623 len = skb->len;
1624 pcount = tcp_skb_pcount(skb);
1625 if (tcp_skb_shift(prev, skb, pcount, len))
1626 tcp_shifted_skb(sk, prev, skb, state, pcount,
1627 len, mss, 0);
1628
1629out:
1630 return prev;
1631
1632noop:
1633 return skb;
1634
1635fallback:
1636 NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK);
1637 return NULL;
1638}
1639
1640static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1641 struct tcp_sack_block *next_dup,
1642 struct tcp_sacktag_state *state,
1643 u32 start_seq, u32 end_seq,
1644 bool dup_sack_in)
1645{
1646 struct tcp_sock *tp = tcp_sk(sk);
1647 struct sk_buff *tmp;
1648
1649 skb_rbtree_walk_from(skb) {
1650 int in_sack = 0;
1651 bool dup_sack = dup_sack_in;
1652
1653
1654 if (!before(TCP_SKB_CB(skb)->seq, end_seq))
1655 break;
1656
1657 if (next_dup &&
1658 before(TCP_SKB_CB(skb)->seq, next_dup->end_seq)) {
1659 in_sack = tcp_match_skb_to_sack(sk, skb,
1660 next_dup->start_seq,
1661 next_dup->end_seq);
1662 if (in_sack > 0)
1663 dup_sack = true;
1664 }
1665
1666
1667
1668
1669
1670 if (in_sack <= 0) {
1671 tmp = tcp_shift_skb_data(sk, skb, state,
1672 start_seq, end_seq, dup_sack);
1673 if (tmp) {
1674 if (tmp != skb) {
1675 skb = tmp;
1676 continue;
1677 }
1678
1679 in_sack = 0;
1680 } else {
1681 in_sack = tcp_match_skb_to_sack(sk, skb,
1682 start_seq,
1683 end_seq);
1684 }
1685 }
1686
1687 if (unlikely(in_sack < 0))
1688 break;
1689
1690 if (in_sack) {
1691 TCP_SKB_CB(skb)->sacked =
1692 tcp_sacktag_one(sk,
1693 state,
1694 TCP_SKB_CB(skb)->sacked,
1695 TCP_SKB_CB(skb)->seq,
1696 TCP_SKB_CB(skb)->end_seq,
1697 dup_sack,
1698 tcp_skb_pcount(skb),
1699 tcp_skb_timestamp_us(skb));
1700 tcp_rate_skb_delivered(sk, skb, state->rate);
1701 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
1702 list_del_init(&skb->tcp_tsorted_anchor);
1703
1704 if (!before(TCP_SKB_CB(skb)->seq,
1705 tcp_highest_sack_seq(tp)))
1706 tcp_advance_highest_sack(sk, skb);
1707 }
1708 }
1709 return skb;
1710}
1711
1712static struct sk_buff *tcp_sacktag_bsearch(struct sock *sk, u32 seq)
1713{
1714 struct rb_node *parent, **p = &sk->tcp_rtx_queue.rb_node;
1715 struct sk_buff *skb;
1716
1717 while (*p) {
1718 parent = *p;
1719 skb = rb_to_skb(parent);
1720 if (before(seq, TCP_SKB_CB(skb)->seq)) {
1721 p = &parent->rb_left;
1722 continue;
1723 }
1724 if (!before(seq, TCP_SKB_CB(skb)->end_seq)) {
1725 p = &parent->rb_right;
1726 continue;
1727 }
1728 return skb;
1729 }
1730 return NULL;
1731}
1732
1733static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
1734 u32 skip_to_seq)
1735{
1736 if (skb && after(TCP_SKB_CB(skb)->seq, skip_to_seq))
1737 return skb;
1738
1739 return tcp_sacktag_bsearch(sk, skip_to_seq);
1740}
1741
1742static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
1743 struct sock *sk,
1744 struct tcp_sack_block *next_dup,
1745 struct tcp_sacktag_state *state,
1746 u32 skip_to_seq)
1747{
1748 if (!next_dup)
1749 return skb;
1750
1751 if (before(next_dup->start_seq, skip_to_seq)) {
1752 skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq);
1753 skb = tcp_sacktag_walk(skb, sk, NULL, state,
1754 next_dup->start_seq, next_dup->end_seq,
1755 1);
1756 }
1757
1758 return skb;
1759}
1760
1761static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_block *cache)
1762{
1763 return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1764}
1765
1766static int
1767tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1768 u32 prior_snd_una, struct tcp_sacktag_state *state)
1769{
1770 struct tcp_sock *tp = tcp_sk(sk);
1771 const unsigned char *ptr = (skb_transport_header(ack_skb) +
1772 TCP_SKB_CB(ack_skb)->sacked);
1773 struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
1774 struct tcp_sack_block sp[TCP_NUM_SACKS];
1775 struct tcp_sack_block *cache;
1776 struct sk_buff *skb;
1777 int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
1778 int used_sacks;
1779 bool found_dup_sack = false;
1780 int i, j;
1781 int first_sack_index;
1782
1783 state->flag = 0;
1784 state->reord = tp->snd_nxt;
1785
1786 if (!tp->sacked_out)
1787 tcp_highest_sack_reset(sk);
1788
1789 found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
1790 num_sacks, prior_snd_una, state);
1791
1792
1793
1794
1795
1796 if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window))
1797 return 0;
1798
1799 if (!tp->packets_out)
1800 goto out;
1801
1802 used_sacks = 0;
1803 first_sack_index = 0;
1804 for (i = 0; i < num_sacks; i++) {
1805 bool dup_sack = !i && found_dup_sack;
1806
1807 sp[used_sacks].start_seq = get_unaligned_be32(&sp_wire[i].start_seq);
1808 sp[used_sacks].end_seq = get_unaligned_be32(&sp_wire[i].end_seq);
1809
1810 if (!tcp_is_sackblock_valid(tp, dup_sack,
1811 sp[used_sacks].start_seq,
1812 sp[used_sacks].end_seq)) {
1813 int mib_idx;
1814
1815 if (dup_sack) {
1816 if (!tp->undo_marker)
1817 mib_idx = LINUX_MIB_TCPDSACKIGNOREDNOUNDO;
1818 else
1819 mib_idx = LINUX_MIB_TCPDSACKIGNOREDOLD;
1820 } else {
1821
1822 if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) &&
1823 !after(sp[used_sacks].end_seq, tp->snd_una))
1824 continue;
1825 mib_idx = LINUX_MIB_TCPSACKDISCARD;
1826 }
1827
1828 NET_INC_STATS(sock_net(sk), mib_idx);
1829 if (i == 0)
1830 first_sack_index = -1;
1831 continue;
1832 }
1833
1834
1835 if (!after(sp[used_sacks].end_seq, prior_snd_una)) {
1836 if (i == 0)
1837 first_sack_index = -1;
1838 continue;
1839 }
1840
1841 used_sacks++;
1842 }
1843
1844
1845 for (i = used_sacks - 1; i > 0; i--) {
1846 for (j = 0; j < i; j++) {
1847 if (after(sp[j].start_seq, sp[j + 1].start_seq)) {
1848 swap(sp[j], sp[j + 1]);
1849
1850
1851 if (j == first_sack_index)
1852 first_sack_index = j + 1;
1853 }
1854 }
1855 }
1856
1857 state->mss_now = tcp_current_mss(sk);
1858 skb = NULL;
1859 i = 0;
1860
1861 if (!tp->sacked_out) {
1862
1863 cache = tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1864 } else {
1865 cache = tp->recv_sack_cache;
1866
1867 while (tcp_sack_cache_ok(tp, cache) && !cache->start_seq &&
1868 !cache->end_seq)
1869 cache++;
1870 }
1871
1872 while (i < used_sacks) {
1873 u32 start_seq = sp[i].start_seq;
1874 u32 end_seq = sp[i].end_seq;
1875 bool dup_sack = (found_dup_sack && (i == first_sack_index));
1876 struct tcp_sack_block *next_dup = NULL;
1877
1878 if (found_dup_sack && ((i + 1) == first_sack_index))
1879 next_dup = &sp[i + 1];
1880
1881
1882 while (tcp_sack_cache_ok(tp, cache) &&
1883 !before(start_seq, cache->end_seq))
1884 cache++;
1885
1886
1887 if (tcp_sack_cache_ok(tp, cache) && !dup_sack &&
1888 after(end_seq, cache->start_seq)) {
1889
1890
1891 if (before(start_seq, cache->start_seq)) {
1892 skb = tcp_sacktag_skip(skb, sk, start_seq);
1893 skb = tcp_sacktag_walk(skb, sk, next_dup,
1894 state,
1895 start_seq,
1896 cache->start_seq,
1897 dup_sack);
1898 }
1899
1900
1901 if (!after(end_seq, cache->end_seq))
1902 goto advance_sp;
1903
1904 skb = tcp_maybe_skipping_dsack(skb, sk, next_dup,
1905 state,
1906 cache->end_seq);
1907
1908
1909 if (tcp_highest_sack_seq(tp) == cache->end_seq) {
1910
1911 skb = tcp_highest_sack(sk);
1912 if (!skb)
1913 break;
1914 cache++;
1915 goto walk;
1916 }
1917
1918 skb = tcp_sacktag_skip(skb, sk, cache->end_seq);
1919
1920 cache++;
1921 continue;
1922 }
1923
1924 if (!before(start_seq, tcp_highest_sack_seq(tp))) {
1925 skb = tcp_highest_sack(sk);
1926 if (!skb)
1927 break;
1928 }
1929 skb = tcp_sacktag_skip(skb, sk, start_seq);
1930
1931walk:
1932 skb = tcp_sacktag_walk(skb, sk, next_dup, state,
1933 start_seq, end_seq, dup_sack);
1934
1935advance_sp:
1936 i++;
1937 }
1938
1939
1940 for (i = 0; i < ARRAY_SIZE(tp->recv_sack_cache) - used_sacks; i++) {
1941 tp->recv_sack_cache[i].start_seq = 0;
1942 tp->recv_sack_cache[i].end_seq = 0;
1943 }
1944 for (j = 0; j < used_sacks; j++)
1945 tp->recv_sack_cache[i++] = sp[j];
1946
1947 if (inet_csk(sk)->icsk_ca_state != TCP_CA_Loss || tp->undo_marker)
1948 tcp_check_sack_reordering(sk, state->reord, 0);
1949
1950 tcp_verify_left_out(tp);
1951out:
1952
1953#if FASTRETRANS_DEBUG > 0
1954 WARN_ON((int)tp->sacked_out < 0);
1955 WARN_ON((int)tp->lost_out < 0);
1956 WARN_ON((int)tp->retrans_out < 0);
1957 WARN_ON((int)tcp_packets_in_flight(tp) < 0);
1958#endif
1959 return state->flag;
1960}
1961
1962
1963
1964
1965static bool tcp_limit_reno_sacked(struct tcp_sock *tp)
1966{
1967 u32 holes;
1968
1969 holes = max(tp->lost_out, 1U);
1970 holes = min(holes, tp->packets_out);
1971
1972 if ((tp->sacked_out + holes) > tp->packets_out) {
1973 tp->sacked_out = tp->packets_out - holes;
1974 return true;
1975 }
1976 return false;
1977}
1978
1979
1980
1981
1982
1983static void tcp_check_reno_reordering(struct sock *sk, const int addend)
1984{
1985 struct tcp_sock *tp = tcp_sk(sk);
1986
1987 if (!tcp_limit_reno_sacked(tp))
1988 return;
1989
1990 tp->reordering = min_t(u32, tp->packets_out + addend,
1991 sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
1992 tp->reord_seen++;
1993 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER);
1994}
1995
1996
1997
1998static void tcp_add_reno_sack(struct sock *sk, int num_dupack, bool ece_ack)
1999{
2000 if (num_dupack) {
2001 struct tcp_sock *tp = tcp_sk(sk);
2002 u32 prior_sacked = tp->sacked_out;
2003 s32 delivered;
2004
2005 tp->sacked_out += num_dupack;
2006 tcp_check_reno_reordering(sk, 0);
2007 delivered = tp->sacked_out - prior_sacked;
2008 if (delivered > 0)
2009 tcp_count_delivered(tp, delivered, ece_ack);
2010 tcp_verify_left_out(tp);
2011 }
2012}
2013
2014
2015
2016static void tcp_remove_reno_sacks(struct sock *sk, int acked, bool ece_ack)
2017{
2018 struct tcp_sock *tp = tcp_sk(sk);
2019
2020 if (acked > 0) {
2021
2022 tcp_count_delivered(tp, max_t(int, acked - tp->sacked_out, 1),
2023 ece_ack);
2024 if (acked - 1 >= tp->sacked_out)
2025 tp->sacked_out = 0;
2026 else
2027 tp->sacked_out -= acked - 1;
2028 }
2029 tcp_check_reno_reordering(sk, acked);
2030 tcp_verify_left_out(tp);
2031}
2032
2033static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
2034{
2035 tp->sacked_out = 0;
2036}
2037
2038void tcp_clear_retrans(struct tcp_sock *tp)
2039{
2040 tp->retrans_out = 0;
2041 tp->lost_out = 0;
2042 tp->undo_marker = 0;
2043 tp->undo_retrans = -1;
2044 tp->sacked_out = 0;
2045}
2046
2047static inline void tcp_init_undo(struct tcp_sock *tp)
2048{
2049 tp->undo_marker = tp->snd_una;
2050
2051 tp->undo_retrans = tp->retrans_out ? : -1;
2052}
2053
2054static bool tcp_is_rack(const struct sock *sk)
2055{
2056 return sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION;
2057}
2058
2059
2060
2061
2062
2063static void tcp_timeout_mark_lost(struct sock *sk)
2064{
2065 struct tcp_sock *tp = tcp_sk(sk);
2066 struct sk_buff *skb, *head;
2067 bool is_reneg;
2068
2069 head = tcp_rtx_queue_head(sk);
2070 is_reneg = head && (TCP_SKB_CB(head)->sacked & TCPCB_SACKED_ACKED);
2071 if (is_reneg) {
2072 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
2073 tp->sacked_out = 0;
2074
2075 tp->is_sack_reneg = 1;
2076 } else if (tcp_is_reno(tp)) {
2077 tcp_reset_reno_sack(tp);
2078 }
2079
2080 skb = head;
2081 skb_rbtree_walk_from(skb) {
2082 if (is_reneg)
2083 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
2084 else if (tcp_is_rack(sk) && skb != head &&
2085 tcp_rack_skb_timeout(tp, skb, 0) > 0)
2086 continue;
2087 tcp_mark_skb_lost(sk, skb);
2088 }
2089 tcp_verify_left_out(tp);
2090 tcp_clear_all_retrans_hints(tp);
2091}
2092
2093
2094void tcp_enter_loss(struct sock *sk)
2095{
2096 const struct inet_connection_sock *icsk = inet_csk(sk);
2097 struct tcp_sock *tp = tcp_sk(sk);
2098 struct net *net = sock_net(sk);
2099 bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
2100
2101 tcp_timeout_mark_lost(sk);
2102
2103
2104 if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
2105 !after(tp->high_seq, tp->snd_una) ||
2106 (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
2107 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2108 tp->prior_cwnd = tp->snd_cwnd;
2109 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
2110 tcp_ca_event(sk, CA_EVENT_LOSS);
2111 tcp_init_undo(tp);
2112 }
2113 tp->snd_cwnd = tcp_packets_in_flight(tp) + 1;
2114 tp->snd_cwnd_cnt = 0;
2115 tp->snd_cwnd_stamp = tcp_jiffies32;
2116
2117
2118
2119
2120 if (icsk->icsk_ca_state <= TCP_CA_Disorder &&
2121 tp->sacked_out >= net->ipv4.sysctl_tcp_reordering)
2122 tp->reordering = min_t(unsigned int, tp->reordering,
2123 net->ipv4.sysctl_tcp_reordering);
2124 tcp_set_ca_state(sk, TCP_CA_Loss);
2125 tp->high_seq = tp->snd_nxt;
2126 tcp_ecn_queue_cwr(tp);
2127
2128
2129
2130
2131
2132 tp->frto = net->ipv4.sysctl_tcp_frto &&
2133 (new_recovery || icsk->icsk_retransmits) &&
2134 !inet_csk(sk)->icsk_mtup.probe_size;
2135}
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147static bool tcp_check_sack_reneging(struct sock *sk, int flag)
2148{
2149 if (flag & FLAG_SACK_RENEGING) {
2150 struct tcp_sock *tp = tcp_sk(sk);
2151 unsigned long delay = max(usecs_to_jiffies(tp->srtt_us >> 4),
2152 msecs_to_jiffies(10));
2153
2154 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2155 delay, TCP_RTO_MAX);
2156 return true;
2157 }
2158 return false;
2159}
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
2173{
2174 return tp->sacked_out + 1;
2175}
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274static bool tcp_time_to_recover(struct sock *sk, int flag)
2275{
2276 struct tcp_sock *tp = tcp_sk(sk);
2277
2278
2279 if (tp->lost_out)
2280 return true;
2281
2282
2283 if (!tcp_is_rack(sk) && tcp_dupack_heuristics(tp) > tp->reordering)
2284 return true;
2285
2286 return false;
2287}
2288
2289
2290
2291
2292
2293
2294static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2295{
2296 struct tcp_sock *tp = tcp_sk(sk);
2297 struct sk_buff *skb;
2298 int cnt;
2299
2300 const u32 loss_high = tp->snd_nxt;
2301
2302 WARN_ON(packets > tp->packets_out);
2303 skb = tp->lost_skb_hint;
2304 if (skb) {
2305
2306 if (mark_head && after(TCP_SKB_CB(skb)->seq, tp->snd_una))
2307 return;
2308 cnt = tp->lost_cnt_hint;
2309 } else {
2310 skb = tcp_rtx_queue_head(sk);
2311 cnt = 0;
2312 }
2313
2314 skb_rbtree_walk_from(skb) {
2315
2316
2317 tp->lost_skb_hint = skb;
2318 tp->lost_cnt_hint = cnt;
2319
2320 if (after(TCP_SKB_CB(skb)->end_seq, loss_high))
2321 break;
2322
2323 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
2324 cnt += tcp_skb_pcount(skb);
2325
2326 if (cnt > packets)
2327 break;
2328
2329 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_LOST))
2330 tcp_mark_skb_lost(sk, skb);
2331
2332 if (mark_head)
2333 break;
2334 }
2335 tcp_verify_left_out(tp);
2336}
2337
2338
2339
2340static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
2341{
2342 struct tcp_sock *tp = tcp_sk(sk);
2343
2344 if (tcp_is_sack(tp)) {
2345 int sacked_upto = tp->sacked_out - tp->reordering;
2346 if (sacked_upto >= 0)
2347 tcp_mark_head_lost(sk, sacked_upto, 0);
2348 else if (fast_rexmit)
2349 tcp_mark_head_lost(sk, 1, 1);
2350 }
2351}
2352
2353static bool tcp_tsopt_ecr_before(const struct tcp_sock *tp, u32 when)
2354{
2355 return tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
2356 before(tp->rx_opt.rcv_tsecr, when);
2357}
2358
2359
2360
2361
2362static bool tcp_skb_spurious_retrans(const struct tcp_sock *tp,
2363 const struct sk_buff *skb)
2364{
2365 return (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) &&
2366 tcp_tsopt_ecr_before(tp, tcp_skb_timestamp(skb));
2367}
2368
2369
2370
2371
2372static inline bool tcp_packet_delayed(const struct tcp_sock *tp)
2373{
2374 return tp->retrans_stamp &&
2375 tcp_tsopt_ecr_before(tp, tp->retrans_stamp);
2376}
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394static bool tcp_any_retrans_done(const struct sock *sk)
2395{
2396 const struct tcp_sock *tp = tcp_sk(sk);
2397 struct sk_buff *skb;
2398
2399 if (tp->retrans_out)
2400 return true;
2401
2402 skb = tcp_rtx_queue_head(sk);
2403 if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
2404 return true;
2405
2406 return false;
2407}
2408
2409static void DBGUNDO(struct sock *sk, const char *msg)
2410{
2411#if FASTRETRANS_DEBUG > 1
2412 struct tcp_sock *tp = tcp_sk(sk);
2413 struct inet_sock *inet = inet_sk(sk);
2414
2415 if (sk->sk_family == AF_INET) {
2416 pr_debug("Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n",
2417 msg,
2418 &inet->inet_daddr, ntohs(inet->inet_dport),
2419 tp->snd_cwnd, tcp_left_out(tp),
2420 tp->snd_ssthresh, tp->prior_ssthresh,
2421 tp->packets_out);
2422 }
2423#if IS_ENABLED(CONFIG_IPV6)
2424 else if (sk->sk_family == AF_INET6) {
2425 pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
2426 msg,
2427 &sk->sk_v6_daddr, ntohs(inet->inet_dport),
2428 tp->snd_cwnd, tcp_left_out(tp),
2429 tp->snd_ssthresh, tp->prior_ssthresh,
2430 tp->packets_out);
2431 }
2432#endif
2433#endif
2434}
2435
2436static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
2437{
2438 struct tcp_sock *tp = tcp_sk(sk);
2439
2440 if (unmark_loss) {
2441 struct sk_buff *skb;
2442
2443 skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
2444 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
2445 }
2446 tp->lost_out = 0;
2447 tcp_clear_all_retrans_hints(tp);
2448 }
2449
2450 if (tp->prior_ssthresh) {
2451 const struct inet_connection_sock *icsk = inet_csk(sk);
2452
2453 tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
2454
2455 if (tp->prior_ssthresh > tp->snd_ssthresh) {
2456 tp->snd_ssthresh = tp->prior_ssthresh;
2457 tcp_ecn_withdraw_cwr(tp);
2458 }
2459 }
2460 tp->snd_cwnd_stamp = tcp_jiffies32;
2461 tp->undo_marker = 0;
2462 tp->rack.advanced = 1;
2463}
2464
2465static inline bool tcp_may_undo(const struct tcp_sock *tp)
2466{
2467 return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
2468}
2469
2470
2471static bool tcp_try_undo_recovery(struct sock *sk)
2472{
2473 struct tcp_sock *tp = tcp_sk(sk);
2474
2475 if (tcp_may_undo(tp)) {
2476 int mib_idx;
2477
2478
2479
2480
2481 DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
2482 tcp_undo_cwnd_reduction(sk, false);
2483 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
2484 mib_idx = LINUX_MIB_TCPLOSSUNDO;
2485 else
2486 mib_idx = LINUX_MIB_TCPFULLUNDO;
2487
2488 NET_INC_STATS(sock_net(sk), mib_idx);
2489 } else if (tp->rack.reo_wnd_persist) {
2490 tp->rack.reo_wnd_persist--;
2491 }
2492 if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
2493
2494
2495
2496 if (!tcp_any_retrans_done(sk))
2497 tp->retrans_stamp = 0;
2498 return true;
2499 }
2500 tcp_set_ca_state(sk, TCP_CA_Open);
2501 tp->is_sack_reneg = 0;
2502 return false;
2503}
2504
2505
2506static bool tcp_try_undo_dsack(struct sock *sk)
2507{
2508 struct tcp_sock *tp = tcp_sk(sk);
2509
2510 if (tp->undo_marker && !tp->undo_retrans) {
2511 tp->rack.reo_wnd_persist = min(TCP_RACK_RECOVERY_THRESH,
2512 tp->rack.reo_wnd_persist + 1);
2513 DBGUNDO(sk, "D-SACK");
2514 tcp_undo_cwnd_reduction(sk, false);
2515 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
2516 return true;
2517 }
2518 return false;
2519}
2520
2521
2522static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
2523{
2524 struct tcp_sock *tp = tcp_sk(sk);
2525
2526 if (frto_undo || tcp_may_undo(tp)) {
2527 tcp_undo_cwnd_reduction(sk, true);
2528
2529 DBGUNDO(sk, "partial loss");
2530 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
2531 if (frto_undo)
2532 NET_INC_STATS(sock_net(sk),
2533 LINUX_MIB_TCPSPURIOUSRTOS);
2534 inet_csk(sk)->icsk_retransmits = 0;
2535 if (frto_undo || tcp_is_sack(tp)) {
2536 tcp_set_ca_state(sk, TCP_CA_Open);
2537 tp->is_sack_reneg = 0;
2538 }
2539 return true;
2540 }
2541 return false;
2542}
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553static void tcp_init_cwnd_reduction(struct sock *sk)
2554{
2555 struct tcp_sock *tp = tcp_sk(sk);
2556
2557 tp->high_seq = tp->snd_nxt;
2558 tp->tlp_high_seq = 0;
2559 tp->snd_cwnd_cnt = 0;
2560 tp->prior_cwnd = tp->snd_cwnd;
2561 tp->prr_delivered = 0;
2562 tp->prr_out = 0;
2563 tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
2564 tcp_ecn_queue_cwr(tp);
2565}
2566
2567void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int newly_lost, int flag)
2568{
2569 struct tcp_sock *tp = tcp_sk(sk);
2570 int sndcnt = 0;
2571 int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
2572
2573 if (newly_acked_sacked <= 0 || WARN_ON_ONCE(!tp->prior_cwnd))
2574 return;
2575
2576 tp->prr_delivered += newly_acked_sacked;
2577 if (delta < 0) {
2578 u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
2579 tp->prior_cwnd - 1;
2580 sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
2581 } else if (flag & FLAG_SND_UNA_ADVANCED && !newly_lost) {
2582 sndcnt = min_t(int, delta,
2583 max_t(int, tp->prr_delivered - tp->prr_out,
2584 newly_acked_sacked) + 1);
2585 } else {
2586 sndcnt = min(delta, newly_acked_sacked);
2587 }
2588
2589 sndcnt = max(sndcnt, (tp->prr_out ? 0 : 1));
2590 tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
2591}
2592
2593static inline void tcp_end_cwnd_reduction(struct sock *sk)
2594{
2595 struct tcp_sock *tp = tcp_sk(sk);
2596
2597 if (inet_csk(sk)->icsk_ca_ops->cong_control)
2598 return;
2599
2600
2601 if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH &&
2602 (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || tp->undo_marker)) {
2603 tp->snd_cwnd = tp->snd_ssthresh;
2604 tp->snd_cwnd_stamp = tcp_jiffies32;
2605 }
2606 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
2607}
2608
2609
2610void tcp_enter_cwr(struct sock *sk)
2611{
2612 struct tcp_sock *tp = tcp_sk(sk);
2613
2614 tp->prior_ssthresh = 0;
2615 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
2616 tp->undo_marker = 0;
2617 tcp_init_cwnd_reduction(sk);
2618 tcp_set_ca_state(sk, TCP_CA_CWR);
2619 }
2620}
2621EXPORT_SYMBOL(tcp_enter_cwr);
2622
2623static void tcp_try_keep_open(struct sock *sk)
2624{
2625 struct tcp_sock *tp = tcp_sk(sk);
2626 int state = TCP_CA_Open;
2627
2628 if (tcp_left_out(tp) || tcp_any_retrans_done(sk))
2629 state = TCP_CA_Disorder;
2630
2631 if (inet_csk(sk)->icsk_ca_state != state) {
2632 tcp_set_ca_state(sk, state);
2633 tp->high_seq = tp->snd_nxt;
2634 }
2635}
2636
2637static void tcp_try_to_open(struct sock *sk, int flag)
2638{
2639 struct tcp_sock *tp = tcp_sk(sk);
2640
2641 tcp_verify_left_out(tp);
2642
2643 if (!tcp_any_retrans_done(sk))
2644 tp->retrans_stamp = 0;
2645
2646 if (flag & FLAG_ECE)
2647 tcp_enter_cwr(sk);
2648
2649 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
2650 tcp_try_keep_open(sk);
2651 }
2652}
2653
2654static void tcp_mtup_probe_failed(struct sock *sk)
2655{
2656 struct inet_connection_sock *icsk = inet_csk(sk);
2657
2658 icsk->icsk_mtup.search_high = icsk->icsk_mtup.probe_size - 1;
2659 icsk->icsk_mtup.probe_size = 0;
2660 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPFAIL);
2661}
2662
2663static void tcp_mtup_probe_success(struct sock *sk)
2664{
2665 struct tcp_sock *tp = tcp_sk(sk);
2666 struct inet_connection_sock *icsk = inet_csk(sk);
2667
2668
2669 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2670 tp->snd_cwnd = tp->snd_cwnd *
2671 tcp_mss_to_mtu(sk, tp->mss_cache) /
2672 icsk->icsk_mtup.probe_size;
2673 tp->snd_cwnd_cnt = 0;
2674 tp->snd_cwnd_stamp = tcp_jiffies32;
2675 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2676
2677 icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size;
2678 icsk->icsk_mtup.probe_size = 0;
2679 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
2680 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPSUCCESS);
2681}
2682
2683
2684
2685
2686
2687void tcp_simple_retransmit(struct sock *sk)
2688{
2689 const struct inet_connection_sock *icsk = inet_csk(sk);
2690 struct tcp_sock *tp = tcp_sk(sk);
2691 struct sk_buff *skb;
2692 int mss;
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704 if (tp->syn_data && sk->sk_state == TCP_SYN_SENT)
2705 mss = -1;
2706 else
2707 mss = tcp_current_mss(sk);
2708
2709 skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
2710 if (tcp_skb_seglen(skb) > mss)
2711 tcp_mark_skb_lost(sk, skb);
2712 }
2713
2714 tcp_clear_retrans_hints_partial(tp);
2715
2716 if (!tp->lost_out)
2717 return;
2718
2719 if (tcp_is_reno(tp))
2720 tcp_limit_reno_sacked(tp);
2721
2722 tcp_verify_left_out(tp);
2723
2724
2725
2726
2727
2728
2729 if (icsk->icsk_ca_state != TCP_CA_Loss) {
2730 tp->high_seq = tp->snd_nxt;
2731 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2732 tp->prior_ssthresh = 0;
2733 tp->undo_marker = 0;
2734 tcp_set_ca_state(sk, TCP_CA_Loss);
2735 }
2736 tcp_xmit_retransmit_queue(sk);
2737}
2738EXPORT_SYMBOL(tcp_simple_retransmit);
2739
2740void tcp_enter_recovery(struct sock *sk, bool ece_ack)
2741{
2742 struct tcp_sock *tp = tcp_sk(sk);
2743 int mib_idx;
2744
2745 if (tcp_is_reno(tp))
2746 mib_idx = LINUX_MIB_TCPRENORECOVERY;
2747 else
2748 mib_idx = LINUX_MIB_TCPSACKRECOVERY;
2749
2750 NET_INC_STATS(sock_net(sk), mib_idx);
2751
2752 tp->prior_ssthresh = 0;
2753 tcp_init_undo(tp);
2754
2755 if (!tcp_in_cwnd_reduction(sk)) {
2756 if (!ece_ack)
2757 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2758 tcp_init_cwnd_reduction(sk);
2759 }
2760 tcp_set_ca_state(sk, TCP_CA_Recovery);
2761}
2762
2763
2764
2765
2766static void tcp_process_loss(struct sock *sk, int flag, int num_dupack,
2767 int *rexmit)
2768{
2769 struct tcp_sock *tp = tcp_sk(sk);
2770 bool recovered = !before(tp->snd_una, tp->high_seq);
2771
2772 if ((flag & FLAG_SND_UNA_ADVANCED || rcu_access_pointer(tp->fastopen_rsk)) &&
2773 tcp_try_undo_loss(sk, false))
2774 return;
2775
2776 if (tp->frto) {
2777
2778
2779
2780 if ((flag & FLAG_ORIG_SACK_ACKED) &&
2781 tcp_try_undo_loss(sk, true))
2782 return;
2783
2784 if (after(tp->snd_nxt, tp->high_seq)) {
2785 if (flag & FLAG_DATA_SACKED || num_dupack)
2786 tp->frto = 0;
2787 } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) {
2788 tp->high_seq = tp->snd_nxt;
2789
2790
2791
2792
2793 if (!tcp_write_queue_empty(sk) &&
2794 after(tcp_wnd_end(tp), tp->snd_nxt)) {
2795 *rexmit = REXMIT_NEW;
2796 return;
2797 }
2798 tp->frto = 0;
2799 }
2800 }
2801
2802 if (recovered) {
2803
2804 tcp_try_undo_recovery(sk);
2805 return;
2806 }
2807 if (tcp_is_reno(tp)) {
2808
2809
2810
2811 if (after(tp->snd_nxt, tp->high_seq) && num_dupack)
2812 tcp_add_reno_sack(sk, num_dupack, flag & FLAG_ECE);
2813 else if (flag & FLAG_SND_UNA_ADVANCED)
2814 tcp_reset_reno_sack(tp);
2815 }
2816 *rexmit = REXMIT_LOST;
2817}
2818
2819static bool tcp_force_fast_retransmit(struct sock *sk)
2820{
2821 struct tcp_sock *tp = tcp_sk(sk);
2822
2823 return after(tcp_highest_sack_seq(tp),
2824 tp->snd_una + tp->reordering * tp->mss_cache);
2825}
2826
2827
2828static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una,
2829 bool *do_lost)
2830{
2831 struct tcp_sock *tp = tcp_sk(sk);
2832
2833 if (tp->undo_marker && tcp_packet_delayed(tp)) {
2834
2835
2836
2837 tcp_check_sack_reordering(sk, prior_snd_una, 1);
2838
2839
2840
2841
2842
2843
2844 if (tp->retrans_out)
2845 return true;
2846
2847 if (!tcp_any_retrans_done(sk))
2848 tp->retrans_stamp = 0;
2849
2850 DBGUNDO(sk, "partial recovery");
2851 tcp_undo_cwnd_reduction(sk, true);
2852 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
2853 tcp_try_keep_open(sk);
2854 } else {
2855
2856 *do_lost = tcp_force_fast_retransmit(sk);
2857 }
2858 return false;
2859}
2860
2861static void tcp_identify_packet_loss(struct sock *sk, int *ack_flag)
2862{
2863 struct tcp_sock *tp = tcp_sk(sk);
2864
2865 if (tcp_rtx_queue_empty(sk))
2866 return;
2867
2868 if (unlikely(tcp_is_reno(tp))) {
2869 tcp_newreno_mark_lost(sk, *ack_flag & FLAG_SND_UNA_ADVANCED);
2870 } else if (tcp_is_rack(sk)) {
2871 u32 prior_retrans = tp->retrans_out;
2872
2873 if (tcp_rack_mark_lost(sk))
2874 *ack_flag &= ~FLAG_SET_XMIT_TIMER;
2875 if (prior_retrans > tp->retrans_out)
2876 *ack_flag |= FLAG_LOST_RETRANS;
2877 }
2878}
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
2893 int num_dupack, int *ack_flag, int *rexmit)
2894{
2895 struct inet_connection_sock *icsk = inet_csk(sk);
2896 struct tcp_sock *tp = tcp_sk(sk);
2897 int fast_rexmit = 0, flag = *ack_flag;
2898 bool ece_ack = flag & FLAG_ECE;
2899 bool do_lost = num_dupack || ((flag & FLAG_DATA_SACKED) &&
2900 tcp_force_fast_retransmit(sk));
2901
2902 if (!tp->packets_out && tp->sacked_out)
2903 tp->sacked_out = 0;
2904
2905
2906
2907 if (ece_ack)
2908 tp->prior_ssthresh = 0;
2909
2910
2911 if (tcp_check_sack_reneging(sk, flag))
2912 return;
2913
2914
2915 tcp_verify_left_out(tp);
2916
2917
2918
2919 if (icsk->icsk_ca_state == TCP_CA_Open) {
2920 WARN_ON(tp->retrans_out != 0 && !tp->syn_data);
2921 tp->retrans_stamp = 0;
2922 } else if (!before(tp->snd_una, tp->high_seq)) {
2923 switch (icsk->icsk_ca_state) {
2924 case TCP_CA_CWR:
2925
2926
2927 if (tp->snd_una != tp->high_seq) {
2928 tcp_end_cwnd_reduction(sk);
2929 tcp_set_ca_state(sk, TCP_CA_Open);
2930 }
2931 break;
2932
2933 case TCP_CA_Recovery:
2934 if (tcp_is_reno(tp))
2935 tcp_reset_reno_sack(tp);
2936 if (tcp_try_undo_recovery(sk))
2937 return;
2938 tcp_end_cwnd_reduction(sk);
2939 break;
2940 }
2941 }
2942
2943
2944 switch (icsk->icsk_ca_state) {
2945 case TCP_CA_Recovery:
2946 if (!(flag & FLAG_SND_UNA_ADVANCED)) {
2947 if (tcp_is_reno(tp))
2948 tcp_add_reno_sack(sk, num_dupack, ece_ack);
2949 } else if (tcp_try_undo_partial(sk, prior_snd_una, &do_lost))
2950 return;
2951
2952 if (tcp_try_undo_dsack(sk))
2953 tcp_try_keep_open(sk);
2954
2955 tcp_identify_packet_loss(sk, ack_flag);
2956 if (icsk->icsk_ca_state != TCP_CA_Recovery) {
2957 if (!tcp_time_to_recover(sk, flag))
2958 return;
2959
2960
2961
2962 tcp_enter_recovery(sk, ece_ack);
2963 }
2964 break;
2965 case TCP_CA_Loss:
2966 tcp_process_loss(sk, flag, num_dupack, rexmit);
2967 tcp_identify_packet_loss(sk, ack_flag);
2968 if (!(icsk->icsk_ca_state == TCP_CA_Open ||
2969 (*ack_flag & FLAG_LOST_RETRANS)))
2970 return;
2971
2972 fallthrough;
2973 default:
2974 if (tcp_is_reno(tp)) {
2975 if (flag & FLAG_SND_UNA_ADVANCED)
2976 tcp_reset_reno_sack(tp);
2977 tcp_add_reno_sack(sk, num_dupack, ece_ack);
2978 }
2979
2980 if (icsk->icsk_ca_state <= TCP_CA_Disorder)
2981 tcp_try_undo_dsack(sk);
2982
2983 tcp_identify_packet_loss(sk, ack_flag);
2984 if (!tcp_time_to_recover(sk, flag)) {
2985 tcp_try_to_open(sk, flag);
2986 return;
2987 }
2988
2989
2990 if (icsk->icsk_ca_state < TCP_CA_CWR &&
2991 icsk->icsk_mtup.probe_size &&
2992 tp->snd_una == tp->mtu_probe.probe_seq_start) {
2993 tcp_mtup_probe_failed(sk);
2994
2995 tp->snd_cwnd++;
2996 tcp_simple_retransmit(sk);
2997 return;
2998 }
2999
3000
3001 tcp_enter_recovery(sk, ece_ack);
3002 fast_rexmit = 1;
3003 }
3004
3005 if (!tcp_is_rack(sk) && do_lost)
3006 tcp_update_scoreboard(sk, fast_rexmit);
3007 *rexmit = REXMIT_LOST;
3008}
3009
3010static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag)
3011{
3012 u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ;
3013 struct tcp_sock *tp = tcp_sk(sk);
3014
3015 if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) {
3016
3017
3018
3019
3020 return;
3021 }
3022 minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32,
3023 rtt_us ? : jiffies_to_usecs(1));
3024}
3025
3026static bool tcp_ack_update_rtt(struct sock *sk, const int flag,
3027 long seq_rtt_us, long sack_rtt_us,
3028 long ca_rtt_us, struct rate_sample *rs)
3029{
3030 const struct tcp_sock *tp = tcp_sk(sk);
3031
3032
3033
3034
3035
3036
3037 if (seq_rtt_us < 0)
3038 seq_rtt_us = sack_rtt_us;
3039
3040
3041
3042
3043
3044
3045
3046 if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
3047 flag & FLAG_ACKED) {
3048 u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
3049
3050 if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
3051 if (!delta)
3052 delta = 1;
3053 seq_rtt_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
3054 ca_rtt_us = seq_rtt_us;
3055 }
3056 }
3057 rs->rtt_us = ca_rtt_us;
3058 if (seq_rtt_us < 0)
3059 return false;
3060
3061
3062
3063
3064
3065 tcp_update_rtt_min(sk, ca_rtt_us, flag);
3066 tcp_rtt_estimator(sk, seq_rtt_us);
3067 tcp_set_rto(sk);
3068
3069
3070 inet_csk(sk)->icsk_backoff = 0;
3071 return true;
3072}
3073
3074
3075void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req)
3076{
3077 struct rate_sample rs;
3078 long rtt_us = -1L;
3079
3080 if (req && !req->num_retrans && tcp_rsk(req)->snt_synack)
3081 rtt_us = tcp_stamp_us_delta(tcp_clock_us(), tcp_rsk(req)->snt_synack);
3082
3083 tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, rtt_us, -1L, rtt_us, &rs);
3084}
3085
3086
3087static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
3088{
3089 const struct inet_connection_sock *icsk = inet_csk(sk);
3090
3091 icsk->icsk_ca_ops->cong_avoid(sk, ack, acked);
3092 tcp_sk(sk)->snd_cwnd_stamp = tcp_jiffies32;
3093}
3094
3095
3096
3097
3098void tcp_rearm_rto(struct sock *sk)
3099{
3100 const struct inet_connection_sock *icsk = inet_csk(sk);
3101 struct tcp_sock *tp = tcp_sk(sk);
3102
3103
3104
3105
3106 if (rcu_access_pointer(tp->fastopen_rsk))
3107 return;
3108
3109 if (!tp->packets_out) {
3110 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
3111 } else {
3112 u32 rto = inet_csk(sk)->icsk_rto;
3113
3114 if (icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
3115 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
3116 s64 delta_us = tcp_rto_delta_us(sk);
3117
3118
3119
3120 rto = usecs_to_jiffies(max_t(int, delta_us, 1));
3121 }
3122 tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
3123 TCP_RTO_MAX);
3124 }
3125}
3126
3127
3128static void tcp_set_xmit_timer(struct sock *sk)
3129{
3130 if (!tcp_schedule_loss_probe(sk, true))
3131 tcp_rearm_rto(sk);
3132}
3133
3134
3135static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
3136{
3137 struct tcp_sock *tp = tcp_sk(sk);
3138 u32 packets_acked;
3139
3140 BUG_ON(!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una));
3141
3142 packets_acked = tcp_skb_pcount(skb);
3143 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
3144 return 0;
3145 packets_acked -= tcp_skb_pcount(skb);
3146
3147 if (packets_acked) {
3148 BUG_ON(tcp_skb_pcount(skb) == 0);
3149 BUG_ON(!before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq));
3150 }
3151
3152 return packets_acked;
3153}
3154
3155static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
3156 const struct sk_buff *ack_skb, u32 prior_snd_una)
3157{
3158 const struct skb_shared_info *shinfo;
3159
3160
3161 if (likely(!TCP_SKB_CB(skb)->txstamp_ack))
3162 return;
3163
3164 shinfo = skb_shinfo(skb);
3165 if (!before(shinfo->tskey, prior_snd_una) &&
3166 before(shinfo->tskey, tcp_sk(sk)->snd_una)) {
3167 tcp_skb_tsorted_save(skb) {
3168 __skb_tstamp_tx(skb, ack_skb, NULL, sk, SCM_TSTAMP_ACK);
3169 } tcp_skb_tsorted_restore(skb);
3170 }
3171}
3172
3173
3174
3175
3176
3177static int tcp_clean_rtx_queue(struct sock *sk, const struct sk_buff *ack_skb,
3178 u32 prior_fack, u32 prior_snd_una,
3179 struct tcp_sacktag_state *sack, bool ece_ack)
3180{
3181 const struct inet_connection_sock *icsk = inet_csk(sk);
3182 u64 first_ackt, last_ackt;
3183 struct tcp_sock *tp = tcp_sk(sk);
3184 u32 prior_sacked = tp->sacked_out;
3185 u32 reord = tp->snd_nxt;
3186 struct sk_buff *skb, *next;
3187 bool fully_acked = true;
3188 long sack_rtt_us = -1L;
3189 long seq_rtt_us = -1L;
3190 long ca_rtt_us = -1L;
3191 u32 pkts_acked = 0;
3192 u32 last_in_flight = 0;
3193 bool rtt_update;
3194 int flag = 0;
3195
3196 first_ackt = 0;
3197
3198 for (skb = skb_rb_first(&sk->tcp_rtx_queue); skb; skb = next) {
3199 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
3200 const u32 start_seq = scb->seq;
3201 u8 sacked = scb->sacked;
3202 u32 acked_pcount;
3203
3204
3205 if (after(scb->end_seq, tp->snd_una)) {
3206 if (tcp_skb_pcount(skb) == 1 ||
3207 !after(tp->snd_una, scb->seq))
3208 break;
3209
3210 acked_pcount = tcp_tso_acked(sk, skb);
3211 if (!acked_pcount)
3212 break;
3213 fully_acked = false;
3214 } else {
3215 acked_pcount = tcp_skb_pcount(skb);
3216 }
3217
3218 if (unlikely(sacked & TCPCB_RETRANS)) {
3219 if (sacked & TCPCB_SACKED_RETRANS)
3220 tp->retrans_out -= acked_pcount;
3221 flag |= FLAG_RETRANS_DATA_ACKED;
3222 } else if (!(sacked & TCPCB_SACKED_ACKED)) {
3223 last_ackt = tcp_skb_timestamp_us(skb);
3224 WARN_ON_ONCE(last_ackt == 0);
3225 if (!first_ackt)
3226 first_ackt = last_ackt;
3227
3228 last_in_flight = TCP_SKB_CB(skb)->tx.in_flight;
3229 if (before(start_seq, reord))
3230 reord = start_seq;
3231 if (!after(scb->end_seq, tp->high_seq))
3232 flag |= FLAG_ORIG_SACK_ACKED;
3233 }
3234
3235 if (sacked & TCPCB_SACKED_ACKED) {
3236 tp->sacked_out -= acked_pcount;
3237 } else if (tcp_is_sack(tp)) {
3238 tcp_count_delivered(tp, acked_pcount, ece_ack);
3239 if (!tcp_skb_spurious_retrans(tp, skb))
3240 tcp_rack_advance(tp, sacked, scb->end_seq,
3241 tcp_skb_timestamp_us(skb));
3242 }
3243 if (sacked & TCPCB_LOST)
3244 tp->lost_out -= acked_pcount;
3245
3246 tp->packets_out -= acked_pcount;
3247 pkts_acked += acked_pcount;
3248 tcp_rate_skb_delivered(sk, skb, sack->rate);
3249
3250
3251
3252
3253
3254
3255
3256
3257 if (likely(!(scb->tcp_flags & TCPHDR_SYN))) {
3258 flag |= FLAG_DATA_ACKED;
3259 } else {
3260 flag |= FLAG_SYN_ACKED;
3261 tp->retrans_stamp = 0;
3262 }
3263
3264 if (!fully_acked)
3265 break;
3266
3267 tcp_ack_tstamp(sk, skb, ack_skb, prior_snd_una);
3268
3269 next = skb_rb_next(skb);
3270 if (unlikely(skb == tp->retransmit_skb_hint))
3271 tp->retransmit_skb_hint = NULL;
3272 if (unlikely(skb == tp->lost_skb_hint))
3273 tp->lost_skb_hint = NULL;
3274 tcp_highest_sack_replace(sk, skb, next);
3275 tcp_rtx_queue_unlink_and_free(skb, sk);
3276 }
3277
3278 if (!skb)
3279 tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
3280
3281 if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una)))
3282 tp->snd_up = tp->snd_una;
3283
3284 if (skb) {
3285 tcp_ack_tstamp(sk, skb, ack_skb, prior_snd_una);
3286 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
3287 flag |= FLAG_SACK_RENEGING;
3288 }
3289
3290 if (likely(first_ackt) && !(flag & FLAG_RETRANS_DATA_ACKED)) {
3291 seq_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, first_ackt);
3292 ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, last_ackt);
3293
3294 if (pkts_acked == 1 && last_in_flight < tp->mss_cache &&
3295 last_in_flight && !prior_sacked && fully_acked &&
3296 sack->rate->prior_delivered + 1 == tp->delivered &&
3297 !(flag & (FLAG_CA_ALERT | FLAG_SYN_ACKED))) {
3298
3299
3300
3301
3302 flag |= FLAG_ACK_MAYBE_DELAYED;
3303 }
3304 }
3305 if (sack->first_sackt) {
3306 sack_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->first_sackt);
3307 ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->last_sackt);
3308 }
3309 rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us,
3310 ca_rtt_us, sack->rate);
3311
3312 if (flag & FLAG_ACKED) {
3313 flag |= FLAG_SET_XMIT_TIMER;
3314 if (unlikely(icsk->icsk_mtup.probe_size &&
3315 !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
3316 tcp_mtup_probe_success(sk);
3317 }
3318
3319 if (tcp_is_reno(tp)) {
3320 tcp_remove_reno_sacks(sk, pkts_acked, ece_ack);
3321
3322
3323
3324
3325
3326
3327
3328 if (flag & FLAG_RETRANS_DATA_ACKED)
3329 flag &= ~FLAG_ORIG_SACK_ACKED;
3330 } else {
3331 int delta;
3332
3333
3334 if (before(reord, prior_fack))
3335 tcp_check_sack_reordering(sk, reord, 0);
3336
3337 delta = prior_sacked - tp->sacked_out;
3338 tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta);
3339 }
3340 } else if (skb && rtt_update && sack_rtt_us >= 0 &&
3341 sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp,
3342 tcp_skb_timestamp_us(skb))) {
3343
3344
3345
3346
3347 flag |= FLAG_SET_XMIT_TIMER;
3348 }
3349
3350 if (icsk->icsk_ca_ops->pkts_acked) {
3351 struct ack_sample sample = { .pkts_acked = pkts_acked,
3352 .rtt_us = sack->rate->rtt_us,
3353 .in_flight = last_in_flight };
3354
3355 icsk->icsk_ca_ops->pkts_acked(sk, &sample);
3356 }
3357
3358#if FASTRETRANS_DEBUG > 0
3359 WARN_ON((int)tp->sacked_out < 0);
3360 WARN_ON((int)tp->lost_out < 0);
3361 WARN_ON((int)tp->retrans_out < 0);
3362 if (!tp->packets_out && tcp_is_sack(tp)) {
3363 icsk = inet_csk(sk);
3364 if (tp->lost_out) {
3365 pr_debug("Leak l=%u %d\n",
3366 tp->lost_out, icsk->icsk_ca_state);
3367 tp->lost_out = 0;
3368 }
3369 if (tp->sacked_out) {
3370 pr_debug("Leak s=%u %d\n",
3371 tp->sacked_out, icsk->icsk_ca_state);
3372 tp->sacked_out = 0;
3373 }
3374 if (tp->retrans_out) {
3375 pr_debug("Leak r=%u %d\n",
3376 tp->retrans_out, icsk->icsk_ca_state);
3377 tp->retrans_out = 0;
3378 }
3379 }
3380#endif
3381 return flag;
3382}
3383
3384static void tcp_ack_probe(struct sock *sk)
3385{
3386 struct inet_connection_sock *icsk = inet_csk(sk);
3387 struct sk_buff *head = tcp_send_head(sk);
3388 const struct tcp_sock *tp = tcp_sk(sk);
3389
3390
3391 if (!head)
3392 return;
3393 if (!after(TCP_SKB_CB(head)->end_seq, tcp_wnd_end(tp))) {
3394 icsk->icsk_backoff = 0;
3395 icsk->icsk_probes_tstamp = 0;
3396 inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
3397
3398
3399
3400 } else {
3401 unsigned long when = tcp_probe0_when(sk, TCP_RTO_MAX);
3402
3403 when = tcp_clamp_probe0_to_user_timeout(sk, when);
3404 tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, when, TCP_RTO_MAX);
3405 }
3406}
3407
3408static inline bool tcp_ack_is_dubious(const struct sock *sk, const int flag)
3409{
3410 return !(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
3411 inet_csk(sk)->icsk_ca_state != TCP_CA_Open;
3412}
3413
3414
3415static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
3416{
3417
3418
3419
3420
3421
3422
3423 if (tcp_sk(sk)->reordering > sock_net(sk)->ipv4.sysctl_tcp_reordering)
3424 return flag & FLAG_FORWARD_PROGRESS;
3425
3426 return flag & FLAG_DATA_ACKED;
3427}
3428
3429
3430
3431
3432
3433
3434static void tcp_cong_control(struct sock *sk, u32 ack, u32 acked_sacked,
3435 int flag, const struct rate_sample *rs)
3436{
3437 const struct inet_connection_sock *icsk = inet_csk(sk);
3438
3439 if (icsk->icsk_ca_ops->cong_control) {
3440 icsk->icsk_ca_ops->cong_control(sk, rs);
3441 return;
3442 }
3443
3444 if (tcp_in_cwnd_reduction(sk)) {
3445
3446 tcp_cwnd_reduction(sk, acked_sacked, rs->losses, flag);
3447 } else if (tcp_may_raise_cwnd(sk, flag)) {
3448
3449 tcp_cong_avoid(sk, ack, acked_sacked);
3450 }
3451 tcp_update_pacing_rate(sk);
3452}
3453
3454
3455
3456
3457static inline bool tcp_may_update_window(const struct tcp_sock *tp,
3458 const u32 ack, const u32 ack_seq,
3459 const u32 nwin)
3460{
3461 return after(ack, tp->snd_una) ||
3462 after(ack_seq, tp->snd_wl1) ||
3463 (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd);
3464}
3465
3466
3467static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack)
3468{
3469 u32 delta = ack - tp->snd_una;
3470
3471 sock_owned_by_me((struct sock *)tp);
3472 tp->bytes_acked += delta;
3473 tp->snd_una = ack;
3474}
3475
3476
3477static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq)
3478{
3479 u32 delta = seq - tp->rcv_nxt;
3480
3481 sock_owned_by_me((struct sock *)tp);
3482 tp->bytes_received += delta;
3483 WRITE_ONCE(tp->rcv_nxt, seq);
3484}
3485
3486
3487
3488
3489
3490
3491static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 ack,
3492 u32 ack_seq)
3493{
3494 struct tcp_sock *tp = tcp_sk(sk);
3495 int flag = 0;
3496 u32 nwin = ntohs(tcp_hdr(skb)->window);
3497
3498 if (likely(!tcp_hdr(skb)->syn))
3499 nwin <<= tp->rx_opt.snd_wscale;
3500
3501 if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
3502 flag |= FLAG_WIN_UPDATE;
3503 tcp_update_wl(tp, ack_seq);
3504
3505 if (tp->snd_wnd != nwin) {
3506 tp->snd_wnd = nwin;
3507
3508
3509
3510
3511 tp->pred_flags = 0;
3512 tcp_fast_path_check(sk);
3513
3514 if (!tcp_write_queue_empty(sk))
3515 tcp_slow_start_after_idle_check(sk);
3516
3517 if (nwin > tp->max_window) {
3518 tp->max_window = nwin;
3519 tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie);
3520 }
3521 }
3522 }
3523
3524 tcp_snd_una_update(tp, ack);
3525
3526 return flag;
3527}
3528
3529static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
3530 u32 *last_oow_ack_time)
3531{
3532 if (*last_oow_ack_time) {
3533 s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);
3534
3535 if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) {
3536 NET_INC_STATS(net, mib_idx);
3537 return true;
3538 }
3539 }
3540
3541 *last_oow_ack_time = tcp_jiffies32;
3542
3543 return false;
3544}
3545
3546
3547
3548
3549
3550
3551
3552
3553bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
3554 int mib_idx, u32 *last_oow_ack_time)
3555{
3556
3557 if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) &&
3558 !tcp_hdr(skb)->syn)
3559 return false;
3560
3561 return __tcp_oow_rate_limited(net, mib_idx, last_oow_ack_time);
3562}
3563
3564
3565static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
3566{
3567
3568 static u32 challenge_timestamp;
3569 static unsigned int challenge_count;
3570 struct tcp_sock *tp = tcp_sk(sk);
3571 struct net *net = sock_net(sk);
3572 u32 count, now;
3573
3574
3575 if (__tcp_oow_rate_limited(net,
3576 LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
3577 &tp->last_oow_ack_time))
3578 return;
3579
3580
3581 now = jiffies / HZ;
3582 if (now != challenge_timestamp) {
3583 u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit;
3584 u32 half = (ack_limit + 1) >> 1;
3585
3586 challenge_timestamp = now;
3587 WRITE_ONCE(challenge_count, half + prandom_u32_max(ack_limit));
3588 }
3589 count = READ_ONCE(challenge_count);
3590 if (count > 0) {
3591 WRITE_ONCE(challenge_count, count - 1);
3592 NET_INC_STATS(net, LINUX_MIB_TCPCHALLENGEACK);
3593 tcp_send_ack(sk);
3594 }
3595}
3596
3597static void tcp_store_ts_recent(struct tcp_sock *tp)
3598{
3599 tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
3600 tp->rx_opt.ts_recent_stamp = ktime_get_seconds();
3601}
3602
3603static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
3604{
3605 if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) {
3606
3607
3608
3609
3610
3611
3612
3613 if (tcp_paws_check(&tp->rx_opt, 0))
3614 tcp_store_ts_recent(tp);
3615 }
3616}
3617
3618
3619
3620
3621static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
3622{
3623 struct tcp_sock *tp = tcp_sk(sk);
3624
3625 if (before(ack, tp->tlp_high_seq))
3626 return;
3627
3628 if (!tp->tlp_retrans) {
3629
3630 tp->tlp_high_seq = 0;
3631 } else if (flag & FLAG_DSACKING_ACK) {
3632
3633 tp->tlp_high_seq = 0;
3634 } else if (after(ack, tp->tlp_high_seq)) {
3635
3636
3637
3638 tcp_init_cwnd_reduction(sk);
3639 tcp_set_ca_state(sk, TCP_CA_CWR);
3640 tcp_end_cwnd_reduction(sk);
3641 tcp_try_keep_open(sk);
3642 NET_INC_STATS(sock_net(sk),
3643 LINUX_MIB_TCPLOSSPROBERECOVERY);
3644 } else if (!(flag & (FLAG_SND_UNA_ADVANCED |
3645 FLAG_NOT_DUP | FLAG_DATA_SACKED))) {
3646
3647 tp->tlp_high_seq = 0;
3648 }
3649}
3650
3651static inline void tcp_in_ack_event(struct sock *sk, u32 flags)
3652{
3653 const struct inet_connection_sock *icsk = inet_csk(sk);
3654
3655 if (icsk->icsk_ca_ops->in_ack_event)
3656 icsk->icsk_ca_ops->in_ack_event(sk, flags);
3657}
3658
3659
3660
3661
3662
3663static void tcp_xmit_recovery(struct sock *sk, int rexmit)
3664{
3665 struct tcp_sock *tp = tcp_sk(sk);
3666
3667 if (rexmit == REXMIT_NONE || sk->sk_state == TCP_SYN_SENT)
3668 return;
3669
3670 if (unlikely(rexmit == REXMIT_NEW)) {
3671 __tcp_push_pending_frames(sk, tcp_current_mss(sk),
3672 TCP_NAGLE_OFF);
3673 if (after(tp->snd_nxt, tp->high_seq))
3674 return;
3675 tp->frto = 0;
3676 }
3677 tcp_xmit_retransmit_queue(sk);
3678}
3679
3680
3681static u32 tcp_newly_delivered(struct sock *sk, u32 prior_delivered, int flag)
3682{
3683 const struct net *net = sock_net(sk);
3684 struct tcp_sock *tp = tcp_sk(sk);
3685 u32 delivered;
3686
3687 delivered = tp->delivered - prior_delivered;
3688 NET_ADD_STATS(net, LINUX_MIB_TCPDELIVERED, delivered);
3689 if (flag & FLAG_ECE)
3690 NET_ADD_STATS(net, LINUX_MIB_TCPDELIVEREDCE, delivered);
3691
3692 return delivered;
3693}
3694
3695
3696static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3697{
3698 struct inet_connection_sock *icsk = inet_csk(sk);
3699 struct tcp_sock *tp = tcp_sk(sk);
3700 struct tcp_sacktag_state sack_state;
3701 struct rate_sample rs = { .prior_delivered = 0 };
3702 u32 prior_snd_una = tp->snd_una;
3703 bool is_sack_reneg = tp->is_sack_reneg;
3704 u32 ack_seq = TCP_SKB_CB(skb)->seq;
3705 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3706 int num_dupack = 0;
3707 int prior_packets = tp->packets_out;
3708 u32 delivered = tp->delivered;
3709 u32 lost = tp->lost;
3710 int rexmit = REXMIT_NONE;
3711 u32 prior_fack;
3712
3713 sack_state.first_sackt = 0;
3714 sack_state.rate = &rs;
3715 sack_state.sack_delivered = 0;
3716
3717
3718 prefetch(sk->tcp_rtx_queue.rb_node);
3719
3720
3721
3722
3723 if (before(ack, prior_snd_una)) {
3724
3725 if (before(ack, prior_snd_una - tp->max_window)) {
3726 if (!(flag & FLAG_NO_CHALLENGE_ACK))
3727 tcp_send_challenge_ack(sk, skb);
3728 return -1;
3729 }
3730 goto old_ack;
3731 }
3732
3733
3734
3735
3736 if (after(ack, tp->snd_nxt))
3737 return -1;
3738
3739 if (after(ack, prior_snd_una)) {
3740 flag |= FLAG_SND_UNA_ADVANCED;
3741 icsk->icsk_retransmits = 0;
3742
3743#if IS_ENABLED(CONFIG_TLS_DEVICE)
3744 if (static_branch_unlikely(&clean_acked_data_enabled.key))
3745 if (icsk->icsk_clean_acked)
3746 icsk->icsk_clean_acked(sk, ack);
3747#endif
3748 }
3749
3750 prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una;
3751 rs.prior_in_flight = tcp_packets_in_flight(tp);
3752
3753
3754
3755
3756 if (flag & FLAG_UPDATE_TS_RECENT)
3757 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
3758
3759 if ((flag & (FLAG_SLOWPATH | FLAG_SND_UNA_ADVANCED)) ==
3760 FLAG_SND_UNA_ADVANCED) {
3761
3762
3763
3764
3765 tcp_update_wl(tp, ack_seq);
3766 tcp_snd_una_update(tp, ack);
3767 flag |= FLAG_WIN_UPDATE;
3768
3769 tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE);
3770
3771 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPACKS);
3772 } else {
3773 u32 ack_ev_flags = CA_ACK_SLOWPATH;
3774
3775 if (ack_seq != TCP_SKB_CB(skb)->end_seq)
3776 flag |= FLAG_DATA;
3777 else
3778 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPUREACKS);
3779
3780 flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
3781
3782 if (TCP_SKB_CB(skb)->sacked)
3783 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
3784 &sack_state);
3785
3786 if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
3787 flag |= FLAG_ECE;
3788 ack_ev_flags |= CA_ACK_ECE;
3789 }
3790
3791 if (sack_state.sack_delivered)
3792 tcp_count_delivered(tp, sack_state.sack_delivered,
3793 flag & FLAG_ECE);
3794
3795 if (flag & FLAG_WIN_UPDATE)
3796 ack_ev_flags |= CA_ACK_WIN_UPDATE;
3797
3798 tcp_in_ack_event(sk, ack_ev_flags);
3799 }
3800
3801
3802
3803
3804
3805
3806
3807
3808 tcp_ecn_accept_cwr(sk, skb);
3809
3810
3811
3812
3813 sk->sk_err_soft = 0;
3814 icsk->icsk_probes_out = 0;
3815 tp->rcv_tstamp = tcp_jiffies32;
3816 if (!prior_packets)
3817 goto no_queue;
3818
3819
3820 flag |= tcp_clean_rtx_queue(sk, skb, prior_fack, prior_snd_una,
3821 &sack_state, flag & FLAG_ECE);
3822
3823 tcp_rack_update_reo_wnd(sk, &rs);
3824
3825 if (tp->tlp_high_seq)
3826 tcp_process_tlp_ack(sk, ack, flag);
3827
3828 if (tcp_ack_is_dubious(sk, flag)) {
3829 if (!(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP))) {
3830 num_dupack = 1;
3831
3832 if (!(flag & FLAG_DATA))
3833 num_dupack = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
3834 }
3835 tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
3836 &rexmit);
3837 }
3838
3839
3840 if (flag & FLAG_SET_XMIT_TIMER)
3841 tcp_set_xmit_timer(sk);
3842
3843 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
3844 sk_dst_confirm(sk);
3845
3846 delivered = tcp_newly_delivered(sk, delivered, flag);
3847 lost = tp->lost - lost;
3848 rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED);
3849 tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate);
3850 tcp_cong_control(sk, ack, delivered, flag, sack_state.rate);
3851 tcp_xmit_recovery(sk, rexmit);
3852 return 1;
3853
3854no_queue:
3855
3856 if (flag & FLAG_DSACKING_ACK) {
3857 tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
3858 &rexmit);
3859 tcp_newly_delivered(sk, delivered, flag);
3860 }
3861
3862
3863
3864
3865 tcp_ack_probe(sk);
3866
3867 if (tp->tlp_high_seq)
3868 tcp_process_tlp_ack(sk, ack, flag);
3869 return 1;
3870
3871old_ack:
3872
3873
3874
3875 if (TCP_SKB_CB(skb)->sacked) {
3876 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
3877 &sack_state);
3878 tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
3879 &rexmit);
3880 tcp_newly_delivered(sk, delivered, flag);
3881 tcp_xmit_recovery(sk, rexmit);
3882 }
3883
3884 return 0;
3885}
3886
3887static void tcp_parse_fastopen_option(int len, const unsigned char *cookie,
3888 bool syn, struct tcp_fastopen_cookie *foc,
3889 bool exp_opt)
3890{
3891
3892 if (!foc || !syn || len < 0 || (len & 1))
3893 return;
3894
3895 if (len >= TCP_FASTOPEN_COOKIE_MIN &&
3896 len <= TCP_FASTOPEN_COOKIE_MAX)
3897 memcpy(foc->val, cookie, len);
3898 else if (len != 0)
3899 len = -1;
3900 foc->len = len;
3901 foc->exp = exp_opt;
3902}
3903
3904static bool smc_parse_options(const struct tcphdr *th,
3905 struct tcp_options_received *opt_rx,
3906 const unsigned char *ptr,
3907 int opsize)
3908{
3909#if IS_ENABLED(CONFIG_SMC)
3910 if (static_branch_unlikely(&tcp_have_smc)) {
3911 if (th->syn && !(opsize & 1) &&
3912 opsize >= TCPOLEN_EXP_SMC_BASE &&
3913 get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC) {
3914 opt_rx->smc_ok = 1;
3915 return true;
3916 }
3917 }
3918#endif
3919 return false;
3920}
3921
3922
3923
3924
3925static u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss)
3926{
3927 const unsigned char *ptr = (const unsigned char *)(th + 1);
3928 int length = (th->doff * 4) - sizeof(struct tcphdr);
3929 u16 mss = 0;
3930
3931 while (length > 0) {
3932 int opcode = *ptr++;
3933 int opsize;
3934
3935 switch (opcode) {
3936 case TCPOPT_EOL:
3937 return mss;
3938 case TCPOPT_NOP:
3939 length--;
3940 continue;
3941 default:
3942 if (length < 2)
3943 return mss;
3944 opsize = *ptr++;
3945 if (opsize < 2)
3946 return mss;
3947 if (opsize > length)
3948 return mss;
3949 if (opcode == TCPOPT_MSS && opsize == TCPOLEN_MSS) {
3950 u16 in_mss = get_unaligned_be16(ptr);
3951
3952 if (in_mss) {
3953 if (user_mss && user_mss < in_mss)
3954 in_mss = user_mss;
3955 mss = in_mss;
3956 }
3957 }
3958 ptr += opsize - 2;
3959 length -= opsize;
3960 }
3961 }
3962 return mss;
3963}
3964
3965
3966
3967
3968
3969void tcp_parse_options(const struct net *net,
3970 const struct sk_buff *skb,
3971 struct tcp_options_received *opt_rx, int estab,
3972 struct tcp_fastopen_cookie *foc)
3973{
3974 const unsigned char *ptr;
3975 const struct tcphdr *th = tcp_hdr(skb);
3976 int length = (th->doff * 4) - sizeof(struct tcphdr);
3977
3978 ptr = (const unsigned char *)(th + 1);
3979 opt_rx->saw_tstamp = 0;
3980 opt_rx->saw_unknown = 0;
3981
3982 while (length > 0) {
3983 int opcode = *ptr++;
3984 int opsize;
3985
3986 switch (opcode) {
3987 case TCPOPT_EOL:
3988 return;
3989 case TCPOPT_NOP:
3990 length--;
3991 continue;
3992 default:
3993 if (length < 2)
3994 return;
3995 opsize = *ptr++;
3996 if (opsize < 2)
3997 return;
3998 if (opsize > length)
3999 return;
4000 switch (opcode) {
4001 case TCPOPT_MSS:
4002 if (opsize == TCPOLEN_MSS && th->syn && !estab) {
4003 u16 in_mss = get_unaligned_be16(ptr);
4004 if (in_mss) {
4005 if (opt_rx->user_mss &&
4006 opt_rx->user_mss < in_mss)
4007 in_mss = opt_rx->user_mss;
4008 opt_rx->mss_clamp = in_mss;
4009 }
4010 }
4011 break;
4012 case TCPOPT_WINDOW:
4013 if (opsize == TCPOLEN_WINDOW && th->syn &&
4014 !estab && net->ipv4.sysctl_tcp_window_scaling) {
4015 __u8 snd_wscale = *(__u8 *)ptr;
4016 opt_rx->wscale_ok = 1;
4017 if (snd_wscale > TCP_MAX_WSCALE) {
4018 net_info_ratelimited("%s: Illegal window scaling value %d > %u received\n",
4019 __func__,
4020 snd_wscale,
4021 TCP_MAX_WSCALE);
4022 snd_wscale = TCP_MAX_WSCALE;
4023 }
4024 opt_rx->snd_wscale = snd_wscale;
4025 }
4026 break;
4027 case TCPOPT_TIMESTAMP:
4028 if ((opsize == TCPOLEN_TIMESTAMP) &&
4029 ((estab && opt_rx->tstamp_ok) ||
4030 (!estab && net->ipv4.sysctl_tcp_timestamps))) {
4031 opt_rx->saw_tstamp = 1;
4032 opt_rx->rcv_tsval = get_unaligned_be32(ptr);
4033 opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
4034 }
4035 break;
4036 case TCPOPT_SACK_PERM:
4037 if (opsize == TCPOLEN_SACK_PERM && th->syn &&
4038 !estab && net->ipv4.sysctl_tcp_sack) {
4039 opt_rx->sack_ok = TCP_SACK_SEEN;
4040 tcp_sack_reset(opt_rx);
4041 }
4042 break;
4043
4044 case TCPOPT_SACK:
4045 if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
4046 !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
4047 opt_rx->sack_ok) {
4048 TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
4049 }
4050 break;
4051#ifdef CONFIG_TCP_MD5SIG
4052 case TCPOPT_MD5SIG:
4053
4054
4055
4056
4057 break;
4058#endif
4059 case TCPOPT_FASTOPEN:
4060 tcp_parse_fastopen_option(
4061 opsize - TCPOLEN_FASTOPEN_BASE,
4062 ptr, th->syn, foc, false);
4063 break;
4064
4065 case TCPOPT_EXP:
4066
4067
4068
4069 if (opsize >= TCPOLEN_EXP_FASTOPEN_BASE &&
4070 get_unaligned_be16(ptr) ==
4071 TCPOPT_FASTOPEN_MAGIC) {
4072 tcp_parse_fastopen_option(opsize -
4073 TCPOLEN_EXP_FASTOPEN_BASE,
4074 ptr + 2, th->syn, foc, true);
4075 break;
4076 }
4077
4078 if (smc_parse_options(th, opt_rx, ptr, opsize))
4079 break;
4080
4081 opt_rx->saw_unknown = 1;
4082 break;
4083
4084 default:
4085 opt_rx->saw_unknown = 1;
4086 }
4087 ptr += opsize-2;
4088 length -= opsize;
4089 }
4090 }
4091}
4092EXPORT_SYMBOL(tcp_parse_options);
4093
4094static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr *th)
4095{
4096 const __be32 *ptr = (const __be32 *)(th + 1);
4097
4098 if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
4099 | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
4100 tp->rx_opt.saw_tstamp = 1;
4101 ++ptr;
4102 tp->rx_opt.rcv_tsval = ntohl(*ptr);
4103 ++ptr;
4104 if (*ptr)
4105 tp->rx_opt.rcv_tsecr = ntohl(*ptr) - tp->tsoffset;
4106 else
4107 tp->rx_opt.rcv_tsecr = 0;
4108 return true;
4109 }
4110 return false;
4111}
4112
4113
4114
4115
4116static bool tcp_fast_parse_options(const struct net *net,
4117 const struct sk_buff *skb,
4118 const struct tcphdr *th, struct tcp_sock *tp)
4119{
4120
4121
4122
4123 if (th->doff == (sizeof(*th) / 4)) {
4124 tp->rx_opt.saw_tstamp = 0;
4125 return false;
4126 } else if (tp->rx_opt.tstamp_ok &&
4127 th->doff == ((sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4)) {
4128 if (tcp_parse_aligned_timestamp(tp, th))
4129 return true;
4130 }
4131
4132 tcp_parse_options(net, skb, &tp->rx_opt, 1, NULL);
4133 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
4134 tp->rx_opt.rcv_tsecr -= tp->tsoffset;
4135
4136 return true;
4137}
4138
4139#ifdef CONFIG_TCP_MD5SIG
4140
4141
4142
4143const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
4144{
4145 int length = (th->doff << 2) - sizeof(*th);
4146 const u8 *ptr = (const u8 *)(th + 1);
4147
4148
4149 while (length >= TCPOLEN_MD5SIG) {
4150 int opcode = *ptr++;
4151 int opsize;
4152
4153 switch (opcode) {
4154 case TCPOPT_EOL:
4155 return NULL;
4156 case TCPOPT_NOP:
4157 length--;
4158 continue;
4159 default:
4160 opsize = *ptr++;
4161 if (opsize < 2 || opsize > length)
4162 return NULL;
4163 if (opcode == TCPOPT_MD5SIG)
4164 return opsize == TCPOLEN_MD5SIG ? ptr : NULL;
4165 }
4166 ptr += opsize - 2;
4167 length -= opsize;
4168 }
4169 return NULL;
4170}
4171EXPORT_SYMBOL(tcp_parse_md5sig_option);
4172#endif
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
4198{
4199 const struct tcp_sock *tp = tcp_sk(sk);
4200 const struct tcphdr *th = tcp_hdr(skb);
4201 u32 seq = TCP_SKB_CB(skb)->seq;
4202 u32 ack = TCP_SKB_CB(skb)->ack_seq;
4203
4204 return (
4205 (th->ack && seq == TCP_SKB_CB(skb)->end_seq && seq == tp->rcv_nxt) &&
4206
4207
4208 ack == tp->snd_una &&
4209
4210
4211 !tcp_may_update_window(tp, ack, seq, ntohs(th->window) << tp->rx_opt.snd_wscale) &&
4212
4213
4214 (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ);
4215}
4216
4217static inline bool tcp_paws_discard(const struct sock *sk,
4218 const struct sk_buff *skb)
4219{
4220 const struct tcp_sock *tp = tcp_sk(sk);
4221
4222 return !tcp_paws_check(&tp->rx_opt, TCP_PAWS_WINDOW) &&
4223 !tcp_disordered_ack(sk, skb);
4224}
4225
4226
4227
4228
4229
4230
4231
4232
4233