1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65#define pr_fmt(fmt) "TCP: " fmt
66
67#include <linux/mm.h>
68#include <linux/slab.h>
69#include <linux/module.h>
70#include <linux/sysctl.h>
71#include <linux/kernel.h>
72#include <linux/prefetch.h>
73#include <net/dst.h>
74#include <net/tcp.h>
75#include <net/inet_common.h>
76#include <linux/ipsec.h>
77#include <asm/unaligned.h>
78#include <linux/errqueue.h>
79#include <trace/events/tcp.h>
80#include <linux/jump_label_ratelimit.h>
81#include <net/busy_poll.h>
82#include <net/mptcp.h>
83
84int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
85
86#define FLAG_DATA 0x01
87#define FLAG_WIN_UPDATE 0x02
88#define FLAG_DATA_ACKED 0x04
89#define FLAG_RETRANS_DATA_ACKED 0x08
90#define FLAG_SYN_ACKED 0x10
91#define FLAG_DATA_SACKED 0x20
92#define FLAG_ECE 0x40
93#define FLAG_LOST_RETRANS 0x80
94#define FLAG_SLOWPATH 0x100
95#define FLAG_ORIG_SACK_ACKED 0x200
96#define FLAG_SND_UNA_ADVANCED 0x400
97#define FLAG_DSACKING_ACK 0x800
98#define FLAG_SET_XMIT_TIMER 0x1000
99#define FLAG_SACK_RENEGING 0x2000
100#define FLAG_UPDATE_TS_RECENT 0x4000
101#define FLAG_NO_CHALLENGE_ACK 0x8000
102#define FLAG_ACK_MAYBE_DELAYED 0x10000
103#define FLAG_DSACK_TLP 0x20000
104
105#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
106#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
107#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE|FLAG_DSACKING_ACK)
108#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)
109
110#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
111#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
112
113#define REXMIT_NONE 0
114#define REXMIT_LOST 1
115#define REXMIT_NEW 2
116
117#if IS_ENABLED(CONFIG_TLS_DEVICE)
118static DEFINE_STATIC_KEY_DEFERRED_FALSE(clean_acked_data_enabled, HZ);
119
120void clean_acked_data_enable(struct inet_connection_sock *icsk,
121 void (*cad)(struct sock *sk, u32 ack_seq))
122{
123 icsk->icsk_clean_acked = cad;
124 static_branch_deferred_inc(&clean_acked_data_enabled);
125}
126EXPORT_SYMBOL_GPL(clean_acked_data_enable);
127
128void clean_acked_data_disable(struct inet_connection_sock *icsk)
129{
130 static_branch_slow_dec_deferred(&clean_acked_data_enabled);
131 icsk->icsk_clean_acked = NULL;
132}
133EXPORT_SYMBOL_GPL(clean_acked_data_disable);
134
135void clean_acked_data_flush(void)
136{
137 static_key_deferred_flush(&clean_acked_data_enabled);
138}
139EXPORT_SYMBOL_GPL(clean_acked_data_flush);
140#endif
141
142#ifdef CONFIG_CGROUP_BPF
143static void bpf_skops_parse_hdr(struct sock *sk, struct sk_buff *skb)
144{
145 bool unknown_opt = tcp_sk(sk)->rx_opt.saw_unknown &&
146 BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk),
147 BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG);
148 bool parse_all_opt = BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk),
149 BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG);
150 struct bpf_sock_ops_kern sock_ops;
151
152 if (likely(!unknown_opt && !parse_all_opt))
153 return;
154
155
156
157
158
159 switch (sk->sk_state) {
160 case TCP_SYN_RECV:
161 case TCP_SYN_SENT:
162 case TCP_LISTEN:
163 return;
164 }
165
166 sock_owned_by_me(sk);
167
168 memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
169 sock_ops.op = BPF_SOCK_OPS_PARSE_HDR_OPT_CB;
170 sock_ops.is_fullsock = 1;
171 sock_ops.sk = sk;
172 bpf_skops_init_skb(&sock_ops, skb, tcp_hdrlen(skb));
173
174 BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
175}
176
177static void bpf_skops_established(struct sock *sk, int bpf_op,
178 struct sk_buff *skb)
179{
180 struct bpf_sock_ops_kern sock_ops;
181
182 sock_owned_by_me(sk);
183
184 memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
185 sock_ops.op = bpf_op;
186 sock_ops.is_fullsock = 1;
187 sock_ops.sk = sk;
188
189 if (skb)
190 bpf_skops_init_skb(&sock_ops, skb, tcp_hdrlen(skb));
191
192 BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
193}
194#else
195static void bpf_skops_parse_hdr(struct sock *sk, struct sk_buff *skb)
196{
197}
198
199static void bpf_skops_established(struct sock *sk, int bpf_op,
200 struct sk_buff *skb)
201{
202}
203#endif
204
205static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb,
206 unsigned int len)
207{
208 static bool __once __read_mostly;
209
210 if (!__once) {
211 struct net_device *dev;
212
213 __once = true;
214
215 rcu_read_lock();
216 dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif);
217 if (!dev || len >= dev->mtu)
218 pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n",
219 dev ? dev->name : "Unknown driver");
220 rcu_read_unlock();
221 }
222}
223
224
225
226
227static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
228{
229 struct inet_connection_sock *icsk = inet_csk(sk);
230 const unsigned int lss = icsk->icsk_ack.last_seg_size;
231 unsigned int len;
232
233 icsk->icsk_ack.last_seg_size = 0;
234
235
236
237
238 len = skb_shinfo(skb)->gso_size ? : skb->len;
239 if (len >= icsk->icsk_ack.rcv_mss) {
240
241
242
243
244 if (unlikely(len != icsk->icsk_ack.rcv_mss)) {
245 u64 val = (u64)skb->len << TCP_RMEM_TO_WIN_SCALE;
246
247 do_div(val, skb->truesize);
248 tcp_sk(sk)->scaling_ratio = val ? val : 1;
249 }
250 icsk->icsk_ack.rcv_mss = min_t(unsigned int, len,
251 tcp_sk(sk)->advmss);
252
253 if (unlikely(len > icsk->icsk_ack.rcv_mss +
254 MAX_TCP_OPTION_SPACE))
255 tcp_gro_dev_warn(sk, skb, len);
256
257
258
259
260
261
262
263
264
265
266
267 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_PSH)
268 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
269 } else {
270
271
272
273
274
275 len += skb->data - skb_transport_header(skb);
276 if (len >= TCP_MSS_DEFAULT + sizeof(struct tcphdr) ||
277
278
279
280
281
282 (len >= TCP_MIN_MSS + sizeof(struct tcphdr) &&
283 !(tcp_flag_word(tcp_hdr(skb)) & TCP_REMNANT))) {
284
285
286
287
288 len -= tcp_sk(sk)->tcp_header_len;
289 icsk->icsk_ack.last_seg_size = len;
290 if (len == lss) {
291 icsk->icsk_ack.rcv_mss = len;
292 return;
293 }
294 }
295 if (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)
296 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED2;
297 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
298 }
299}
300
301static void tcp_incr_quickack(struct sock *sk, unsigned int max_quickacks)
302{
303 struct inet_connection_sock *icsk = inet_csk(sk);
304 unsigned int quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
305
306 if (quickacks == 0)
307 quickacks = 2;
308 quickacks = min(quickacks, max_quickacks);
309 if (quickacks > icsk->icsk_ack.quick)
310 icsk->icsk_ack.quick = quickacks;
311}
312
313static void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
314{
315 struct inet_connection_sock *icsk = inet_csk(sk);
316
317 tcp_incr_quickack(sk, max_quickacks);
318 inet_csk_exit_pingpong_mode(sk);
319 icsk->icsk_ack.ato = TCP_ATO_MIN;
320}
321
322
323
324
325
326static bool tcp_in_quickack_mode(struct sock *sk)
327{
328 const struct inet_connection_sock *icsk = inet_csk(sk);
329 const struct dst_entry *dst = __sk_dst_get(sk);
330
331 return (dst && dst_metric(dst, RTAX_QUICKACK)) ||
332 (icsk->icsk_ack.quick && !inet_csk_in_pingpong_mode(sk));
333}
334
335static void tcp_ecn_queue_cwr(struct tcp_sock *tp)
336{
337 if (tp->ecn_flags & TCP_ECN_OK)
338 tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
339}
340
341static void tcp_ecn_accept_cwr(struct sock *sk, const struct sk_buff *skb)
342{
343 if (tcp_hdr(skb)->cwr) {
344 tcp_sk(sk)->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
345
346
347
348
349
350 if (TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq)
351 inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
352 }
353}
354
355static void tcp_ecn_withdraw_cwr(struct tcp_sock *tp)
356{
357 tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
358}
359
360static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
361{
362 struct tcp_sock *tp = tcp_sk(sk);
363
364 switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) {
365 case INET_ECN_NOT_ECT:
366
367
368
369
370 if (tp->ecn_flags & TCP_ECN_SEEN)
371 tcp_enter_quickack_mode(sk, 2);
372 break;
373 case INET_ECN_CE:
374 if (tcp_ca_needs_ecn(sk))
375 tcp_ca_event(sk, CA_EVENT_ECN_IS_CE);
376
377 if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
378
379 tcp_enter_quickack_mode(sk, 2);
380 tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
381 }
382 tp->ecn_flags |= TCP_ECN_SEEN;
383 break;
384 default:
385 if (tcp_ca_needs_ecn(sk))
386 tcp_ca_event(sk, CA_EVENT_ECN_NO_CE);
387 tp->ecn_flags |= TCP_ECN_SEEN;
388 break;
389 }
390}
391
392static void tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
393{
394 if (tcp_sk(sk)->ecn_flags & TCP_ECN_OK)
395 __tcp_ecn_check_ce(sk, skb);
396}
397
398static void tcp_ecn_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th)
399{
400 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr))
401 tp->ecn_flags &= ~TCP_ECN_OK;
402}
403
404static void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th)
405{
406 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr))
407 tp->ecn_flags &= ~TCP_ECN_OK;
408}
409
410static bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th)
411{
412 if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK))
413 return true;
414 return false;
415}
416
417
418
419
420
421
422static void tcp_sndbuf_expand(struct sock *sk)
423{
424 const struct tcp_sock *tp = tcp_sk(sk);
425 const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
426 int sndmem, per_mss;
427 u32 nr_segs;
428
429
430
431
432 per_mss = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
433 MAX_TCP_HEADER +
434 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
435
436 per_mss = roundup_pow_of_two(per_mss) +
437 SKB_DATA_ALIGN(sizeof(struct sk_buff));
438
439 nr_segs = max_t(u32, TCP_INIT_CWND, tcp_snd_cwnd(tp));
440 nr_segs = max_t(u32, nr_segs, tp->reordering + 1);
441
442
443
444
445
446 sndmem = ca_ops->sndbuf_expand ? ca_ops->sndbuf_expand(sk) : 2;
447 sndmem *= nr_segs * per_mss;
448
449 if (sk->sk_sndbuf < sndmem)
450 WRITE_ONCE(sk->sk_sndbuf,
451 min(sndmem, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[2])));
452}
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb,
481 unsigned int skbtruesize)
482{
483 const struct tcp_sock *tp = tcp_sk(sk);
484
485 int truesize = tcp_win_from_space(sk, skbtruesize) >> 1;
486 int window = tcp_win_from_space(sk, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])) >> 1;
487
488 while (tp->rcv_ssthresh <= window) {
489 if (truesize <= skb->len)
490 return 2 * inet_csk(sk)->icsk_ack.rcv_mss;
491
492 truesize >>= 1;
493 window >>= 1;
494 }
495 return 0;
496}
497
498
499
500
501
502
503
504static u32 truesize_adjust(bool adjust, const struct sk_buff *skb)
505{
506 u32 truesize = skb->truesize;
507
508 if (adjust && !skb_headlen(skb)) {
509 truesize -= SKB_TRUESIZE(skb_end_offset(skb));
510
511 if (unlikely((int)truesize < (int)skb->len))
512 truesize = skb->truesize;
513 }
514 return truesize;
515}
516
517static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb,
518 bool adjust)
519{
520 struct tcp_sock *tp = tcp_sk(sk);
521 int room;
522
523 room = min_t(int, tp->window_clamp, tcp_space(sk)) - tp->rcv_ssthresh;
524
525 if (room <= 0)
526 return;
527
528
529 if (!tcp_under_memory_pressure(sk)) {
530 unsigned int truesize = truesize_adjust(adjust, skb);
531 int incr;
532
533
534
535
536 if (tcp_win_from_space(sk, truesize) <= skb->len)
537 incr = 2 * tp->advmss;
538 else
539 incr = __tcp_grow_window(sk, skb, truesize);
540
541 if (incr) {
542 incr = max_t(int, incr, 2 * skb->len);
543 tp->rcv_ssthresh += min(room, incr);
544 inet_csk(sk)->icsk_ack.quick |= 1;
545 }
546 } else {
547
548
549
550 tcp_adjust_rcv_ssthresh(sk);
551 }
552}
553
554
555
556
557static void tcp_init_buffer_space(struct sock *sk)
558{
559 int tcp_app_win = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_app_win);
560 struct tcp_sock *tp = tcp_sk(sk);
561 int maxwin;
562
563 if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
564 tcp_sndbuf_expand(sk);
565
566 tcp_mstamp_refresh(tp);
567 tp->rcvq_space.time = tp->tcp_mstamp;
568 tp->rcvq_space.seq = tp->copied_seq;
569
570 maxwin = tcp_full_space(sk);
571
572 if (tp->window_clamp >= maxwin) {
573 tp->window_clamp = maxwin;
574
575 if (tcp_app_win && maxwin > 4 * tp->advmss)
576 tp->window_clamp = max(maxwin -
577 (maxwin >> tcp_app_win),
578 4 * tp->advmss);
579 }
580
581
582 if (tcp_app_win &&
583 tp->window_clamp > 2 * tp->advmss &&
584 tp->window_clamp + tp->advmss > maxwin)
585 tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
586
587 tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
588 tp->snd_cwnd_stamp = tcp_jiffies32;
589 tp->rcvq_space.space = min3(tp->rcv_ssthresh, tp->rcv_wnd,
590 (u32)TCP_INIT_CWND * tp->advmss);
591}
592
593
594static void tcp_clamp_window(struct sock *sk)
595{
596 struct tcp_sock *tp = tcp_sk(sk);
597 struct inet_connection_sock *icsk = inet_csk(sk);
598 struct net *net = sock_net(sk);
599 int rmem2;
600
601 icsk->icsk_ack.quick = 0;
602 rmem2 = READ_ONCE(net->ipv4.sysctl_tcp_rmem[2]);
603
604 if (sk->sk_rcvbuf < rmem2 &&
605 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
606 !tcp_under_memory_pressure(sk) &&
607 sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
608 WRITE_ONCE(sk->sk_rcvbuf,
609 min(atomic_read(&sk->sk_rmem_alloc), rmem2));
610 }
611 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
612 tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
613}
614
615
616
617
618
619
620
621
622void tcp_initialize_rcv_mss(struct sock *sk)
623{
624 const struct tcp_sock *tp = tcp_sk(sk);
625 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
626
627 hint = min(hint, tp->rcv_wnd / 2);
628 hint = min(hint, TCP_MSS_DEFAULT);
629 hint = max(hint, TCP_MIN_MSS);
630
631 inet_csk(sk)->icsk_ack.rcv_mss = hint;
632}
633EXPORT_SYMBOL(tcp_initialize_rcv_mss);
634
635
636
637
638
639
640
641
642
643
644
645
646static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
647{
648 u32 new_sample = tp->rcv_rtt_est.rtt_us;
649 long m = sample;
650
651 if (new_sample != 0) {
652
653
654
655
656
657
658
659
660
661
662 if (!win_dep) {
663 m -= (new_sample >> 3);
664 new_sample += m;
665 } else {
666 m <<= 3;
667 if (m < new_sample)
668 new_sample = m;
669 }
670 } else {
671
672 new_sample = m << 3;
673 }
674
675 tp->rcv_rtt_est.rtt_us = new_sample;
676}
677
678static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
679{
680 u32 delta_us;
681
682 if (tp->rcv_rtt_est.time == 0)
683 goto new_measure;
684 if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
685 return;
686 delta_us = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcv_rtt_est.time);
687 if (!delta_us)
688 delta_us = 1;
689 tcp_rcv_rtt_update(tp, delta_us, 1);
690
691new_measure:
692 tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
693 tp->rcv_rtt_est.time = tp->tcp_mstamp;
694}
695
696static s32 tcp_rtt_tsopt_us(const struct tcp_sock *tp)
697{
698 u32 delta, delta_us;
699
700 delta = tcp_time_stamp_ts(tp) - tp->rx_opt.rcv_tsecr;
701 if (tp->tcp_usec_ts)
702 return delta;
703
704 if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
705 if (!delta)
706 delta = 1;
707 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
708 return delta_us;
709 }
710 return -1;
711}
712
713static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
714 const struct sk_buff *skb)
715{
716 struct tcp_sock *tp = tcp_sk(sk);
717
718 if (tp->rx_opt.rcv_tsecr == tp->rcv_rtt_last_tsecr)
719 return;
720 tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr;
721
722 if (TCP_SKB_CB(skb)->end_seq -
723 TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss) {
724 s32 delta = tcp_rtt_tsopt_us(tp);
725
726 if (delta >= 0)
727 tcp_rcv_rtt_update(tp, delta, 0);
728 }
729}
730
731
732
733
734
735void tcp_rcv_space_adjust(struct sock *sk)
736{
737 struct tcp_sock *tp = tcp_sk(sk);
738 u32 copied;
739 int time;
740
741 trace_tcp_rcv_space_adjust(sk);
742
743 tcp_mstamp_refresh(tp);
744 time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time);
745 if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0)
746 return;
747
748
749 copied = tp->copied_seq - tp->rcvq_space.seq;
750 if (copied <= tp->rcvq_space.space)
751 goto new_measure;
752
753
754
755
756
757
758
759
760
761
762 if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
763 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
764 u64 rcvwin, grow;
765 int rcvbuf;
766
767
768
769
770 rcvwin = ((u64)copied << 1) + 16 * tp->advmss;
771
772
773 grow = rcvwin * (copied - tp->rcvq_space.space);
774 do_div(grow, tp->rcvq_space.space);
775 rcvwin += (grow << 1);
776
777 rcvbuf = min_t(u64, tcp_space_from_win(sk, rcvwin),
778 READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
779 if (rcvbuf > sk->sk_rcvbuf) {
780 WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
781
782
783 tp->window_clamp = tcp_win_from_space(sk, rcvbuf);
784 }
785 }
786 tp->rcvq_space.space = copied;
787
788new_measure:
789 tp->rcvq_space.seq = tp->copied_seq;
790 tp->rcvq_space.time = tp->tcp_mstamp;
791}
792
793static void tcp_save_lrcv_flowlabel(struct sock *sk, const struct sk_buff *skb)
794{
795#if IS_ENABLED(CONFIG_IPV6)
796 struct inet_connection_sock *icsk = inet_csk(sk);
797
798 if (skb->protocol == htons(ETH_P_IPV6))
799 icsk->icsk_ack.lrcv_flowlabel = ntohl(ip6_flowlabel(ipv6_hdr(skb)));
800#endif
801}
802
803
804
805
806
807
808
809
810
811
812
813static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
814{
815 struct tcp_sock *tp = tcp_sk(sk);
816 struct inet_connection_sock *icsk = inet_csk(sk);
817 u32 now;
818
819 inet_csk_schedule_ack(sk);
820
821 tcp_measure_rcv_mss(sk, skb);
822
823 tcp_rcv_rtt_measure(tp);
824
825 now = tcp_jiffies32;
826
827 if (!icsk->icsk_ack.ato) {
828
829
830
831 tcp_incr_quickack(sk, TCP_MAX_QUICKACKS);
832 icsk->icsk_ack.ato = TCP_ATO_MIN;
833 } else {
834 int m = now - icsk->icsk_ack.lrcvtime;
835
836 if (m <= TCP_ATO_MIN / 2) {
837
838 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + TCP_ATO_MIN / 2;
839 } else if (m < icsk->icsk_ack.ato) {
840 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + m;
841 if (icsk->icsk_ack.ato > icsk->icsk_rto)
842 icsk->icsk_ack.ato = icsk->icsk_rto;
843 } else if (m > icsk->icsk_rto) {
844
845
846
847 tcp_incr_quickack(sk, TCP_MAX_QUICKACKS);
848 }
849 }
850 icsk->icsk_ack.lrcvtime = now;
851 tcp_save_lrcv_flowlabel(sk, skb);
852
853 tcp_ecn_check_ce(sk, skb);
854
855 if (skb->len >= 128)
856 tcp_grow_window(sk, skb, true);
857}
858
859
860
861
862
863
864
865
866
867
868static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
869{
870 struct tcp_sock *tp = tcp_sk(sk);
871 long m = mrtt_us;
872 u32 srtt = tp->srtt_us;
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890 if (srtt != 0) {
891 m -= (srtt >> 3);
892 srtt += m;
893 if (m < 0) {
894 m = -m;
895 m -= (tp->mdev_us >> 2);
896
897
898
899
900
901
902
903
904 if (m > 0)
905 m >>= 3;
906 } else {
907 m -= (tp->mdev_us >> 2);
908 }
909 tp->mdev_us += m;
910 if (tp->mdev_us > tp->mdev_max_us) {
911 tp->mdev_max_us = tp->mdev_us;
912 if (tp->mdev_max_us > tp->rttvar_us)
913 tp->rttvar_us = tp->mdev_max_us;
914 }
915 if (after(tp->snd_una, tp->rtt_seq)) {
916 if (tp->mdev_max_us < tp->rttvar_us)
917 tp->rttvar_us -= (tp->rttvar_us - tp->mdev_max_us) >> 2;
918 tp->rtt_seq = tp->snd_nxt;
919 tp->mdev_max_us = tcp_rto_min_us(sk);
920
921 tcp_bpf_rtt(sk);
922 }
923 } else {
924
925 srtt = m << 3;
926 tp->mdev_us = m << 1;
927 tp->rttvar_us = max(tp->mdev_us, tcp_rto_min_us(sk));
928 tp->mdev_max_us = tp->rttvar_us;
929 tp->rtt_seq = tp->snd_nxt;
930
931 tcp_bpf_rtt(sk);
932 }
933 tp->srtt_us = max(1U, srtt);
934}
935
936static void tcp_update_pacing_rate(struct sock *sk)
937{
938 const struct tcp_sock *tp = tcp_sk(sk);
939 u64 rate;
940
941
942 rate = (u64)tp->mss_cache * ((USEC_PER_SEC / 100) << 3);
943
944
945
946
947
948
949
950
951
952 if (tcp_snd_cwnd(tp) < tp->snd_ssthresh / 2)
953 rate *= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio);
954 else
955 rate *= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio);
956
957 rate *= max(tcp_snd_cwnd(tp), tp->packets_out);
958
959 if (likely(tp->srtt_us))
960 do_div(rate, tp->srtt_us);
961
962
963
964
965
966 WRITE_ONCE(sk->sk_pacing_rate,
967 min_t(u64, rate, READ_ONCE(sk->sk_max_pacing_rate)));
968}
969
970
971
972
973static void tcp_set_rto(struct sock *sk)
974{
975 const struct tcp_sock *tp = tcp_sk(sk);
976
977
978
979
980
981
982
983
984
985
986 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp);
987
988
989
990
991
992
993
994
995
996
997 tcp_bound_rto(sk);
998}
999
1000__u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)
1001{
1002 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
1003
1004 if (!cwnd)
1005 cwnd = TCP_INIT_CWND;
1006 return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
1007}
1008
1009struct tcp_sacktag_state {
1010
1011
1012
1013
1014 u64 first_sackt;
1015 u64 last_sackt;
1016 u32 reord;
1017 u32 sack_delivered;
1018 int flag;
1019 unsigned int mss_now;
1020 struct rate_sample *rate;
1021};
1022
1023
1024
1025
1026
1027
1028
1029static u32 tcp_dsack_seen(struct tcp_sock *tp, u32 start_seq,
1030 u32 end_seq, struct tcp_sacktag_state *state)
1031{
1032 u32 seq_len, dup_segs = 1;
1033
1034 if (!before(start_seq, end_seq))
1035 return 0;
1036
1037 seq_len = end_seq - start_seq;
1038
1039 if (seq_len > tp->max_window)
1040 return 0;
1041 if (seq_len > tp->mss_cache)
1042 dup_segs = DIV_ROUND_UP(seq_len, tp->mss_cache);
1043 else if (tp->tlp_high_seq && tp->tlp_high_seq == end_seq)
1044 state->flag |= FLAG_DSACK_TLP;
1045
1046 tp->dsack_dups += dup_segs;
1047
1048 if (tp->dsack_dups > tp->total_retrans)
1049 return 0;
1050
1051 tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
1052
1053
1054
1055
1056
1057
1058 if (tp->reord_seen && !(state->flag & FLAG_DSACK_TLP))
1059 tp->rack.dsack_seen = 1;
1060
1061 state->flag |= FLAG_DSACKING_ACK;
1062
1063 state->sack_delivered += dup_segs;
1064
1065 return dup_segs;
1066}
1067
1068
1069
1070
1071
1072static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq,
1073 const int ts)
1074{
1075 struct tcp_sock *tp = tcp_sk(sk);
1076 const u32 mss = tp->mss_cache;
1077 u32 fack, metric;
1078
1079 fack = tcp_highest_sack_seq(tp);
1080 if (!before(low_seq, fack))
1081 return;
1082
1083 metric = fack - low_seq;
1084 if ((metric > tp->reordering * mss) && mss) {
1085#if FASTRETRANS_DEBUG > 1
1086 pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
1087 tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
1088 tp->reordering,
1089 0,
1090 tp->sacked_out,
1091 tp->undo_marker ? tp->undo_retrans : 0);
1092#endif
1093 tp->reordering = min_t(u32, (metric + mss - 1) / mss,
1094 READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering));
1095 }
1096
1097
1098 tp->reord_seen++;
1099 NET_INC_STATS(sock_net(sk),
1100 ts ? LINUX_MIB_TCPTSREORDER : LINUX_MIB_TCPSACKREORDER);
1101}
1102
1103
1104
1105
1106
1107
1108static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
1109{
1110 if ((!tp->retransmit_skb_hint && tp->retrans_out >= tp->lost_out) ||
1111 (tp->retransmit_skb_hint &&
1112 before(TCP_SKB_CB(skb)->seq,
1113 TCP_SKB_CB(tp->retransmit_skb_hint)->seq)))
1114 tp->retransmit_skb_hint = skb;
1115}
1116
1117
1118
1119
1120static void tcp_notify_skb_loss_event(struct tcp_sock *tp, const struct sk_buff *skb)
1121{
1122 tp->lost += tcp_skb_pcount(skb);
1123}
1124
1125void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
1126{
1127 __u8 sacked = TCP_SKB_CB(skb)->sacked;
1128 struct tcp_sock *tp = tcp_sk(sk);
1129
1130 if (sacked & TCPCB_SACKED_ACKED)
1131 return;
1132
1133 tcp_verify_retransmit_hint(tp, skb);
1134 if (sacked & TCPCB_LOST) {
1135 if (sacked & TCPCB_SACKED_RETRANS) {
1136
1137 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1138 tp->retrans_out -= tcp_skb_pcount(skb);
1139 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT,
1140 tcp_skb_pcount(skb));
1141 tcp_notify_skb_loss_event(tp, skb);
1142 }
1143 } else {
1144 tp->lost_out += tcp_skb_pcount(skb);
1145 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1146 tcp_notify_skb_loss_event(tp, skb);
1147 }
1148}
1149
1150
1151static void tcp_count_delivered(struct tcp_sock *tp, u32 delivered,
1152 bool ece_ack)
1153{
1154 tp->delivered += delivered;
1155 if (ece_ack)
1156 tp->delivered_ce += delivered;
1157}
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252static bool tcp_is_sackblock_valid(struct tcp_sock *tp, bool is_dsack,
1253 u32 start_seq, u32 end_seq)
1254{
1255
1256 if (after(end_seq, tp->snd_nxt) || !before(start_seq, end_seq))
1257 return false;
1258
1259
1260 if (!before(start_seq, tp->snd_nxt))
1261 return false;
1262
1263
1264
1265
1266 if (after(start_seq, tp->snd_una))
1267 return true;
1268
1269 if (!is_dsack || !tp->undo_marker)
1270 return false;
1271
1272
1273 if (after(end_seq, tp->snd_una))
1274 return false;
1275
1276 if (!before(start_seq, tp->undo_marker))
1277 return true;
1278
1279
1280 if (!after(end_seq, tp->undo_marker))
1281 return false;
1282
1283
1284
1285
1286 return !before(start_seq, end_seq - tp->max_window);
1287}
1288
1289static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
1290 struct tcp_sack_block_wire *sp, int num_sacks,
1291 u32 prior_snd_una, struct tcp_sacktag_state *state)
1292{
1293 struct tcp_sock *tp = tcp_sk(sk);
1294 u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq);
1295 u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq);
1296 u32 dup_segs;
1297
1298 if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) {
1299 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKRECV);
1300 } else if (num_sacks > 1) {
1301 u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq);
1302 u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq);
1303
1304 if (after(end_seq_0, end_seq_1) || before(start_seq_0, start_seq_1))
1305 return false;
1306 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKOFORECV);
1307 } else {
1308 return false;
1309 }
1310
1311 dup_segs = tcp_dsack_seen(tp, start_seq_0, end_seq_0, state);
1312 if (!dup_segs) {
1313 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKIGNOREDDUBIOUS);
1314 return false;
1315 }
1316
1317 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDSACKRECVSEGS, dup_segs);
1318
1319
1320 if (tp->undo_marker && tp->undo_retrans > 0 &&
1321 !after(end_seq_0, prior_snd_una) &&
1322 after(end_seq_0, tp->undo_marker))
1323 tp->undo_retrans = max_t(int, 0, tp->undo_retrans - dup_segs);
1324
1325 return true;
1326}
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
1337 u32 start_seq, u32 end_seq)
1338{
1339 int err;
1340 bool in_sack;
1341 unsigned int pkt_len;
1342 unsigned int mss;
1343
1344 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1345 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1346
1347 if (tcp_skb_pcount(skb) > 1 && !in_sack &&
1348 after(TCP_SKB_CB(skb)->end_seq, start_seq)) {
1349 mss = tcp_skb_mss(skb);
1350 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1351
1352 if (!in_sack) {
1353 pkt_len = start_seq - TCP_SKB_CB(skb)->seq;
1354 if (pkt_len < mss)
1355 pkt_len = mss;
1356 } else {
1357 pkt_len = end_seq - TCP_SKB_CB(skb)->seq;
1358 if (pkt_len < mss)
1359 return -EINVAL;
1360 }
1361
1362
1363
1364
1365 if (pkt_len > mss) {
1366 unsigned int new_len = (pkt_len / mss) * mss;
1367 if (!in_sack && new_len < pkt_len)
1368 new_len += mss;
1369 pkt_len = new_len;
1370 }
1371
1372 if (pkt_len >= skb->len && !in_sack)
1373 return 0;
1374
1375 err = tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
1376 pkt_len, mss, GFP_ATOMIC);
1377 if (err < 0)
1378 return err;
1379 }
1380
1381 return in_sack;
1382}
1383
1384
1385static u8 tcp_sacktag_one(struct sock *sk,
1386 struct tcp_sacktag_state *state, u8 sacked,
1387 u32 start_seq, u32 end_seq,
1388 int dup_sack, int pcount,
1389 u64 xmit_time)
1390{
1391 struct tcp_sock *tp = tcp_sk(sk);
1392
1393
1394 if (dup_sack && (sacked & TCPCB_RETRANS)) {
1395 if (tp->undo_marker && tp->undo_retrans > 0 &&
1396 after(end_seq, tp->undo_marker))
1397 tp->undo_retrans = max_t(int, 0, tp->undo_retrans - pcount);
1398 if ((sacked & TCPCB_SACKED_ACKED) &&
1399 before(start_seq, state->reord))
1400 state->reord = start_seq;
1401 }
1402
1403
1404 if (!after(end_seq, tp->snd_una))
1405 return sacked;
1406
1407 if (!(sacked & TCPCB_SACKED_ACKED)) {
1408 tcp_rack_advance(tp, sacked, end_seq, xmit_time);
1409
1410 if (sacked & TCPCB_SACKED_RETRANS) {
1411
1412
1413
1414
1415 if (sacked & TCPCB_LOST) {
1416 sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
1417 tp->lost_out -= pcount;
1418 tp->retrans_out -= pcount;
1419 }
1420 } else {
1421 if (!(sacked & TCPCB_RETRANS)) {
1422
1423
1424
1425 if (before(start_seq,
1426 tcp_highest_sack_seq(tp)) &&
1427 before(start_seq, state->reord))
1428 state->reord = start_seq;
1429
1430 if (!after(end_seq, tp->high_seq))
1431 state->flag |= FLAG_ORIG_SACK_ACKED;
1432 if (state->first_sackt == 0)
1433 state->first_sackt = xmit_time;
1434 state->last_sackt = xmit_time;
1435 }
1436
1437 if (sacked & TCPCB_LOST) {
1438 sacked &= ~TCPCB_LOST;
1439 tp->lost_out -= pcount;
1440 }
1441 }
1442
1443 sacked |= TCPCB_SACKED_ACKED;
1444 state->flag |= FLAG_DATA_SACKED;
1445 tp->sacked_out += pcount;
1446
1447 state->sack_delivered += pcount;
1448
1449
1450 if (tp->lost_skb_hint &&
1451 before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq))
1452 tp->lost_cnt_hint += pcount;
1453 }
1454
1455
1456
1457
1458
1459 if (dup_sack && (sacked & TCPCB_SACKED_RETRANS)) {
1460 sacked &= ~TCPCB_SACKED_RETRANS;
1461 tp->retrans_out -= pcount;
1462 }
1463
1464 return sacked;
1465}
1466
1467
1468
1469
1470static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev,
1471 struct sk_buff *skb,
1472 struct tcp_sacktag_state *state,
1473 unsigned int pcount, int shifted, int mss,
1474 bool dup_sack)
1475{
1476 struct tcp_sock *tp = tcp_sk(sk);
1477 u32 start_seq = TCP_SKB_CB(skb)->seq;
1478 u32 end_seq = start_seq + shifted;
1479
1480 BUG_ON(!pcount);
1481
1482
1483
1484
1485
1486
1487
1488 tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
1489 start_seq, end_seq, dup_sack, pcount,
1490 tcp_skb_timestamp_us(skb));
1491 tcp_rate_skb_delivered(sk, skb, state->rate);
1492
1493 if (skb == tp->lost_skb_hint)
1494 tp->lost_cnt_hint += pcount;
1495
1496 TCP_SKB_CB(prev)->end_seq += shifted;
1497 TCP_SKB_CB(skb)->seq += shifted;
1498
1499 tcp_skb_pcount_add(prev, pcount);
1500 WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount);
1501 tcp_skb_pcount_add(skb, -pcount);
1502
1503
1504
1505
1506
1507
1508 if (!TCP_SKB_CB(prev)->tcp_gso_size)
1509 TCP_SKB_CB(prev)->tcp_gso_size = mss;
1510
1511
1512 if (tcp_skb_pcount(skb) <= 1)
1513 TCP_SKB_CB(skb)->tcp_gso_size = 0;
1514
1515
1516 TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
1517
1518 if (skb->len > 0) {
1519 BUG_ON(!tcp_skb_pcount(skb));
1520 NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKSHIFTED);
1521 return false;
1522 }
1523
1524
1525
1526 if (skb == tp->retransmit_skb_hint)
1527 tp->retransmit_skb_hint = prev;
1528 if (skb == tp->lost_skb_hint) {
1529 tp->lost_skb_hint = prev;
1530 tp->lost_cnt_hint -= tcp_skb_pcount(prev);
1531 }
1532
1533 TCP_SKB_CB(prev)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
1534 TCP_SKB_CB(prev)->eor = TCP_SKB_CB(skb)->eor;
1535 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
1536 TCP_SKB_CB(prev)->end_seq++;
1537
1538 if (skb == tcp_highest_sack(sk))
1539 tcp_advance_highest_sack(sk, skb);
1540
1541 tcp_skb_collapse_tstamp(prev, skb);
1542 if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp))
1543 TCP_SKB_CB(prev)->tx.delivered_mstamp = 0;
1544
1545 tcp_rtx_queue_unlink_and_free(skb, sk);
1546
1547 NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKMERGED);
1548
1549 return true;
1550}
1551
1552
1553
1554
1555static int tcp_skb_seglen(const struct sk_buff *skb)
1556{
1557 return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb);
1558}
1559
1560
1561static int skb_can_shift(const struct sk_buff *skb)
1562{
1563 return !skb_headlen(skb) && skb_is_nonlinear(skb);
1564}
1565
1566int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from,
1567 int pcount, int shiftlen)
1568{
1569
1570
1571
1572
1573
1574 if (unlikely(to->len + shiftlen >= 65535 * TCP_MIN_GSO_SIZE))
1575 return 0;
1576 if (unlikely(tcp_skb_pcount(to) + pcount > 65535))
1577 return 0;
1578 return skb_shift(to, from, shiftlen);
1579}
1580
1581
1582
1583
1584static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
1585 struct tcp_sacktag_state *state,
1586 u32 start_seq, u32 end_seq,
1587 bool dup_sack)
1588{
1589 struct tcp_sock *tp = tcp_sk(sk);
1590 struct sk_buff *prev;
1591 int mss;
1592 int pcount = 0;
1593 int len;
1594 int in_sack;
1595
1596
1597 if (!dup_sack &&
1598 (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS)
1599 goto fallback;
1600 if (!skb_can_shift(skb))
1601 goto fallback;
1602
1603 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1604 goto fallback;
1605
1606
1607 prev = skb_rb_prev(skb);
1608 if (!prev)
1609 goto fallback;
1610
1611 if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
1612 goto fallback;
1613
1614 if (!tcp_skb_can_collapse(prev, skb))
1615 goto fallback;
1616
1617 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1618 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1619
1620 if (in_sack) {
1621 len = skb->len;
1622 pcount = tcp_skb_pcount(skb);
1623 mss = tcp_skb_seglen(skb);
1624
1625
1626
1627
1628 if (mss != tcp_skb_seglen(prev))
1629 goto fallback;
1630 } else {
1631 if (!after(TCP_SKB_CB(skb)->end_seq, start_seq))
1632 goto noop;
1633
1634
1635
1636
1637 if (tcp_skb_pcount(skb) <= 1)
1638 goto noop;
1639
1640 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1641 if (!in_sack) {
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653 goto fallback;
1654 }
1655
1656 len = end_seq - TCP_SKB_CB(skb)->seq;
1657 BUG_ON(len < 0);
1658 BUG_ON(len > skb->len);
1659
1660
1661
1662
1663
1664 mss = tcp_skb_mss(skb);
1665
1666
1667
1668
1669 if (mss != tcp_skb_seglen(prev))
1670 goto fallback;
1671
1672 if (len == mss) {
1673 pcount = 1;
1674 } else if (len < mss) {
1675 goto noop;
1676 } else {
1677 pcount = len / mss;
1678 len = pcount * mss;
1679 }
1680 }
1681
1682
1683 if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una))
1684 goto fallback;
1685
1686 if (!tcp_skb_shift(prev, skb, pcount, len))
1687 goto fallback;
1688 if (!tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss, dup_sack))
1689 goto out;
1690
1691
1692
1693
1694 skb = skb_rb_next(prev);
1695 if (!skb)
1696 goto out;
1697
1698 if (!skb_can_shift(skb) ||
1699 ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) ||
1700 (mss != tcp_skb_seglen(skb)))
1701 goto out;
1702
1703 if (!tcp_skb_can_collapse(prev, skb))
1704 goto out;
1705 len = skb->len;
1706 pcount = tcp_skb_pcount(skb);
1707 if (tcp_skb_shift(prev, skb, pcount, len))
1708 tcp_shifted_skb(sk, prev, skb, state, pcount,
1709 len, mss, 0);
1710
1711out:
1712 return prev;
1713
1714noop:
1715 return skb;
1716
1717fallback:
1718 NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK);
1719 return NULL;
1720}
1721
1722static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1723 struct tcp_sack_block *next_dup,
1724 struct tcp_sacktag_state *state,
1725 u32 start_seq, u32 end_seq,
1726 bool dup_sack_in)
1727{
1728 struct tcp_sock *tp = tcp_sk(sk);
1729 struct sk_buff *tmp;
1730
1731 skb_rbtree_walk_from(skb) {
1732 int in_sack = 0;
1733 bool dup_sack = dup_sack_in;
1734
1735
1736 if (!before(TCP_SKB_CB(skb)->seq, end_seq))
1737 break;
1738
1739 if (next_dup &&
1740 before(TCP_SKB_CB(skb)->seq, next_dup->end_seq)) {
1741 in_sack = tcp_match_skb_to_sack(sk, skb,
1742 next_dup->start_seq,
1743 next_dup->end_seq);
1744 if (in_sack > 0)
1745 dup_sack = true;
1746 }
1747
1748
1749
1750
1751
1752 if (in_sack <= 0) {
1753 tmp = tcp_shift_skb_data(sk, skb, state,
1754 start_seq, end_seq, dup_sack);
1755 if (tmp) {
1756 if (tmp != skb) {
1757 skb = tmp;
1758 continue;
1759 }
1760
1761 in_sack = 0;
1762 } else {
1763 in_sack = tcp_match_skb_to_sack(sk, skb,
1764 start_seq,
1765 end_seq);
1766 }
1767 }
1768
1769 if (unlikely(in_sack < 0))
1770 break;
1771
1772 if (in_sack) {
1773 TCP_SKB_CB(skb)->sacked =
1774 tcp_sacktag_one(sk,
1775 state,
1776 TCP_SKB_CB(skb)->sacked,
1777 TCP_SKB_CB(skb)->seq,
1778 TCP_SKB_CB(skb)->end_seq,
1779 dup_sack,
1780 tcp_skb_pcount(skb),
1781 tcp_skb_timestamp_us(skb));
1782 tcp_rate_skb_delivered(sk, skb, state->rate);
1783 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
1784 list_del_init(&skb->tcp_tsorted_anchor);
1785
1786 if (!before(TCP_SKB_CB(skb)->seq,
1787 tcp_highest_sack_seq(tp)))
1788 tcp_advance_highest_sack(sk, skb);
1789 }
1790 }
1791 return skb;
1792}
1793
1794static struct sk_buff *tcp_sacktag_bsearch(struct sock *sk, u32 seq)
1795{
1796 struct rb_node *parent, **p = &sk->tcp_rtx_queue.rb_node;
1797 struct sk_buff *skb;
1798
1799 while (*p) {
1800 parent = *p;
1801 skb = rb_to_skb(parent);
1802 if (before(seq, TCP_SKB_CB(skb)->seq)) {
1803 p = &parent->rb_left;
1804 continue;
1805 }
1806 if (!before(seq, TCP_SKB_CB(skb)->end_seq)) {
1807 p = &parent->rb_right;
1808 continue;
1809 }
1810 return skb;
1811 }
1812 return NULL;
1813}
1814
1815static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
1816 u32 skip_to_seq)
1817{
1818 if (skb && after(TCP_SKB_CB(skb)->seq, skip_to_seq))
1819 return skb;
1820
1821 return tcp_sacktag_bsearch(sk, skip_to_seq);
1822}
1823
1824static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
1825 struct sock *sk,
1826 struct tcp_sack_block *next_dup,
1827 struct tcp_sacktag_state *state,
1828 u32 skip_to_seq)
1829{
1830 if (!next_dup)
1831 return skb;
1832
1833 if (before(next_dup->start_seq, skip_to_seq)) {
1834 skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq);
1835 skb = tcp_sacktag_walk(skb, sk, NULL, state,
1836 next_dup->start_seq, next_dup->end_seq,
1837 1);
1838 }
1839
1840 return skb;
1841}
1842
1843static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_block *cache)
1844{
1845 return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1846}
1847
1848static int
1849tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1850 u32 prior_snd_una, struct tcp_sacktag_state *state)
1851{
1852 struct tcp_sock *tp = tcp_sk(sk);
1853 const unsigned char *ptr = (skb_transport_header(ack_skb) +
1854 TCP_SKB_CB(ack_skb)->sacked);
1855 struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
1856 struct tcp_sack_block sp[TCP_NUM_SACKS];
1857 struct tcp_sack_block *cache;
1858 struct sk_buff *skb;
1859 int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
1860 int used_sacks;
1861 bool found_dup_sack = false;
1862 int i, j;
1863 int first_sack_index;
1864
1865 state->flag = 0;
1866 state->reord = tp->snd_nxt;
1867
1868 if (!tp->sacked_out)
1869 tcp_highest_sack_reset(sk);
1870
1871 found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
1872 num_sacks, prior_snd_una, state);
1873
1874
1875
1876
1877
1878 if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window))
1879 return 0;
1880
1881 if (!tp->packets_out)
1882 goto out;
1883
1884 used_sacks = 0;
1885 first_sack_index = 0;
1886 for (i = 0; i < num_sacks; i++) {
1887 bool dup_sack = !i && found_dup_sack;
1888
1889 sp[used_sacks].start_seq = get_unaligned_be32(&sp_wire[i].start_seq);
1890 sp[used_sacks].end_seq = get_unaligned_be32(&sp_wire[i].end_seq);
1891
1892 if (!tcp_is_sackblock_valid(tp, dup_sack,
1893 sp[used_sacks].start_seq,
1894 sp[used_sacks].end_seq)) {
1895 int mib_idx;
1896
1897 if (dup_sack) {
1898 if (!tp->undo_marker)
1899 mib_idx = LINUX_MIB_TCPDSACKIGNOREDNOUNDO;
1900 else
1901 mib_idx = LINUX_MIB_TCPDSACKIGNOREDOLD;
1902 } else {
1903
1904 if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) &&
1905 !after(sp[used_sacks].end_seq, tp->snd_una))
1906 continue;
1907 mib_idx = LINUX_MIB_TCPSACKDISCARD;
1908 }
1909
1910 NET_INC_STATS(sock_net(sk), mib_idx);
1911 if (i == 0)
1912 first_sack_index = -1;
1913 continue;
1914 }
1915
1916
1917 if (!after(sp[used_sacks].end_seq, prior_snd_una)) {
1918 if (i == 0)
1919 first_sack_index = -1;
1920 continue;
1921 }
1922
1923 used_sacks++;
1924 }
1925
1926
1927 for (i = used_sacks - 1; i > 0; i--) {
1928 for (j = 0; j < i; j++) {
1929 if (after(sp[j].start_seq, sp[j + 1].start_seq)) {
1930 swap(sp[j], sp[j + 1]);
1931
1932
1933 if (j == first_sack_index)
1934 first_sack_index = j + 1;
1935 }
1936 }
1937 }
1938
1939 state->mss_now = tcp_current_mss(sk);
1940 skb = NULL;
1941 i = 0;
1942
1943 if (!tp->sacked_out) {
1944
1945 cache = tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1946 } else {
1947 cache = tp->recv_sack_cache;
1948
1949 while (tcp_sack_cache_ok(tp, cache) && !cache->start_seq &&
1950 !cache->end_seq)
1951 cache++;
1952 }
1953
1954 while (i < used_sacks) {
1955 u32 start_seq = sp[i].start_seq;
1956 u32 end_seq = sp[i].end_seq;
1957 bool dup_sack = (found_dup_sack && (i == first_sack_index));
1958 struct tcp_sack_block *next_dup = NULL;
1959
1960 if (found_dup_sack && ((i + 1) == first_sack_index))
1961 next_dup = &sp[i + 1];
1962
1963
1964 while (tcp_sack_cache_ok(tp, cache) &&
1965 !before(start_seq, cache->end_seq))
1966 cache++;
1967
1968
1969 if (tcp_sack_cache_ok(tp, cache) && !dup_sack &&
1970 after(end_seq, cache->start_seq)) {
1971
1972
1973 if (before(start_seq, cache->start_seq)) {
1974 skb = tcp_sacktag_skip(skb, sk, start_seq);
1975 skb = tcp_sacktag_walk(skb, sk, next_dup,
1976 state,
1977 start_seq,
1978 cache->start_seq,
1979 dup_sack);
1980 }
1981
1982
1983 if (!after(end_seq, cache->end_seq))
1984 goto advance_sp;
1985
1986 skb = tcp_maybe_skipping_dsack(skb, sk, next_dup,
1987 state,
1988 cache->end_seq);
1989
1990
1991 if (tcp_highest_sack_seq(tp) == cache->end_seq) {
1992
1993 skb = tcp_highest_sack(sk);
1994 if (!skb)
1995 break;
1996 cache++;
1997 goto walk;
1998 }
1999
2000 skb = tcp_sacktag_skip(skb, sk, cache->end_seq);
2001
2002 cache++;
2003 continue;
2004 }
2005
2006 if (!before(start_seq, tcp_highest_sack_seq(tp))) {
2007 skb = tcp_highest_sack(sk);
2008 if (!skb)
2009 break;
2010 }
2011 skb = tcp_sacktag_skip(skb, sk, start_seq);
2012
2013walk:
2014 skb = tcp_sacktag_walk(skb, sk, next_dup, state,
2015 start_seq, end_seq, dup_sack);
2016
2017advance_sp:
2018 i++;
2019 }
2020
2021
2022 for (i = 0; i < ARRAY_SIZE(tp->recv_sack_cache) - used_sacks; i++) {
2023 tp->recv_sack_cache[i].start_seq = 0;
2024 tp->recv_sack_cache[i].end_seq = 0;
2025 }
2026 for (j = 0; j < used_sacks; j++)
2027 tp->recv_sack_cache[i++] = sp[j];
2028
2029 if (inet_csk(sk)->icsk_ca_state != TCP_CA_Loss || tp->undo_marker)
2030 tcp_check_sack_reordering(sk, state->reord, 0);
2031
2032 tcp_verify_left_out(tp);
2033out:
2034
2035#if FASTRETRANS_DEBUG > 0
2036 WARN_ON((int)tp->sacked_out < 0);
2037 WARN_ON((int)tp->lost_out < 0);
2038 WARN_ON((int)tp->retrans_out < 0);
2039 WARN_ON((int)tcp_packets_in_flight(tp) < 0);
2040#endif
2041 return state->flag;
2042}
2043
2044
2045
2046
2047static bool tcp_limit_reno_sacked(struct tcp_sock *tp)
2048{
2049 u32 holes;
2050
2051 holes = max(tp->lost_out, 1U);
2052 holes = min(holes, tp->packets_out);
2053
2054 if ((tp->sacked_out + holes) > tp->packets_out) {
2055 tp->sacked_out = tp->packets_out - holes;
2056 return true;
2057 }
2058 return false;
2059}
2060
2061
2062
2063
2064
2065static void tcp_check_reno_reordering(struct sock *sk, const int addend)
2066{
2067 struct tcp_sock *tp = tcp_sk(sk);
2068
2069 if (!tcp_limit_reno_sacked(tp))
2070 return;
2071
2072 tp->reordering = min_t(u32, tp->packets_out + addend,
2073 READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering));
2074 tp->reord_seen++;
2075 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER);
2076}
2077
2078
2079
2080static void tcp_add_reno_sack(struct sock *sk, int num_dupack, bool ece_ack)
2081{
2082 if (num_dupack) {
2083 struct tcp_sock *tp = tcp_sk(sk);
2084 u32 prior_sacked = tp->sacked_out;
2085 s32 delivered;
2086
2087 tp->sacked_out += num_dupack;
2088 tcp_check_reno_reordering(sk, 0);
2089 delivered = tp->sacked_out - prior_sacked;
2090 if (delivered > 0)
2091 tcp_count_delivered(tp, delivered, ece_ack);
2092 tcp_verify_left_out(tp);
2093 }
2094}
2095
2096
2097
2098static void tcp_remove_reno_sacks(struct sock *sk, int acked, bool ece_ack)
2099{
2100 struct tcp_sock *tp = tcp_sk(sk);
2101
2102 if (acked > 0) {
2103
2104 tcp_count_delivered(tp, max_t(int, acked - tp->sacked_out, 1),
2105 ece_ack);
2106 if (acked - 1 >= tp->sacked_out)
2107 tp->sacked_out = 0;
2108 else
2109 tp->sacked_out -= acked - 1;
2110 }
2111 tcp_check_reno_reordering(sk, acked);
2112 tcp_verify_left_out(tp);
2113}
2114
2115static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
2116{
2117 tp->sacked_out = 0;
2118}
2119
2120void tcp_clear_retrans(struct tcp_sock *tp)
2121{
2122 tp->retrans_out = 0;
2123 tp->lost_out = 0;
2124 tp->undo_marker = 0;
2125 tp->undo_retrans = -1;
2126 tp->sacked_out = 0;
2127 tp->rto_stamp = 0;
2128 tp->total_rto = 0;
2129 tp->total_rto_recoveries = 0;
2130 tp->total_rto_time = 0;
2131}
2132
2133static inline void tcp_init_undo(struct tcp_sock *tp)
2134{
2135 tp->undo_marker = tp->snd_una;
2136
2137 tp->undo_retrans = tp->retrans_out ? : -1;
2138}
2139
2140static bool tcp_is_rack(const struct sock *sk)
2141{
2142 return READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
2143 TCP_RACK_LOSS_DETECTION;
2144}
2145
2146
2147
2148
2149
2150static void tcp_timeout_mark_lost(struct sock *sk)
2151{
2152 struct tcp_sock *tp = tcp_sk(sk);
2153 struct sk_buff *skb, *head;
2154 bool is_reneg;
2155
2156 head = tcp_rtx_queue_head(sk);
2157 is_reneg = head && (TCP_SKB_CB(head)->sacked & TCPCB_SACKED_ACKED);
2158 if (is_reneg) {
2159 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
2160 tp->sacked_out = 0;
2161
2162 tp->is_sack_reneg = 1;
2163 } else if (tcp_is_reno(tp)) {
2164 tcp_reset_reno_sack(tp);
2165 }
2166
2167 skb = head;
2168 skb_rbtree_walk_from(skb) {
2169 if (is_reneg)
2170 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
2171 else if (tcp_is_rack(sk) && skb != head &&
2172 tcp_rack_skb_timeout(tp, skb, 0) > 0)
2173 continue;
2174 tcp_mark_skb_lost(sk, skb);
2175 }
2176 tcp_verify_left_out(tp);
2177 tcp_clear_all_retrans_hints(tp);
2178}
2179
2180
2181void tcp_enter_loss(struct sock *sk)
2182{
2183 const struct inet_connection_sock *icsk = inet_csk(sk);
2184 struct tcp_sock *tp = tcp_sk(sk);
2185 struct net *net = sock_net(sk);
2186 bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
2187 u8 reordering;
2188
2189 tcp_timeout_mark_lost(sk);
2190
2191
2192 if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
2193 !after(tp->high_seq, tp->snd_una) ||
2194 (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
2195 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2196 tp->prior_cwnd = tcp_snd_cwnd(tp);
2197 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
2198 tcp_ca_event(sk, CA_EVENT_LOSS);
2199 tcp_init_undo(tp);
2200 }
2201 tcp_snd_cwnd_set(tp, tcp_packets_in_flight(tp) + 1);
2202 tp->snd_cwnd_cnt = 0;
2203 tp->snd_cwnd_stamp = tcp_jiffies32;
2204
2205
2206
2207
2208 reordering = READ_ONCE(net->ipv4.sysctl_tcp_reordering);
2209 if (icsk->icsk_ca_state <= TCP_CA_Disorder &&
2210 tp->sacked_out >= reordering)
2211 tp->reordering = min_t(unsigned int, tp->reordering,
2212 reordering);
2213
2214 tcp_set_ca_state(sk, TCP_CA_Loss);
2215 tp->high_seq = tp->snd_nxt;
2216 tcp_ecn_queue_cwr(tp);
2217
2218
2219
2220
2221
2222 tp->frto = READ_ONCE(net->ipv4.sysctl_tcp_frto) &&
2223 (new_recovery || icsk->icsk_retransmits) &&
2224 !inet_csk(sk)->icsk_mtup.probe_size;
2225}
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237static bool tcp_check_sack_reneging(struct sock *sk, int *ack_flag)
2238{
2239 if (*ack_flag & FLAG_SACK_RENEGING &&
2240 *ack_flag & FLAG_SND_UNA_ADVANCED) {
2241 struct tcp_sock *tp = tcp_sk(sk);
2242 unsigned long delay = max(usecs_to_jiffies(tp->srtt_us >> 4),
2243 msecs_to_jiffies(10));
2244
2245 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2246 delay, TCP_RTO_MAX);
2247 *ack_flag &= ~FLAG_SET_XMIT_TIMER;
2248 return true;
2249 }
2250 return false;
2251}
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
2265{
2266 return tp->sacked_out + 1;
2267}
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366static bool tcp_time_to_recover(struct sock *sk, int flag)
2367{
2368 struct tcp_sock *tp = tcp_sk(sk);
2369
2370
2371 if (tp->lost_out)
2372 return true;
2373
2374
2375 if (!tcp_is_rack(sk) && tcp_dupack_heuristics(tp) > tp->reordering)
2376 return true;
2377
2378 return false;
2379}
2380
2381
2382
2383
2384
2385
2386static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2387{
2388 struct tcp_sock *tp = tcp_sk(sk);
2389 struct sk_buff *skb;
2390 int cnt;
2391
2392 const u32 loss_high = tp->snd_nxt;
2393
2394 WARN_ON(packets > tp->packets_out);
2395 skb = tp->lost_skb_hint;
2396 if (skb) {
2397
2398 if (mark_head && after(TCP_SKB_CB(skb)->seq, tp->snd_una))
2399 return;
2400 cnt = tp->lost_cnt_hint;
2401 } else {
2402 skb = tcp_rtx_queue_head(sk);
2403 cnt = 0;
2404 }
2405
2406 skb_rbtree_walk_from(skb) {
2407
2408
2409 tp->lost_skb_hint = skb;
2410 tp->lost_cnt_hint = cnt;
2411
2412 if (after(TCP_SKB_CB(skb)->end_seq, loss_high))
2413 break;
2414
2415 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
2416 cnt += tcp_skb_pcount(skb);
2417
2418 if (cnt > packets)
2419 break;
2420
2421 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_LOST))
2422 tcp_mark_skb_lost(sk, skb);
2423
2424 if (mark_head)
2425 break;
2426 }
2427 tcp_verify_left_out(tp);
2428}
2429
2430
2431
2432static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
2433{
2434 struct tcp_sock *tp = tcp_sk(sk);
2435
2436 if (tcp_is_sack(tp)) {
2437 int sacked_upto = tp->sacked_out - tp->reordering;
2438 if (sacked_upto >= 0)
2439 tcp_mark_head_lost(sk, sacked_upto, 0);
2440 else if (fast_rexmit)
2441 tcp_mark_head_lost(sk, 1, 1);
2442 }
2443}
2444
2445static bool tcp_tsopt_ecr_before(const struct tcp_sock *tp, u32 when)
2446{
2447 return tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
2448 before(tp->rx_opt.rcv_tsecr, when);
2449}
2450
2451
2452
2453
2454static bool tcp_skb_spurious_retrans(const struct tcp_sock *tp,
2455 const struct sk_buff *skb)
2456{
2457 return (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) &&
2458 tcp_tsopt_ecr_before(tp, tcp_skb_timestamp_ts(tp->tcp_usec_ts, skb));
2459}
2460
2461
2462
2463
2464static inline bool tcp_packet_delayed(const struct tcp_sock *tp)
2465{
2466 return tp->retrans_stamp &&
2467 tcp_tsopt_ecr_before(tp, tp->retrans_stamp);
2468}
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486static bool tcp_any_retrans_done(const struct sock *sk)
2487{
2488 const struct tcp_sock *tp = tcp_sk(sk);
2489 struct sk_buff *skb;
2490
2491 if (tp->retrans_out)
2492 return true;
2493
2494 skb = tcp_rtx_queue_head(sk);
2495 if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
2496 return true;
2497
2498 return false;
2499}
2500
2501static void DBGUNDO(struct sock *sk, const char *msg)
2502{
2503#if FASTRETRANS_DEBUG > 1
2504 struct tcp_sock *tp = tcp_sk(sk);
2505 struct inet_sock *inet = inet_sk(sk);
2506
2507 if (sk->sk_family == AF_INET) {
2508 pr_debug("Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n",
2509 msg,
2510 &inet->inet_daddr, ntohs(inet->inet_dport),
2511 tcp_snd_cwnd(tp), tcp_left_out(tp),
2512 tp->snd_ssthresh, tp->prior_ssthresh,
2513 tp->packets_out);
2514 }
2515#if IS_ENABLED(CONFIG_IPV6)
2516 else if (sk->sk_family == AF_INET6) {
2517 pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
2518 msg,
2519 &sk->sk_v6_daddr, ntohs(inet->inet_dport),
2520 tcp_snd_cwnd(tp), tcp_left_out(tp),
2521 tp->snd_ssthresh, tp->prior_ssthresh,
2522 tp->packets_out);
2523 }
2524#endif
2525#endif
2526}
2527
2528static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
2529{
2530 struct tcp_sock *tp = tcp_sk(sk);
2531
2532 if (unmark_loss) {
2533 struct sk_buff *skb;
2534
2535 skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
2536 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
2537 }
2538 tp->lost_out = 0;
2539 tcp_clear_all_retrans_hints(tp);
2540 }
2541
2542 if (tp->prior_ssthresh) {
2543 const struct inet_connection_sock *icsk = inet_csk(sk);
2544
2545 tcp_snd_cwnd_set(tp, icsk->icsk_ca_ops->undo_cwnd(sk));
2546
2547 if (tp->prior_ssthresh > tp->snd_ssthresh) {
2548 tp->snd_ssthresh = tp->prior_ssthresh;
2549 tcp_ecn_withdraw_cwr(tp);
2550 }
2551 }
2552 tp->snd_cwnd_stamp = tcp_jiffies32;
2553 tp->undo_marker = 0;
2554 tp->rack.advanced = 1;
2555}
2556
2557static inline bool tcp_may_undo(const struct tcp_sock *tp)
2558{
2559 return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
2560}
2561
2562static bool tcp_is_non_sack_preventing_reopen(struct sock *sk)
2563{
2564 struct tcp_sock *tp = tcp_sk(sk);
2565
2566 if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
2567
2568
2569
2570 if (!tcp_any_retrans_done(sk))
2571 tp->retrans_stamp = 0;
2572 return true;
2573 }
2574 return false;
2575}
2576
2577
2578static bool tcp_try_undo_recovery(struct sock *sk)
2579{
2580 struct tcp_sock *tp = tcp_sk(sk);
2581
2582 if (tcp_may_undo(tp)) {
2583 int mib_idx;
2584
2585
2586
2587
2588 DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
2589 tcp_undo_cwnd_reduction(sk, false);
2590 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
2591 mib_idx = LINUX_MIB_TCPLOSSUNDO;
2592 else
2593 mib_idx = LINUX_MIB_TCPFULLUNDO;
2594
2595 NET_INC_STATS(sock_net(sk), mib_idx);
2596 } else if (tp->rack.reo_wnd_persist) {
2597 tp->rack.reo_wnd_persist--;
2598 }
2599 if (tcp_is_non_sack_preventing_reopen(sk))
2600 return true;
2601 tcp_set_ca_state(sk, TCP_CA_Open);
2602 tp->is_sack_reneg = 0;
2603 return false;
2604}
2605
2606
2607static bool tcp_try_undo_dsack(struct sock *sk)
2608{
2609 struct tcp_sock *tp = tcp_sk(sk);
2610
2611 if (tp->undo_marker && !tp->undo_retrans) {
2612 tp->rack.reo_wnd_persist = min(TCP_RACK_RECOVERY_THRESH,
2613 tp->rack.reo_wnd_persist + 1);
2614 DBGUNDO(sk, "D-SACK");
2615 tcp_undo_cwnd_reduction(sk, false);
2616 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
2617 return true;
2618 }
2619 return false;
2620}
2621
2622
2623static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
2624{
2625 struct tcp_sock *tp = tcp_sk(sk);
2626
2627 if (frto_undo || tcp_may_undo(tp)) {
2628 tcp_undo_cwnd_reduction(sk, true);
2629
2630 DBGUNDO(sk, "partial loss");
2631 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
2632 if (frto_undo)
2633 NET_INC_STATS(sock_net(sk),
2634 LINUX_MIB_TCPSPURIOUSRTOS);
2635 inet_csk(sk)->icsk_retransmits = 0;
2636 if (tcp_is_non_sack_preventing_reopen(sk))
2637 return true;
2638 if (frto_undo || tcp_is_sack(tp)) {
2639 tcp_set_ca_state(sk, TCP_CA_Open);
2640 tp->is_sack_reneg = 0;
2641 }
2642 return true;
2643 }
2644 return false;
2645}
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656static void tcp_init_cwnd_reduction(struct sock *sk)
2657{
2658 struct tcp_sock *tp = tcp_sk(sk);
2659
2660 tp->high_seq = tp->snd_nxt;
2661 tp->tlp_high_seq = 0;
2662 tp->snd_cwnd_cnt = 0;
2663 tp->prior_cwnd = tcp_snd_cwnd(tp);
2664 tp->prr_delivered = 0;
2665 tp->prr_out = 0;
2666 tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
2667 tcp_ecn_queue_cwr(tp);
2668}
2669
2670void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int newly_lost, int flag)
2671{
2672 struct tcp_sock *tp = tcp_sk(sk);
2673 int sndcnt = 0;
2674 int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
2675
2676 if (newly_acked_sacked <= 0 || WARN_ON_ONCE(!tp->prior_cwnd))
2677 return;
2678
2679 tp->prr_delivered += newly_acked_sacked;
2680 if (delta < 0) {
2681 u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
2682 tp->prior_cwnd - 1;
2683 sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
2684 } else {
2685 sndcnt = max_t(int, tp->prr_delivered - tp->prr_out,
2686 newly_acked_sacked);
2687 if (flag & FLAG_SND_UNA_ADVANCED && !newly_lost)
2688 sndcnt++;
2689 sndcnt = min(delta, sndcnt);
2690 }
2691
2692 sndcnt = max(sndcnt, (tp->prr_out ? 0 : 1));
2693 tcp_snd_cwnd_set(tp, tcp_packets_in_flight(tp) + sndcnt);
2694}
2695
2696static inline void tcp_end_cwnd_reduction(struct sock *sk)
2697{
2698 struct tcp_sock *tp = tcp_sk(sk);
2699
2700 if (inet_csk(sk)->icsk_ca_ops->cong_control)
2701 return;
2702
2703
2704 if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH &&
2705 (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || tp->undo_marker)) {
2706 tcp_snd_cwnd_set(tp, tp->snd_ssthresh);
2707 tp->snd_cwnd_stamp = tcp_jiffies32;
2708 }
2709 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
2710}
2711
2712
2713void tcp_enter_cwr(struct sock *sk)
2714{
2715 struct tcp_sock *tp = tcp_sk(sk);
2716
2717 tp->prior_ssthresh = 0;
2718 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
2719 tp->undo_marker = 0;
2720 tcp_init_cwnd_reduction(sk);
2721 tcp_set_ca_state(sk, TCP_CA_CWR);
2722 }
2723}
2724EXPORT_SYMBOL(tcp_enter_cwr);
2725
2726static void tcp_try_keep_open(struct sock *sk)
2727{
2728 struct tcp_sock *tp = tcp_sk(sk);
2729 int state = TCP_CA_Open;
2730
2731 if (tcp_left_out(tp) || tcp_any_retrans_done(sk))
2732 state = TCP_CA_Disorder;
2733
2734 if (inet_csk(sk)->icsk_ca_state != state) {
2735 tcp_set_ca_state(sk, state);
2736 tp->high_seq = tp->snd_nxt;
2737 }
2738}
2739
2740static void tcp_try_to_open(struct sock *sk, int flag)
2741{
2742 struct tcp_sock *tp = tcp_sk(sk);
2743
2744 tcp_verify_left_out(tp);
2745
2746 if (!tcp_any_retrans_done(sk))
2747 tp->retrans_stamp = 0;
2748
2749 if (flag & FLAG_ECE)
2750 tcp_enter_cwr(sk);
2751
2752 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
2753 tcp_try_keep_open(sk);
2754 }
2755}
2756
2757static void tcp_mtup_probe_failed(struct sock *sk)
2758{
2759 struct inet_connection_sock *icsk = inet_csk(sk);
2760
2761 icsk->icsk_mtup.search_high = icsk->icsk_mtup.probe_size - 1;
2762 icsk->icsk_mtup.probe_size = 0;
2763 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPFAIL);
2764}
2765
2766static void tcp_mtup_probe_success(struct sock *sk)
2767{
2768 struct tcp_sock *tp = tcp_sk(sk);
2769 struct inet_connection_sock *icsk = inet_csk(sk);
2770 u64 val;
2771
2772 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2773
2774 val = (u64)tcp_snd_cwnd(tp) * tcp_mss_to_mtu(sk, tp->mss_cache);
2775 do_div(val, icsk->icsk_mtup.probe_size);
2776 DEBUG_NET_WARN_ON_ONCE((u32)val != val);
2777 tcp_snd_cwnd_set(tp, max_t(u32, 1U, val));
2778
2779 tp->snd_cwnd_cnt = 0;
2780 tp->snd_cwnd_stamp = tcp_jiffies32;
2781 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2782
2783 icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size;
2784 icsk->icsk_mtup.probe_size = 0;
2785 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
2786 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPSUCCESS);
2787}
2788
2789
2790
2791
2792
2793void tcp_simple_retransmit(struct sock *sk)
2794{
2795 const struct inet_connection_sock *icsk = inet_csk(sk);
2796 struct tcp_sock *tp = tcp_sk(sk);
2797 struct sk_buff *skb;
2798 int mss;
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810 if (tp->syn_data && sk->sk_state == TCP_SYN_SENT)
2811 mss = -1;
2812 else
2813 mss = tcp_current_mss(sk);
2814
2815 skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
2816 if (tcp_skb_seglen(skb) > mss)
2817 tcp_mark_skb_lost(sk, skb);
2818 }
2819
2820 tcp_clear_retrans_hints_partial(tp);
2821
2822 if (!tp->lost_out)
2823 return;
2824
2825 if (tcp_is_reno(tp))
2826 tcp_limit_reno_sacked(tp);
2827
2828 tcp_verify_left_out(tp);
2829
2830
2831
2832
2833
2834
2835 if (icsk->icsk_ca_state != TCP_CA_Loss) {
2836 tp->high_seq = tp->snd_nxt;
2837 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2838 tp->prior_ssthresh = 0;
2839 tp->undo_marker = 0;
2840 tcp_set_ca_state(sk, TCP_CA_Loss);
2841 }
2842 tcp_xmit_retransmit_queue(sk);
2843}
2844EXPORT_SYMBOL(tcp_simple_retransmit);
2845
2846void tcp_enter_recovery(struct sock *sk, bool ece_ack)
2847{
2848 struct tcp_sock *tp = tcp_sk(sk);
2849 int mib_idx;
2850
2851 if (tcp_is_reno(tp))
2852 mib_idx = LINUX_MIB_TCPRENORECOVERY;
2853 else
2854 mib_idx = LINUX_MIB_TCPSACKRECOVERY;
2855
2856 NET_INC_STATS(sock_net(sk), mib_idx);
2857
2858 tp->prior_ssthresh = 0;
2859 tcp_init_undo(tp);
2860
2861 if (!tcp_in_cwnd_reduction(sk)) {
2862 if (!ece_ack)
2863 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2864 tcp_init_cwnd_reduction(sk);
2865 }
2866 tcp_set_ca_state(sk, TCP_CA_Recovery);
2867}
2868
2869static void tcp_update_rto_time(struct tcp_sock *tp)
2870{
2871 if (tp->rto_stamp) {
2872 tp->total_rto_time += tcp_time_stamp_ms(tp) - tp->rto_stamp;
2873 tp->rto_stamp = 0;
2874 }
2875}
2876
2877
2878
2879
2880static void tcp_process_loss(struct sock *sk, int flag, int num_dupack,
2881 int *rexmit)
2882{
2883 struct tcp_sock *tp = tcp_sk(sk);
2884 bool recovered = !before(tp->snd_una, tp->high_seq);
2885
2886 if ((flag & FLAG_SND_UNA_ADVANCED || rcu_access_pointer(tp->fastopen_rsk)) &&
2887 tcp_try_undo_loss(sk, false))
2888 return;
2889
2890 if (tp->frto) {
2891
2892
2893
2894 if ((flag & FLAG_ORIG_SACK_ACKED) &&
2895 tcp_try_undo_loss(sk, true))
2896 return;
2897
2898 if (after(tp->snd_nxt, tp->high_seq)) {
2899 if (flag & FLAG_DATA_SACKED || num_dupack)
2900 tp->frto = 0;
2901 } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) {
2902 tp->high_seq = tp->snd_nxt;
2903
2904
2905
2906
2907 if (!tcp_write_queue_empty(sk) &&
2908 after(tcp_wnd_end(tp), tp->snd_nxt)) {
2909 *rexmit = REXMIT_NEW;
2910 return;
2911 }
2912 tp->frto = 0;
2913 }
2914 }
2915
2916 if (recovered) {
2917
2918 tcp_try_undo_recovery(sk);
2919 return;
2920 }
2921 if (tcp_is_reno(tp)) {
2922
2923
2924
2925 if (after(tp->snd_nxt, tp->high_seq) && num_dupack)
2926 tcp_add_reno_sack(sk, num_dupack, flag & FLAG_ECE);
2927 else if (flag & FLAG_SND_UNA_ADVANCED)
2928 tcp_reset_reno_sack(tp);
2929 }
2930 *rexmit = REXMIT_LOST;
2931}
2932
2933static bool tcp_force_fast_retransmit(struct sock *sk)
2934{
2935 struct tcp_sock *tp = tcp_sk(sk);
2936
2937 return after(tcp_highest_sack_seq(tp),
2938 tp->snd_una + tp->reordering * tp->mss_cache);
2939}
2940
2941
2942static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una,
2943 bool *do_lost)
2944{
2945 struct tcp_sock *tp = tcp_sk(sk);
2946
2947 if (tp->undo_marker && tcp_packet_delayed(tp)) {
2948
2949
2950
2951 tcp_check_sack_reordering(sk, prior_snd_una, 1);
2952
2953
2954
2955
2956
2957
2958 if (tp->retrans_out)
2959 return true;
2960
2961 if (!tcp_any_retrans_done(sk))
2962 tp->retrans_stamp = 0;
2963
2964 DBGUNDO(sk, "partial recovery");
2965 tcp_undo_cwnd_reduction(sk, true);
2966 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
2967 tcp_try_keep_open(sk);
2968 } else {
2969
2970 *do_lost = tcp_force_fast_retransmit(sk);
2971 }
2972 return false;
2973}
2974
2975static void tcp_identify_packet_loss(struct sock *sk, int *ack_flag)
2976{
2977 struct tcp_sock *tp = tcp_sk(sk);
2978
2979 if (tcp_rtx_queue_empty(sk))
2980 return;
2981
2982 if (unlikely(tcp_is_reno(tp))) {
2983 tcp_newreno_mark_lost(sk, *ack_flag & FLAG_SND_UNA_ADVANCED);
2984 } else if (tcp_is_rack(sk)) {
2985 u32 prior_retrans = tp->retrans_out;
2986
2987 if (tcp_rack_mark_lost(sk))
2988 *ack_flag &= ~FLAG_SET_XMIT_TIMER;
2989 if (prior_retrans > tp->retrans_out)
2990 *ack_flag |= FLAG_LOST_RETRANS;
2991 }
2992}
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
3007 int num_dupack, int *ack_flag, int *rexmit)
3008{
3009 struct inet_connection_sock *icsk = inet_csk(sk);
3010 struct tcp_sock *tp = tcp_sk(sk);
3011 int fast_rexmit = 0, flag = *ack_flag;
3012 bool ece_ack = flag & FLAG_ECE;
3013 bool do_lost = num_dupack || ((flag & FLAG_DATA_SACKED) &&
3014 tcp_force_fast_retransmit(sk));
3015
3016 if (!tp->packets_out && tp->sacked_out)
3017 tp->sacked_out = 0;
3018
3019
3020
3021 if (ece_ack)
3022 tp->prior_ssthresh = 0;
3023
3024
3025 if (tcp_check_sack_reneging(sk, ack_flag))
3026 return;
3027
3028
3029 tcp_verify_left_out(tp);
3030
3031
3032
3033 if (icsk->icsk_ca_state == TCP_CA_Open) {
3034 WARN_ON(tp->retrans_out != 0 && !tp->syn_data);
3035 tp->retrans_stamp = 0;
3036 } else if (!before(tp->snd_una, tp->high_seq)) {
3037 switch (icsk->icsk_ca_state) {
3038 case TCP_CA_CWR:
3039
3040
3041 if (tp->snd_una != tp->high_seq) {
3042 tcp_end_cwnd_reduction(sk);
3043 tcp_set_ca_state(sk, TCP_CA_Open);
3044 }
3045 break;
3046
3047 case TCP_CA_Recovery:
3048 if (tcp_is_reno(tp))
3049 tcp_reset_reno_sack(tp);
3050 if (tcp_try_undo_recovery(sk))
3051 return;
3052 tcp_end_cwnd_reduction(sk);
3053 break;
3054 }
3055 }
3056
3057
3058 switch (icsk->icsk_ca_state) {
3059 case TCP_CA_Recovery:
3060 if (!(flag & FLAG_SND_UNA_ADVANCED)) {
3061 if (tcp_is_reno(tp))
3062 tcp_add_reno_sack(sk, num_dupack, ece_ack);
3063 } else if (tcp_try_undo_partial(sk, prior_snd_una, &do_lost))
3064 return;
3065
3066 if (tcp_try_undo_dsack(sk))
3067 tcp_try_keep_open(sk);
3068
3069 tcp_identify_packet_loss(sk, ack_flag);
3070 if (icsk->icsk_ca_state != TCP_CA_Recovery) {
3071 if (!tcp_time_to_recover(sk, flag))
3072 return;
3073
3074
3075
3076 tcp_enter_recovery(sk, ece_ack);
3077 }
3078 break;
3079 case TCP_CA_Loss:
3080 tcp_process_loss(sk, flag, num_dupack, rexmit);
3081 if (icsk->icsk_ca_state != TCP_CA_Loss)
3082 tcp_update_rto_time(tp);
3083 tcp_identify_packet_loss(sk, ack_flag);
3084 if (!(icsk->icsk_ca_state == TCP_CA_Open ||
3085 (*ack_flag & FLAG_LOST_RETRANS)))
3086 return;
3087
3088 fallthrough;
3089 default:
3090 if (tcp_is_reno(tp)) {
3091 if (flag & FLAG_SND_UNA_ADVANCED)
3092 tcp_reset_reno_sack(tp);
3093 tcp_add_reno_sack(sk, num_dupack, ece_ack);
3094 }
3095
3096 if (icsk->icsk_ca_state <= TCP_CA_Disorder)
3097 tcp_try_undo_dsack(sk);
3098
3099 tcp_identify_packet_loss(sk, ack_flag);
3100 if (!tcp_time_to_recover(sk, flag)) {
3101 tcp_try_to_open(sk, flag);
3102 return;
3103 }
3104
3105
3106 if (icsk->icsk_ca_state < TCP_CA_CWR &&
3107 icsk->icsk_mtup.probe_size &&
3108 tp->snd_una == tp->mtu_probe.probe_seq_start) {
3109 tcp_mtup_probe_failed(sk);
3110
3111 tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
3112 tcp_simple_retransmit(sk);
3113 return;
3114 }
3115
3116
3117 tcp_enter_recovery(sk, ece_ack);
3118 fast_rexmit = 1;
3119 }
3120
3121 if (!tcp_is_rack(sk) && do_lost)
3122 tcp_update_scoreboard(sk, fast_rexmit);
3123 *rexmit = REXMIT_LOST;
3124}
3125
3126static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag)
3127{
3128 u32 wlen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen) * HZ;
3129 struct tcp_sock *tp = tcp_sk(sk);
3130
3131 if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) {
3132
3133
3134
3135
3136 return;
3137 }
3138 minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32,
3139 rtt_us ? : jiffies_to_usecs(1));
3140}
3141
3142static bool tcp_ack_update_rtt(struct sock *sk, const int flag,
3143 long seq_rtt_us, long sack_rtt_us,
3144 long ca_rtt_us, struct rate_sample *rs)
3145{
3146 const struct tcp_sock *tp = tcp_sk(sk);
3147
3148
3149
3150
3151
3152
3153 if (seq_rtt_us < 0)
3154 seq_rtt_us = sack_rtt_us;
3155
3156
3157
3158
3159
3160
3161
3162 if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp &&
3163 tp->rx_opt.rcv_tsecr && flag & FLAG_ACKED)
3164 seq_rtt_us = ca_rtt_us = tcp_rtt_tsopt_us(tp);
3165
3166 rs->rtt_us = ca_rtt_us;
3167 if (seq_rtt_us < 0)
3168 return false;
3169
3170
3171
3172
3173
3174 tcp_update_rtt_min(sk, ca_rtt_us, flag);
3175 tcp_rtt_estimator(sk, seq_rtt_us);
3176 tcp_set_rto(sk);
3177
3178
3179 inet_csk(sk)->icsk_backoff = 0;
3180 return true;
3181}
3182
3183
3184void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req)
3185{
3186 struct rate_sample rs;
3187 long rtt_us = -1L;
3188
3189 if (req && !req->num_retrans && tcp_rsk(req)->snt_synack)
3190 rtt_us = tcp_stamp_us_delta(tcp_clock_us(), tcp_rsk(req)->snt_synack);
3191
3192 tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, rtt_us, -1L, rtt_us, &rs);
3193}
3194
3195
3196static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
3197{
3198 const struct inet_connection_sock *icsk = inet_csk(sk);
3199
3200 icsk->icsk_ca_ops->cong_avoid(sk, ack, acked);
3201 tcp_sk(sk)->snd_cwnd_stamp = tcp_jiffies32;
3202}
3203
3204
3205
3206
3207void tcp_rearm_rto(struct sock *sk)
3208{
3209 const struct inet_connection_sock *icsk = inet_csk(sk);
3210 struct tcp_sock *tp = tcp_sk(sk);
3211
3212
3213
3214
3215 if (rcu_access_pointer(tp->fastopen_rsk))
3216 return;
3217
3218 if (!tp->packets_out) {
3219 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
3220 } else {
3221 u32 rto = inet_csk(sk)->icsk_rto;
3222
3223 if (icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
3224 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
3225 s64 delta_us = tcp_rto_delta_us(sk);
3226
3227
3228
3229 rto = usecs_to_jiffies(max_t(int, delta_us, 1));
3230 }
3231 tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
3232 TCP_RTO_MAX);
3233 }
3234}
3235
3236
3237static void tcp_set_xmit_timer(struct sock *sk)
3238{
3239 if (!tcp_schedule_loss_probe(sk, true))
3240 tcp_rearm_rto(sk);
3241}
3242
3243
3244static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
3245{
3246 struct tcp_sock *tp = tcp_sk(sk);
3247 u32 packets_acked;
3248
3249 BUG_ON(!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una));
3250
3251 packets_acked = tcp_skb_pcount(skb);
3252 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
3253 return 0;
3254 packets_acked -= tcp_skb_pcount(skb);
3255
3256 if (packets_acked) {
3257 BUG_ON(tcp_skb_pcount(skb) == 0);
3258 BUG_ON(!before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq));
3259 }
3260
3261 return packets_acked;
3262}
3263
3264static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
3265 const struct sk_buff *ack_skb, u32 prior_snd_una)
3266{
3267 const struct skb_shared_info *shinfo;
3268
3269
3270 if (likely(!TCP_SKB_CB(skb)->txstamp_ack))
3271 return;
3272
3273 shinfo = skb_shinfo(skb);
3274 if (!before(shinfo->tskey, prior_snd_una) &&
3275 before(shinfo->tskey, tcp_sk(sk)->snd_una)) {
3276 tcp_skb_tsorted_save(skb) {
3277 __skb_tstamp_tx(skb, ack_skb, NULL, sk, SCM_TSTAMP_ACK);
3278 } tcp_skb_tsorted_restore(skb);
3279 }
3280}
3281
3282
3283
3284
3285
3286static int tcp_clean_rtx_queue(struct sock *sk, const struct sk_buff *ack_skb,
3287 u32 prior_fack, u32 prior_snd_una,
3288 struct tcp_sacktag_state *sack, bool ece_ack)
3289{
3290 const struct inet_connection_sock *icsk = inet_csk(sk);
3291 u64 first_ackt, last_ackt;
3292 struct tcp_sock *tp = tcp_sk(sk);
3293 u32 prior_sacked = tp->sacked_out;
3294 u32 reord = tp->snd_nxt;
3295 struct sk_buff *skb, *next;
3296 bool fully_acked = true;
3297 long sack_rtt_us = -1L;
3298 long seq_rtt_us = -1L;
3299 long ca_rtt_us = -1L;
3300 u32 pkts_acked = 0;
3301 bool rtt_update;
3302 int flag = 0;
3303
3304 first_ackt = 0;
3305
3306 for (skb = skb_rb_first(&sk->tcp_rtx_queue); skb; skb = next) {
3307 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
3308 const u32 start_seq = scb->seq;
3309 u8 sacked = scb->sacked;
3310 u32 acked_pcount;
3311
3312
3313 if (after(scb->end_seq, tp->snd_una)) {
3314 if (tcp_skb_pcount(skb) == 1 ||
3315 !after(tp->snd_una, scb->seq))
3316 break;
3317
3318 acked_pcount = tcp_tso_acked(sk, skb);
3319 if (!acked_pcount)
3320 break;
3321 fully_acked = false;
3322 } else {
3323 acked_pcount = tcp_skb_pcount(skb);
3324 }
3325
3326 if (unlikely(sacked & TCPCB_RETRANS)) {
3327 if (sacked & TCPCB_SACKED_RETRANS)
3328 tp->retrans_out -= acked_pcount;
3329 flag |= FLAG_RETRANS_DATA_ACKED;
3330 } else if (!(sacked & TCPCB_SACKED_ACKED)) {
3331 last_ackt = tcp_skb_timestamp_us(skb);
3332 WARN_ON_ONCE(last_ackt == 0);
3333 if (!first_ackt)
3334 first_ackt = last_ackt;
3335
3336 if (before(start_seq, reord))
3337 reord = start_seq;
3338 if (!after(scb->end_seq, tp->high_seq))
3339 flag |= FLAG_ORIG_SACK_ACKED;
3340 }
3341
3342 if (sacked & TCPCB_SACKED_ACKED) {
3343 tp->sacked_out -= acked_pcount;
3344 } else if (tcp_is_sack(tp)) {
3345 tcp_count_delivered(tp, acked_pcount, ece_ack);
3346 if (!tcp_skb_spurious_retrans(tp, skb))
3347 tcp_rack_advance(tp, sacked, scb->end_seq,
3348 tcp_skb_timestamp_us(skb));
3349 }
3350 if (sacked & TCPCB_LOST)
3351 tp->lost_out -= acked_pcount;
3352
3353 tp->packets_out -= acked_pcount;
3354 pkts_acked += acked_pcount;
3355 tcp_rate_skb_delivered(sk, skb, sack->rate);
3356
3357
3358
3359
3360
3361
3362
3363
3364 if (likely(!(scb->tcp_flags & TCPHDR_SYN))) {
3365 flag |= FLAG_DATA_ACKED;
3366 } else {
3367 flag |= FLAG_SYN_ACKED;
3368 tp->retrans_stamp = 0;
3369 }
3370
3371 if (!fully_acked)
3372 break;
3373
3374 tcp_ack_tstamp(sk, skb, ack_skb, prior_snd_una);
3375
3376 next = skb_rb_next(skb);
3377 if (unlikely(skb == tp->retransmit_skb_hint))
3378 tp->retransmit_skb_hint = NULL;
3379 if (unlikely(skb == tp->lost_skb_hint))
3380 tp->lost_skb_hint = NULL;
3381 tcp_highest_sack_replace(sk, skb, next);
3382 tcp_rtx_queue_unlink_and_free(skb, sk);
3383 }
3384
3385 if (!skb)
3386 tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
3387
3388 if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una)))
3389 tp->snd_up = tp->snd_una;
3390
3391 if (skb) {
3392 tcp_ack_tstamp(sk, skb, ack_skb, prior_snd_una);
3393 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
3394 flag |= FLAG_SACK_RENEGING;
3395 }
3396
3397 if (likely(first_ackt) && !(flag & FLAG_RETRANS_DATA_ACKED)) {
3398 seq_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, first_ackt);
3399 ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, last_ackt);
3400
3401 if (pkts_acked == 1 && fully_acked && !prior_sacked &&
3402 (tp->snd_una - prior_snd_una) < tp->mss_cache &&
3403 sack->rate->prior_delivered + 1 == tp->delivered &&
3404 !(flag & (FLAG_CA_ALERT | FLAG_SYN_ACKED))) {
3405
3406
3407
3408
3409 flag |= FLAG_ACK_MAYBE_DELAYED;
3410 }
3411 }
3412 if (sack->first_sackt) {
3413 sack_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->first_sackt);
3414 ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->last_sackt);
3415 }
3416 rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us,
3417 ca_rtt_us, sack->rate);
3418
3419 if (flag & FLAG_ACKED) {
3420 flag |= FLAG_SET_XMIT_TIMER;
3421 if (unlikely(icsk->icsk_mtup.probe_size &&
3422 !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
3423 tcp_mtup_probe_success(sk);
3424 }
3425
3426 if (tcp_is_reno(tp)) {
3427 tcp_remove_reno_sacks(sk, pkts_acked, ece_ack);
3428
3429
3430
3431
3432
3433
3434
3435 if (flag & FLAG_RETRANS_DATA_ACKED)
3436 flag &= ~FLAG_ORIG_SACK_ACKED;
3437 } else {
3438 int delta;
3439
3440
3441 if (before(reord, prior_fack))
3442 tcp_check_sack_reordering(sk, reord, 0);
3443
3444 delta = prior_sacked - tp->sacked_out;
3445 tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta);
3446 }
3447 } else if (skb && rtt_update && sack_rtt_us >= 0 &&
3448 sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp,
3449 tcp_skb_timestamp_us(skb))) {
3450
3451
3452
3453
3454 flag |= FLAG_SET_XMIT_TIMER;
3455 }
3456
3457 if (icsk->icsk_ca_ops->pkts_acked) {
3458 struct ack_sample sample = { .pkts_acked = pkts_acked,
3459 .rtt_us = sack->rate->rtt_us };
3460
3461 sample.in_flight = tp->mss_cache *
3462 (tp->delivered - sack->rate->prior_delivered);
3463 icsk->icsk_ca_ops->pkts_acked(sk, &sample);
3464 }
3465
3466#if FASTRETRANS_DEBUG > 0
3467 WARN_ON((int)tp->sacked_out < 0);
3468 WARN_ON((int)tp->lost_out < 0);
3469 WARN_ON((int)tp->retrans_out < 0);
3470 if (!tp->packets_out && tcp_is_sack(tp)) {
3471 icsk = inet_csk(sk);
3472 if (tp->lost_out) {
3473 pr_debug("Leak l=%u %d\n",
3474 tp->lost_out, icsk->icsk_ca_state);
3475 tp->lost_out = 0;
3476 }
3477 if (tp->sacked_out) {
3478 pr_debug("Leak s=%u %d\n",
3479 tp->sacked_out, icsk->icsk_ca_state);
3480 tp->sacked_out = 0;
3481 }
3482 if (tp->retrans_out) {
3483 pr_debug("Leak r=%u %d\n",
3484 tp->retrans_out, icsk->icsk_ca_state);
3485 tp->retrans_out = 0;
3486 }
3487 }
3488#endif
3489 return flag;
3490}
3491
3492static void tcp_ack_probe(struct sock *sk)
3493{
3494 struct inet_connection_sock *icsk = inet_csk(sk);
3495 struct sk_buff *head = tcp_send_head(sk);
3496 const struct tcp_sock *tp = tcp_sk(sk);
3497
3498
3499 if (!head)
3500 return;
3501 if (!after(TCP_SKB_CB(head)->end_seq, tcp_wnd_end(tp))) {
3502 icsk->icsk_backoff = 0;
3503 icsk->icsk_probes_tstamp = 0;
3504 inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
3505
3506
3507
3508 } else {
3509 unsigned long when = tcp_probe0_when(sk, TCP_RTO_MAX);
3510
3511 when = tcp_clamp_probe0_to_user_timeout(sk, when);
3512 tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, when, TCP_RTO_MAX);
3513 }
3514}
3515
3516static inline bool tcp_ack_is_dubious(const struct sock *sk, const int flag)
3517{
3518 return !(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
3519 inet_csk(sk)->icsk_ca_state != TCP_CA_Open;
3520}
3521
3522
3523static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
3524{
3525
3526
3527
3528
3529
3530
3531 if (tcp_sk(sk)->reordering >
3532 READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering))
3533 return flag & FLAG_FORWARD_PROGRESS;
3534
3535 return flag & FLAG_DATA_ACKED;
3536}
3537
3538
3539
3540
3541
3542
3543static void tcp_cong_control(struct sock *sk, u32 ack, u32 acked_sacked,
3544 int flag, const struct rate_sample *rs)
3545{
3546 const struct inet_connection_sock *icsk = inet_csk(sk);
3547
3548 if (icsk->icsk_ca_ops->cong_control) {
3549 icsk->icsk_ca_ops->cong_control(sk, rs);
3550 return;
3551 }
3552
3553 if (tcp_in_cwnd_reduction(sk)) {
3554
3555 tcp_cwnd_reduction(sk, acked_sacked, rs->losses, flag);
3556 } else if (tcp_may_raise_cwnd(sk, flag)) {
3557
3558 tcp_cong_avoid(sk, ack, acked_sacked);
3559 }
3560 tcp_update_pacing_rate(sk);
3561}
3562
3563
3564
3565
3566static inline bool tcp_may_update_window(const struct tcp_sock *tp,
3567 const u32 ack, const u32 ack_seq,
3568 const u32 nwin)
3569{
3570 return after(ack, tp->snd_una) ||
3571 after(ack_seq, tp->snd_wl1) ||
3572 (ack_seq == tp->snd_wl1 && (nwin > tp->snd_wnd || !nwin));
3573}
3574
3575static void tcp_snd_sne_update(struct tcp_sock *tp, u32 ack)
3576{
3577#ifdef CONFIG_TCP_AO
3578 struct tcp_ao_info *ao;
3579
3580 if (!static_branch_unlikely(&tcp_ao_needed.key))
3581 return;
3582
3583 ao = rcu_dereference_protected(tp->ao_info,
3584 lockdep_sock_is_held((struct sock *)tp));
3585 if (ao && ack < tp->snd_una)
3586 ao->snd_sne++;
3587#endif
3588}
3589
3590
3591static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack)
3592{
3593 u32 delta = ack - tp->snd_una;
3594
3595 sock_owned_by_me((struct sock *)tp);
3596 tp->bytes_acked += delta;
3597 tcp_snd_sne_update(tp, ack);
3598 tp->snd_una = ack;
3599}
3600
3601static void tcp_rcv_sne_update(struct tcp_sock *tp, u32 seq)
3602{
3603#ifdef CONFIG_TCP_AO
3604 struct tcp_ao_info *ao;
3605
3606 if (!static_branch_unlikely(&tcp_ao_needed.key))
3607 return;
3608
3609 ao = rcu_dereference_protected(tp->ao_info,
3610 lockdep_sock_is_held((struct sock *)tp));
3611 if (ao && seq < tp->rcv_nxt)
3612 ao->rcv_sne++;
3613#endif
3614}
3615
3616
3617static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq)
3618{
3619 u32 delta = seq - tp->rcv_nxt;
3620
3621 sock_owned_by_me((struct sock *)tp);
3622 tp->bytes_received += delta;
3623 tcp_rcv_sne_update(tp, seq);
3624 WRITE_ONCE(tp->rcv_nxt, seq);
3625}
3626
3627
3628
3629
3630
3631
3632static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 ack,
3633 u32 ack_seq)
3634{
3635 struct tcp_sock *tp = tcp_sk(sk);
3636 int flag = 0;
3637 u32 nwin = ntohs(tcp_hdr(skb)->window);
3638
3639 if (likely(!tcp_hdr(skb)->syn))
3640 nwin <<= tp->rx_opt.snd_wscale;
3641
3642 if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
3643 flag |= FLAG_WIN_UPDATE;
3644 tcp_update_wl(tp, ack_seq);
3645
3646 if (tp->snd_wnd != nwin) {
3647 tp->snd_wnd = nwin;
3648
3649
3650
3651
3652 tp->pred_flags = 0;
3653 tcp_fast_path_check(sk);
3654
3655 if (!tcp_write_queue_empty(sk))
3656 tcp_slow_start_after_idle_check(sk);
3657
3658 if (nwin > tp->max_window) {
3659 tp->max_window = nwin;
3660 tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie);
3661 }
3662 }
3663 }
3664
3665 tcp_snd_una_update(tp, ack);
3666
3667 return flag;
3668}
3669
3670static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
3671 u32 *last_oow_ack_time)
3672{
3673
3674 u32 val = READ_ONCE(*last_oow_ack_time);
3675
3676 if (val) {
3677 s32 elapsed = (s32)(tcp_jiffies32 - val);
3678
3679 if (0 <= elapsed &&
3680 elapsed < READ_ONCE(net->ipv4.sysctl_tcp_invalid_ratelimit)) {
3681 NET_INC_STATS(net, mib_idx);
3682 return true;
3683 }
3684 }
3685
3686
3687
3688
3689 WRITE_ONCE(*last_oow_ack_time, tcp_jiffies32);
3690
3691 return false;
3692}
3693
3694
3695
3696
3697
3698
3699
3700
3701bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
3702 int mib_idx, u32 *last_oow_ack_time)
3703{
3704
3705 if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) &&
3706 !tcp_hdr(skb)->syn)
3707 return false;
3708
3709 return __tcp_oow_rate_limited(net, mib_idx, last_oow_ack_time);
3710}
3711
3712
3713static void tcp_send_challenge_ack(struct sock *sk)
3714{
3715 struct tcp_sock *tp = tcp_sk(sk);
3716 struct net *net = sock_net(sk);
3717 u32 count, now, ack_limit;
3718
3719
3720 if (__tcp_oow_rate_limited(net,
3721 LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
3722 &tp->last_oow_ack_time))
3723 return;
3724
3725 ack_limit = READ_ONCE(net->ipv4.sysctl_tcp_challenge_ack_limit);
3726 if (ack_limit == INT_MAX)
3727 goto send_ack;
3728
3729
3730 now = jiffies / HZ;
3731 if (now != READ_ONCE(net->ipv4.tcp_challenge_timestamp)) {
3732 u32 half = (ack_limit + 1) >> 1;
3733
3734 WRITE_ONCE(net->ipv4.tcp_challenge_timestamp, now);
3735 WRITE_ONCE(net->ipv4.tcp_challenge_count,
3736 get_random_u32_inclusive(half, ack_limit + half - 1));
3737 }
3738 count = READ_ONCE(net->ipv4.tcp_challenge_count);
3739 if (count > 0) {
3740 WRITE_ONCE(net->ipv4.tcp_challenge_count, count - 1);
3741send_ack:
3742 NET_INC_STATS(net, LINUX_MIB_TCPCHALLENGEACK);
3743 tcp_send_ack(sk);
3744 }
3745}
3746
3747static void tcp_store_ts_recent(struct tcp_sock *tp)
3748{
3749 tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
3750 tp->rx_opt.ts_recent_stamp = ktime_get_seconds();
3751}
3752
3753static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
3754{
3755 if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) {
3756
3757
3758
3759
3760
3761
3762
3763 if (tcp_paws_check(&tp->rx_opt, 0))
3764 tcp_store_ts_recent(tp);
3765 }
3766}
3767
3768
3769
3770
3771static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
3772{
3773 struct tcp_sock *tp = tcp_sk(sk);
3774
3775 if (before(ack, tp->tlp_high_seq))
3776 return;
3777
3778 if (!tp->tlp_retrans) {
3779
3780 tp->tlp_high_seq = 0;
3781 } else if (flag & FLAG_DSACK_TLP) {
3782
3783 tp->tlp_high_seq = 0;
3784 } else if (after(ack, tp->tlp_high_seq)) {
3785
3786
3787
3788 tcp_init_cwnd_reduction(sk);
3789 tcp_set_ca_state(sk, TCP_CA_CWR);
3790 tcp_end_cwnd_reduction(sk);
3791 tcp_try_keep_open(sk);
3792 NET_INC_STATS(sock_net(sk),
3793 LINUX_MIB_TCPLOSSPROBERECOVERY);
3794 } else if (!(flag & (FLAG_SND_UNA_ADVANCED |
3795 FLAG_NOT_DUP | FLAG_DATA_SACKED))) {
3796
3797 tp->tlp_high_seq = 0;
3798 }
3799}
3800
3801static inline void tcp_in_ack_event(struct sock *sk, u32 flags)
3802{
3803 const struct inet_connection_sock *icsk = inet_csk(sk);
3804
3805 if (icsk->icsk_ca_ops->in_ack_event)
3806 icsk->icsk_ca_ops->in_ack_event(sk, flags);
3807}
3808
3809
3810
3811
3812
3813static void tcp_xmit_recovery(struct sock *sk, int rexmit)
3814{
3815 struct tcp_sock *tp = tcp_sk(sk);
3816
3817 if (rexmit == REXMIT_NONE || sk->sk_state == TCP_SYN_SENT)
3818 return;
3819
3820 if (unlikely(rexmit == REXMIT_NEW)) {
3821 __tcp_push_pending_frames(sk, tcp_current_mss(sk),
3822 TCP_NAGLE_OFF);
3823 if (after(tp->snd_nxt, tp->high_seq))
3824 return;
3825 tp->frto = 0;
3826 }
3827 tcp_xmit_retransmit_queue(sk);
3828}
3829
3830
3831static u32 tcp_newly_delivered(struct sock *sk, u32 prior_delivered, int flag)
3832{
3833 const struct net *net = sock_net(sk);
3834 struct tcp_sock *tp = tcp_sk(sk);
3835 u32 delivered;
3836
3837 delivered = tp->delivered - prior_delivered;
3838 NET_ADD_STATS(net, LINUX_MIB_TCPDELIVERED, delivered);
3839 if (flag & FLAG_ECE)
3840 NET_ADD_STATS(net, LINUX_MIB_TCPDELIVEREDCE, delivered);
3841
3842 return delivered;
3843}
3844
3845
3846static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3847{
3848 struct inet_connection_sock *icsk = inet_csk(sk);
3849 struct tcp_sock *tp = tcp_sk(sk);
3850 struct tcp_sacktag_state sack_state;
3851 struct rate_sample rs = { .prior_delivered = 0 };
3852 u32 prior_snd_una = tp->snd_una;
3853 bool is_sack_reneg = tp->is_sack_reneg;
3854 u32 ack_seq = TCP_SKB_CB(skb)->seq;
3855 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3856 int num_dupack = 0;
3857 int prior_packets = tp->packets_out;
3858 u32 delivered = tp->delivered;
3859 u32 lost = tp->lost;
3860 int rexmit = REXMIT_NONE;
3861 u32 prior_fack;
3862
3863 sack_state.first_sackt = 0;
3864 sack_state.rate = &rs;
3865 sack_state.sack_delivered = 0;
3866
3867
3868 prefetch(sk->tcp_rtx_queue.rb_node);
3869
3870
3871
3872
3873 if (before(ack, prior_snd_una)) {
3874 u32 max_window;
3875
3876
3877 max_window = min_t(u64, tp->max_window, tp->bytes_acked);
3878
3879 if (before(ack, prior_snd_una - max_window)) {
3880 if (!(flag & FLAG_NO_CHALLENGE_ACK))
3881 tcp_send_challenge_ack(sk);
3882 return -SKB_DROP_REASON_TCP_TOO_OLD_ACK;
3883 }
3884 goto old_ack;
3885 }
3886
3887
3888
3889
3890 if (after(ack, tp->snd_nxt))
3891 return -SKB_DROP_REASON_TCP_ACK_UNSENT_DATA;
3892
3893 if (after(ack, prior_snd_una)) {
3894 flag |= FLAG_SND_UNA_ADVANCED;
3895 icsk->icsk_retransmits = 0;
3896
3897#if IS_ENABLED(CONFIG_TLS_DEVICE)
3898 if (static_branch_unlikely(&clean_acked_data_enabled.key))
3899 if (icsk->icsk_clean_acked)
3900 icsk->icsk_clean_acked(sk, ack);
3901#endif
3902 }
3903
3904 prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una;
3905 rs.prior_in_flight = tcp_packets_in_flight(tp);
3906
3907
3908
3909
3910 if (flag & FLAG_UPDATE_TS_RECENT)
3911 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
3912
3913 if ((flag & (FLAG_SLOWPATH | FLAG_SND_UNA_ADVANCED)) ==
3914 FLAG_SND_UNA_ADVANCED) {
3915
3916
3917
3918
3919 tcp_update_wl(tp, ack_seq);
3920 tcp_snd_una_update(tp, ack);
3921 flag |= FLAG_WIN_UPDATE;
3922
3923 tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE);
3924
3925 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPACKS);
3926 } else {
3927 u32 ack_ev_flags = CA_ACK_SLOWPATH;
3928
3929 if (ack_seq != TCP_SKB_CB(skb)->end_seq)
3930 flag |= FLAG_DATA;
3931 else
3932 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPUREACKS);
3933
3934 flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
3935
3936 if (TCP_SKB_CB(skb)->sacked)
3937 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
3938 &sack_state);
3939
3940 if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
3941 flag |= FLAG_ECE;
3942 ack_ev_flags |= CA_ACK_ECE;
3943 }
3944
3945 if (sack_state.sack_delivered)
3946 tcp_count_delivered(tp, sack_state.sack_delivered,
3947 flag & FLAG_ECE);
3948
3949 if (flag & FLAG_WIN_UPDATE)
3950 ack_ev_flags |= CA_ACK_WIN_UPDATE;
3951
3952 tcp_in_ack_event(sk, ack_ev_flags);
3953 }
3954
3955
3956
3957
3958
3959
3960
3961
3962 tcp_ecn_accept_cwr(sk, skb);
3963
3964
3965
3966
3967 WRITE_ONCE(sk->sk_err_soft, 0);
3968 icsk->icsk_probes_out = 0;
3969 tp->rcv_tstamp = tcp_jiffies32;
3970 if (!prior_packets)
3971 goto no_queue;
3972
3973
3974 flag |= tcp_clean_rtx_queue(sk, skb, prior_fack, prior_snd_una,
3975 &sack_state, flag & FLAG_ECE);
3976
3977 tcp_rack_update_reo_wnd(sk, &rs);
3978
3979 if (tp->tlp_high_seq)
3980 tcp_process_tlp_ack(sk, ack, flag);
3981
3982 if (tcp_ack_is_dubious(sk, flag)) {
3983 if (!(flag & (FLAG_SND_UNA_ADVANCED |
3984 FLAG_NOT_DUP | FLAG_DSACKING_ACK))) {
3985 num_dupack = 1;
3986
3987 if (!(flag & FLAG_DATA))
3988 num_dupack = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
3989 }
3990 tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
3991 &rexmit);
3992 }
3993
3994
3995 if (flag & FLAG_SET_XMIT_TIMER)
3996 tcp_set_xmit_timer(sk);
3997
3998 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
3999 sk_dst_confirm(sk);
4000
4001 delivered = tcp_newly_delivered(sk, delivered, flag);
4002 lost = tp->lost - lost;
4003 rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED);
4004 tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate);
4005 tcp_cong_control(sk, ack, delivered, flag, sack_state.rate);
4006 tcp_xmit_recovery(sk, rexmit);
4007 return 1;
4008
4009no_queue:
4010
4011 if (flag & FLAG_DSACKING_ACK) {
4012 tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
4013 &rexmit);
4014 tcp_newly_delivered(sk, delivered, flag);
4015 }
4016
4017
4018
4019
4020 tcp_ack_probe(sk);
4021
4022 if (tp->tlp_high_seq)
4023 tcp_process_tlp_ack(sk, ack, flag);
4024 return 1;
4025
4026old_ack:
4027
4028
4029
4030 if (TCP_SKB_CB(skb)->sacked) {
4031 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
4032 &sack_state);
4033 tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
4034 &rexmit);
4035 tcp_newly_delivered(sk, delivered, flag);
4036 tcp_xmit_recovery(sk, rexmit);
4037 }
4038
4039 return 0;
4040}
4041
4042static void tcp_parse_fastopen_option(int len, const unsigned char *cookie,
4043 bool syn, struct tcp_fastopen_cookie *foc,
4044 bool exp_opt)
4045{
4046
4047 if (!foc || !syn || len < 0 || (len & 1))
4048 return;
4049
4050 if (len >= TCP_FASTOPEN_COOKIE_MIN &&
4051 len <= TCP_FASTOPEN_COOKIE_MAX)
4052 memcpy(foc->val, cookie, len);
4053 else if (len != 0)
4054 len = -1;
4055 foc->len = len;
4056 foc->exp = exp_opt;
4057}
4058
4059static bool smc_parse_options(const struct tcphdr *th,
4060 struct tcp_options_received *opt_rx,
4061 const unsigned char *ptr,
4062 int opsize)
4063{
4064#if IS_ENABLED(CONFIG_SMC)
4065 if (static_branch_unlikely(&tcp_have_smc)) {
4066 if (th->syn && !(opsize & 1) &&
4067 opsize >= TCPOLEN_EXP_SMC_BASE &&
4068 get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC) {
4069 opt_rx->smc_ok = 1;
4070 return true;
4071 }
4072 }
4073#endif
4074 return false;
4075}
4076
4077
4078
4079
4080u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss)
4081{
4082 const unsigned char *ptr = (const unsigned char *)(th + 1);
4083 int length = (th->doff * 4) - sizeof(struct tcphdr);
4084 u16 mss = 0;
4085
4086 while (length > 0) {
4087 int opcode = *ptr++;
4088 int opsize;
4089
4090 switch (opcode) {
4091 case TCPOPT_EOL:
4092 return mss;
4093 case TCPOPT_NOP:
4094 length--;
4095 continue;
4096 default:
4097 if (length < 2)
4098 return mss;
4099 opsize = *ptr++;
4100 if (opsize < 2)
4101 return mss;
4102 if (opsize > length)
4103 return mss;
4104 if (opcode == TCPOPT_MSS && opsize == TCPOLEN_MSS) {
4105 u16 in_mss = get_unaligned_be16(ptr);
4106
4107 if (in_mss) {
4108 if (user_mss && user_mss < in_mss)
4109 in_mss = user_mss;
4110 mss = in_mss;
4111 }
4112 }
4113 ptr += opsize - 2;
4114 length -= opsize;
4115 }
4116 }
4117 return mss;
4118}
4119EXPORT_SYMBOL_GPL(tcp_parse_mss_option);
4120
4121
4122
4123
4124
4125void tcp_parse_options(const struct net *net,
4126 const struct sk_buff *skb,
4127 struct tcp_options_received *opt_rx, int estab,
4128 struct tcp_fastopen_cookie *foc)
4129{
4130 const unsigned char *ptr;
4131 const struct tcphdr *th = tcp_hdr(skb);
4132 int length = (th->doff * 4) - sizeof(struct tcphdr);
4133
4134 ptr = (const unsigned char *)(th + 1);
4135 opt_rx->saw_tstamp = 0;
4136 opt_rx->saw_unknown = 0;
4137
4138 while (length > 0) {
4139 int opcode = *ptr++;
4140 int opsize;
4141
4142 switch (opcode) {
4143 case TCPOPT_EOL:
4144 return;
4145 case TCPOPT_NOP:
4146 length--;
4147 continue;
4148 default:
4149 if (length < 2)
4150 return;
4151 opsize = *ptr++;
4152 if (opsize < 2)
4153 return;
4154 if (opsize > length)
4155 return;
4156 switch (opcode) {
4157 case TCPOPT_MSS:
4158 if (opsize == TCPOLEN_MSS && th->syn && !estab) {
4159 u16 in_mss = get_unaligned_be16(ptr);
4160 if (in_mss) {
4161 if (opt_rx->user_mss &&
4162 opt_rx->user_mss < in_mss)
4163 in_mss = opt_rx->user_mss;
4164 opt_rx->mss_clamp = in_mss;
4165 }
4166 }
4167 break;
4168 case TCPOPT_WINDOW:
4169 if (opsize == TCPOLEN_WINDOW && th->syn &&
4170 !estab && READ_ONCE(net->ipv4.sysctl_tcp_window_scaling)) {
4171 __u8 snd_wscale = *(__u8 *)ptr;
4172 opt_rx->wscale_ok = 1;
4173 if (snd_wscale > TCP_MAX_WSCALE) {
4174 net_info_ratelimited("%s: Illegal window scaling value %d > %u received\n",
4175 __func__,
4176 snd_wscale,
4177 TCP_MAX_WSCALE);
4178 snd_wscale = TCP_MAX_WSCALE;
4179 }
4180 opt_rx->snd_wscale = snd_wscale;
4181 }
4182 break;
4183 case TCPOPT_TIMESTAMP:
4184 if ((opsize == TCPOLEN_TIMESTAMP) &&
4185 ((estab && opt_rx->tstamp_ok) ||
4186 (!estab && READ_ONCE(net->ipv4.sysctl_tcp_timestamps)))) {
4187 opt_rx->saw_tstamp = 1;
4188 opt_rx->rcv_tsval = get_unaligned_be32(ptr);
4189 opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
4190 }
4191 break;
4192 case TCPOPT_SACK_PERM:
4193 if (opsize == TCPOLEN_SACK_PERM && th->syn &&
4194 !estab && READ_ONCE(net->ipv4.sysctl_tcp_sack)) {
4195 opt_rx->sack_ok = TCP_SACK_SEEN;
4196 tcp_sack_reset(opt_rx);
4197 }
4198 break;
4199
4200 case TCPOPT_SACK:
4201 if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
4202 !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
4203 opt_rx->sack_ok) {
4204 TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
4205 }
4206 break;
4207#ifdef CONFIG_TCP_MD5SIG
4208 case TCPOPT_MD5SIG:
4209
4210
4211
4212 break;
4213#endif
4214 case TCPOPT_FASTOPEN:
4215 tcp_parse_fastopen_option(
4216 opsize - TCPOLEN_FASTOPEN_BASE,
4217 ptr, th->syn, foc, false);
4218 break;
4219
4220 case TCPOPT_EXP:
4221
4222
4223
4224