1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65#define pr_fmt(fmt) "TCP: " fmt
66
67#include <linux/mm.h>
68#include <linux/slab.h>
69#include <linux/module.h>
70#include <linux/sysctl.h>
71#include <linux/kernel.h>
72#include <linux/prefetch.h>
73#include <net/dst.h>
74#include <net/tcp.h>
75#include <net/inet_common.h>
76#include <linux/ipsec.h>
77#include <asm/unaligned.h>
78#include <linux/errqueue.h>
79#include <trace/events/tcp.h>
80#include <linux/jump_label_ratelimit.h>
81#include <net/busy_poll.h>
82#include <net/mptcp.h>
83
84int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
85
86#define FLAG_DATA 0x01
87#define FLAG_WIN_UPDATE 0x02
88#define FLAG_DATA_ACKED 0x04
89#define FLAG_RETRANS_DATA_ACKED 0x08
90#define FLAG_SYN_ACKED 0x10
91#define FLAG_DATA_SACKED 0x20
92#define FLAG_ECE 0x40
93#define FLAG_LOST_RETRANS 0x80
94#define FLAG_SLOWPATH 0x100
95#define FLAG_ORIG_SACK_ACKED 0x200
96#define FLAG_SND_UNA_ADVANCED 0x400
97#define FLAG_DSACKING_ACK 0x800
98#define FLAG_SET_XMIT_TIMER 0x1000
99#define FLAG_SACK_RENEGING 0x2000
100#define FLAG_UPDATE_TS_RECENT 0x4000
101#define FLAG_NO_CHALLENGE_ACK 0x8000
102#define FLAG_ACK_MAYBE_DELAYED 0x10000
103
104#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
105#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
106#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE|FLAG_DSACKING_ACK)
107#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)
108
109#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
110#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
111
112#define REXMIT_NONE 0
113#define REXMIT_LOST 1
114#define REXMIT_NEW 2
115
116#if IS_ENABLED(CONFIG_TLS_DEVICE)
117static DEFINE_STATIC_KEY_DEFERRED_FALSE(clean_acked_data_enabled, HZ);
118
119void clean_acked_data_enable(struct inet_connection_sock *icsk,
120 void (*cad)(struct sock *sk, u32 ack_seq))
121{
122 icsk->icsk_clean_acked = cad;
123 static_branch_deferred_inc(&clean_acked_data_enabled);
124}
125EXPORT_SYMBOL_GPL(clean_acked_data_enable);
126
127void clean_acked_data_disable(struct inet_connection_sock *icsk)
128{
129 static_branch_slow_dec_deferred(&clean_acked_data_enabled);
130 icsk->icsk_clean_acked = NULL;
131}
132EXPORT_SYMBOL_GPL(clean_acked_data_disable);
133
134void clean_acked_data_flush(void)
135{
136 static_key_deferred_flush(&clean_acked_data_enabled);
137}
138EXPORT_SYMBOL_GPL(clean_acked_data_flush);
139#endif
140
141#ifdef CONFIG_CGROUP_BPF
142static void bpf_skops_parse_hdr(struct sock *sk, struct sk_buff *skb)
143{
144 bool unknown_opt = tcp_sk(sk)->rx_opt.saw_unknown &&
145 BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk),
146 BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG);
147 bool parse_all_opt = BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk),
148 BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG);
149 struct bpf_sock_ops_kern sock_ops;
150
151 if (likely(!unknown_opt && !parse_all_opt))
152 return;
153
154
155
156
157
158 switch (sk->sk_state) {
159 case TCP_SYN_RECV:
160 case TCP_SYN_SENT:
161 case TCP_LISTEN:
162 return;
163 }
164
165 sock_owned_by_me(sk);
166
167 memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
168 sock_ops.op = BPF_SOCK_OPS_PARSE_HDR_OPT_CB;
169 sock_ops.is_fullsock = 1;
170 sock_ops.sk = sk;
171 bpf_skops_init_skb(&sock_ops, skb, tcp_hdrlen(skb));
172
173 BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
174}
175
176static void bpf_skops_established(struct sock *sk, int bpf_op,
177 struct sk_buff *skb)
178{
179 struct bpf_sock_ops_kern sock_ops;
180
181 sock_owned_by_me(sk);
182
183 memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
184 sock_ops.op = bpf_op;
185 sock_ops.is_fullsock = 1;
186 sock_ops.sk = sk;
187
188 if (skb)
189 bpf_skops_init_skb(&sock_ops, skb, tcp_hdrlen(skb));
190
191 BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
192}
193#else
194static void bpf_skops_parse_hdr(struct sock *sk, struct sk_buff *skb)
195{
196}
197
198static void bpf_skops_established(struct sock *sk, int bpf_op,
199 struct sk_buff *skb)
200{
201}
202#endif
203
204static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb,
205 unsigned int len)
206{
207 static bool __once __read_mostly;
208
209 if (!__once) {
210 struct net_device *dev;
211
212 __once = true;
213
214 rcu_read_lock();
215 dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif);
216 if (!dev || len >= dev->mtu)
217 pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n",
218 dev ? dev->name : "Unknown driver");
219 rcu_read_unlock();
220 }
221}
222
223
224
225
226static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
227{
228 struct inet_connection_sock *icsk = inet_csk(sk);
229 const unsigned int lss = icsk->icsk_ack.last_seg_size;
230 unsigned int len;
231
232 icsk->icsk_ack.last_seg_size = 0;
233
234
235
236
237 len = skb_shinfo(skb)->gso_size ? : skb->len;
238 if (len >= icsk->icsk_ack.rcv_mss) {
239 icsk->icsk_ack.rcv_mss = min_t(unsigned int, len,
240 tcp_sk(sk)->advmss);
241
242 if (unlikely(len > icsk->icsk_ack.rcv_mss +
243 MAX_TCP_OPTION_SPACE))
244 tcp_gro_dev_warn(sk, skb, len);
245 } else {
246
247
248
249
250
251 len += skb->data - skb_transport_header(skb);
252 if (len >= TCP_MSS_DEFAULT + sizeof(struct tcphdr) ||
253
254
255
256
257
258 (len >= TCP_MIN_MSS + sizeof(struct tcphdr) &&
259 !(tcp_flag_word(tcp_hdr(skb)) & TCP_REMNANT))) {
260
261
262
263
264 len -= tcp_sk(sk)->tcp_header_len;
265 icsk->icsk_ack.last_seg_size = len;
266 if (len == lss) {
267 icsk->icsk_ack.rcv_mss = len;
268 return;
269 }
270 }
271 if (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)
272 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED2;
273 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
274 }
275}
276
277static void tcp_incr_quickack(struct sock *sk, unsigned int max_quickacks)
278{
279 struct inet_connection_sock *icsk = inet_csk(sk);
280 unsigned int quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
281
282 if (quickacks == 0)
283 quickacks = 2;
284 quickacks = min(quickacks, max_quickacks);
285 if (quickacks > icsk->icsk_ack.quick)
286 icsk->icsk_ack.quick = quickacks;
287}
288
289void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
290{
291 struct inet_connection_sock *icsk = inet_csk(sk);
292
293 tcp_incr_quickack(sk, max_quickacks);
294 inet_csk_exit_pingpong_mode(sk);
295 icsk->icsk_ack.ato = TCP_ATO_MIN;
296}
297EXPORT_SYMBOL(tcp_enter_quickack_mode);
298
299
300
301
302
303static bool tcp_in_quickack_mode(struct sock *sk)
304{
305 const struct inet_connection_sock *icsk = inet_csk(sk);
306 const struct dst_entry *dst = __sk_dst_get(sk);
307
308 return (dst && dst_metric(dst, RTAX_QUICKACK)) ||
309 (icsk->icsk_ack.quick && !inet_csk_in_pingpong_mode(sk));
310}
311
312static void tcp_ecn_queue_cwr(struct tcp_sock *tp)
313{
314 if (tp->ecn_flags & TCP_ECN_OK)
315 tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
316}
317
318static void tcp_ecn_accept_cwr(struct sock *sk, const struct sk_buff *skb)
319{
320 if (tcp_hdr(skb)->cwr) {
321 tcp_sk(sk)->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
322
323
324
325
326
327 if (TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq)
328 inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
329 }
330}
331
332static void tcp_ecn_withdraw_cwr(struct tcp_sock *tp)
333{
334 tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
335}
336
337static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
338{
339 struct tcp_sock *tp = tcp_sk(sk);
340
341 switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) {
342 case INET_ECN_NOT_ECT:
343
344
345
346
347 if (tp->ecn_flags & TCP_ECN_SEEN)
348 tcp_enter_quickack_mode(sk, 2);
349 break;
350 case INET_ECN_CE:
351 if (tcp_ca_needs_ecn(sk))
352 tcp_ca_event(sk, CA_EVENT_ECN_IS_CE);
353
354 if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
355
356 tcp_enter_quickack_mode(sk, 2);
357 tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
358 }
359 tp->ecn_flags |= TCP_ECN_SEEN;
360 break;
361 default:
362 if (tcp_ca_needs_ecn(sk))
363 tcp_ca_event(sk, CA_EVENT_ECN_NO_CE);
364 tp->ecn_flags |= TCP_ECN_SEEN;
365 break;
366 }
367}
368
369static void tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
370{
371 if (tcp_sk(sk)->ecn_flags & TCP_ECN_OK)
372 __tcp_ecn_check_ce(sk, skb);
373}
374
375static void tcp_ecn_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th)
376{
377 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr))
378 tp->ecn_flags &= ~TCP_ECN_OK;
379}
380
381static void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th)
382{
383 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr))
384 tp->ecn_flags &= ~TCP_ECN_OK;
385}
386
387static bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th)
388{
389 if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK))
390 return true;
391 return false;
392}
393
394
395
396
397
398
399static void tcp_sndbuf_expand(struct sock *sk)
400{
401 const struct tcp_sock *tp = tcp_sk(sk);
402 const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
403 int sndmem, per_mss;
404 u32 nr_segs;
405
406
407
408
409 per_mss = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
410 MAX_TCP_HEADER +
411 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
412
413 per_mss = roundup_pow_of_two(per_mss) +
414 SKB_DATA_ALIGN(sizeof(struct sk_buff));
415
416 nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd);
417 nr_segs = max_t(u32, nr_segs, tp->reordering + 1);
418
419
420
421
422
423 sndmem = ca_ops->sndbuf_expand ? ca_ops->sndbuf_expand(sk) : 2;
424 sndmem *= nr_segs * per_mss;
425
426 if (sk->sk_sndbuf < sndmem)
427 WRITE_ONCE(sk->sk_sndbuf,
428 min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]));
429}
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
458{
459 struct tcp_sock *tp = tcp_sk(sk);
460
461 int truesize = tcp_win_from_space(sk, skb->truesize) >> 1;
462 int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
463
464 while (tp->rcv_ssthresh <= window) {
465 if (truesize <= skb->len)
466 return 2 * inet_csk(sk)->icsk_ack.rcv_mss;
467
468 truesize >>= 1;
469 window >>= 1;
470 }
471 return 0;
472}
473
474static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
475{
476 struct tcp_sock *tp = tcp_sk(sk);
477 int room;
478
479 room = min_t(int, tp->window_clamp, tcp_space(sk)) - tp->rcv_ssthresh;
480
481
482 if (room > 0 && !tcp_under_memory_pressure(sk)) {
483 int incr;
484
485
486
487
488 if (tcp_win_from_space(sk, skb->truesize) <= skb->len)
489 incr = 2 * tp->advmss;
490 else
491 incr = __tcp_grow_window(sk, skb);
492
493 if (incr) {
494 incr = max_t(int, incr, 2 * skb->len);
495 tp->rcv_ssthresh += min(room, incr);
496 inet_csk(sk)->icsk_ack.quick |= 1;
497 }
498 }
499}
500
501
502
503
504static void tcp_init_buffer_space(struct sock *sk)
505{
506 int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win;
507 struct tcp_sock *tp = tcp_sk(sk);
508 int maxwin;
509
510 if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
511 tcp_sndbuf_expand(sk);
512
513 tcp_mstamp_refresh(tp);
514 tp->rcvq_space.time = tp->tcp_mstamp;
515 tp->rcvq_space.seq = tp->copied_seq;
516
517 maxwin = tcp_full_space(sk);
518
519 if (tp->window_clamp >= maxwin) {
520 tp->window_clamp = maxwin;
521
522 if (tcp_app_win && maxwin > 4 * tp->advmss)
523 tp->window_clamp = max(maxwin -
524 (maxwin >> tcp_app_win),
525 4 * tp->advmss);
526 }
527
528
529 if (tcp_app_win &&
530 tp->window_clamp > 2 * tp->advmss &&
531 tp->window_clamp + tp->advmss > maxwin)
532 tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
533
534 tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
535 tp->snd_cwnd_stamp = tcp_jiffies32;
536 tp->rcvq_space.space = min3(tp->rcv_ssthresh, tp->rcv_wnd,
537 (u32)TCP_INIT_CWND * tp->advmss);
538}
539
540
541static void tcp_clamp_window(struct sock *sk)
542{
543 struct tcp_sock *tp = tcp_sk(sk);
544 struct inet_connection_sock *icsk = inet_csk(sk);
545 struct net *net = sock_net(sk);
546
547 icsk->icsk_ack.quick = 0;
548
549 if (sk->sk_rcvbuf < net->ipv4.sysctl_tcp_rmem[2] &&
550 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
551 !tcp_under_memory_pressure(sk) &&
552 sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
553 WRITE_ONCE(sk->sk_rcvbuf,
554 min(atomic_read(&sk->sk_rmem_alloc),
555 net->ipv4.sysctl_tcp_rmem[2]));
556 }
557 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
558 tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
559}
560
561
562
563
564
565
566
567
568void tcp_initialize_rcv_mss(struct sock *sk)
569{
570 const struct tcp_sock *tp = tcp_sk(sk);
571 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
572
573 hint = min(hint, tp->rcv_wnd / 2);
574 hint = min(hint, TCP_MSS_DEFAULT);
575 hint = max(hint, TCP_MIN_MSS);
576
577 inet_csk(sk)->icsk_ack.rcv_mss = hint;
578}
579EXPORT_SYMBOL(tcp_initialize_rcv_mss);
580
581
582
583
584
585
586
587
588
589
590
591
592static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
593{
594 u32 new_sample = tp->rcv_rtt_est.rtt_us;
595 long m = sample;
596
597 if (new_sample != 0) {
598
599
600
601
602
603
604
605
606
607
608 if (!win_dep) {
609 m -= (new_sample >> 3);
610 new_sample += m;
611 } else {
612 m <<= 3;
613 if (m < new_sample)
614 new_sample = m;
615 }
616 } else {
617
618 new_sample = m << 3;
619 }
620
621 tp->rcv_rtt_est.rtt_us = new_sample;
622}
623
624static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
625{
626 u32 delta_us;
627
628 if (tp->rcv_rtt_est.time == 0)
629 goto new_measure;
630 if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
631 return;
632 delta_us = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcv_rtt_est.time);
633 if (!delta_us)
634 delta_us = 1;
635 tcp_rcv_rtt_update(tp, delta_us, 1);
636
637new_measure:
638 tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
639 tp->rcv_rtt_est.time = tp->tcp_mstamp;
640}
641
642static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
643 const struct sk_buff *skb)
644{
645 struct tcp_sock *tp = tcp_sk(sk);
646
647 if (tp->rx_opt.rcv_tsecr == tp->rcv_rtt_last_tsecr)
648 return;
649 tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr;
650
651 if (TCP_SKB_CB(skb)->end_seq -
652 TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss) {
653 u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
654 u32 delta_us;
655
656 if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
657 if (!delta)
658 delta = 1;
659 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
660 tcp_rcv_rtt_update(tp, delta_us, 0);
661 }
662 }
663}
664
665
666
667
668
669void tcp_rcv_space_adjust(struct sock *sk)
670{
671 struct tcp_sock *tp = tcp_sk(sk);
672 u32 copied;
673 int time;
674
675 trace_tcp_rcv_space_adjust(sk);
676
677 tcp_mstamp_refresh(tp);
678 time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time);
679 if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0)
680 return;
681
682
683 copied = tp->copied_seq - tp->rcvq_space.seq;
684 if (copied <= tp->rcvq_space.space)
685 goto new_measure;
686
687
688
689
690
691
692
693
694
695
696 if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
697 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
698 int rcvmem, rcvbuf;
699 u64 rcvwin, grow;
700
701
702
703
704 rcvwin = ((u64)copied << 1) + 16 * tp->advmss;
705
706
707 grow = rcvwin * (copied - tp->rcvq_space.space);
708 do_div(grow, tp->rcvq_space.space);
709 rcvwin += (grow << 1);
710
711 rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
712 while (tcp_win_from_space(sk, rcvmem) < tp->advmss)
713 rcvmem += 128;
714
715 do_div(rcvwin, tp->advmss);
716 rcvbuf = min_t(u64, rcvwin * rcvmem,
717 sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
718 if (rcvbuf > sk->sk_rcvbuf) {
719 WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
720
721
722 tp->window_clamp = tcp_win_from_space(sk, rcvbuf);
723 }
724 }
725 tp->rcvq_space.space = copied;
726
727new_measure:
728 tp->rcvq_space.seq = tp->copied_seq;
729 tp->rcvq_space.time = tp->tcp_mstamp;
730}
731
732
733
734
735
736
737
738
739
740
741
742static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
743{
744 struct tcp_sock *tp = tcp_sk(sk);
745 struct inet_connection_sock *icsk = inet_csk(sk);
746 u32 now;
747
748 inet_csk_schedule_ack(sk);
749
750 tcp_measure_rcv_mss(sk, skb);
751
752 tcp_rcv_rtt_measure(tp);
753
754 now = tcp_jiffies32;
755
756 if (!icsk->icsk_ack.ato) {
757
758
759
760 tcp_incr_quickack(sk, TCP_MAX_QUICKACKS);
761 icsk->icsk_ack.ato = TCP_ATO_MIN;
762 } else {
763 int m = now - icsk->icsk_ack.lrcvtime;
764
765 if (m <= TCP_ATO_MIN / 2) {
766
767 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + TCP_ATO_MIN / 2;
768 } else if (m < icsk->icsk_ack.ato) {
769 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + m;
770 if (icsk->icsk_ack.ato > icsk->icsk_rto)
771 icsk->icsk_ack.ato = icsk->icsk_rto;
772 } else if (m > icsk->icsk_rto) {
773
774
775
776 tcp_incr_quickack(sk, TCP_MAX_QUICKACKS);
777 sk_mem_reclaim(sk);
778 }
779 }
780 icsk->icsk_ack.lrcvtime = now;
781
782 tcp_ecn_check_ce(sk, skb);
783
784 if (skb->len >= 128)
785 tcp_grow_window(sk, skb);
786}
787
788
789
790
791
792
793
794
795
796
797static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
798{
799 struct tcp_sock *tp = tcp_sk(sk);
800 long m = mrtt_us;
801 u32 srtt = tp->srtt_us;
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819 if (srtt != 0) {
820 m -= (srtt >> 3);
821 srtt += m;
822 if (m < 0) {
823 m = -m;
824 m -= (tp->mdev_us >> 2);
825
826
827
828
829
830
831
832
833 if (m > 0)
834 m >>= 3;
835 } else {
836 m -= (tp->mdev_us >> 2);
837 }
838 tp->mdev_us += m;
839 if (tp->mdev_us > tp->mdev_max_us) {
840 tp->mdev_max_us = tp->mdev_us;
841 if (tp->mdev_max_us > tp->rttvar_us)
842 tp->rttvar_us = tp->mdev_max_us;
843 }
844 if (after(tp->snd_una, tp->rtt_seq)) {
845 if (tp->mdev_max_us < tp->rttvar_us)
846 tp->rttvar_us -= (tp->rttvar_us - tp->mdev_max_us) >> 2;
847 tp->rtt_seq = tp->snd_nxt;
848 tp->mdev_max_us = tcp_rto_min_us(sk);
849
850 tcp_bpf_rtt(sk);
851 }
852 } else {
853
854 srtt = m << 3;
855 tp->mdev_us = m << 1;
856 tp->rttvar_us = max(tp->mdev_us, tcp_rto_min_us(sk));
857 tp->mdev_max_us = tp->rttvar_us;
858 tp->rtt_seq = tp->snd_nxt;
859
860 tcp_bpf_rtt(sk);
861 }
862 tp->srtt_us = max(1U, srtt);
863}
864
865static void tcp_update_pacing_rate(struct sock *sk)
866{
867 const struct tcp_sock *tp = tcp_sk(sk);
868 u64 rate;
869
870
871 rate = (u64)tp->mss_cache * ((USEC_PER_SEC / 100) << 3);
872
873
874
875
876
877
878
879
880
881 if (tp->snd_cwnd < tp->snd_ssthresh / 2)
882 rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio;
883 else
884 rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio;
885
886 rate *= max(tp->snd_cwnd, tp->packets_out);
887
888 if (likely(tp->srtt_us))
889 do_div(rate, tp->srtt_us);
890
891
892
893
894
895 WRITE_ONCE(sk->sk_pacing_rate, min_t(u64, rate,
896 sk->sk_max_pacing_rate));
897}
898
899
900
901
902static void tcp_set_rto(struct sock *sk)
903{
904 const struct tcp_sock *tp = tcp_sk(sk);
905
906
907
908
909
910
911
912
913
914
915 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp);
916
917
918
919
920
921
922
923
924
925
926 tcp_bound_rto(sk);
927}
928
929__u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)
930{
931 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
932
933 if (!cwnd)
934 cwnd = TCP_INIT_CWND;
935 return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
936}
937
938struct tcp_sacktag_state {
939
940
941
942
943 u64 first_sackt;
944 u64 last_sackt;
945 u32 reord;
946 u32 sack_delivered;
947 int flag;
948 unsigned int mss_now;
949 struct rate_sample *rate;
950};
951
952
953
954
955
956
957
958static u32 tcp_dsack_seen(struct tcp_sock *tp, u32 start_seq,
959 u32 end_seq, struct tcp_sacktag_state *state)
960{
961 u32 seq_len, dup_segs = 1;
962
963 if (!before(start_seq, end_seq))
964 return 0;
965
966 seq_len = end_seq - start_seq;
967
968 if (seq_len > tp->max_window)
969 return 0;
970 if (seq_len > tp->mss_cache)
971 dup_segs = DIV_ROUND_UP(seq_len, tp->mss_cache);
972
973 tp->dsack_dups += dup_segs;
974
975 if (tp->dsack_dups > tp->total_retrans)
976 return 0;
977
978 tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
979 tp->rack.dsack_seen = 1;
980
981 state->flag |= FLAG_DSACKING_ACK;
982
983 state->sack_delivered += dup_segs;
984
985 return dup_segs;
986}
987
988
989
990
991
992static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq,
993 const int ts)
994{
995 struct tcp_sock *tp = tcp_sk(sk);
996 const u32 mss = tp->mss_cache;
997 u32 fack, metric;
998
999 fack = tcp_highest_sack_seq(tp);
1000 if (!before(low_seq, fack))
1001 return;
1002
1003 metric = fack - low_seq;
1004 if ((metric > tp->reordering * mss) && mss) {
1005#if FASTRETRANS_DEBUG > 1
1006 pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
1007 tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
1008 tp->reordering,
1009 0,
1010 tp->sacked_out,
1011 tp->undo_marker ? tp->undo_retrans : 0);
1012#endif
1013 tp->reordering = min_t(u32, (metric + mss - 1) / mss,
1014 sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
1015 }
1016
1017
1018 tp->reord_seen++;
1019 NET_INC_STATS(sock_net(sk),
1020 ts ? LINUX_MIB_TCPTSREORDER : LINUX_MIB_TCPSACKREORDER);
1021}
1022
1023
1024
1025
1026
1027
1028static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
1029{
1030 if ((!tp->retransmit_skb_hint && tp->retrans_out >= tp->lost_out) ||
1031 (tp->retransmit_skb_hint &&
1032 before(TCP_SKB_CB(skb)->seq,
1033 TCP_SKB_CB(tp->retransmit_skb_hint)->seq)))
1034 tp->retransmit_skb_hint = skb;
1035}
1036
1037
1038
1039
1040static void tcp_notify_skb_loss_event(struct tcp_sock *tp, const struct sk_buff *skb)
1041{
1042 tp->lost += tcp_skb_pcount(skb);
1043}
1044
1045void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
1046{
1047 __u8 sacked = TCP_SKB_CB(skb)->sacked;
1048 struct tcp_sock *tp = tcp_sk(sk);
1049
1050 if (sacked & TCPCB_SACKED_ACKED)
1051 return;
1052
1053 tcp_verify_retransmit_hint(tp, skb);
1054 if (sacked & TCPCB_LOST) {
1055 if (sacked & TCPCB_SACKED_RETRANS) {
1056
1057 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1058 tp->retrans_out -= tcp_skb_pcount(skb);
1059 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT,
1060 tcp_skb_pcount(skb));
1061 tcp_notify_skb_loss_event(tp, skb);
1062 }
1063 } else {
1064 tp->lost_out += tcp_skb_pcount(skb);
1065 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1066 tcp_notify_skb_loss_event(tp, skb);
1067 }
1068}
1069
1070
1071static void tcp_count_delivered(struct tcp_sock *tp, u32 delivered,
1072 bool ece_ack)
1073{
1074 tp->delivered += delivered;
1075 if (ece_ack)
1076 tp->delivered_ce += delivered;
1077}
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172static bool tcp_is_sackblock_valid(struct tcp_sock *tp, bool is_dsack,
1173 u32 start_seq, u32 end_seq)
1174{
1175
1176 if (after(end_seq, tp->snd_nxt) || !before(start_seq, end_seq))
1177 return false;
1178
1179
1180 if (!before(start_seq, tp->snd_nxt))
1181 return false;
1182
1183
1184
1185
1186 if (after(start_seq, tp->snd_una))
1187 return true;
1188
1189 if (!is_dsack || !tp->undo_marker)
1190 return false;
1191
1192
1193 if (after(end_seq, tp->snd_una))
1194 return false;
1195
1196 if (!before(start_seq, tp->undo_marker))
1197 return true;
1198
1199
1200 if (!after(end_seq, tp->undo_marker))
1201 return false;
1202
1203
1204
1205
1206 return !before(start_seq, end_seq - tp->max_window);
1207}
1208
1209static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
1210 struct tcp_sack_block_wire *sp, int num_sacks,
1211 u32 prior_snd_una, struct tcp_sacktag_state *state)
1212{
1213 struct tcp_sock *tp = tcp_sk(sk);
1214 u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq);
1215 u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq);
1216 u32 dup_segs;
1217
1218 if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) {
1219 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKRECV);
1220 } else if (num_sacks > 1) {
1221 u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq);
1222 u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq);
1223
1224 if (after(end_seq_0, end_seq_1) || before(start_seq_0, start_seq_1))
1225 return false;
1226 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKOFORECV);
1227 } else {
1228 return false;
1229 }
1230
1231 dup_segs = tcp_dsack_seen(tp, start_seq_0, end_seq_0, state);
1232 if (!dup_segs) {
1233 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKIGNOREDDUBIOUS);
1234 return false;
1235 }
1236
1237 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDSACKRECVSEGS, dup_segs);
1238
1239
1240 if (tp->undo_marker && tp->undo_retrans > 0 &&
1241 !after(end_seq_0, prior_snd_una) &&
1242 after(end_seq_0, tp->undo_marker))
1243 tp->undo_retrans = max_t(int, 0, tp->undo_retrans - dup_segs);
1244
1245 return true;
1246}
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
1257 u32 start_seq, u32 end_seq)
1258{
1259 int err;
1260 bool in_sack;
1261 unsigned int pkt_len;
1262 unsigned int mss;
1263
1264 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1265 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1266
1267 if (tcp_skb_pcount(skb) > 1 && !in_sack &&
1268 after(TCP_SKB_CB(skb)->end_seq, start_seq)) {
1269 mss = tcp_skb_mss(skb);
1270 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1271
1272 if (!in_sack) {
1273 pkt_len = start_seq - TCP_SKB_CB(skb)->seq;
1274 if (pkt_len < mss)
1275 pkt_len = mss;
1276 } else {
1277 pkt_len = end_seq - TCP_SKB_CB(skb)->seq;
1278 if (pkt_len < mss)
1279 return -EINVAL;
1280 }
1281
1282
1283
1284
1285 if (pkt_len > mss) {
1286 unsigned int new_len = (pkt_len / mss) * mss;
1287 if (!in_sack && new_len < pkt_len)
1288 new_len += mss;
1289 pkt_len = new_len;
1290 }
1291
1292 if (pkt_len >= skb->len && !in_sack)
1293 return 0;
1294
1295 err = tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
1296 pkt_len, mss, GFP_ATOMIC);
1297 if (err < 0)
1298 return err;
1299 }
1300
1301 return in_sack;
1302}
1303
1304
1305static u8 tcp_sacktag_one(struct sock *sk,
1306 struct tcp_sacktag_state *state, u8 sacked,
1307 u32 start_seq, u32 end_seq,
1308 int dup_sack, int pcount,
1309 u64 xmit_time)
1310{
1311 struct tcp_sock *tp = tcp_sk(sk);
1312
1313
1314 if (dup_sack && (sacked & TCPCB_RETRANS)) {
1315 if (tp->undo_marker && tp->undo_retrans > 0 &&
1316 after(end_seq, tp->undo_marker))
1317 tp->undo_retrans--;
1318 if ((sacked & TCPCB_SACKED_ACKED) &&
1319 before(start_seq, state->reord))
1320 state->reord = start_seq;
1321 }
1322
1323
1324 if (!after(end_seq, tp->snd_una))
1325 return sacked;
1326
1327 if (!(sacked & TCPCB_SACKED_ACKED)) {
1328 tcp_rack_advance(tp, sacked, end_seq, xmit_time);
1329
1330 if (sacked & TCPCB_SACKED_RETRANS) {
1331
1332
1333
1334
1335 if (sacked & TCPCB_LOST) {
1336 sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
1337 tp->lost_out -= pcount;
1338 tp->retrans_out -= pcount;
1339 }
1340 } else {
1341 if (!(sacked & TCPCB_RETRANS)) {
1342
1343
1344
1345 if (before(start_seq,
1346 tcp_highest_sack_seq(tp)) &&
1347 before(start_seq, state->reord))
1348 state->reord = start_seq;
1349
1350 if (!after(end_seq, tp->high_seq))
1351 state->flag |= FLAG_ORIG_SACK_ACKED;
1352 if (state->first_sackt == 0)
1353 state->first_sackt = xmit_time;
1354 state->last_sackt = xmit_time;
1355 }
1356
1357 if (sacked & TCPCB_LOST) {
1358 sacked &= ~TCPCB_LOST;
1359 tp->lost_out -= pcount;
1360 }
1361 }
1362
1363 sacked |= TCPCB_SACKED_ACKED;
1364 state->flag |= FLAG_DATA_SACKED;
1365 tp->sacked_out += pcount;
1366
1367 state->sack_delivered += pcount;
1368
1369
1370 if (tp->lost_skb_hint &&
1371 before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq))
1372 tp->lost_cnt_hint += pcount;
1373 }
1374
1375
1376
1377
1378
1379 if (dup_sack && (sacked & TCPCB_SACKED_RETRANS)) {
1380 sacked &= ~TCPCB_SACKED_RETRANS;
1381 tp->retrans_out -= pcount;
1382 }
1383
1384 return sacked;
1385}
1386
1387
1388
1389
1390static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev,
1391 struct sk_buff *skb,
1392 struct tcp_sacktag_state *state,
1393 unsigned int pcount, int shifted, int mss,
1394 bool dup_sack)
1395{
1396 struct tcp_sock *tp = tcp_sk(sk);
1397 u32 start_seq = TCP_SKB_CB(skb)->seq;
1398 u32 end_seq = start_seq + shifted;
1399
1400 BUG_ON(!pcount);
1401
1402
1403
1404
1405
1406
1407
1408 tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
1409 start_seq, end_seq, dup_sack, pcount,
1410 tcp_skb_timestamp_us(skb));
1411 tcp_rate_skb_delivered(sk, skb, state->rate);
1412
1413 if (skb == tp->lost_skb_hint)
1414 tp->lost_cnt_hint += pcount;
1415
1416 TCP_SKB_CB(prev)->end_seq += shifted;
1417 TCP_SKB_CB(skb)->seq += shifted;
1418
1419 tcp_skb_pcount_add(prev, pcount);
1420 WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount);
1421 tcp_skb_pcount_add(skb, -pcount);
1422
1423
1424
1425
1426
1427
1428 if (!TCP_SKB_CB(prev)->tcp_gso_size)
1429 TCP_SKB_CB(prev)->tcp_gso_size = mss;
1430
1431
1432 if (tcp_skb_pcount(skb) <= 1)
1433 TCP_SKB_CB(skb)->tcp_gso_size = 0;
1434
1435
1436 TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
1437
1438 if (skb->len > 0) {
1439 BUG_ON(!tcp_skb_pcount(skb));
1440 NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKSHIFTED);
1441 return false;
1442 }
1443
1444
1445
1446 if (skb == tp->retransmit_skb_hint)
1447 tp->retransmit_skb_hint = prev;
1448 if (skb == tp->lost_skb_hint) {
1449 tp->lost_skb_hint = prev;
1450 tp->lost_cnt_hint -= tcp_skb_pcount(prev);
1451 }
1452
1453 TCP_SKB_CB(prev)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
1454 TCP_SKB_CB(prev)->eor = TCP_SKB_CB(skb)->eor;
1455 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
1456 TCP_SKB_CB(prev)->end_seq++;
1457
1458 if (skb == tcp_highest_sack(sk))
1459 tcp_advance_highest_sack(sk, skb);
1460
1461 tcp_skb_collapse_tstamp(prev, skb);
1462 if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp))
1463 TCP_SKB_CB(prev)->tx.delivered_mstamp = 0;
1464
1465 tcp_rtx_queue_unlink_and_free(skb, sk);
1466
1467 NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKMERGED);
1468
1469 return true;
1470}
1471
1472
1473
1474
1475static int tcp_skb_seglen(const struct sk_buff *skb)
1476{
1477 return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb);
1478}
1479
1480
1481static int skb_can_shift(const struct sk_buff *skb)
1482{
1483 return !skb_headlen(skb) && skb_is_nonlinear(skb);
1484}
1485
1486int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from,
1487 int pcount, int shiftlen)
1488{
1489
1490
1491
1492
1493
1494 if (unlikely(to->len + shiftlen >= 65535 * TCP_MIN_GSO_SIZE))
1495 return 0;
1496 if (unlikely(tcp_skb_pcount(to) + pcount > 65535))
1497 return 0;
1498 return skb_shift(to, from, shiftlen);
1499}
1500
1501
1502
1503
1504static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
1505 struct tcp_sacktag_state *state,
1506 u32 start_seq, u32 end_seq,
1507 bool dup_sack)
1508{
1509 struct tcp_sock *tp = tcp_sk(sk);
1510 struct sk_buff *prev;
1511 int mss;
1512 int pcount = 0;
1513 int len;
1514 int in_sack;
1515
1516
1517 if (!dup_sack &&
1518 (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS)
1519 goto fallback;
1520 if (!skb_can_shift(skb))
1521 goto fallback;
1522
1523 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1524 goto fallback;
1525
1526
1527 prev = skb_rb_prev(skb);
1528 if (!prev)
1529 goto fallback;
1530
1531 if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
1532 goto fallback;
1533
1534 if (!tcp_skb_can_collapse(prev, skb))
1535 goto fallback;
1536
1537 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1538 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1539
1540 if (in_sack) {
1541 len = skb->len;
1542 pcount = tcp_skb_pcount(skb);
1543 mss = tcp_skb_seglen(skb);
1544
1545
1546
1547
1548 if (mss != tcp_skb_seglen(prev))
1549 goto fallback;
1550 } else {
1551 if (!after(TCP_SKB_CB(skb)->end_seq, start_seq))
1552 goto noop;
1553
1554
1555
1556
1557 if (tcp_skb_pcount(skb) <= 1)
1558 goto noop;
1559
1560 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1561 if (!in_sack) {
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573 goto fallback;
1574 }
1575
1576 len = end_seq - TCP_SKB_CB(skb)->seq;
1577 BUG_ON(len < 0);
1578 BUG_ON(len > skb->len);
1579
1580
1581
1582
1583
1584 mss = tcp_skb_mss(skb);
1585
1586
1587
1588
1589 if (mss != tcp_skb_seglen(prev))
1590 goto fallback;
1591
1592 if (len == mss) {
1593 pcount = 1;
1594 } else if (len < mss) {
1595 goto noop;
1596 } else {
1597 pcount = len / mss;
1598 len = pcount * mss;
1599 }
1600 }
1601
1602
1603 if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una))
1604 goto fallback;
1605
1606 if (!tcp_skb_shift(prev, skb, pcount, len))
1607 goto fallback;
1608 if (!tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss, dup_sack))
1609 goto out;
1610
1611
1612
1613
1614 skb = skb_rb_next(prev);
1615 if (!skb)
1616 goto out;
1617
1618 if (!skb_can_shift(skb) ||
1619 ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) ||
1620 (mss != tcp_skb_seglen(skb)))
1621 goto out;
1622
1623 len = skb->len;
1624 pcount = tcp_skb_pcount(skb);
1625 if (tcp_skb_shift(prev, skb, pcount, len))
1626 tcp_shifted_skb(sk, prev, skb, state, pcount,
1627 len, mss, 0);
1628
1629out:
1630 return prev;
1631
1632noop:
1633 return skb;
1634
1635fallback:
1636 NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK);
1637 return NULL;
1638}
1639
1640static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1641 struct tcp_sack_block *next_dup,
1642 struct tcp_sacktag_state *state,
1643 u32 start_seq, u32 end_seq,
1644 bool dup_sack_in)
1645{
1646 struct tcp_sock *tp = tcp_sk(sk);
1647 struct sk_buff *tmp;
1648
1649 skb_rbtree_walk_from(skb) {
1650 int in_sack = 0;
1651 bool dup_sack = dup_sack_in;
1652
1653
1654 if (!before(TCP_SKB_CB(skb)->seq, end_seq))
1655 break;
1656
1657 if (next_dup &&
1658 before(TCP_SKB_CB(skb)->seq, next_dup->end_seq)) {
1659 in_sack = tcp_match_skb_to_sack(sk, skb,
1660 next_dup->start_seq,
1661 next_dup->end_seq);
1662 if (in_sack > 0)
1663 dup_sack = true;
1664 }
1665
1666
1667
1668
1669
1670 if (in_sack <= 0) {
1671 tmp = tcp_shift_skb_data(sk, skb, state,
1672 start_seq, end_seq, dup_sack);
1673 if (tmp) {
1674 if (tmp != skb) {
1675 skb = tmp;
1676 continue;
1677 }
1678
1679 in_sack = 0;
1680 } else {
1681 in_sack = tcp_match_skb_to_sack(sk, skb,
1682 start_seq,
1683 end_seq);
1684 }
1685 }
1686
1687 if (unlikely(in_sack < 0))
1688 break;
1689
1690 if (in_sack) {
1691 TCP_SKB_CB(skb)->sacked =
1692 tcp_sacktag_one(sk,
1693 state,
1694 TCP_SKB_CB(skb)->sacked,
1695 TCP_SKB_CB(skb)->seq,
1696 TCP_SKB_CB(skb)->end_seq,
1697 dup_sack,
1698 tcp_skb_pcount(skb),
1699 tcp_skb_timestamp_us(skb));
1700 tcp_rate_skb_delivered(sk, skb, state->rate);
1701 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
1702 list_del_init(&skb->tcp_tsorted_anchor);
1703
1704 if (!before(TCP_SKB_CB(skb)->seq,
1705 tcp_highest_sack_seq(tp)))
1706 tcp_advance_highest_sack(sk, skb);
1707 }
1708 }
1709 return skb;
1710}
1711
1712static struct sk_buff *tcp_sacktag_bsearch(struct sock *sk, u32 seq)
1713{
1714 struct rb_node *parent, **p = &sk->tcp_rtx_queue.rb_node;
1715 struct sk_buff *skb;
1716
1717 while (*p) {
1718 parent = *p;
1719 skb = rb_to_skb(parent);
1720 if (before(seq, TCP_SKB_CB(skb)->seq)) {
1721 p = &parent->rb_left;
1722 continue;
1723 }
1724 if (!before(seq, TCP_SKB_CB(skb)->end_seq)) {
1725 p = &parent->rb_right;
1726 continue;
1727 }
1728 return skb;
1729 }
1730 return NULL;
1731}
1732
1733static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
1734 u32 skip_to_seq)
1735{
1736 if (skb && after(TCP_SKB_CB(skb)->seq, skip_to_seq))
1737 return skb;
1738
1739 return tcp_sacktag_bsearch(sk, skip_to_seq);
1740}
1741
1742static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
1743 struct sock *sk,
1744 struct tcp_sack_block *next_dup,
1745 struct tcp_sacktag_state *state,
1746 u32 skip_to_seq)
1747{
1748 if (!next_dup)
1749 return skb;
1750
1751 if (before(next_dup->start_seq, skip_to_seq)) {
1752 skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq);
1753 skb = tcp_sacktag_walk(skb, sk, NULL, state,
1754 next_dup->start_seq, next_dup->end_seq,
1755 1);
1756 }
1757
1758 return skb;
1759}
1760
1761static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_block *cache)
1762{
1763 return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1764}
1765
1766static int
1767tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1768 u32 prior_snd_una, struct tcp_sacktag_state *state)
1769{
1770 struct tcp_sock *tp = tcp_sk(sk);
1771 const unsigned char *ptr = (skb_transport_header(ack_skb) +
1772 TCP_SKB_CB(ack_skb)->sacked);
1773 struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
1774 struct tcp_sack_block sp[TCP_NUM_SACKS];
1775 struct tcp_sack_block *cache;
1776 struct sk_buff *skb;
1777 int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
1778 int used_sacks;
1779 bool found_dup_sack = false;
1780 int i, j;
1781 int first_sack_index;
1782
1783 state->flag = 0;
1784 state->reord = tp->snd_nxt;
1785
1786 if (!tp->sacked_out)
1787 tcp_highest_sack_reset(sk);
1788
1789 found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
1790 num_sacks, prior_snd_una, state);
1791
1792
1793
1794
1795
1796 if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window))
1797 return 0;
1798
1799 if (!tp->packets_out)
1800 goto out;
1801
1802 used_sacks = 0;
1803 first_sack_index = 0;
1804 for (i = 0; i < num_sacks; i++) {
1805 bool dup_sack = !i && found_dup_sack;
1806
1807 sp[used_sacks].start_seq = get_unaligned_be32(&sp_wire[i].start_seq);
1808 sp[used_sacks].end_seq = get_unaligned_be32(&sp_wire[i].end_seq);
1809
1810 if (!tcp_is_sackblock_valid(tp, dup_sack,
1811 sp[used_sacks].start_seq,
1812 sp[used_sacks].end_seq)) {
1813 int mib_idx;
1814
1815 if (dup_sack) {
1816 if (!tp->undo_marker)
1817 mib_idx = LINUX_MIB_TCPDSACKIGNOREDNOUNDO;
1818 else
1819 mib_idx = LINUX_MIB_TCPDSACKIGNOREDOLD;
1820 } else {
1821
1822 if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) &&
1823 !after(sp[used_sacks].end_seq, tp->snd_una))
1824 continue;
1825 mib_idx = LINUX_MIB_TCPSACKDISCARD;
1826 }
1827
1828 NET_INC_STATS(sock_net(sk), mib_idx);
1829 if (i == 0)
1830 first_sack_index = -1;
1831 continue;
1832 }
1833
1834
1835 if (!after(sp[used_sacks].end_seq, prior_snd_una)) {
1836 if (i == 0)
1837 first_sack_index = -1;
1838 continue;
1839 }
1840
1841 used_sacks++;
1842 }
1843
1844
1845 for (i = used_sacks - 1; i > 0; i--) {
1846 for (j = 0; j < i; j++) {
1847 if (after(sp[j].start_seq, sp[j + 1].start_seq)) {
1848 swap(sp[j], sp[j + 1]);
1849
1850
1851 if (j == first_sack_index)
1852 first_sack_index = j + 1;
1853 }
1854 }
1855 }
1856
1857 state->mss_now = tcp_current_mss(sk);
1858 skb = NULL;
1859 i = 0;
1860
1861 if (!tp->sacked_out) {
1862
1863 cache = tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1864 } else {
1865 cache = tp->recv_sack_cache;
1866
1867 while (tcp_sack_cache_ok(tp, cache) && !cache->start_seq &&
1868 !cache->end_seq)
1869 cache++;
1870 }
1871
1872 while (i < used_sacks) {
1873 u32 start_seq = sp[i].start_seq;
1874 u32 end_seq = sp[i].end_seq;
1875 bool dup_sack = (found_dup_sack && (i == first_sack_index));
1876 struct tcp_sack_block *next_dup = NULL;
1877
1878 if (found_dup_sack && ((i + 1) == first_sack_index))
1879 next_dup = &sp[i + 1];
1880
1881
1882 while (tcp_sack_cache_ok(tp, cache) &&
1883 !before(start_seq, cache->end_seq))
1884 cache++;
1885
1886
1887 if (tcp_sack_cache_ok(tp, cache) && !dup_sack &&
1888 after(end_seq, cache->start_seq)) {
1889
1890
1891 if (before(start_seq, cache->start_seq)) {
1892 skb = tcp_sacktag_skip(skb, sk, start_seq);
1893 skb = tcp_sacktag_walk(skb, sk, next_dup,
1894 state,
1895 start_seq,
1896 cache->start_seq,
1897 dup_sack);
1898 }
1899
1900
1901 if (!after(end_seq, cache->end_seq))
1902 goto advance_sp;
1903
1904 skb = tcp_maybe_skipping_dsack(skb, sk, next_dup,
1905 state,
1906 cache->end_seq);
1907
1908
1909 if (tcp_highest_sack_seq(tp) == cache->end_seq) {
1910
1911 skb = tcp_highest_sack(sk);
1912 if (!skb)
1913 break;
1914 cache++;
1915 goto walk;
1916 }
1917
1918 skb = tcp_sacktag_skip(skb, sk, cache->end_seq);
1919
1920 cache++;
1921 continue;
1922 }
1923
1924 if (!before(start_seq, tcp_highest_sack_seq(tp))) {
1925 skb = tcp_highest_sack(sk);
1926 if (!skb)
1927 break;
1928 }
1929 skb = tcp_sacktag_skip(skb, sk, start_seq);
1930
1931walk:
1932 skb = tcp_sacktag_walk(skb, sk, next_dup, state,
1933 start_seq, end_seq, dup_sack);
1934
1935advance_sp:
1936 i++;
1937 }
1938
1939
1940 for (i = 0; i < ARRAY_SIZE(tp->recv_sack_cache) - used_sacks; i++) {
1941 tp->recv_sack_cache[i].start_seq = 0;
1942 tp->recv_sack_cache[i].end_seq = 0;
1943 }
1944 for (j = 0; j < used_sacks; j++)
1945 tp->recv_sack_cache[i++] = sp[j];
1946
1947 if (inet_csk(sk)->icsk_ca_state != TCP_CA_Loss || tp->undo_marker)
1948 tcp_check_sack_reordering(sk, state->reord, 0);
1949
1950 tcp_verify_left_out(tp);
1951out:
1952
1953#if FASTRETRANS_DEBUG > 0
1954 WARN_ON((int)tp->sacked_out < 0);
1955 WARN_ON((int)tp->lost_out < 0);
1956 WARN_ON((int)tp->retrans_out < 0);
1957 WARN_ON((int)tcp_packets_in_flight(tp) < 0);
1958#endif
1959 return state->flag;
1960}
1961
1962
1963
1964
1965static bool tcp_limit_reno_sacked(struct tcp_sock *tp)
1966{
1967 u32 holes;
1968
1969 holes = max(tp->lost_out, 1U);
1970 holes = min(holes, tp->packets_out);
1971
1972 if ((tp->sacked_out + holes) > tp->packets_out) {
1973 tp->sacked_out = tp->packets_out - holes;
1974 return true;
1975 }
1976 return false;
1977}
1978
1979
1980
1981
1982
1983static void tcp_check_reno_reordering(struct sock *sk, const int addend)
1984{
1985 struct tcp_sock *tp = tcp_sk(sk);
1986
1987 if (!tcp_limit_reno_sacked(tp))
1988 return;
1989
1990 tp->reordering = min_t(u32, tp->packets_out + addend,
1991 sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
1992 tp->reord_seen++;
1993 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER);
1994}
1995
1996
1997
1998static void tcp_add_reno_sack(struct sock *sk, int num_dupack, bool ece_ack)
1999{
2000 if (num_dupack) {
2001 struct tcp_sock *tp = tcp_sk(sk);
2002 u32 prior_sacked = tp->sacked_out;
2003 s32 delivered;
2004
2005 tp->sacked_out += num_dupack;
2006 tcp_check_reno_reordering(sk, 0);
2007 delivered = tp->sacked_out - prior_sacked;
2008 if (delivered > 0)
2009 tcp_count_delivered(tp, delivered, ece_ack);
2010 tcp_verify_left_out(tp);
2011 }
2012}
2013
2014
2015
2016static void tcp_remove_reno_sacks(struct sock *sk, int acked, bool ece_ack)
2017{
2018 struct tcp_sock *tp = tcp_sk(sk);
2019
2020 if (acked > 0) {
2021
2022 tcp_count_delivered(tp, max_t(int, acked - tp->sacked_out, 1),
2023 ece_ack);
2024 if (acked - 1 >= tp->sacked_out)
2025 tp->sacked_out = 0;
2026 else
2027 tp->sacked_out -= acked - 1;
2028 }
2029 tcp_check_reno_reordering(sk, acked);
2030 tcp_verify_left_out(tp);
2031}
2032
2033static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
2034{
2035 tp->sacked_out = 0;
2036}
2037
2038void tcp_clear_retrans(struct tcp_sock *tp)
2039{
2040 tp->retrans_out = 0;
2041 tp->lost_out = 0;
2042 tp->undo_marker = 0;
2043 tp->undo_retrans = -1;
2044 tp->sacked_out = 0;
2045}
2046
2047static inline void tcp_init_undo(struct tcp_sock *tp)
2048{
2049 tp->undo_marker = tp->snd_una;
2050
2051 tp->undo_retrans = tp->retrans_out ? : -1;
2052}
2053
2054static bool tcp_is_rack(const struct sock *sk)
2055{
2056 return sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION;
2057}
2058
2059
2060
2061
2062
2063static void tcp_timeout_mark_lost(struct sock *sk)
2064{
2065 struct tcp_sock *tp = tcp_sk(sk);
2066 struct sk_buff *skb, *head;
2067 bool is_reneg;
2068
2069 head = tcp_rtx_queue_head(sk);
2070 is_reneg = head && (TCP_SKB_CB(head)->sacked & TCPCB_SACKED_ACKED);
2071 if (is_reneg) {
2072 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
2073 tp->sacked_out = 0;
2074
2075 tp->is_sack_reneg = 1;
2076 } else if (tcp_is_reno(tp)) {
2077 tcp_reset_reno_sack(tp);
2078 }
2079
2080 skb = head;
2081 skb_rbtree_walk_from(skb) {
2082 if (is_reneg)
2083 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
2084 else if (tcp_is_rack(sk) && skb != head &&
2085 tcp_rack_skb_timeout(tp, skb, 0) > 0)
2086 continue;
2087 tcp_mark_skb_lost(sk, skb);
2088 }
2089 tcp_verify_left_out(tp);
2090 tcp_clear_all_retrans_hints(tp);
2091}
2092
2093
2094void tcp_enter_loss(struct sock *sk)
2095{
2096 const struct inet_connection_sock *icsk = inet_csk(sk);
2097 struct tcp_sock *tp = tcp_sk(sk);
2098 struct net *net = sock_net(sk);
2099 bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
2100
2101 tcp_timeout_mark_lost(sk);
2102
2103
2104 if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
2105 !after(tp->high_seq, tp->snd_una) ||
2106 (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
2107 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2108 tp->prior_cwnd = tp->snd_cwnd;
2109 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
2110 tcp_ca_event(sk, CA_EVENT_LOSS);
2111 tcp_init_undo(tp);
2112 }
2113 tp->snd_cwnd = tcp_packets_in_flight(tp) + 1;
2114 tp->snd_cwnd_cnt = 0;
2115 tp->snd_cwnd_stamp = tcp_jiffies32;
2116
2117
2118
2119
2120 if (icsk->icsk_ca_state <= TCP_CA_Disorder &&
2121 tp->sacked_out >= net->ipv4.sysctl_tcp_reordering)
2122 tp->reordering = min_t(unsigned int, tp->reordering,
2123 net->ipv4.sysctl_tcp_reordering);
2124 tcp_set_ca_state(sk, TCP_CA_Loss);
2125 tp->high_seq = tp->snd_nxt;
2126 tcp_ecn_queue_cwr(tp);
2127
2128
2129
2130
2131
2132 tp->frto = net->ipv4.sysctl_tcp_frto &&
2133 (new_recovery || icsk->icsk_retransmits) &&
2134 !inet_csk(sk)->icsk_mtup.probe_size;
2135}
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147static bool tcp_check_sack_reneging(struct sock *sk, int flag)
2148{
2149 if (flag & FLAG_SACK_RENEGING) {
2150 struct tcp_sock *tp = tcp_sk(sk);
2151 unsigned long delay = max(usecs_to_jiffies(tp->srtt_us >> 4),
2152 msecs_to_jiffies(10));
2153
2154 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2155 delay, TCP_RTO_MAX);
2156 return true;
2157 }
2158 return false;
2159}
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
2173{
2174 return tp->sacked_out + 1;
2175}
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274static bool tcp_time_to_recover(struct sock *sk, int flag)
2275{
2276 struct tcp_sock *tp = tcp_sk(sk);
2277
2278
2279 if (tp->lost_out)
2280 return true;
2281
2282
2283 if (!tcp_is_rack(sk) && tcp_dupack_heuristics(tp) > tp->reordering)
2284 return true;
2285
2286 return false;
2287}
2288
2289
2290
2291
2292
2293
2294static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2295{
2296 struct tcp_sock *tp = tcp_sk(sk);
2297 struct sk_buff *skb;
2298 int cnt;
2299
2300 const u32 loss_high = tp->snd_nxt;
2301
2302 WARN_ON(packets > tp->packets_out);
2303 skb = tp->lost_skb_hint;
2304 if (skb) {
2305
2306 if (mark_head && after(TCP_SKB_CB(skb)->seq, tp->snd_una))
2307 return;
2308 cnt = tp->lost_cnt_hint;
2309 } else {
2310 skb = tcp_rtx_queue_head(sk);
2311 cnt = 0;
2312 }
2313
2314 skb_rbtree_walk_from(skb) {
2315
2316
2317 tp->lost_skb_hint = skb;
2318 tp->lost_cnt_hint = cnt;
2319
2320 if (after(TCP_SKB_CB(skb)->end_seq, loss_high))
2321 break;
2322
2323 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
2324 cnt += tcp_skb_pcount(skb);
2325
2326 if (cnt > packets)
2327 break;
2328
2329 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_LOST))
2330 tcp_mark_skb_lost(sk, skb);
2331
2332 if (mark_head)
2333 break;
2334 }
2335 tcp_verify_left_out(tp);
2336}
2337
2338
2339
2340static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
2341{
2342 struct tcp_sock *tp = tcp_sk(sk);
2343
2344 if (tcp_is_sack(tp)) {
2345 int sacked_upto = tp->sacked_out - tp->reordering;
2346 if (sacked_upto >= 0)
2347 tcp_mark_head_lost(sk, sacked_upto, 0);
2348 else if (fast_rexmit)
2349 tcp_mark_head_lost(sk, 1, 1);
2350 }
2351}
2352
2353static bool tcp_tsopt_ecr_before(const struct tcp_sock *tp, u32 when)
2354{
2355 return tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
2356 before(tp->rx_opt.rcv_tsecr, when);
2357}
2358
2359
2360
2361
2362static bool tcp_skb_spurious_retrans(const struct tcp_sock *tp,
2363 const struct sk_buff *skb)
2364{
2365 return (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) &&
2366 tcp_tsopt_ecr_before(tp, tcp_skb_timestamp(skb));
2367}
2368
2369
2370
2371
2372static inline bool tcp_packet_delayed(const struct tcp_sock *tp)
2373{
2374 return tp->retrans_stamp &&
2375 tcp_tsopt_ecr_before(tp, tp->retrans_stamp);
2376}
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394static bool tcp_any_retrans_done(const struct sock *sk)
2395{
2396 const struct tcp_sock *tp = tcp_sk(sk);
2397 struct sk_buff *skb;
2398
2399 if (tp->retrans_out)
2400 return true;
2401
2402 skb = tcp_rtx_queue_head(sk);
2403 if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
2404 return true;
2405
2406 return false;
2407}
2408
2409static void DBGUNDO(struct sock *sk, const char *msg)
2410{
2411#if FASTRETRANS_DEBUG > 1
2412 struct tcp_sock *tp = tcp_sk(sk);
2413 struct inet_sock *inet = inet_sk(sk);
2414
2415 if (sk->sk_family == AF_INET) {
2416 pr_debug("Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n",
2417 msg,
2418 &inet->inet_daddr, ntohs(inet->inet_dport),
2419 tp->snd_cwnd, tcp_left_out(tp),
2420 tp->snd_ssthresh, tp->prior_ssthresh,
2421 tp->packets_out);
2422 }
2423#if IS_ENABLED(CONFIG_IPV6)
2424 else if (sk->sk_family == AF_INET6) {
2425 pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
2426 msg,
2427 &sk->sk_v6_daddr, ntohs(inet->inet_dport),
2428 tp->snd_cwnd, tcp_left_out(tp),
2429 tp->snd_ssthresh, tp->prior_ssthresh,
2430 tp->packets_out);
2431 }
2432#endif
2433#endif
2434}
2435
2436static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
2437{
2438 struct tcp_sock *tp = tcp_sk(sk);
2439
2440 if (unmark_loss) {
2441 struct sk_buff *skb;
2442
2443 skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
2444 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
2445 }
2446 tp->lost_out = 0;
2447 tcp_clear_all_retrans_hints(tp);
2448 }
2449
2450 if (tp->prior_ssthresh) {
2451 const struct inet_connection_sock *icsk = inet_csk(sk);
2452
2453 tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
2454
2455 if (tp->prior_ssthresh > tp->snd_ssthresh) {
2456 tp->snd_ssthresh = tp->prior_ssthresh;
2457 tcp_ecn_withdraw_cwr(tp);
2458 }
2459 }
2460 tp->snd_cwnd_stamp = tcp_jiffies32;
2461 tp->undo_marker = 0;
2462 tp->rack.advanced = 1;
2463}
2464
2465static inline bool tcp_may_undo(const struct tcp_sock *tp)
2466{
2467 return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
2468}
2469
2470
2471static bool tcp_try_undo_recovery(struct sock *sk)
2472{
2473 struct tcp_sock *tp = tcp_sk(sk);
2474
2475 if (tcp_may_undo(tp)) {
2476 int mib_idx;
2477
2478
2479
2480
2481 DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
2482 tcp_undo_cwnd_reduction(sk, false);
2483 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
2484 mib_idx = LINUX_MIB_TCPLOSSUNDO;
2485 else
2486 mib_idx = LINUX_MIB_TCPFULLUNDO;
2487
2488 NET_INC_STATS(sock_net(sk), mib_idx);
2489 } else if (tp->rack.reo_wnd_persist) {
2490 tp->rack.reo_wnd_persist--;
2491 }
2492 if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
2493
2494
2495
2496 if (!tcp_any_retrans_done(sk))
2497 tp->retrans_stamp = 0;
2498 return true;
2499 }
2500 tcp_set_ca_state(sk, TCP_CA_Open);
2501 tp->is_sack_reneg = 0;
2502 return false;
2503}
2504
2505
2506static bool tcp_try_undo_dsack(struct sock *sk)
2507{
2508 struct tcp_sock *tp = tcp_sk(sk);
2509
2510 if (tp->undo_marker && !tp->undo_retrans) {
2511 tp->rack.reo_wnd_persist = min(TCP_RACK_RECOVERY_THRESH,
2512 tp->rack.reo_wnd_persist + 1);
2513 DBGUNDO(sk, "D-SACK");
2514 tcp_undo_cwnd_reduction(sk, false);
2515 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
2516 return true;
2517 }
2518 return false;
2519}
2520
2521
2522static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
2523{
2524 struct tcp_sock *tp = tcp_sk(sk);
2525
2526 if (frto_undo || tcp_may_undo(tp)) {
2527 tcp_undo_cwnd_reduction(sk, true);
2528
2529 DBGUNDO(sk, "partial loss");
2530 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
2531 if (frto_undo)
2532 NET_INC_STATS(sock_net(sk),
2533 LINUX_MIB_TCPSPURIOUSRTOS);
2534 inet_csk(sk)->icsk_retransmits = 0;
2535 if (frto_undo || tcp_is_sack(tp)) {
2536 tcp_set_ca_state(sk, TCP_CA_Open);
2537 tp->is_sack_reneg = 0;
2538 }
2539 return true;
2540 }
2541 return false;
2542}
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553static void tcp_init_cwnd_reduction(struct sock *sk)
2554{
2555 struct tcp_sock *tp = tcp_sk(sk);
2556
2557 tp->high_seq = tp->snd_nxt;
2558 tp->tlp_high_seq = 0;
2559 tp->snd_cwnd_cnt = 0;
2560 tp->prior_cwnd = tp->snd_cwnd;
2561 tp->prr_delivered = 0;
2562 tp->prr_out = 0;
2563 tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
2564 tcp_ecn_queue_cwr(tp);
2565}
2566
2567void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int newly_lost, int flag)
2568{
2569 struct tcp_sock *tp = tcp_sk(sk);
2570 int sndcnt = 0;
2571 int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
2572
2573 if (newly_acked_sacked <= 0 || WARN_ON_ONCE(!tp->prior_cwnd))
2574 return;
2575
2576 tp->prr_delivered += newly_acked_sacked;
2577 if (delta < 0) {
2578 u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
2579 tp->prior_cwnd - 1;
2580 sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
2581 } else if (flag & FLAG_SND_UNA_ADVANCED && !newly_lost) {
2582 sndcnt = min_t(int, delta,
2583 max_t(int, tp->prr_delivered - tp->prr_out,
2584 newly_acked_sacked) + 1);
2585 } else {
2586 sndcnt = min(delta, newly_acked_sacked);
2587 }
2588
2589 sndcnt = max(sndcnt, (tp->prr_out ? 0 : 1));
2590 tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
2591}
2592
2593static inline void tcp_end_cwnd_reduction(struct sock *sk)
2594{
2595 struct tcp_sock *tp = tcp_sk(sk);
2596
2597 if (inet_csk(sk)->icsk_ca_ops->cong_control)
2598 return;
2599
2600
2601 if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH &&
2602 (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || tp->undo_marker)) {
2603 tp->snd_cwnd = tp->snd_ssthresh;
2604 tp->snd_cwnd_stamp = tcp_jiffies32;
2605 }
2606 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
2607}
2608
2609
2610void tcp_enter_cwr(struct sock *sk)
2611{
2612 struct tcp_sock *tp = tcp_sk(sk);
2613
2614 tp->prior_ssthresh = 0;
2615 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
2616 tp->undo_marker = 0;
2617 tcp_init_cwnd_reduction(sk);
2618 tcp_set_ca_state(sk, TCP_CA_CWR);
2619 }
2620}
2621EXPORT_SYMBOL(tcp_enter_cwr);
2622
2623static void tcp_try_keep_open(struct sock *sk)
2624{
2625 struct tcp_sock *tp = tcp_sk(sk);
2626 int state = TCP_CA_Open;
2627
2628 if (tcp_left_out(tp) || tcp_any_retrans_done(sk))
2629 state = TCP_CA_Disorder;
2630
2631 if (inet_csk(sk)->icsk_ca_state != state) {
2632 tcp_set_ca_state(sk, state);
2633 tp->high_seq = tp->snd_nxt;
2634 }
2635}
2636
2637static void tcp_try_to_open(struct sock *sk, int flag)
2638{
2639 struct tcp_sock *tp = tcp_sk(sk);
2640
2641 tcp_verify_left_out(tp);
2642
2643 if (!tcp_any_retrans_done(sk))
2644 tp->retrans_stamp = 0;
2645
2646 if (flag & FLAG_ECE)
2647 tcp_enter_cwr(sk);
2648
2649 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
2650 tcp_try_keep_open(sk);
2651 }
2652}
2653
2654static void tcp_mtup_probe_failed(struct sock *sk)
2655{
2656 struct inet_connection_sock *icsk = inet_csk(sk);
2657
2658 icsk->icsk_mtup.search_high = icsk->icsk_mtup.probe_size - 1;
2659 icsk->icsk_mtup.probe_size = 0;
2660 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPFAIL);
2661}
2662
2663static void tcp_mtup_probe_success(struct sock *sk)
2664{
2665 struct tcp_sock *tp = tcp_sk(sk);
2666 struct inet_connection_sock *icsk = inet_csk(sk);
2667
2668
2669 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2670 tp->snd_cwnd = tp->snd_cwnd *
2671 tcp_mss_to_mtu(sk, tp->mss_cache) /
2672 icsk->icsk_mtup.probe_size;
2673 tp->snd_cwnd_cnt = 0;
2674 tp->snd_cwnd_stamp = tcp_jiffies32;
2675 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2676
2677 icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size;
2678 icsk->icsk_mtup.probe_size = 0;
2679 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
2680 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPSUCCESS);
2681}
2682
2683
2684
2685
2686
2687void tcp_simple_retransmit(struct sock *sk)
2688{
2689 const struct inet_connection_sock *icsk = inet_csk(sk);
2690 struct tcp_sock *tp = tcp_sk(sk);
2691 struct sk_buff *skb;
2692 int mss;
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704 if (tp->syn_data && sk->sk_state == TCP_SYN_SENT)
2705 mss = -1;
2706 else
2707 mss = tcp_current_mss(sk);
2708
2709 skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
2710 if (tcp_skb_seglen(skb) > mss)
2711 tcp_mark_skb_lost(sk, skb);
2712 }
2713
2714 tcp_clear_retrans_hints_partial(tp);
2715
2716 if (!tp->lost_out)
2717 return;
2718
2719 if (tcp_is_reno(tp))
2720 tcp_limit_reno_sacked(tp);
2721
2722 tcp_verify_left_out(tp);
2723
2724
2725
2726
2727
2728
2729 if (icsk->icsk_ca_state != TCP_CA_Loss) {
2730 tp->high_seq = tp->snd_nxt;
2731 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2732 tp->prior_ssthresh = 0;
2733 tp->undo_marker = 0;
2734 tcp_set_ca_state(sk, TCP_CA_Loss);
2735 }
2736 tcp_xmit_retransmit_queue(sk);
2737}
2738EXPORT_SYMBOL(tcp_simple_retransmit);
2739
2740void tcp_enter_recovery(struct sock *sk, bool ece_ack)
2741{
2742 struct tcp_sock *tp = tcp_sk(sk);
2743 int mib_idx;
2744
2745 if (tcp_is_reno(tp))
2746 mib_idx = LINUX_MIB_TCPRENORECOVERY;
2747 else
2748 mib_idx = LINUX_MIB_TCPSACKRECOVERY;
2749
2750 NET_INC_STATS(sock_net(sk), mib_idx);
2751
2752 tp->prior_ssthresh = 0;
2753 tcp_init_undo(tp);
2754
2755 if (!tcp_in_cwnd_reduction(sk)) {
2756 if (!ece_ack)
2757 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2758 tcp_init_cwnd_reduction(sk);
2759 }
2760 tcp_set_ca_state(sk, TCP_CA_Recovery);
2761}
2762
2763
2764
2765
2766static void tcp_process_loss(struct sock *sk, int flag, int num_dupack,
2767 int *rexmit)
2768{
2769 struct tcp_sock *tp = tcp_sk(sk);
2770 bool recovered = !before(tp->snd_una, tp->high_seq);
2771
2772 if ((flag & FLAG_SND_UNA_ADVANCED || rcu_access_pointer(tp->fastopen_rsk)) &&
2773 tcp_try_undo_loss(sk, false))
2774 return;
2775
2776 if (tp->frto) {
2777
2778
2779
2780 if ((flag & FLAG_ORIG_SACK_ACKED) &&
2781 tcp_try_undo_loss(sk, true))
2782 return;
2783
2784 if (after(tp->snd_nxt, tp->high_seq)) {
2785 if (flag & FLAG_DATA_SACKED || num_dupack)
2786 tp->frto = 0;
2787 } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) {
2788 tp->high_seq = tp->snd_nxt;
2789
2790
2791
2792
2793 if (!tcp_write_queue_empty(sk) &&
2794 after(tcp_wnd_end(tp), tp->snd_nxt)) {
2795 *rexmit = REXMIT_NEW;
2796 return;
2797 }
2798 tp->frto = 0;
2799 }
2800 }
2801
2802 if (recovered) {
2803
2804 tcp_try_undo_recovery(sk);
2805 return;
2806 }
2807 if (tcp_is_reno(tp)) {
2808
2809
2810
2811 if (after(tp->snd_nxt, tp->high_seq) && num_dupack)
2812 tcp_add_reno_sack(sk, num_dupack, flag & FLAG_ECE);
2813 else if (flag & FLAG_SND_UNA_ADVANCED)
2814 tcp_reset_reno_sack(tp);
2815 }
2816 *rexmit = REXMIT_LOST;
2817}
2818
2819static bool tcp_force_fast_retransmit(struct sock *sk)
2820{
2821 struct tcp_sock *tp = tcp_sk(sk);
2822
2823 return after(tcp_highest_sack_seq(tp),
2824 tp->snd_una + tp->reordering * tp->mss_cache);
2825}
2826
2827
2828static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una,
2829 bool *do_lost)
2830{
2831 struct tcp_sock *tp = tcp_sk(sk);
2832
2833 if (tp->undo_marker && tcp_packet_delayed(tp)) {
2834
2835
2836
2837 tcp_check_sack_reordering(sk, prior_snd_una, 1);
2838
2839
2840
2841
2842
2843
2844 if (tp->retrans_out)
2845 return true;
2846
2847 if (!tcp_any_retrans_done(sk))
2848 tp->retrans_stamp = 0;
2849
2850 DBGUNDO(sk, "partial recovery");
2851 tcp_undo_cwnd_reduction(sk, true);
2852 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
2853 tcp_try_keep_open(sk);
2854 } else {
2855
2856 *do_lost = tcp_force_fast_retransmit(sk);
2857 }
2858 return false;
2859}
2860
2861static void tcp_identify_packet_loss(struct sock *sk, int *ack_flag)
2862{
2863 struct tcp_sock *tp = tcp_sk(sk);
2864
2865 if (tcp_rtx_queue_empty(sk))
2866 return;
2867
2868 if (unlikely(tcp_is_reno(tp))) {
2869 tcp_newreno_mark_lost(sk, *ack_flag & FLAG_SND_UNA_ADVANCED);
2870 } else if (tcp_is_rack(sk)) {
2871 u32 prior_retrans = tp->retrans_out;
2872
2873 if (tcp_rack_mark_lost(sk))
2874 *ack_flag &= ~FLAG_SET_XMIT_TIMER;
2875 if (prior_retrans > tp->retrans_out)
2876 *ack_flag |= FLAG_LOST_RETRANS;
2877 }
2878}
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
2893 int num_dupack, int *ack_flag, int *rexmit)
2894{
2895 struct inet_connection_sock *icsk = inet_csk(sk);
2896 struct tcp_sock *tp = tcp_sk(sk);
2897 int fast_rexmit = 0, flag = *ack_flag;
2898 bool ece_ack = flag & FLAG_ECE;
2899 bool do_lost = num_dupack || ((flag & FLAG_DATA_SACKED) &&
2900 tcp_force_fast_retransmit(sk));
2901
2902 if (!tp->packets_out && tp->sacked_out)
2903 tp->sacked_out = 0;
2904
2905
2906
2907 if (ece_ack)
2908 tp->prior_ssthresh = 0;
2909
2910
2911 if (tcp_check_sack_reneging(sk, flag))
2912 return;
2913
2914
2915 tcp_verify_left_out(tp);
2916
2917
2918
2919 if (icsk->icsk_ca_state == TCP_CA_Open) {
2920 WARN_ON(tp->retrans_out != 0 && !tp->syn_data);
2921 tp->retrans_stamp = 0;
2922 } else if (!before(tp->snd_una, tp->high_seq)) {
2923 switch (icsk->icsk_ca_state) {
2924 case TCP_CA_CWR:
2925
2926
2927 if (tp->snd_una != tp->high_seq) {
2928 tcp_end_cwnd_reduction(sk);
2929 tcp_set_ca_state(sk, TCP_CA_Open);
2930 }
2931 break;
2932
2933 case TCP_CA_Recovery:
2934 if (tcp_is_reno(tp))
2935 tcp_reset_reno_sack(tp);
2936 if (tcp_try_undo_recovery(sk))
2937 return;
2938 tcp_end_cwnd_reduction(sk);
2939 break;
2940 }
2941 }
2942
2943
2944 switch (icsk->icsk_ca_state) {
2945 case TCP_CA_Recovery:
2946 if (!(flag & FLAG_SND_UNA_ADVANCED)) {
2947 if (tcp_is_reno(tp))
2948 tcp_add_reno_sack(sk, num_dupack, ece_ack);
2949 } else if (tcp_try_undo_partial(sk, prior_snd_una, &do_lost))
2950 return;
2951
2952 if (tcp_try_undo_dsack(sk))
2953 tcp_try_keep_open(sk);
2954
2955 tcp_identify_packet_loss(sk, ack_flag);
2956 if (icsk->icsk_ca_state != TCP_CA_Recovery) {
2957 if (!tcp_time_to_recover(sk, flag))
2958 return;
2959
2960
2961
2962 tcp_enter_recovery(sk, ece_ack);
2963 }
2964 break;
2965 case TCP_CA_Loss:
2966 tcp_process_loss(sk, flag, num_dupack, rexmit);
2967 tcp_identify_packet_loss(sk, ack_flag);
2968 if (!(icsk->icsk_ca_state == TCP_CA_Open ||
2969 (*ack_flag & FLAG_LOST_RETRANS)))
2970 return;
2971
2972 fallthrough;
2973 default:
2974 if (tcp_is_reno(tp)) {
2975 if (flag & FLAG_SND_UNA_ADVANCED)
2976 tcp_reset_reno_sack(tp);
2977 tcp_add_reno_sack(sk, num_dupack, ece_ack);
2978 }
2979
2980 if (icsk->icsk_ca_state <= TCP_CA_Disorder)
2981 tcp_try_undo_dsack(sk);
2982
2983 tcp_identify_packet_loss(sk, ack_flag);
2984 if (!tcp_time_to_recover(sk, flag)) {
2985 tcp_try_to_open(sk, flag);
2986 return;
2987 }
2988
2989
2990 if (icsk->icsk_ca_state < TCP_CA_CWR &&
2991 icsk->icsk_mtup.probe_size &&
2992 tp->snd_una == tp->mtu_probe.probe_seq_start) {
2993 tcp_mtup_probe_failed(sk);
2994
2995 tp->snd_cwnd++;
2996 tcp_simple_retransmit(sk);
2997 return;
2998 }
2999
3000
3001 tcp_enter_recovery(sk, ece_ack);
3002 fast_rexmit = 1;
3003 }
3004
3005 if (!tcp_is_rack(sk) && do_lost)
3006 tcp_update_scoreboard(sk, fast_rexmit);
3007 *rexmit = REXMIT_LOST;
3008}
3009
3010static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag)
3011{
3012 u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ;
3013 struct tcp_sock *tp = tcp_sk(sk);
3014
3015 if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) {
3016
3017
3018
3019
3020 return;
3021 }
3022 minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32,
3023 rtt_us ? : jiffies_to_usecs(1));
3024}
3025
3026static bool tcp_ack_update_rtt(struct sock *sk, const int flag,
3027 long seq_rtt_us, long sack_rtt_us,
3028 long ca_rtt_us, struct rate_sample *rs)
3029{
3030 const struct tcp_sock *tp = tcp_sk(sk);
3031
3032
3033
3034
3035
3036
3037 if (seq_rtt_us < 0)
3038 seq_rtt_us = sack_rtt_us;
3039
3040
3041
3042
3043
3044
3045
3046 if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
3047 flag & FLAG_ACKED) {
3048 u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
3049
3050 if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
3051 if (!delta)
3052 delta = 1;
3053 seq_rtt_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
3054 ca_rtt_us = seq_rtt_us;
3055 }
3056 }
3057 rs->rtt_us = ca_rtt_us;
3058 if (seq_rtt_us < 0)
3059 return false;
3060
3061
3062
3063
3064
3065 tcp_update_rtt_min(sk, ca_rtt_us, flag);
3066 tcp_rtt_estimator(sk, seq_rtt_us);
3067 tcp_set_rto(sk);
3068
3069
3070 inet_csk(sk)->icsk_backoff = 0;
3071 return true;
3072}
3073
3074
3075void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req)
3076{
3077 struct rate_sample rs;
3078 long rtt_us = -1L;
3079
3080 if (req && !req->num_retrans && tcp_rsk(req)->snt_synack)
3081 rtt_us = tcp_stamp_us_delta(tcp_clock_us(), tcp_rsk(req)->snt_synack);
3082
3083 tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, rtt_us, -1L, rtt_us, &rs);
3084}
3085
3086
3087static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
3088{
3089 const struct inet_connection_sock *icsk = inet_csk(sk);
3090
3091 icsk->icsk_ca_ops->cong_avoid(sk, ack, acked);
3092 tcp_sk(sk)->snd_cwnd_stamp = tcp_jiffies32;
3093}
3094
3095
3096
3097
3098void tcp_rearm_rto(struct sock *sk)
3099{
3100 const struct inet_connection_sock *icsk = inet_csk(sk);
3101 struct tcp_sock *tp = tcp_sk(sk);
3102
3103
3104
3105
3106 if (rcu_access_pointer(tp->fastopen_rsk))
3107 return;
3108
3109 if (!tp->packets_out) {
3110 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
3111 } else {
3112 u32 rto = inet_csk(sk)->icsk_rto;
3113
3114 if (icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
3115 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
3116 s64 delta_us = tcp_rto_delta_us(sk);
3117
3118
3119
3120 rto = usecs_to_jiffies(max_t(int, delta_us, 1));
3121 }
3122 tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
3123 TCP_RTO_MAX);
3124 }
3125}
3126
3127
3128static void tcp_set_xmit_timer(struct sock *sk)
3129{
3130 if (!tcp_schedule_loss_probe(sk, true))
3131 tcp_rearm_rto(sk);
3132}
3133
3134
3135static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
3136{
3137 struct tcp_sock *tp = tcp_sk(sk);
3138 u32 packets_acked;
3139
3140 BUG_ON(!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una));
3141
3142 packets_acked = tcp_skb_pcount(skb);
3143 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
3144 return 0;
3145 packets_acked -= tcp_skb_pcount(skb);
3146
3147 if (packets_acked) {
3148 BUG_ON(tcp_skb_pcount(skb) == 0);
3149 BUG_ON(!before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq));
3150 }
3151
3152 return packets_acked;
3153}
3154
3155static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
3156 const struct sk_buff *ack_skb, u32 prior_snd_una)
3157{
3158 const struct skb_shared_info *shinfo;
3159
3160
3161 if (likely(!TCP_SKB_CB(skb)->txstamp_ack))
3162 return;
3163
3164 shinfo = skb_shinfo(skb);
3165 if (!before(shinfo->tskey, prior_snd_una) &&
3166 before(shinfo->tskey, tcp_sk(sk)->snd_una)) {
3167 tcp_skb_tsorted_save(skb) {
3168 __skb_tstamp_tx(skb, ack_skb, NULL, sk, SCM_TSTAMP_ACK);
3169 } tcp_skb_tsorted_restore(skb);
3170 }
3171}
3172
3173
3174
3175
3176
3177static int tcp_clean_rtx_queue(struct sock *sk, const struct sk_buff *ack_skb,
3178 u32 prior_fack, u32 prior_snd_una,
3179 struct tcp_sacktag_state *sack, bool ece_ack)
3180{
3181 const struct inet_connection_sock *icsk = inet_csk(sk);
3182 u64 first_ackt, last_ackt;
3183 struct tcp_sock *tp = tcp_sk(sk);
3184 u32 prior_sacked = tp->sacked_out;
3185 u32 reord = tp->snd_nxt;
3186 struct sk_buff *skb, *next;
3187 bool fully_acked = true;
3188 long sack_rtt_us = -1L;
3189 long seq_rtt_us = -1L;
3190 long ca_rtt_us = -1L;
3191 u32 pkts_acked = 0;
3192 u32 last_in_flight = 0;
3193 bool rtt_update;
3194 int flag = 0;
3195
3196 first_ackt = 0;
3197
3198 for (skb = skb_rb_first(&sk->tcp_rtx_queue); skb; skb = next) {
3199 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
3200 const u32 start_seq = scb->seq;
3201 u8 sacked = scb->sacked;
3202 u32 acked_pcount;
3203
3204
3205 if (after(scb->end_seq, tp->snd_una)) {
3206 if (tcp_skb_pcount(skb) == 1 ||
3207 !after(tp->snd_una, scb->seq))
3208 break;
3209
3210 acked_pcount = tcp_tso_acked(sk, skb);
3211 if (!acked_pcount)
3212 break;
3213 fully_acked = false;
3214 } else {
3215 acked_pcount = tcp_skb_pcount(skb);
3216 }
3217
3218 if (unlikely(sacked & TCPCB_RETRANS)) {
3219 if (sacked & TCPCB_SACKED_RETRANS)
3220 tp->retrans_out -= acked_pcount;
3221 flag |= FLAG_RETRANS_DATA_ACKED;
3222 } else if (!(sacked & TCPCB_SACKED_ACKED)) {
3223 last_ackt = tcp_skb_timestamp_us(skb);
3224 WARN_ON_ONCE(last_ackt == 0);
3225 if (!first_ackt)
3226 first_ackt = last_ackt;
3227
3228 last_in_flight = TCP_SKB_CB(skb)->tx.in_flight;
3229 if (before(start_seq, reord))
3230 reord = start_seq;
3231 if (!after(scb->end_seq, tp->high_seq))
3232 flag |= FLAG_ORIG_SACK_ACKED;
3233 }
3234
3235 if (sacked & TCPCB_SACKED_ACKED) {
3236 tp->sacked_out -= acked_pcount;
3237 } else if (tcp_is_sack(tp)) {
3238 tcp_count_delivered(tp, acked_pcount, ece_ack);
3239 if (!tcp_skb_spurious_retrans(tp, skb))
3240 tcp_rack_advance(tp, sacked, scb->end_seq,
3241 tcp_skb_timestamp_us(skb));
3242 }
3243 if (sacked & TCPCB_LOST)
3244 tp->lost_out -= acked_pcount;
3245
3246 tp->packets_out -= acked_pcount;
3247 pkts_acked += acked_pcount;
3248 tcp_rate_skb_delivered(sk, skb, sack->rate);
3249
3250
3251
3252
3253
3254
3255
3256
3257 if (likely(!(scb->tcp_flags & TCPHDR_SYN))) {
3258 flag |= FLAG_DATA_ACKED;
3259 } else {
3260 flag |= FLAG_SYN_ACKED;
3261 tp->retrans_stamp = 0;
3262 }
3263
3264 if (!fully_acked)
3265 break;
3266
3267 tcp_ack_tstamp(sk, skb, ack_skb, prior_snd_una);
3268
3269 next = skb_rb_next(skb);
3270 if (unlikely(skb == tp->retransmit_skb_hint))
3271 tp->retransmit_skb_hint = NULL;
3272 if (unlikely(skb == tp->lost_skb_hint))
3273 tp->lost_skb_hint = NULL;
3274 tcp_highest_sack_replace(sk, skb, next);
3275 tcp_rtx_queue_unlink_and_free(skb, sk);
3276 }
3277
3278 if (!skb)
3279 tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
3280
3281 if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una)))
3282 tp->snd_up = tp->snd_una;
3283
3284 if (skb) {
3285 tcp_ack_tstamp(sk, skb, ack_skb, prior_snd_una);
3286 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
3287 flag |= FLAG_SACK_RENEGING;
3288 }
3289
3290 if (likely(first_ackt) && !(flag & FLAG_RETRANS_DATA_ACKED)) {
3291 seq_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, first_ackt);
3292 ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, last_ackt);
3293
3294 if (pkts_acked == 1 && last_in_flight < tp->mss_cache &&
3295 last_in_flight && !prior_sacked && fully_acked &&
3296 sack->rate->prior_delivered + 1 == tp->delivered &&
3297 !(flag & (FLAG_CA_ALERT | FLAG_SYN_ACKED))) {
3298
3299
3300
3301
3302 flag |= FLAG_ACK_MAYBE_DELAYED;
3303 }
3304 }
3305 if (sack->first_sackt) {
3306 sack_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->first_sackt);
3307 ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->last_sackt);
3308 }
3309 rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us,
3310 ca_rtt_us, sack->rate);
3311
3312 if (flag & FLAG_ACKED) {
3313 flag |= FLAG_SET_XMIT_TIMER;
3314 if (unlikely(icsk->icsk_mtup.probe_size &&
3315 !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
3316 tcp_mtup_probe_success(sk);
3317 }
3318
3319 if (tcp_is_reno(tp)) {
3320 tcp_remove_reno_sacks(sk, pkts_acked, ece_ack);
3321
3322
3323
3324
3325
3326
3327
3328 if (flag & FLAG_RETRANS_DATA_ACKED)
3329 flag &= ~FLAG_ORIG_SACK_ACKED;
3330 } else {
3331 int delta;
3332
3333
3334 if (before(reord, prior_fack))
3335 tcp_check_sack_reordering(sk, reord, 0);
3336
3337 delta = prior_sacked - tp->sacked_out;
3338 tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta);
3339 }
3340 } else if (skb && rtt_update && sack_rtt_us >= 0 &&
3341 sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp,
3342 tcp_skb_timestamp_us(skb))) {
3343
3344
3345
3346
3347 flag |= FLAG_SET_XMIT_TIMER;
3348 }
3349
3350 if (icsk->icsk_ca_ops->pkts_acked) {
3351 struct ack_sample sample = { .pkts_acked = pkts_acked,
3352 .rtt_us = sack->rate->rtt_us,
3353 .in_flight = last_in_flight };
3354
3355 icsk->icsk_ca_ops->pkts_acked(sk, &sample);
3356 }
3357
3358#if FASTRETRANS_DEBUG > 0
3359 WARN_ON((int)tp->sacked_out < 0);
3360 WARN_ON((int)tp->lost_out < 0);
3361 WARN_ON((int)tp->retrans_out < 0);
3362 if (!tp->packets_out && tcp_is_sack(tp)) {
3363 icsk = inet_csk(sk);
3364 if (tp->lost_out) {
3365 pr_debug("Leak l=%u %d\n",
3366 tp->lost_out, icsk->icsk_ca_state);
3367 tp->lost_out = 0;
3368 }
3369 if (tp->sacked_out) {
3370 pr_debug("Leak s=%u %d\n",
3371 tp->sacked_out, icsk->icsk_ca_state);
3372 tp->sacked_out = 0;
3373 }
3374 if (tp->retrans_out) {
3375 pr_debug("Leak r=%u %d\n",
3376 tp->retrans_out, icsk->icsk_ca_state);
3377 tp->retrans_out = 0;
3378 }
3379 }
3380#endif
3381 return flag;
3382}
3383
3384static void tcp_ack_probe(struct sock *sk)
3385{
3386 struct inet_connection_sock *icsk = inet_csk(sk);
3387 struct sk_buff *head = tcp_send_head(sk);
3388 const struct tcp_sock *tp = tcp_sk(sk);
3389
3390
3391 if (!head)
3392 return;
3393 if (!after(TCP_SKB_CB(head)->end_seq, tcp_wnd_end(tp))) {
3394 icsk->icsk_backoff = 0;
3395 icsk->icsk_probes_tstamp = 0;
3396 inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
3397
3398
3399
3400 } else {
3401 unsigned long when = tcp_probe0_when(sk, TCP_RTO_MAX);
3402
3403 when = tcp_clamp_probe0_to_user_timeout(sk, when);
3404 tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, when, TCP_RTO_MAX);
3405 }
3406}
3407
3408static inline bool tcp_ack_is_dubious(const struct sock *sk, const int flag)
3409{
3410 return !(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
3411 inet_csk(sk)->icsk_ca_state != TCP_CA_Open;
3412}
3413
3414
3415static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
3416{
3417
3418
3419
3420
3421
3422
3423 if (tcp_sk(sk)->reordering > sock_net(sk)->ipv4.sysctl_tcp_reordering)
3424 return flag & FLAG_FORWARD_PROGRESS;
3425
3426 return flag & FLAG_DATA_ACKED;
3427}
3428
3429
3430
3431
3432
3433
3434static void tcp_cong_control(struct sock *sk, u32 ack, u32 acked_sacked,
3435 int flag, const struct rate_sample *rs)
3436{
3437 const struct inet_connection_sock *icsk = inet_csk(sk);
3438
3439 if (icsk->icsk_ca_ops->cong_control) {
3440 icsk->icsk_ca_ops->cong_control(sk, rs);
3441 return;
3442 }
3443
3444 if (tcp_in_cwnd_reduction(sk)) {
3445
3446 tcp_cwnd_reduction(sk, acked_sacked, rs->losses, flag);
3447 } else if (tcp_may_raise_cwnd(sk, flag)) {
3448
3449 tcp_cong_avoid(sk, ack, acked_sacked);
3450 }
3451 tcp_update_pacing_rate(sk);
3452}
3453
3454
3455
3456
3457static inline bool tcp_may_update_window(const struct tcp_sock *tp,
3458 const u32 ack, const u32 ack_seq,
3459 const u32 nwin)
3460{
3461 return after(ack, tp->snd_una) ||
3462 after(ack_seq, tp->snd_wl1) ||
3463 (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd);
3464}
3465
3466
3467static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack)
3468{
3469 u32 delta = ack - tp->snd_una;
3470
3471 sock_owned_by_me((struct sock *)tp);
3472 tp->bytes_acked += delta;
3473 tp->snd_una = ack;
3474}
3475
3476
3477static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq)
3478{
3479 u32 delta = seq - tp->rcv_nxt;
3480
3481 sock_owned_by_me((struct sock *)tp);
3482 tp->bytes_received += delta;
3483 WRITE_ONCE(tp->rcv_nxt, seq);
3484}
3485
3486
3487
3488
3489
3490
3491static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 ack,
3492 u32 ack_seq)
3493{
3494 struct tcp_sock *tp = tcp_sk(sk);
3495 int flag = 0;
3496 u32 nwin = ntohs(tcp_hdr(skb)->window);
3497
3498 if (likely(!tcp_hdr(skb)->syn))
3499 nwin <<= tp->rx_opt.snd_wscale;
3500
3501 if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
3502 flag |= FLAG_WIN_UPDATE;
3503 tcp_update_wl(tp, ack_seq);
3504
3505 if (tp->snd_wnd != nwin) {
3506 tp->snd_wnd = nwin;
3507
3508
3509
3510
3511 tp->pred_flags = 0;
3512 tcp_fast_path_check(sk);
3513
3514 if (!tcp_write_queue_empty(sk))
3515 tcp_slow_start_after_idle_check(sk);
3516
3517 if (nwin > tp->max_window) {
3518 tp->max_window = nwin;
3519 tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie);
3520 }
3521 }
3522 }
3523
3524 tcp_snd_una_update(tp, ack);
3525
3526 return flag;
3527}
3528
3529static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
3530 u32 *last_oow_ack_time)
3531{
3532 if (*last_oow_ack_time) {
3533 s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);
3534
3535 if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) {
3536 NET_INC_STATS(net, mib_idx);
3537 return true;
3538 }
3539 }
3540
3541 *last_oow_ack_time = tcp_jiffies32;
3542
3543 return false;
3544}
3545
3546
3547
3548
3549
3550
3551
3552
3553bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
3554 int mib_idx, u32 *last_oow_ack_time)
3555{
3556
3557 if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) &&
3558 !tcp_hdr(skb)->syn)
3559 return false;
3560
3561 return __tcp_oow_rate_limited(net, mib_idx, last_oow_ack_time);
3562}
3563
3564
3565static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
3566{
3567
3568 static u32 challenge_timestamp;
3569 static unsigned int challenge_count;
3570 struct tcp_sock *tp = tcp_sk(sk);
3571 struct net *net = sock_net(sk);
3572 u32 count, now;
3573
3574
3575 if (__tcp_oow_rate_limited(net,
3576 LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
3577 &tp->last_oow_ack_time))
3578 return;
3579
3580
3581 now = jiffies / HZ;
3582 if (now != challenge_timestamp) {
3583 u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit;
3584 u32 half = (ack_limit + 1) >> 1;
3585
3586 challenge_timestamp = now;
3587 WRITE_ONCE(challenge_count, half + prandom_u32_max(ack_limit));
3588 }
3589 count = READ_ONCE(challenge_count);
3590 if (count > 0) {
3591 WRITE_ONCE(challenge_count, count - 1);
3592 NET_INC_STATS(net, LINUX_MIB_TCPCHALLENGEACK);
3593 tcp_send_ack(sk);
3594 }
3595}
3596
3597static void tcp_store_ts_recent(struct tcp_sock *tp)
3598{
3599 tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
3600 tp->rx_opt.ts_recent_stamp = ktime_get_seconds();
3601}
3602
3603static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
3604{
3605 if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) {
3606
3607
3608
3609
3610
3611
3612
3613 if (tcp_paws_check(&tp->rx_opt, 0))
3614 tcp_store_ts_recent(tp);
3615 }
3616}
3617
3618
3619
3620
3621static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
3622{
3623 struct tcp_sock *tp = tcp_sk(sk);
3624
3625 if (before(ack, tp->tlp_high_seq))
3626 return;
3627
3628 if (!tp->tlp_retrans) {
3629
3630 tp->tlp_high_seq = 0;
3631 } else if (flag & FLAG_DSACKING_ACK) {
3632
3633 tp->tlp_high_seq = 0;
3634 } else if (after(ack, tp->tlp_high_seq)) {
3635
3636
3637
3638 tcp_init_cwnd_reduction(sk);
3639 tcp_set_ca_state(sk, TCP_CA_CWR);
3640 tcp_end_cwnd_reduction(sk);
3641 tcp_try_keep_open(sk);
3642 NET_INC_STATS(sock_net(sk),
3643 LINUX_MIB_TCPLOSSPROBERECOVERY);
3644 } else if (!(flag & (FLAG_SND_UNA_ADVANCED |
3645 FLAG_NOT_DUP | FLAG_DATA_SACKED))) {
3646
3647 tp->tlp_high_seq = 0;
3648 }
3649}
3650
3651static inline void tcp_in_ack_event(struct sock *sk, u32 flags)
3652{
3653 const struct inet_connection_sock *icsk = inet_csk(sk);
3654
3655 if (icsk->icsk_ca_ops->in_ack_event)
3656 icsk->icsk_ca_ops->in_ack_event(sk, flags);
3657}
3658
3659
3660
3661
3662
3663static void tcp_xmit_recovery(struct sock *sk, int rexmit)
3664{
3665 struct tcp_sock *tp = tcp_sk(sk);
3666
3667 if (rexmit == REXMIT_NONE || sk->sk_state == TCP_SYN_SENT)
3668 return;
3669
3670 if (unlikely(rexmit == REXMIT_NEW)) {
3671 __tcp_push_pending_frames(sk, tcp_current_mss(sk),
3672 TCP_NAGLE_OFF);
3673 if (after(tp->snd_nxt, tp->high_seq))
3674 return;
3675 tp->frto = 0;
3676 }
3677 tcp_xmit_retransmit_queue(sk);
3678}
3679
3680
3681static u32 tcp_newly_delivered(struct sock *sk, u32 prior_delivered, int flag)
3682{
3683 const struct net *net = sock_net(sk);
3684 struct tcp_sock *tp = tcp_sk(sk);
3685 u32 delivered;
3686
3687 delivered = tp->delivered - prior_delivered;
3688 NET_ADD_STATS(net, LINUX_MIB_TCPDELIVERED, delivered);
3689 if (flag & FLAG_ECE)
3690 NET_ADD_STATS(net, LINUX_MIB_TCPDELIVEREDCE, delivered);
3691
3692 return delivered;
3693}
3694
3695
3696static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3697{
3698 struct inet_connection_sock *icsk = inet_csk(sk);
3699 struct tcp_sock *tp = tcp_sk(sk);
3700 struct tcp_sacktag_state sack_state;
3701 struct rate_sample rs = { .prior_delivered = 0 };
3702 u32 prior_snd_una = tp->snd_una;
3703 bool is_sack_reneg = tp->is_sack_reneg;
3704 u32 ack_seq = TCP_SKB_CB(skb)->seq;
3705 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3706 int num_dupack = 0;
3707 int prior_packets = tp->packets_out;
3708 u32 delivered = tp->delivered;
3709 u32 lost = tp->lost;
3710 int rexmit = REXMIT_NONE;
3711 u32 prior_fack;
3712
3713 sack_state.first_sackt = 0;
3714 sack_state.rate = &rs;
3715 sack_state.sack_delivered = 0;
3716
3717
3718 prefetch(sk->tcp_rtx_queue.rb_node);
3719
3720
3721
3722
3723 if (before(ack, prior_snd_una)) {
3724
3725 if (before(ack, prior_snd_una - tp->max_window)) {
3726 if (!(flag & FLAG_NO_CHALLENGE_ACK))
3727 tcp_send_challenge_ack(sk, skb);
3728 return -1;
3729 }
3730 goto old_ack;
3731 }
3732
3733
3734
3735
3736 if (after(ack, tp->snd_nxt))
3737 return -1;
3738
3739 if (after(ack, prior_snd_una)) {
3740 flag |= FLAG_SND_UNA_ADVANCED;
3741 icsk->icsk_retransmits = 0;
3742
3743#if IS_ENABLED(CONFIG_TLS_DEVICE)
3744 if (static_branch_unlikely(&clean_acked_data_enabled.key))
3745 if (icsk->icsk_clean_acked)
3746 icsk->icsk_clean_acked(sk, ack);
3747#endif
3748 }
3749
3750 prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una;
3751 rs.prior_in_flight = tcp_packets_in_flight(tp);
3752
3753
3754
3755
3756 if (flag & FLAG_UPDATE_TS_RECENT)
3757 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
3758
3759 if ((flag & (FLAG_SLOWPATH | FLAG_SND_UNA_ADVANCED)) ==
3760 FLAG_SND_UNA_ADVANCED) {
3761
3762
3763
3764
3765 tcp_update_wl(tp, ack_seq);
3766 tcp_snd_una_update(tp, ack);
3767 flag |= FLAG_WIN_UPDATE;
3768
3769 tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE);
3770
3771 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPACKS);
3772 } else {
3773 u32 ack_ev_flags = CA_ACK_SLOWPATH;
3774
3775 if (ack_seq != TCP_SKB_CB(skb)->end_seq)
3776 flag |= FLAG_DATA;
3777 else
3778 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPUREACKS);
3779
3780 flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
3781
3782 if (TCP_SKB_CB(skb)->sacked)
3783 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
3784 &sack_state);
3785
3786 if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
3787 flag |= FLAG_ECE;
3788 ack_ev_flags |= CA_ACK_ECE;
3789 }
3790
3791 if (sack_state.sack_delivered)
3792 tcp_count_delivered(tp, sack_state.sack_delivered,
3793 flag & FLAG_ECE);
3794
3795 if (flag & FLAG_WIN_UPDATE)
3796 ack_ev_flags |= CA_ACK_WIN_UPDATE;
3797
3798 tcp_in_ack_event(sk, ack_ev_flags);
3799 }
3800
3801
3802
3803
3804
3805
3806
3807
3808 tcp_ecn_accept_cwr(sk, skb);
3809
3810
3811
3812
3813 sk->sk_err_soft = 0;
3814 icsk->icsk_probes_out = 0;
3815 tp->rcv_tstamp = tcp_jiffies32;
3816 if (!prior_packets)
3817 goto no_queue;
3818
3819
3820 flag |= tcp_clean_rtx_queue(sk, skb, prior_fack, prior_snd_una,
3821 &sack_state, flag & FLAG_ECE);
3822
3823 tcp_rack_update_reo_wnd(sk, &rs);
3824
3825 if (tp->tlp_high_seq)
3826 tcp_process_tlp_ack(sk, ack, flag);
3827
3828 if (tcp_ack_is_dubious(sk, flag)) {
3829 if (!(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP))) {
3830 num_dupack = 1;
3831
3832 if (!(flag & FLAG_DATA))
3833 num_dupack = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
3834 }
3835 tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
3836 &rexmit);
3837 }
3838
3839
3840 if (flag & FLAG_SET_XMIT_TIMER)
3841 tcp_set_xmit_timer(sk);
3842
3843 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
3844 sk_dst_confirm(sk);
3845
3846 delivered = tcp_newly_delivered(sk, delivered, flag);
3847 lost = tp->lost - lost;
3848 rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED);
3849 tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate);
3850 tcp_cong_control(sk, ack, delivered, flag, sack_state.rate);
3851 tcp_xmit_recovery(sk, rexmit);
3852 return 1;
3853
3854no_queue:
3855
3856 if (flag & FLAG_DSACKING_ACK) {
3857 tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
3858 &rexmit);
3859 tcp_newly_delivered(sk, delivered, flag);
3860 }
3861
3862
3863
3864
3865 tcp_ack_probe(sk);
3866
3867 if (tp->tlp_high_seq)
3868 tcp_process_tlp_ack(sk, ack, flag);
3869 return 1;
3870
3871old_ack:
3872
3873
3874
3875 if (TCP_SKB_CB(skb)->sacked) {
3876 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
3877 &sack_state);
3878 tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
3879 &rexmit);
3880 tcp_newly_delivered(sk, delivered, flag);
3881 tcp_xmit_recovery(sk, rexmit);
3882 }
3883
3884 return 0;
3885}
3886
3887static void tcp_parse_fastopen_option(int len, const unsigned char *cookie,
3888 bool syn, struct tcp_fastopen_cookie *foc,
3889 bool exp_opt)
3890{
3891
3892 if (!foc || !syn || len < 0 || (len & 1))
3893 return;
3894
3895 if (len >= TCP_FASTOPEN_COOKIE_MIN &&
3896 len <= TCP_FASTOPEN_COOKIE_MAX)
3897 memcpy(foc->val, cookie, len);
3898 else if (len != 0)
3899 len = -1;
3900 foc->len = len;
3901 foc->exp = exp_opt;
3902}
3903
3904static bool smc_parse_options(const struct tcphdr *th,
3905 struct tcp_options_received *opt_rx,
3906 const unsigned char *ptr,
3907 int opsize)
3908{
3909#if IS_ENABLED(CONFIG_SMC)
3910 if (static_branch_unlikely(&tcp_have_smc)) {
3911 if (th->syn && !(opsize & 1) &&
3912 opsize >= TCPOLEN_EXP_SMC_BASE &&
3913 get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC) {
3914 opt_rx->smc_ok = 1;
3915 return true;
3916 }
3917 }
3918#endif
3919 return false;
3920}
3921
3922
3923
3924
3925static u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss)
3926{
3927 const unsigned char *ptr = (const unsigned char *)(th + 1);
3928 int length = (th->doff * 4) - sizeof(struct tcphdr);
3929 u16 mss = 0;
3930
3931 while (length > 0) {
3932 int opcode = *ptr++;
3933 int opsize;
3934
3935 switch (opcode) {
3936 case TCPOPT_EOL:
3937 return mss;
3938 case TCPOPT_NOP:
3939 length--;
3940 continue;
3941 default:
3942 if (length < 2)
3943 return mss;
3944 opsize = *ptr++;
3945 if (opsize < 2)
3946 return mss;
3947 if (opsize > length)
3948 return mss;
3949 if (opcode == TCPOPT_MSS && opsize == TCPOLEN_MSS) {
3950 u16 in_mss = get_unaligned_be16(ptr);
3951
3952 if (in_mss) {
3953 if (user_mss && user_mss < in_mss)
3954 in_mss = user_mss;
3955 mss = in_mss;
3956 }
3957 }
3958 ptr += opsize - 2;
3959 length -= opsize;
3960 }
3961 }
3962 return mss;
3963}
3964
3965
3966
3967
3968
3969void tcp_parse_options(const struct net *net,
3970 const struct sk_buff *skb,
3971 struct tcp_options_received *opt_rx, int estab,
3972 struct tcp_fastopen_cookie *foc)
3973{
3974 const unsigned char *ptr;
3975 const struct tcphdr *th = tcp_hdr(skb);
3976 int length = (th->doff * 4) - sizeof(struct tcphdr);
3977
3978 ptr = (const unsigned char *)(th + 1);
3979 opt_rx->saw_tstamp = 0;
3980 opt_rx->saw_unknown = 0;
3981
3982 while (length > 0) {
3983 int opcode = *ptr++;
3984 int opsize;
3985
3986 switch (opcode) {
3987 case TCPOPT_EOL:
3988 return;
3989 case TCPOPT_NOP:
3990 length--;
3991 continue;
3992 default:
3993 if (length < 2)
3994 return;
3995 opsize = *ptr++;
3996 if (opsize < 2)
3997 return;
3998 if (opsize > length)
3999 return;
4000 switch (opcode) {
4001 case TCPOPT_MSS:
4002 if (opsize == TCPOLEN_MSS && th->syn && !estab) {
4003 u16 in_mss = get_unaligned_be16(ptr);
4004 if (in_mss) {
4005 if (opt_rx->user_mss &&
4006 opt_rx->user_mss < in_mss)
4007 in_mss = opt_rx->user_mss;
4008 opt_rx->mss_clamp = in_mss;
4009 }
4010 }
4011 break;
4012 case TCPOPT_WINDOW:
4013 if (opsize == TCPOLEN_WINDOW && th->syn &&
4014 !estab && net->ipv4.sysctl_tcp_window_scaling) {
4015 __u8 snd_wscale = *(__u8 *)ptr;
4016 opt_rx->wscale_ok = 1;
4017 if (snd_wscale > TCP_MAX_WSCALE) {
4018 net_info_ratelimited("%s: Illegal window scaling value %d > %u received\n",
4019 __func__,
4020 snd_wscale,
4021 TCP_MAX_WSCALE);
4022 snd_wscale = TCP_MAX_WSCALE;
4023 }
4024 opt_rx->snd_wscale = snd_wscale;
4025 }
4026 break;
4027 case TCPOPT_TIMESTAMP:
4028 if ((opsize == TCPOLEN_TIMESTAMP) &&
4029 ((estab && opt_rx->tstamp_ok) ||
4030 (!estab && net->ipv4.sysctl_tcp_timestamps))) {
4031 opt_rx->saw_tstamp = 1;
4032 opt_rx->rcv_tsval = get_unaligned_be32(ptr);
4033 opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
4034 }
4035 break;
4036 case TCPOPT_SACK_PERM:
4037 if (opsize == TCPOLEN_SACK_PERM && th->syn &&
4038 !estab && net->ipv4.sysctl_tcp_sack) {
4039 opt_rx->sack_ok = TCP_SACK_SEEN;
4040 tcp_sack_reset(opt_rx);
4041 }
4042 break;
4043
4044 case TCPOPT_SACK:
4045 if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
4046 !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
4047 opt_rx->sack_ok) {
4048 TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
4049 }
4050 break;
4051#ifdef CONFIG_TCP_MD5SIG
4052 case TCPOPT_MD5SIG:
4053
4054
4055
4056
4057 break;
4058#endif
4059 case TCPOPT_FASTOPEN:
4060 tcp_parse_fastopen_option(
4061 opsize - TCPOLEN_FASTOPEN_BASE,
4062 ptr, th->syn, foc, false);
4063 break;
4064
4065 case TCPOPT_EXP:
4066
4067
4068
4069 if (opsize >= TCPOLEN_EXP_FASTOPEN_BASE &&
4070 get_unaligned_be16(ptr) ==
4071 TCPOPT_FASTOPEN_MAGIC) {
4072 tcp_parse_fastopen_option(opsize -
4073 TCPOLEN_EXP_FASTOPEN_BASE,
4074 ptr + 2, th->syn, foc, true);
4075 break;
4076 }
4077
4078 if (smc_parse_options(th, opt_rx, ptr, opsize))
4079 break;
4080
4081 opt_rx->saw_unknown = 1;
4082 break;
4083
4084 default:
4085 opt_rx->saw_unknown = 1;
4086 }
4087 ptr += opsize-2;
4088 length -= opsize;
4089 }
4090 }
4091}
4092EXPORT_SYMBOL(tcp_parse_options);
4093
4094static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr *th)
4095{
4096 const __be32 *ptr = (const __be32 *)(th + 1);
4097
4098 if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
4099 | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
4100 tp->rx_opt.saw_tstamp = 1;
4101 ++ptr;
4102 tp->rx_opt.rcv_tsval = ntohl(*ptr);
4103 ++ptr;
4104 if (*ptr)
4105 tp->rx_opt.rcv_tsecr = ntohl(*ptr) - tp->tsoffset;
4106 else
4107 tp->rx_opt.rcv_tsecr = 0;
4108 return true;
4109 }
4110 return false;
4111}
4112
4113
4114
4115
4116static bool tcp_fast_parse_options(const struct net *net,
4117 const struct sk_buff *skb,
4118 const struct tcphdr *th, struct tcp_sock *tp)
4119{
4120
4121
4122
4123 if (th->doff == (sizeof(*th) / 4)) {
4124 tp->rx_opt.saw_tstamp = 0;
4125 return false;
4126 } else if (tp->rx_opt.tstamp_ok &&
4127 th->doff == ((sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4)) {
4128 if (tcp_parse_aligned_timestamp(tp, th))
4129 return true;
4130 }
4131
4132 tcp_parse_options(net, skb, &tp->rx_opt, 1, NULL);
4133 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
4134 tp->rx_opt.rcv_tsecr -= tp->tsoffset;
4135
4136 return true;
4137}
4138
4139#ifdef CONFIG_TCP_MD5SIG
4140
4141
4142
4143const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
4144{
4145 int length = (th->doff << 2) - sizeof(*th);
4146 const u8 *ptr = (const u8 *)(th + 1);
4147
4148
4149 while (length >= TCPOLEN_MD5SIG) {
4150 int opcode = *ptr++;
4151 int opsize;
4152
4153 switch (opcode) {
4154 case TCPOPT_EOL:
4155 return NULL;
4156 case TCPOPT_NOP:
4157 length--;
4158 continue;
4159 default:
4160 opsize = *ptr++;
4161 if (opsize < 2 || opsize > length)
4162 return NULL;
4163 if (opcode == TCPOPT_MD5SIG)
4164 return opsize == TCPOLEN_MD5SIG ? ptr : NULL;
4165 }
4166 ptr += opsize - 2;
4167 length -= opsize;
4168 }
4169 return NULL;
4170}
4171EXPORT_SYMBOL(tcp_parse_md5sig_option);
4172#endif
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
4198{
4199 const struct tcp_sock *tp = tcp_sk(sk);
4200 const struct tcphdr *th = tcp_hdr(skb);
4201 u32 seq = TCP_SKB_CB(skb)->seq;
4202 u32 ack = TCP_SKB_CB(skb)->ack_seq;
4203
4204 return (
4205 (th->ack && seq == TCP_SKB_CB(skb)->end_seq && seq == tp->rcv_nxt) &&
4206
4207
4208 ack == tp->snd_una &&
4209
4210
4211 !tcp_may_update_window(tp, ack, seq, ntohs(th->window) << tp->rx_opt.snd_wscale) &&
4212
4213
4214 (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ);
4215}
4216
4217static inline bool tcp_paws_discard(const struct sock *sk,
4218 const struct sk_buff *skb)
4219{
4220 const struct tcp_sock *tp = tcp_sk(sk);
4221
4222 return !tcp_paws_check(&tp->rx_opt, TCP_PAWS_WINDOW) &&
4223 !tcp_disordered_ack(sk, skb);
4224}
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
4240{
4241 return !before(end_seq, tp->rcv_wup) &&
4242 !after(seq, tp->rcv_nxt + tcp_receive_window(tp));
4243}
4244
4245
4246void tcp_reset(struct sock *sk, struct sk_buff *skb)
4247{
4248 trace_tcp_receive_reset(sk);
4249
4250
4251
4252
4253 if (sk_is_mptcp(sk))
4254 mptcp_incoming_options(sk, skb);
4255
4256
4257 switch (sk->sk_state) {
4258 case TCP_SYN_SENT:
4259 sk->sk_err = ECONNREFUSED;
4260 break;
4261 case TCP_CLOSE_WAIT:
4262 sk->sk_err = EPIPE;
4263 break;
4264 case TCP_CLOSE:
4265 return;
4266 default:
4267 sk->sk_err = ECONNRESET;
4268 }
4269
4270 smp_wmb();
4271
4272 tcp_write_queue_purge(sk);
4273 tcp_done(sk);
4274
4275 if (!sock_flag(sk, SOCK_DEAD))
4276 sk_error_report(sk);
4277}
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293void tcp_fin(struct sock *sk)
4294{
4295 struct tcp_sock *tp = tcp_sk(sk);
4296
4297 inet_csk_schedule_ack(sk);
4298
4299 sk->sk_shutdown |= RCV_SHUTDOWN;
4300 sock_set_flag(sk, SOCK_DONE);
4301
4302 switch (sk->sk_state) {
4303 case TCP_SYN_RECV:
4304 case TCP_ESTABLISHED:
4305
4306 tcp_set_state(sk, TCP_CLOSE_WAIT);
4307 inet_csk_enter_pingpong_mode(sk);
4308 break;
4309
4310 case TCP_CLOSE_WAIT:
4311 case TCP_CLOSING:
4312
4313
4314
4315 break;
4316 case TCP_LAST_ACK:
4317
4318 break;
4319
4320 case TCP_FIN_WAIT1:
4321
4322
4323
4324
4325 tcp_send_ack(sk);
4326 tcp_set_state(sk, TCP_CLOSING);
4327 break;
4328 case TCP_FIN_WAIT2:
4329
4330 tcp_send_ack(sk);
4331 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
4332 break;
4333 default:
4334
4335
4336
4337 pr_err("%s: Impossible, sk->sk_state=%d\n",
4338 __func__, sk->sk_state);
4339 break;
4340 }
4341
4342
4343
4344
4345 skb_rbtree_purge(&tp->out_of_order_queue);
4346 if (tcp_is_sack(tp))
4347 tcp_sack_reset(&tp->rx_opt);
4348 sk_mem_reclaim(sk);
4349
4350 if (!sock_flag(sk, SOCK_DEAD)) {
4351 sk->sk_state_change(sk);
4352
4353
4354 if (sk->sk_shutdown == SHUTDOWN_MASK ||
4355 sk->sk_state == TCP_CLOSE)
4356 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
4357 else
4358 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
4359 }
4360}
4361
4362static inline bool tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
4363 u32 end_seq)
4364{
4365 if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
4366 if (before(seq, sp->start_seq))
4367 sp->start_seq = seq;
4368 if (after(end_seq, sp->end_seq))
4369 sp->end_seq = end_seq;
4370 return true;
4371 }
4372 return false;
4373}
4374
4375static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
4376{
4377 struct tcp_sock *tp = tcp_sk(sk);
4378
4379 if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
4380 int mib_idx;
4381
4382 if (before(seq, tp->rcv_nxt))
4383 mib_idx = LINUX_MIB_TCPDSACKOLDSENT;
4384 else
4385 mib_idx = LINUX_MIB_TCPDSACKOFOSENT;
4386
4387 NET_INC_STATS(sock_net(sk), mib_idx);
4388
4389 tp->rx_opt.dsack = 1;
4390 tp->duplicate_sack[0].start_seq = seq;
4391 tp->duplicate_sack[0].end_seq = end_seq;
4392 }
4393}
4394
4395static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
4396{
4397 struct tcp_sock *tp = tcp_sk(sk);
4398
4399 if (!tp->rx_opt.dsack)
4400 tcp_dsack_set(sk, seq, end_seq);
4401 else
4402 tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
4403}
4404
4405static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb)
4406{
4407
4408
4409
4410
4411
4412 if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq &&
4413 sk_rethink_txhash(sk))
4414 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDUPLICATEDATAREHASH);
4415}
4416
4417static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
4418{
4419 struct tcp_sock *tp = tcp_sk(sk);
4420
4421 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
4422 before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4423 NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4424 tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
4425
4426 if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
4427 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
4428
4429 tcp_rcv_spurious_retrans(sk, skb);
4430 if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
4431 end_seq = tp->rcv_nxt;
4432 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, end_seq);
4433 }
4434 }
4435
4436 tcp_send_ack(sk);
4437}
4438
4439
4440
4441
4442static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
4443{
4444 int this_sack;
4445 struct tcp_sack_block *sp = &tp->selective_acks[0];
4446 struct tcp_sack_block *swalk = sp + 1;
4447
4448
4449
4450
4451 for (this_sack = 1; this_sack < tp->rx_opt.num_sacks;) {
4452 if (tcp_sack_extend(sp, swalk->start_seq, swalk->end_seq)) {
4453 int i;
4454
4455
4456
4457
4458 tp->rx_opt.num_sacks--;
4459 for (i = this_sack; i < tp->rx_opt.num_sacks; i++)
4460 sp[i] = sp[i + 1];
4461 continue;
4462 }
4463 this_sack++;
4464 swalk++;
4465 }
4466}
4467
4468static void tcp_sack_compress_send_ack(struct sock *sk)
4469{
4470 struct tcp_sock *tp = tcp_sk(sk);
4471
4472 if (!tp->compressed_ack)
4473 return;
4474
4475 if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
4476 __sock_put(sk);
4477
4478
4479
4480
4481
4482 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
4483 tp->compressed_ack - 1);
4484
4485 tp->compressed_ack = 0;
4486 tcp_send_ack(sk);
4487}
4488
4489
4490
4491
4492
4493#define TCP_SACK_BLOCKS_EXPECTED 2
4494
4495static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
4496{
4497 struct tcp_sock *tp = tcp_sk(sk);
4498 struct tcp_sack_block *sp = &tp->selective_acks[0];
4499 int cur_sacks = tp->rx_opt.num_sacks;
4500 int this_sack;
4501
4502 if (!cur_sacks)
4503 goto new_sack;
4504
4505 for (this_sack = 0; this_sack < cur_sacks; this_sack++, sp++) {
4506 if (tcp_sack_extend(sp, seq, end_seq)) {
4507 if (this_sack >= TCP_SACK_BLOCKS_EXPECTED)
4508 tcp_sack_compress_send_ack(sk);
4509
4510 for (; this_sack > 0; this_sack--, sp--)
4511 swap(*sp, *(sp - 1));
4512 if (cur_sacks > 1)
4513 tcp_sack_maybe_coalesce(tp);
4514 return;
4515 }
4516 }
4517
4518 if (this_sack >= TCP_SACK_BLOCKS_EXPECTED)
4519 tcp_sack_compress_send_ack(sk);
4520
4521
4522
4523
4524
4525
4526
4527 if (this_sack >= TCP_NUM_SACKS) {
4528 this_sack--;
4529 tp->rx_opt.num_sacks--;
4530 sp--;
4531 }
4532 for (; this_sack > 0; this_sack--, sp--)
4533 *sp = *(sp - 1);
4534
4535new_sack:
4536
4537 sp->start_seq = seq;
4538 sp->end_seq = end_seq;
4539 tp->rx_opt.num_sacks++;
4540}
4541
4542
4543
4544static void tcp_sack_remove(struct tcp_sock *tp)
4545{
4546 struct tcp_sack_block *sp = &tp->selective_acks[0];
4547 int num_sacks = tp->rx_opt.num_sacks;
4548 int this_sack;
4549
4550
4551 if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
4552 tp->rx_opt.num_sacks = 0;
4553 return;
4554 }
4555
4556 for (this_sack = 0; this_sack < num_sacks;) {
4557
4558 if (!before(tp->rcv_nxt, sp->start_seq)) {
4559 int i;
4560
4561
4562 WARN_ON(before(tp->rcv_nxt, sp->end_seq));
4563
4564
4565 for (i = this_sack+1; i < num_sacks; i++)
4566 tp->selective_acks[i-1] = tp->selective_acks[i];
4567 num_sacks--;
4568 continue;
4569 }
4570 this_sack++;
4571 sp++;
4572 }
4573 tp->rx_opt.num_sacks = num_sacks;
4574}
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589static bool tcp_try_coalesce(struct sock *sk,
4590 struct sk_buff *to,
4591 struct sk_buff *from,
4592 bool *fragstolen)
4593{
4594 int delta;
4595
4596 *fragstolen = false;
4597
4598
4599 if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq)
4600 return false;
4601
4602 if (!mptcp_skb_can_collapse(to, from))
4603 return false;
4604
4605#ifdef CONFIG_TLS_DEVICE
4606 if (from->decrypted != to->decrypted)
4607 return false;
4608#endif
4609
4610 if (!skb_try_coalesce(to, from, fragstolen, &delta))
4611 return false;
4612
4613 atomic_add(delta, &sk->sk_rmem_alloc);
4614 sk_mem_charge(sk, delta);
4615 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE);
4616 TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
4617 TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
4618 TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags;
4619
4620 if (TCP_SKB_CB(from)->has_rxtstamp) {
4621 TCP_SKB_CB(to)->has_rxtstamp = true;
4622 to->tstamp = from->tstamp;
4623 skb_hwtstamps(to)->hwtstamp = skb_hwtstamps(from)->hwtstamp;
4624 }
4625
4626 return true;
4627}
4628
4629static bool tcp_ooo_try_coalesce(struct sock *sk,
4630 struct sk_buff *to,
4631 struct sk_buff *from,
4632 bool *fragstolen)
4633{
4634 bool res = tcp_try_coalesce(sk, to, from, fragstolen);
4635
4636
4637 if (res) {
4638 u32 gso_segs = max_t(u16, 1, skb_shinfo(to)->gso_segs) +
4639 max_t(u16, 1, skb_shinfo(from)->gso_segs);
4640
4641 skb_shinfo(to)->gso_segs = min_t(u32, gso_segs, 0xFFFF);
4642 }
4643 return res;
4644}
4645
4646static void tcp_drop(struct sock *sk, struct sk_buff *skb)
4647{
4648 sk_drops_add(sk, skb);
4649 __kfree_skb(skb);
4650}
4651
4652
4653
4654
4655static void tcp_ofo_queue(struct sock *sk)
4656{
4657 struct tcp_sock *tp = tcp_sk(sk);
4658 __u32 dsack_high = tp->rcv_nxt;
4659 bool fin, fragstolen, eaten;
4660 struct sk_buff *skb, *tail;
4661 struct rb_node *p;
4662
4663 p = rb_first(&tp->out_of_order_queue);
4664 while (p) {
4665 skb = rb_to_skb(p);
4666 if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
4667 break;
4668
4669 if (before(TCP_SKB_CB(skb)->seq, dsack_high)) {
4670 __u32 dsack = dsack_high;
4671 if (before(TCP_SKB_CB(skb)->end_seq, dsack_high))
4672 dsack_high = TCP_SKB_CB(skb)->end_seq;
4673 tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack);
4674 }
4675 p = rb_next(p);
4676 rb_erase(&skb->rbnode, &tp->out_of_order_queue);
4677
4678 if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
4679 tcp_drop(sk, skb);
4680 continue;
4681 }
4682
4683 tail = skb_peek_tail(&sk->sk_receive_queue);
4684 eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
4685 tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
4686 fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
4687 if (!eaten)
4688 __skb_queue_tail(&sk->sk_receive_queue, skb);
4689 else
4690 kfree_skb_partial(skb, fragstolen);
4691
4692 if (unlikely(fin)) {
4693 tcp_fin(sk);
4694
4695
4696
4697 break;
4698 }
4699 }
4700}
4701
4702static bool tcp_prune_ofo_queue(struct sock *sk);
4703static int tcp_prune_queue(struct sock *sk);
4704
4705static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
4706 unsigned int size)
4707{
4708 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
4709 !sk_rmem_schedule(sk, skb, size)) {
4710
4711 if (tcp_prune_queue(sk) < 0)
4712 return -1;
4713
4714 while (!sk_rmem_schedule(sk, skb, size)) {
4715 if (!tcp_prune_ofo_queue(sk))
4716 return -1;
4717 }
4718 }
4719 return 0;
4720}
4721
4722static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4723{
4724 struct tcp_sock *tp = tcp_sk(sk);
4725 struct rb_node **p, *parent;
4726 struct sk_buff *skb1;
4727 u32 seq, end_seq;
4728 bool fragstolen;
4729
4730 tcp_ecn_check_ce(sk, skb);
4731
4732 if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
4733 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP);
4734 sk->sk_data_ready(sk);
4735 tcp_drop(sk, skb);
4736 return;
4737 }
4738
4739
4740 tp->pred_flags = 0;
4741 inet_csk_schedule_ack(sk);
4742
4743 tp->rcv_ooopack += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
4744 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
4745 seq = TCP_SKB_CB(skb)->seq;
4746 end_seq = TCP_SKB_CB(skb)->end_seq;
4747
4748 p = &tp->out_of_order_queue.rb_node;
4749 if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
4750
4751 if (tcp_is_sack(tp)) {
4752 tp->rx_opt.num_sacks = 1;
4753 tp->selective_acks[0].start_seq = seq;
4754 tp->selective_acks[0].end_seq = end_seq;
4755 }
4756 rb_link_node(&skb->rbnode, NULL, p);
4757 rb_insert_color(&skb->rbnode, &tp->out_of_order_queue);
4758 tp->ooo_last_skb = skb;
4759 goto end;
4760 }
4761
4762
4763
4764
4765 if (tcp_ooo_try_coalesce(sk, tp->ooo_last_skb,
4766 skb, &fragstolen)) {
4767coalesce_done:
4768
4769
4770
4771 if (tcp_is_sack(tp))
4772 tcp_grow_window(sk, skb);
4773 kfree_skb_partial(skb, fragstolen);
4774 skb = NULL;
4775 goto add_sack;
4776 }
4777
4778 if (!before(seq, TCP_SKB_CB(tp->ooo_last_skb)->end_seq)) {
4779 parent = &tp->ooo_last_skb->rbnode;
4780 p = &parent->rb_right;
4781 goto insert;
4782 }
4783
4784
4785 parent = NULL;
4786 while (*p) {
4787 parent = *p;
4788 skb1 = rb_to_skb(parent);
4789 if (before(seq, TCP_SKB_CB(skb1)->seq)) {
4790 p = &parent->rb_left;
4791 continue;
4792 }
4793 if (before(seq, TCP_SKB_CB(skb1)->end_seq)) {
4794 if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4795
4796 NET_INC_STATS(sock_net(sk),
4797 LINUX_MIB_TCPOFOMERGE);
4798 tcp_drop(sk, skb);
4799 skb = NULL;
4800 tcp_dsack_set(sk, seq, end_seq);
4801 goto add_sack;
4802 }
4803 if (after(seq, TCP_SKB_CB(skb1)->seq)) {
4804
4805 tcp_dsack_set(sk, seq, TCP_SKB_CB(skb1)->end_seq);
4806 } else {
4807
4808
4809
4810 rb_replace_node(&skb1->rbnode, &skb->rbnode,
4811 &tp->out_of_order_queue);
4812 tcp_dsack_extend(sk,
4813 TCP_SKB_CB(skb1)->seq,
4814 TCP_SKB_CB(skb1)->end_seq);
4815 NET_INC_STATS(sock_net(sk),
4816 LINUX_MIB_TCPOFOMERGE);
4817 tcp_drop(sk, skb1);
4818 goto merge_right;
4819 }
4820 } else if (tcp_ooo_try_coalesce(sk, skb1,
4821 skb, &fragstolen)) {
4822 goto coalesce_done;
4823 }
4824 p = &parent->rb_right;
4825 }
4826insert:
4827
4828 rb_link_node(&skb->rbnode, parent, p);
4829 rb_insert_color(&skb->rbnode, &tp->out_of_order_queue);
4830
4831merge_right:
4832
4833 while ((skb1 = skb_rb_next(skb)) != NULL) {
4834 if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
4835 break;
4836 if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4837 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4838 end_seq);
4839 break;
4840 }
4841 rb_erase(&skb1->rbnode, &tp->out_of_order_queue);
4842 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4843 TCP_SKB_CB(skb1)->end_seq);
4844 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
4845 tcp_drop(sk, skb1);
4846 }
4847
4848 if (!skb1)
4849 tp->ooo_last_skb = skb;
4850
4851add_sack:
4852 if (tcp_is_sack(tp))
4853 tcp_sack_new_ofo_skb(sk, seq, end_seq);
4854end:
4855 if (skb) {
4856
4857
4858
4859 if (tcp_is_sack(tp))
4860 tcp_grow_window(sk, skb);
4861 skb_condense(skb);
4862 skb_set_owner_r(skb, sk);
4863 }
4864}
4865
4866static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb,
4867 bool *fragstolen)
4868{
4869 int eaten;
4870 struct sk_buff *tail = skb_peek_tail(&sk->sk_receive_queue);
4871
4872 eaten = (tail &&
4873 tcp_try_coalesce(sk, tail,
4874 skb, fragstolen)) ? 1 : 0;
4875 tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
4876 if (!eaten) {
4877 __skb_queue_tail(&sk->sk_receive_queue, skb);
4878 skb_set_owner_r(skb, sk);
4879 }
4880 return eaten;
4881}
4882
4883int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
4884{
4885 struct sk_buff *skb;
4886 int err = -ENOMEM;
4887 int data_len = 0;
4888 bool fragstolen;
4889
4890 if (size == 0)
4891 return 0;
4892
4893 if (size > PAGE_SIZE) {
4894 int npages = min_t(size_t, size >> PAGE_SHIFT, MAX_SKB_FRAGS);
4895
4896 data_len = npages << PAGE_SHIFT;
4897 size = data_len + (size & ~PAGE_MASK);
4898 }
4899 skb = alloc_skb_with_frags(size - data_len, data_len,
4900 PAGE_ALLOC_COSTLY_ORDER,
4901 &err, sk->sk_allocation);
4902 if (!skb)
4903 goto err;
4904
4905 skb_put(skb, size - data_len);
4906 skb->data_len = data_len;
4907 skb->len = size;
4908
4909 if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
4910 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
4911 goto err_free;
4912 }
4913
4914 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
4915 if (err)
4916 goto err_free;
4917
4918 TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
4919 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + size;
4920 TCP_SKB_CB(skb)->ack_seq = tcp_sk(sk)->snd_una - 1;
4921
4922 if (tcp_queue_rcv(sk, skb, &fragstolen)) {
4923 WARN_ON_ONCE(fragstolen);
4924 __kfree_skb(skb);
4925 }
4926 return size;
4927
4928err_free:
4929 kfree_skb(skb);
4930err:
4931 return err;
4932
4933}
4934
4935void tcp_data_ready(struct sock *sk)
4936{
4937 if (tcp_epollin_ready(sk, sk->sk_rcvlowat) || sock_flag(sk, SOCK_DONE))
4938 sk->sk_data_ready(sk);
4939}
4940
4941static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
4942{
4943 struct tcp_sock *tp = tcp_sk(sk);
4944 bool fragstolen;
4945 int eaten;
4946
4947
4948
4949
4950 if (sk_is_mptcp(sk) && !mptcp_incoming_options(sk, skb)) {
4951 __kfree_skb(skb);
4952 return;
4953 }
4954
4955 if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
4956 __kfree_skb(skb);
4957 return;
4958 }
4959 skb_dst_drop(skb);
4960 __skb_pull(skb, tcp_hdr(skb)->doff * 4);
4961
4962 tp->rx_opt.dsack = 0;
4963
4964
4965
4966
4967
4968 if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
4969 if (tcp_receive_window(tp) == 0) {
4970 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP);
4971 goto out_of_window;
4972 }
4973
4974
4975queue_and_out:
4976 if (skb_queue_len(&sk->sk_receive_queue) == 0)
4977 sk_forced_mem_schedule(sk, skb->truesize);
4978 else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
4979 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
4980 sk->sk_data_ready(sk);
4981 goto drop;
4982 }
4983
4984 eaten = tcp_queue_rcv(sk, skb, &fragstolen);
4985 if (skb->len)
4986 tcp_event_data_recv(sk, skb);
4987 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
4988 tcp_fin(sk);
4989
4990 if (!RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
4991 tcp_ofo_queue(sk);
4992
4993
4994
4995
4996 if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
4997 inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
4998 }
4999
5000 if (tp->rx_opt.num_sacks)
5001 tcp_sack_remove(tp);
5002
5003 tcp_fast_path_check(sk);
5004
5005 if (eaten > 0)
5006 kfree_skb_partial(skb, fragstolen);
5007 if (!sock_flag(sk, SOCK_DEAD))
5008 tcp_data_ready(sk);
5009 return;
5010 }
5011
5012 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
5013 tcp_rcv_spurious_retrans(sk, skb);
5014
5015 NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
5016 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
5017
5018out_of_window:
5019 tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
5020 inet_csk_schedule_ack(sk);
5021drop:
5022 tcp_drop(sk, skb);
5023 return;
5024 }
5025
5026
5027 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp)))
5028 goto out_of_window;
5029
5030 if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
5031
5032 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, tp->rcv_nxt);
5033
5034
5035
5036
5037 if (!tcp_receive_window(tp)) {
5038 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP);
5039 goto out_of_window;
5040 }
5041 goto queue_and_out;
5042 }
5043
5044 tcp_data_queue_ofo(sk, skb);
5045}
5046
5047static struct sk_buff *tcp_skb_next(struct sk_buff *skb, struct sk_buff_head *list)
5048{
5049 if (list)
5050 return !skb_queue_is_last(list, skb) ? skb->next : NULL;
5051
5052 return skb_rb_next(skb);
5053}
5054
5055static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
5056 struct sk_buff_head *list,
5057 struct rb_root *root)
5058{
5059 struct sk_buff *next = tcp_skb_next(skb, list);
5060
5061 if (list)
5062 __skb_unlink(skb, list);
5063 else
5064 rb_erase(&skb->rbnode, root);
5065
5066 __kfree_skb(skb);
5067 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
5068
5069 return next;
5070}
5071
5072
5073void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
5074{
5075 struct rb_node **p = &root->rb_node;
5076 struct rb_node *parent = NULL;
5077 struct sk_buff *skb1;
5078
5079 while (*p) {
5080 parent = *p;
5081 skb1 = rb_to_skb(parent);
5082 if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq))
5083 p = &parent->rb_left;
5084 else
5085 p = &parent->rb_right;
5086 }
5087 rb_link_node(&skb->rbnode, parent, p);
5088 rb_insert_color(&skb->rbnode, root);
5089}
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099static void
5100tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root,
5101 struct sk_buff *head, struct sk_buff *tail, u32 start, u32 end)
5102{
5103 struct sk_buff *skb = head, *n;
5104 struct sk_buff_head tmp;
5105 bool end_of_skbs;
5106
5107
5108
5109
5110restart:
5111 for (end_of_skbs = true; skb != NULL && skb != tail; skb = n) {
5112 n = tcp_skb_next(skb, list);
5113
5114
5115 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
5116 skb = tcp_collapse_one(sk, skb, list, root);
5117 if (!skb)
5118 break;
5119 goto restart;
5120 }
5121
5122
5123
5124
5125
5126
5127 if (!(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) &&
5128 (tcp_win_from_space(sk, skb->truesize) > skb->len ||
5129 before(TCP_SKB_CB(skb)->seq, start))) {
5130 end_of_skbs = false;
5131 break;
5132 }
5133
5134 if (n && n != tail && mptcp_skb_can_collapse(skb, n) &&
5135 TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(n)->seq) {
5136 end_of_skbs = false;
5137 break;
5138 }
5139
5140
5141 start = TCP_SKB_CB(skb)->end_seq;
5142 }
5143 if (end_of_skbs ||
5144 (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
5145 return;
5146
5147 __skb_queue_head_init(&tmp);
5148
5149 while (before(start, end)) {
5150 int copy = min_t(int, SKB_MAX_ORDER(0, 0), end - start);
5151 struct sk_buff *nskb;
5152
5153 nskb = alloc_skb(copy, GFP_ATOMIC);
5154 if (!nskb)
5155 break;
5156
5157 memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
5158#ifdef CONFIG_TLS_DEVICE
5159 nskb->decrypted = skb->decrypted;
5160#endif
5161 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
5162 if (list)
5163 __skb_queue_before(list, skb, nskb);
5164 else
5165 __skb_queue_tail(&tmp, nskb);
5166 skb_set_owner_r(nskb, sk);
5167 mptcp_skb_ext_move(nskb, skb);
5168
5169
5170 while (copy > 0) {
5171 int offset = start - TCP_SKB_CB(skb)->seq;
5172 int size = TCP_SKB_CB(skb)->end_seq - start;
5173
5174 BUG_ON(offset < 0);
5175 if (size > 0) {
5176 size = min(copy, size);
5177 if (skb_copy_bits(skb, offset, skb_put(nskb, size), size))
5178 BUG();
5179 TCP_SKB_CB(nskb)->end_seq += size;
5180 copy -= size;
5181 start += size;
5182 }
5183 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
5184 skb = tcp_collapse_one(sk, skb, list, root);
5185 if (!skb ||
5186 skb == tail ||
5187 !mptcp_skb_can_collapse(nskb, skb) ||
5188 (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
5189 goto end;
5190#ifdef CONFIG_TLS_DEVICE
5191 if (skb->decrypted != nskb->decrypted)
5192 goto end;
5193#endif
5194 }
5195 }
5196 }
5197end:
5198 skb_queue_walk_safe(&tmp, skb, n)
5199 tcp_rbtree_insert(root, skb);
5200}
5201
5202
5203
5204
5205static void tcp_collapse_ofo_queue(struct sock *sk)
5206{
5207 struct tcp_sock *tp = tcp_sk(sk);
5208 u32 range_truesize, sum_tiny = 0;
5209 struct sk_buff *skb, *head;
5210 u32 start, end;
5211
5212 skb = skb_rb_first(&tp->out_of_order_queue);
5213new_range:
5214 if (!skb) {
5215 tp->ooo_last_skb = skb_rb_last(&tp->out_of_order_queue);
5216 return;
5217 }
5218 start = TCP_SKB_CB(skb)->seq;
5219 end = TCP_SKB_CB(skb)->end_seq;
5220 range_truesize = skb->truesize;
5221
5222 for (head = skb;;) {
5223 skb = skb_rb_next(skb);
5224
5225
5226
5227
5228 if (!skb ||
5229 after(TCP_SKB_CB(skb)->seq, end) ||
5230 before(TCP_SKB_CB(skb)->end_seq, start)) {
5231
5232 if (range_truesize != head->truesize ||
5233 end - start >= SKB_WITH_OVERHEAD(SK_MEM_QUANTUM)) {
5234 tcp_collapse(sk, NULL, &tp->out_of_order_queue,
5235 head, skb, start, end);
5236 } else {
5237 sum_tiny += range_truesize;
5238 if (sum_tiny > sk->sk_rcvbuf >> 3)
5239 return;
5240 }
5241 goto new_range;
5242 }
5243
5244 range_truesize += skb->truesize;
5245 if (unlikely(before(TCP_SKB_CB(skb)->seq, start)))
5246 start = TCP_SKB_CB(skb)->seq;
5247 if (after(TCP_SKB_CB(skb)->end_seq, end))
5248 end = TCP_SKB_CB(skb)->end_seq;
5249 }
5250}
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262
5263static bool tcp_prune_ofo_queue(struct sock *sk)
5264{
5265 struct tcp_sock *tp = tcp_sk(sk);
5266 struct rb_node *node, *prev;
5267 int goal;
5268
5269 if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
5270 return false;
5271
5272 NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED);
5273 goal = sk->sk_rcvbuf >> 3;
5274 node = &tp->ooo_last_skb->rbnode;
5275 do {
5276 prev = rb_prev(node);
5277 rb_erase(node, &tp->out_of_order_queue);
5278 goal -= rb_to_skb(node)->truesize;
5279 tcp_drop(sk, rb_to_skb(node));
5280 if (!prev || goal <= 0) {
5281 sk_mem_reclaim(sk);
5282 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
5283 !tcp_under_memory_pressure(sk))
5284 break;
5285 goal = sk->sk_rcvbuf >> 3;
5286 }
5287 node = prev;
5288 } while (node);
5289 tp->ooo_last_skb = rb_to_skb(prev);
5290
5291
5292
5293
5294
5295
5296 if (tp->rx_opt.sack_ok)
5297 tcp_sack_reset(&tp->rx_opt);
5298 return true;
5299}
5300
5301
5302
5303
5304
5305
5306
5307
5308static int tcp_prune_queue(struct sock *sk)
5309{
5310 struct tcp_sock *tp = tcp_sk(sk);
5311
5312 NET_INC_STATS(sock_net(sk), LINUX_MIB_PRUNECALLED);
5313
5314 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
5315 tcp_clamp_window(sk);
5316 else if (tcp_under_memory_pressure(sk))
5317 tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
5318
5319 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
5320 return 0;
5321
5322 tcp_collapse_ofo_queue(sk);
5323 if (!skb_queue_empty(&sk->sk_receive_queue))
5324 tcp_collapse(sk, &sk->sk_receive_queue, NULL,
5325 skb_peek(&sk->sk_receive_queue),
5326 NULL,
5327 tp->copied_seq, tp->rcv_nxt);
5328 sk_mem_reclaim(sk);
5329
5330 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
5331 return 0;
5332
5333
5334
5335
5336 tcp_prune_ofo_queue(sk);
5337
5338 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
5339 return 0;
5340
5341
5342
5343
5344
5345 NET_INC_STATS(sock_net(sk), LINUX_MIB_RCVPRUNED);
5346
5347
5348 tp->pred_flags = 0;
5349 return -1;
5350}
5351
5352static bool tcp_should_expand_sndbuf(const struct sock *sk)
5353{
5354 const struct tcp_sock *tp = tcp_sk(sk);
5355
5356
5357
5358
5359 if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
5360 return false;
5361
5362
5363 if (tcp_under_memory_pressure(sk))
5364 return false;
5365
5366
5367 if (sk_memory_allocated(sk) >= sk_prot_mem_limits(sk, 0))
5368 return false;
5369
5370
5371 if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
5372 return false;
5373
5374 return true;
5375}
5376
5377static void tcp_new_space(struct sock *sk)
5378{
5379 struct tcp_sock *tp = tcp_sk(sk);
5380
5381 if (tcp_should_expand_sndbuf(sk)) {
5382 tcp_sndbuf_expand(sk);
5383 tp->snd_cwnd_stamp = tcp_jiffies32;
5384 }
5385
5386 sk->sk_write_space(sk);
5387}
5388
5389static void tcp_check_space(struct sock *sk)
5390{
5391
5392 smp_mb();
5393 if (sk->sk_socket &&
5394 test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
5395 tcp_new_space(sk);
5396 if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
5397 tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
5398 }
5399}
5400
5401static inline void tcp_data_snd_check(struct sock *sk)
5402{
5403 tcp_push_pending_frames(sk);
5404 tcp_check_space(sk);
5405}
5406
5407
5408
5409
5410static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
5411{
5412 struct tcp_sock *tp = tcp_sk(sk);
5413 unsigned long rtt, delay;
5414
5415
5416 if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss &&
5417
5418
5419
5420
5421
5422 (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat ||
5423 __tcp_select_window(sk) >= tp->rcv_wnd)) ||
5424
5425 tcp_in_quickack_mode(sk) ||
5426
5427 inet_csk(sk)->icsk_ack.pending & ICSK_ACK_NOW) {
5428send_now:
5429 tcp_send_ack(sk);
5430 return;
5431 }
5432
5433 if (!ofo_possible || RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
5434 tcp_send_delayed_ack(sk);
5435 return;
5436 }
5437
5438 if (!tcp_is_sack(tp) ||
5439 tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)
5440 goto send_now;
5441
5442 if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
5443 tp->compressed_ack_rcv_nxt = tp->rcv_nxt;
5444 tp->dup_ack_counter = 0;
5445 }
5446 if (tp->dup_ack_counter < TCP_FASTRETRANS_THRESH) {
5447 tp->dup_ack_counter++;
5448 goto send_now;
5449 }
5450 tp->compressed_ack++;
5451 if (hrtimer_is_queued(&tp->compressed_ack_timer))
5452 return;
5453
5454
5455
5456 rtt = tp->rcv_rtt_est.rtt_us;
5457 if (tp->srtt_us && tp->srtt_us < rtt)
5458 rtt = tp->srtt_us;
5459
5460 delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns,
5461 rtt * (NSEC_PER_USEC >> 3)/20);
5462 sock_hold(sk);
5463 hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay),
5464 sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns,
5465 HRTIMER_MODE_REL_PINNED_SOFT);
5466}
5467
5468static inline void tcp_ack_snd_check(struct sock *sk)
5469{
5470 if (!inet_csk_ack_scheduled(sk)) {
5471
5472 return;
5473 }
5474 __tcp_ack_snd_check(sk, 1);
5475}
5476
5477
5478
5479
5480
5481
5482
5483
5484
5485
5486
5487static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
5488{
5489 struct tcp_sock *tp = tcp_sk(sk);
5490 u32 ptr = ntohs(th->urg_ptr);
5491
5492 if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg)
5493 ptr--;
5494 ptr += ntohl(th->seq);
5495
5496
5497 if (after(tp->copied_seq, ptr))
5498 return;
5499
5500
5501
5502
5503
5504
5505
5506
5507
5508
5509
5510 if (before(ptr, tp->rcv_nxt))
5511 return;
5512
5513
5514 if (tp->urg_data && !after(ptr, tp->urg_seq))
5515 return;
5516
5517
5518 sk_send_sigurg(sk);
5519
5520
5521
5522
5523
5524
5525
5526
5527
5528
5529
5530
5531
5532
5533
5534
5535 if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
5536 !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) {
5537 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
5538 tp->copied_seq++;
5539 if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) {
5540 __skb_unlink(skb, &sk->sk_receive_queue);
5541 __kfree_skb(skb);
5542 }
5543 }
5544
5545 tp->urg_data = TCP_URG_NOTYET;
5546 WRITE_ONCE(tp->urg_seq, ptr);
5547
5548
5549 tp->pred_flags = 0;
5550}
5551
5552
5553static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th)
5554{
5555 struct tcp_sock *tp = tcp_sk(sk);
5556
5557
5558 if (th->urg)
5559 tcp_check_urg(sk, th);
5560
5561
5562 if (tp->urg_data == TCP_URG_NOTYET) {
5563 u32 ptr = tp->urg_seq - ntohl(th->seq) + (th->doff * 4) -
5564 th->syn;
5565
5566
5567 if (ptr < skb->len) {
5568 u8 tmp;
5569 if (skb_copy_bits(skb, ptr, &tmp, 1))
5570 BUG();
5571 tp->urg_data = TCP_URG_VALID | tmp;
5572 if (!sock_flag(sk, SOCK_DEAD))
5573 sk->sk_data_ready(sk);
5574 }
5575 }
5576}
5577
5578
5579
5580
5581
5582
5583
5584
5585
5586static bool tcp_reset_check(const struct sock *sk, const struct sk_buff *skb)
5587{
5588 struct tcp_sock *tp = tcp_sk(sk);
5589
5590 return unlikely(TCP_SKB_CB(skb)->seq == (tp->rcv_nxt - 1) &&
5591 (1 << sk->sk_state) & (TCPF_CLOSE_WAIT | TCPF_LAST_ACK |
5592 TCPF_CLOSING));
5593}
5594
5595
5596
5597
5598static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
5599 const struct tcphdr *th, int syn_inerr)
5600{
5601 struct tcp_sock *tp = tcp_sk(sk);
5602 bool rst_seq_match = false;
5603
5604
5605 if (tcp_fast_parse_options(sock_net(sk), skb, th, tp) &&
5606 tp->rx_opt.saw_tstamp &&
5607 tcp_paws_discard(sk, skb)) {
5608 if (!th->rst) {
5609 NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
5610 if (!tcp_oow_rate_limited(sock_net(sk), skb,
5611 LINUX_MIB_TCPACKSKIPPEDPAWS,
5612 &tp->last_oow_ack_time))
5613 tcp_send_dupack(sk, skb);
5614 goto discard;
5615 }
5616
5617 }
5618
5619
5620 if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
5621
5622
5623
5624
5625
5626
5627 if (!th->rst) {
5628 if (th->syn)
5629 goto syn_challenge;
5630 if (!tcp_oow_rate_limited(sock_net(sk), skb,
5631 LINUX_MIB_TCPACKSKIPPEDSEQ,
5632 &tp->last_oow_ack_time))
5633 tcp_send_dupack(sk, skb);
5634 } else if (tcp_reset_check(sk, skb)) {
5635 tcp_reset(sk, skb);
5636 }
5637 goto discard;
5638 }
5639
5640
5641 if (th->rst) {
5642
5643
5644
5645
5646
5647
5648
5649
5650
5651 if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt ||
5652 tcp_reset_check(sk, skb)) {
5653 rst_seq_match = true;
5654 } else if (tcp_is_sack(tp) && tp->rx_opt.num_sacks > 0) {
5655 struct tcp_sack_block *sp = &tp->selective_acks[0];
5656 int max_sack = sp[0].end_seq;
5657 int this_sack;
5658
5659 for (this_sack = 1; this_sack < tp->rx_opt.num_sacks;
5660 ++this_sack) {
5661 max_sack = after(sp[this_sack].end_seq,
5662 max_sack) ?
5663 sp[this_sack].end_seq : max_sack;
5664 }
5665
5666 if (TCP_SKB_CB(skb)->seq == max_sack)
5667 rst_seq_match = true;
5668 }
5669
5670 if (rst_seq_match)
5671 tcp_reset(sk, skb);
5672 else {
5673
5674
5675
5676
5677 if (tp->syn_fastopen && !tp->data_segs_in &&
5678 sk->sk_state == TCP_ESTABLISHED)
5679 tcp_fastopen_active_disable(sk);
5680 tcp_send_challenge_ack(sk, skb);
5681 }
5682 goto discard;
5683 }
5684
5685
5686
5687
5688
5689
5690 if (th->syn) {
5691syn_challenge:
5692 if (syn_inerr)
5693 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
5694 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE);
5695 tcp_send_challenge_ack(sk, skb);
5696 goto discard;
5697 }
5698
5699 bpf_skops_parse_hdr(sk, skb);
5700
5701 return true;
5702
5703discard:
5704 tcp_drop(sk, skb);
5705 return false;
5706}
5707
5708
5709
5710
5711
5712
5713
5714
5715
5716
5717
5718
5719
5720
5721
5722
5723
5724
5725
5726
5727
5728
5729
5730
5731void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
5732{
5733 const struct tcphdr *th = (const struct tcphdr *)skb->data;
5734 struct tcp_sock *tp = tcp_sk(sk);
5735 unsigned int len = skb->len;
5736
5737
5738 trace_tcp_probe(sk, skb);
5739
5740 tcp_mstamp_refresh(tp);
5741 if (unlikely(!sk->sk_rx_dst))
5742 inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
5743
5744
5745
5746
5747
5748
5749
5750
5751
5752
5753
5754
5755
5756
5757
5758 tp->rx_opt.saw_tstamp = 0;
5759
5760
5761
5762
5763
5764
5765
5766
5767
5768
5769 if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
5770 TCP_SKB_CB(skb)->seq == tp->rcv_nxt &&
5771 !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) {
5772 int tcp_header_len = tp->tcp_header_len;
5773
5774
5775
5776
5777
5778
5779
5780 if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
5781
5782 if (!tcp_parse_aligned_timestamp(tp, th))
5783 goto slow_path;
5784
5785
5786 if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
5787 goto slow_path;
5788
5789
5790
5791
5792
5793
5794 }
5795
5796 if (len <= tcp_header_len) {
5797
5798 if (len == tcp_header_len) {
5799
5800
5801
5802
5803 if (tcp_header_len ==
5804 (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
5805 tp->rcv_nxt == tp->rcv_wup)
5806 tcp_store_ts_recent(tp);
5807
5808
5809
5810
5811 tcp_ack(sk, skb, 0);
5812 __kfree_skb(skb);
5813 tcp_data_snd_check(sk);
5814
5815
5816
5817
5818 tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr;
5819 return;
5820 } else {
5821 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
5822 goto discard;
5823 }
5824 } else {
5825 int eaten = 0;
5826 bool fragstolen = false;
5827
5828 if (tcp_checksum_complete(skb))
5829 goto csum_error;
5830
5831 if ((int)skb->truesize > sk->sk_forward_alloc)
5832 goto step5;
5833
5834
5835
5836
5837
5838 if (tcp_header_len ==
5839 (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
5840 tp->rcv_nxt == tp->rcv_wup)
5841 tcp_store_ts_recent(tp);
5842
5843 tcp_rcv_rtt_measure_ts(sk, skb);
5844
5845 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS);
5846
5847
5848 __skb_pull(skb, tcp_header_len);
5849 eaten = tcp_queue_rcv(sk, skb, &fragstolen);
5850
5851 tcp_event_data_recv(sk, skb);
5852
5853 if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
5854
5855 tcp_ack(sk, skb, FLAG_DATA);
5856 tcp_data_snd_check(sk);
5857 if (!inet_csk_ack_scheduled(sk))
5858 goto no_ack;
5859 } else {
5860 tcp_update_wl(tp, TCP_SKB_CB(skb)->seq);
5861 }
5862
5863 __tcp_ack_snd_check(sk, 0);
5864no_ack:
5865 if (eaten)
5866 kfree_skb_partial(skb, fragstolen);
5867 tcp_data_ready(sk);
5868 return;
5869 }
5870 }
5871
5872slow_path:
5873 if (len < (th->doff << 2) || tcp_checksum_complete(skb))
5874 goto csum_error;
5875
5876 if (!th->ack && !th->rst && !th->syn)
5877 goto discard;
5878
5879
5880
5881
5882
5883 if (!tcp_validate_incoming(sk, skb, th, 1))
5884 return;
5885
5886step5:
5887 if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0)
5888 goto discard;
5889
5890 tcp_rcv_rtt_measure_ts(sk, skb);
5891
5892
5893 tcp_urg(sk, skb, th);
5894
5895
5896 tcp_data_queue(sk, skb);
5897
5898 tcp_data_snd_check(sk);
5899 tcp_ack_snd_check(sk);
5900 return;
5901
5902csum_error:
5903 trace_tcp_bad_csum(skb);
5904 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
5905 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
5906
5907discard:
5908 tcp_drop(sk, skb);
5909}
5910EXPORT_SYMBOL(tcp_rcv_established);
5911
5912void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb)
5913{
5914 struct inet_connection_sock *icsk = inet_csk(sk);
5915 struct tcp_sock *tp = tcp_sk(sk);
5916
5917 tcp_mtup_init(sk);
5918 icsk->icsk_af_ops->rebuild_header(sk);
5919 tcp_init_metrics(sk);
5920
5921
5922
5923
5924
5925
5926
5927 if (tp->total_retrans > 1 && tp->undo_marker)
5928 tp->snd_cwnd = 1;
5929 else
5930 tp->snd_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk));
5931 tp->snd_cwnd_stamp = tcp_jiffies32;
5932
5933 bpf_skops_established(sk, bpf_op, skb);
5934
5935 if (!icsk->icsk_ca_initialized)
5936 tcp_init_congestion_control(sk);
5937 tcp_init_buffer_space(sk);
5938}
5939
5940void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
5941{
5942 struct tcp_sock *tp = tcp_sk(sk);
5943 struct inet_connection_sock *icsk = inet_csk(sk);
5944
5945 tcp_set_state(sk, TCP_ESTABLISHED);
5946 icsk->icsk_ack.lrcvtime = tcp_jiffies32;
5947
5948 if (skb) {
5949 icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
5950 security_inet_conn_established(sk, skb);
5951 sk_mark_napi_id(sk, skb);
5952 }
5953
5954 tcp_init_transfer(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB, skb);
5955
5956
5957
5958
5959 tp->lsndtime = tcp_jiffies32;
5960
5961 if (sock_flag(sk, SOCK_KEEPOPEN))
5962 inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
5963
5964 if (!tp->rx_opt.snd_wscale)
5965 __tcp_fast_path_on(tp, tp->snd_wnd);
5966 else
5967 tp->pred_flags = 0;
5968}
5969
5970static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
5971 struct tcp_fastopen_cookie *cookie)
5972{
5973 struct tcp_sock *tp = tcp_sk(sk);
5974 struct sk_buff *data = tp->syn_data ? tcp_rtx_queue_head(sk) : NULL;
5975 u16 mss = tp->rx_opt.mss_clamp, try_exp = 0;
5976 bool syn_drop = false;
5977
5978 if (mss == tp->rx_opt.user_mss) {
5979 struct tcp_options_received opt;
5980
5981
5982 tcp_clear_options(&opt);
5983 opt.user_mss = opt.mss_clamp = 0;
5984 tcp_parse_options(sock_net(sk), synack, &opt, 0, NULL);
5985 mss = opt.mss_clamp;
5986 }
5987
5988 if (!tp->syn_fastopen) {
5989
5990 cookie->len = -1;
5991 } else if (tp->total_retrans) {
5992
5993
5994
5995
5996
5997 syn_drop = (cookie->len < 0 && data);
5998 } else if (cookie->len < 0 && !tp->syn_data) {
5999
6000
6001
6002
6003 try_exp = tp->syn_fastopen_exp ? 2 : 1;
6004 }
6005
6006 tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp);
6007
6008 if (data) {
6009 if (tp->total_retrans)
6010 tp->fastopen_client_fail = TFO_SYN_RETRANSMITTED;
6011 else
6012 tp->fastopen_client_fail = TFO_DATA_NOT_ACKED;
6013 skb_rbtree_walk_from(data)
6014 tcp_mark_skb_lost(sk, data);
6015 tcp_xmit_retransmit_queue(sk);
6016 NET_INC_STATS(sock_net(sk),
6017 LINUX_MIB_TCPFASTOPENACTIVEFAIL);
6018 return true;
6019 }
6020 tp->syn_data_acked = tp->syn_data;
6021 if (tp->syn_data_acked) {
6022 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE);
6023
6024 if (tp->delivered > 1)
6025 --tp->delivered;
6026 }
6027
6028 tcp_fastopen_add_skb(sk, synack);
6029
6030 return false;
6031}
6032
6033static void smc_check_reset_syn(struct tcp_sock *tp)
6034{
6035#if IS_ENABLED(CONFIG_SMC)
6036 if (static_branch_unlikely(&tcp_have_smc)) {
6037 if (tp->syn_smc && !tp->rx_opt.smc_ok)
6038 tp->syn_smc = 0;
6039 }
6040#endif
6041}
6042
6043static void tcp_try_undo_spurious_syn(struct sock *sk)
6044{
6045 struct tcp_sock *tp = tcp_sk(sk);
6046 u32 syn_stamp;
6047
6048
6049
6050
6051
6052 syn_stamp = tp->retrans_stamp;
6053 if (tp->undo_marker && syn_stamp && tp->rx_opt.saw_tstamp &&
6054 syn_stamp == tp->rx_opt.rcv_tsecr)
6055 tp->undo_marker = 0;
6056}
6057
6058static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
6059 const struct tcphdr *th)
6060{
6061 struct inet_connection_sock *icsk = inet_csk(sk);
6062 struct tcp_sock *tp = tcp_sk(sk);
6063 struct tcp_fastopen_cookie foc = { .len = -1 };
6064 int saved_clamp = tp->rx_opt.mss_clamp;
6065 bool fastopen_fail;
6066
6067 tcp_parse_options(sock_net(sk), skb, &tp->rx_opt, 0, &foc);
6068 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
6069 tp->rx_opt.rcv_tsecr -= tp->tsoffset;
6070
6071 if (th->ack) {
6072
6073
6074
6075
6076
6077
6078
6079
6080 if (!after(TCP_SKB_CB(skb)->ack_seq, tp->snd_una) ||
6081 after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) {
6082
6083 if (icsk->icsk_retransmits == 0)
6084 inet_csk_reset_xmit_timer(sk,
6085 ICSK_TIME_RETRANS,
6086 TCP_TIMEOUT_MIN, TCP_RTO_MAX);
6087 goto reset_and_undo;
6088 }
6089
6090 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
6091 !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
6092 tcp_time_stamp(tp))) {
6093 NET_INC_STATS(sock_net(sk),
6094 LINUX_MIB_PAWSACTIVEREJECTED);
6095 goto reset_and_undo;
6096 }
6097
6098
6099
6100
6101
6102
6103
6104
6105
6106 if (th->rst) {
6107 tcp_reset(sk, skb);
6108 goto discard;
6109 }
6110
6111
6112
6113
6114
6115
6116
6117
6118 if (!th->syn)
6119 goto discard_and_undo;
6120
6121
6122
6123
6124
6125
6126
6127
6128 tcp_ecn_rcv_synack(tp, th);
6129
6130 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
6131 tcp_try_undo_spurious_syn(sk);
6132 tcp_ack(sk, skb, FLAG_SLOWPATH);
6133
6134
6135
6136
6137 WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1);
6138 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
6139
6140
6141
6142
6143 tp->snd_wnd = ntohs(th->window);
6144
6145 if (!tp->rx_opt.wscale_ok) {
6146 tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
6147 tp->window_clamp = min(tp->window_clamp, 65535U);
6148 }
6149
6150 if (tp->rx_opt.saw_tstamp) {
6151 tp->rx_opt.tstamp_ok = 1;
6152 tp->tcp_header_len =
6153 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
6154 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
6155 tcp_store_ts_recent(tp);
6156 } else {
6157 tp->tcp_header_len = sizeof(struct tcphdr);
6158 }
6159
6160 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
6161 tcp_initialize_rcv_mss(sk);
6162
6163
6164
6165
6166 WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
6167
6168 smc_check_reset_syn(tp);
6169
6170 smp_mb();
6171
6172 tcp_finish_connect(sk, skb);
6173
6174 fastopen_fail = (tp->syn_fastopen || tp->syn_data) &&
6175 tcp_rcv_fastopen_synack(sk, skb, &foc);
6176
6177 if (!sock_flag(sk, SOCK_DEAD)) {
6178 sk->sk_state_change(sk);
6179 sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
6180 }
6181 if (fastopen_fail)
6182 return -1;
6183 if (sk->sk_write_pending ||
6184 icsk->icsk_accept_queue.rskq_defer_accept ||
6185 inet_csk_in_pingpong_mode(sk)) {
6186
6187
6188
6189
6190
6191
6192
6193 inet_csk_schedule_ack(sk);
6194 tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
6195 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
6196 TCP_DELACK_MAX, TCP_RTO_MAX);
6197
6198discard:
6199 tcp_drop(sk, skb);
6200 return 0;
6201 } else {
6202 tcp_send_ack(sk);
6203 }
6204 return -1;
6205 }
6206
6207
6208
6209 if (th->rst) {
6210
6211
6212
6213
6214
6215
6216 goto discard_and_undo;
6217 }
6218
6219
6220 if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp &&
6221 tcp_paws_reject(&tp->rx_opt, 0))
6222 goto discard_and_undo;
6223
6224 if (th->syn) {
6225
6226
6227
6228
6229 tcp_set_state(sk, TCP_SYN_RECV);
6230
6231 if (tp->rx_opt.saw_tstamp) {
6232 tp->rx_opt.tstamp_ok = 1;
6233 tcp_store_ts_recent(tp);
6234 tp->tcp_header_len =
6235 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
6236 } else {
6237 tp->tcp_header_len = sizeof(struct tcphdr);
6238 }
6239
6240 WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1);
6241 WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
6242 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
6243
6244
6245
6246
6247 tp->snd_wnd = ntohs(th->window);
6248 tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
6249 tp->max_window = tp->snd_wnd;
6250
6251 tcp_ecn_rcv_syn(tp, th);
6252
6253 tcp_mtup_init(sk);
6254 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
6255 tcp_initialize_rcv_mss(sk);
6256
6257 tcp_send_synack(sk);
6258#if 0
6259
6260
6261
6262
6263
6264
6265
6266
6267
6268
6269
6270 return -1;
6271#else
6272 goto discard;
6273#endif
6274 }
6275
6276
6277
6278
6279discard_and_undo:
6280 tcp_clear_options(&tp->rx_opt);
6281 tp->rx_opt.mss_clamp = saved_clamp;
6282 goto discard;
6283
6284reset_and_undo:
6285 tcp_clear_options(&tp->rx_opt);
6286 tp->rx_opt.mss_clamp = saved_clamp;
6287 return 1;
6288}
6289
6290static void tcp_rcv_synrecv_state_fastopen(struct sock *sk)
6291{
6292 struct request_sock *req;
6293
6294
6295
6296
6297 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
6298 tcp_try_undo_loss(sk, false);
6299
6300
6301 tcp_sk(sk)->retrans_stamp = 0;
6302 inet_csk(sk)->icsk_retransmits = 0;
6303
6304
6305
6306
6307 req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk,
6308 lockdep_sock_is_held(sk));
6309 reqsk_fastopen_remove(sk, req, false);
6310
6311
6312
6313
6314
6315
6316
6317
6318
6319 tcp_rearm_rto(sk);
6320}
6321
6322
6323
6324
6325
6326
6327
6328
6329int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
6330{
6331 struct tcp_sock *tp = tcp_sk(sk);
6332 struct inet_connection_sock *icsk = inet_csk(sk);
6333 const struct tcphdr *th = tcp_hdr(skb);
6334 struct request_sock *req;
6335 int queued = 0;
6336 bool acceptable;
6337
6338 switch (sk->sk_state) {
6339 case TCP_CLOSE:
6340 goto discard;
6341
6342 case TCP_LISTEN:
6343 if (th->ack)
6344 return 1;
6345
6346 if (th->rst)
6347 goto discard;
6348
6349 if (th->syn) {
6350 if (th->fin)
6351 goto discard;
6352
6353
6354
6355 rcu_read_lock();
6356 local_bh_disable();
6357 acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0;
6358 local_bh_enable();
6359 rcu_read_unlock();
6360
6361 if (!acceptable)
6362 return 1;
6363 consume_skb(skb);
6364 return 0;
6365 }
6366 goto discard;
6367
6368 case TCP_SYN_SENT:
6369 tp->rx_opt.saw_tstamp = 0;
6370 tcp_mstamp_refresh(tp);
6371 queued = tcp_rcv_synsent_state_process(sk, skb, th);
6372 if (queued >= 0)
6373 return queued;
6374
6375
6376 tcp_urg(sk, skb, th);
6377 __kfree_skb(skb);
6378 tcp_data_snd_check(sk);
6379 return 0;
6380 }
6381
6382 tcp_mstamp_refresh(tp);
6383 tp->rx_opt.saw_tstamp = 0;
6384 req = rcu_dereference_protected(tp->fastopen_rsk,
6385 lockdep_sock_is_held(sk));
6386 if (req) {
6387 bool req_stolen;
6388
6389 WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
6390 sk->sk_state != TCP_FIN_WAIT1);
6391
6392 if (!tcp_check_req(sk, skb, req, true, &req_stolen))
6393 goto discard;
6394 }
6395
6396 if (!th->ack && !th->rst && !th->syn)
6397 goto discard;
6398
6399 if (!tcp_validate_incoming(sk, skb, th, 0))
6400 return 0;
6401
6402
6403 acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |
6404 FLAG_UPDATE_TS_RECENT |
6405 FLAG_NO_CHALLENGE_ACK) > 0;
6406
6407 if (!acceptable) {
6408 if (sk->sk_state == TCP_SYN_RECV)
6409 return 1;
6410 tcp_send_challenge_ack(sk, skb);
6411 goto discard;
6412 }
6413 switch (sk->sk_state) {
6414 case TCP_SYN_RECV:
6415 tp->delivered++;
6416 if (!tp->srtt_us)
6417 tcp_synack_rtt_meas(sk, req);
6418
6419 if (req) {
6420 tcp_rcv_synrecv_state_fastopen(sk);
6421 } else {
6422 tcp_try_undo_spurious_syn(sk);
6423 tp->retrans_stamp = 0;
6424 tcp_init_transfer(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB,
6425 skb);
6426 WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
6427 }
6428 smp_mb();
6429 tcp_set_state(sk, TCP_ESTABLISHED);
6430 sk->sk_state_change(sk);
6431
6432
6433
6434
6435
6436 if (sk->sk_socket)
6437 sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
6438
6439 tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
6440 tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale;
6441 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
6442
6443 if (tp->rx_opt.tstamp_ok)
6444 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
6445
6446 if (!inet_csk(sk)->icsk_ca_ops->cong_control)
6447 tcp_update_pacing_rate(sk);
6448
6449
6450 tp->lsndtime = tcp_jiffies32;
6451
6452 tcp_initialize_rcv_mss(sk);
6453 tcp_fast_path_on(tp);
6454 break;
6455
6456 case TCP_FIN_WAIT1: {
6457 int tmo;
6458
6459 if (req)
6460 tcp_rcv_synrecv_state_fastopen(sk);
6461
6462 if (tp->snd_una != tp->write_seq)
6463 break;
6464
6465 tcp_set_state(sk, TCP_FIN_WAIT2);
6466 sk->sk_shutdown |= SEND_SHUTDOWN;
6467
6468 sk_dst_confirm(sk);
6469
6470 if (!sock_flag(sk, SOCK_DEAD)) {
6471
6472 sk->sk_state_change(sk);
6473 break;
6474 }
6475
6476 if (tp->linger2 < 0) {
6477 tcp_done(sk);
6478 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
6479 return 1;
6480 }
6481 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
6482 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
6483
6484 if (tp->syn_fastopen && th->fin)
6485 tcp_fastopen_active_disable(sk);
6486 tcp_done(sk);
6487 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
6488 return 1;
6489 }
6490
6491 tmo = tcp_fin_time(sk);
6492 if (tmo > TCP_TIMEWAIT_LEN) {
6493 inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
6494 } else if (th->fin || sock_owned_by_user(sk)) {
6495
6496
6497
6498
6499
6500
6501 inet_csk_reset_keepalive_timer(sk, tmo);
6502 } else {
6503 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
6504 goto discard;
6505 }
6506 break;
6507 }
6508
6509 case TCP_CLOSING:
6510 if (tp->snd_una == tp->write_seq) {
6511 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
6512 goto discard;
6513 }
6514 break;
6515
6516 case TCP_LAST_ACK:
6517 if (tp->snd_una == tp->write_seq) {
6518 tcp_update_metrics(sk);
6519 tcp_done(sk);
6520 goto discard;
6521 }
6522 break;
6523 }
6524
6525
6526 tcp_urg(sk, skb, th);
6527
6528
6529 switch (sk->sk_state) {
6530 case TCP_CLOSE_WAIT:
6531 case TCP_CLOSING:
6532 case TCP_LAST_ACK:
6533 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
6534
6535
6536
6537 if (sk_is_mptcp(sk) && !mptcp_incoming_options(sk, skb))
6538 goto discard;
6539 break;
6540 }
6541 fallthrough;
6542 case TCP_FIN_WAIT1:
6543 case TCP_FIN_WAIT2:
6544
6545
6546
6547
6548 if (sk->sk_shutdown & RCV_SHUTDOWN) {
6549 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
6550 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
6551 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
6552 tcp_reset(sk, skb);
6553 return 1;
6554 }
6555 }
6556 fallthrough;
6557 case TCP_ESTABLISHED:
6558 tcp_data_queue(sk, skb);
6559 queued = 1;
6560 break;
6561 }
6562
6563
6564 if (sk->sk_state != TCP_CLOSE) {
6565 tcp_data_snd_check(sk);
6566 tcp_ack_snd_check(sk);
6567 }
6568
6569 if (!queued) {
6570discard:
6571 tcp_drop(sk, skb);
6572 }
6573 return 0;
6574}
6575EXPORT_SYMBOL(tcp_rcv_state_process);
6576
6577static inline void pr_drop_req(struct request_sock *req, __u16 port, int family)
6578{
6579 struct inet_request_sock *ireq = inet_rsk(req);
6580
6581 if (family == AF_INET)
6582 net_dbg_ratelimited("drop open request from %pI4/%u\n",
6583 &ireq->ir_rmt_addr, port);
6584#if IS_ENABLED(CONFIG_IPV6)
6585 else if (family == AF_INET6)
6586 net_dbg_ratelimited("drop open request from %pI6/%u\n",
6587 &ireq->ir_v6_rmt_addr, port);
6588#endif
6589}
6590
6591
6592
6593
6594
6595
6596
6597
6598
6599
6600
6601
6602
6603
6604
6605
6606
6607
6608static void tcp_ecn_create_request(struct request_sock *req,
6609 const struct sk_buff *skb,
6610 const struct sock *listen_sk,
6611 const struct dst_entry *dst)
6612{
6613 const struct tcphdr *th = tcp_hdr(skb);
6614 const struct net *net = sock_net(listen_sk);
6615 bool th_ecn = th->ece && th->cwr;
6616 bool ect, ecn_ok;
6617 u32 ecn_ok_dst;
6618
6619 if (!th_ecn)
6620 return;
6621
6622 ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield);
6623 ecn_ok_dst = dst_feature(dst, DST_FEATURE_ECN_MASK);
6624 ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst;
6625
6626 if (((!ect || th->res1) && ecn_ok) || tcp_ca_needs_ecn(listen_sk) ||
6627 (ecn_ok_dst & DST_FEATURE_ECN_CA) ||
6628 tcp_bpf_ca_needs_ecn((struct sock *)req))
6629 inet_rsk(req)->ecn_ok = 1;
6630}
6631
6632static void tcp_openreq_init(struct request_sock *req,
6633 const struct tcp_options_received *rx_opt,
6634 struct sk_buff *skb, const struct sock *sk)
6635{
6636 struct inet_request_sock *ireq = inet_rsk(req);
6637
6638 req->rsk_rcv_wnd = 0;
6639 tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
6640 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
6641 tcp_rsk(req)->snt_synack = 0;
6642 tcp_rsk(req)->last_oow_ack_time = 0;
6643 req->mss = rx_opt->mss_clamp;
6644 req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
6645 ireq->tstamp_ok = rx_opt->tstamp_ok;
6646 ireq->sack_ok = rx_opt->sack_ok;
6647 ireq->snd_wscale = rx_opt->snd_wscale;
6648 ireq->wscale_ok = rx_opt->wscale_ok;
6649 ireq->acked = 0;
6650 ireq->ecn_ok = 0;
6651 ireq->ir_rmt_port = tcp_hdr(skb)->source;
6652 ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
6653 ireq->ir_mark = inet_request_mark(sk, skb);
6654#if IS_ENABLED(CONFIG_SMC)
6655 ireq->smc_ok = rx_opt->smc_ok;
6656#endif
6657}
6658
6659struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
6660 struct sock *sk_listener,
6661 bool attach_listener)
6662{
6663 struct request_sock *req = reqsk_alloc(ops, sk_listener,
6664 attach_listener);
6665
6666 if (req) {
6667 struct inet_request_sock *ireq = inet_rsk(req);
6668
6669 ireq->ireq_opt = NULL;
6670#if IS_ENABLED(CONFIG_IPV6)
6671 ireq->pktopts = NULL;
6672#endif
6673 atomic64_set(&ireq->ir_cookie, 0);
6674 ireq->ireq_state = TCP_NEW_SYN_RECV;
6675 write_pnet(&ireq->ireq_net, sock_net(sk_listener));
6676 ireq->ireq_family = sk_listener->sk_family;
6677 }
6678
6679 return req;
6680}
6681EXPORT_SYMBOL(inet_reqsk_alloc);
6682
6683
6684
6685
6686static bool tcp_syn_flood_action(const struct sock *sk, const char *proto)
6687{
6688 struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
6689 const char *msg = "Dropping request";
6690 bool want_cookie = false;
6691 struct net *net = sock_net(sk);
6692
6693#ifdef CONFIG_SYN_COOKIES
6694 if (net->ipv4.sysctl_tcp_syncookies) {
6695 msg = "Sending cookies";
6696 want_cookie = true;
6697 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
6698 } else
6699#endif
6700 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
6701
6702 if (!queue->synflood_warned &&
6703 net->ipv4.sysctl_tcp_syncookies != 2 &&
6704 xchg(&queue->synflood_warned, 1) == 0)
6705 net_info_ratelimited("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
6706 proto, sk->sk_num, msg);
6707
6708 return want_cookie;
6709}
6710
6711static void tcp_reqsk_record_syn(const struct sock *sk,
6712 struct request_sock *req,
6713 const struct sk_buff *skb)
6714{
6715 if (tcp_sk(sk)->save_syn) {
6716 u32 len = skb_network_header_len(skb) + tcp_hdrlen(skb);
6717 struct saved_syn *saved_syn;
6718 u32 mac_hdrlen;
6719 void *base;
6720
6721 if (tcp_sk(sk)->save_syn == 2) {
6722 base = skb_mac_header(skb);
6723 mac_hdrlen = skb_mac_header_len(skb);
6724 len += mac_hdrlen;
6725 } else {
6726 base = skb_network_header(skb);
6727 mac_hdrlen = 0;
6728 }
6729
6730 saved_syn = kmalloc(struct_size(saved_syn, data, len),
6731 GFP_ATOMIC);
6732 if (saved_syn) {
6733 saved_syn->mac_hdrlen = mac_hdrlen;
6734 saved_syn->network_hdrlen = skb_network_header_len(skb);
6735 saved_syn->tcp_hdrlen = tcp_hdrlen(skb);
6736 memcpy(saved_syn->data, base, len);
6737 req->saved_syn = saved_syn;
6738 }
6739 }
6740}
6741
6742
6743
6744
6745u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops,
6746 const struct tcp_request_sock_ops *af_ops,
6747 struct sock *sk, struct tcphdr *th)
6748{
6749 struct tcp_sock *tp = tcp_sk(sk);
6750 u16 mss;
6751
6752 if (sock_net(sk)->ipv4.sysctl_tcp_syncookies != 2 &&
6753 !inet_csk_reqsk_queue_is_full(sk))
6754 return 0;
6755
6756 if (!tcp_syn_flood_action(sk, rsk_ops->slab_name))
6757 return 0;
6758
6759 if (sk_acceptq_is_full(sk)) {
6760 NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
6761 return 0;
6762 }
6763
6764 mss = tcp_parse_mss_option(th, tp->rx_opt.user_mss);
6765 if (!mss)
6766 mss = af_ops->mss_clamp;
6767
6768 return mss;
6769}
6770EXPORT_SYMBOL_GPL(tcp_get_syncookie_mss);
6771
6772int tcp_conn_request(struct request_sock_ops *rsk_ops,
6773 const struct tcp_request_sock_ops *af_ops,
6774 struct sock *sk, struct sk_buff *skb)
6775{
6776 struct tcp_fastopen_cookie foc = { .len = -1 };
6777 __u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
6778 struct tcp_options_received tmp_opt;
6779 struct tcp_sock *tp = tcp_sk(sk);
6780 struct net *net = sock_net(sk);
6781 struct sock *fastopen_sk = NULL;
6782 struct request_sock *req;
6783 bool want_cookie = false;
6784 struct dst_entry *dst;
6785 struct flowi fl;
6786
6787
6788
6789
6790
6791 if ((net->ipv4.sysctl_tcp_syncookies == 2 ||
6792 inet_csk_reqsk_queue_is_full(sk)) && !isn) {
6793 want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name);
6794 if (!want_cookie)
6795 goto drop;
6796 }
6797
6798 if (sk_acceptq_is_full(sk)) {
6799 NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
6800 goto drop;
6801 }
6802
6803 req = inet_reqsk_alloc(rsk_ops, sk, !want_cookie);
6804 if (!req)
6805 goto drop;
6806
6807 req->syncookie = want_cookie;
6808 tcp_rsk(req)->af_specific = af_ops;
6809 tcp_rsk(req)->ts_off = 0;
6810#if IS_ENABLED(CONFIG_MPTCP)
6811 tcp_rsk(req)->is_mptcp = 0;
6812#endif
6813
6814 tcp_clear_options(&tmp_opt);
6815 tmp_opt.mss_clamp = af_ops->mss_clamp;
6816 tmp_opt.user_mss = tp->rx_opt.user_mss;
6817 tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0,
6818 want_cookie ? NULL : &foc);
6819
6820 if (want_cookie && !tmp_opt.saw_tstamp)
6821 tcp_clear_options(&tmp_opt);
6822
6823 if (IS_ENABLED(CONFIG_SMC) && want_cookie)
6824 tmp_opt.smc_ok = 0;
6825
6826 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
6827 tcp_openreq_init(req, &tmp_opt, skb, sk);
6828 inet_rsk(req)->no_srccheck = inet_sk(sk)->transparent;
6829
6830
6831 inet_rsk(req)->ir_iif = inet_request_bound_dev_if(sk, skb);
6832
6833 dst = af_ops->route_req(sk, skb, &fl, req);
6834 if (!dst)
6835 goto drop_and_free;
6836
6837 if (tmp_opt.tstamp_ok)
6838 tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb);
6839
6840 if (!want_cookie && !isn) {
6841
6842 if (!net->ipv4.sysctl_tcp_syncookies &&
6843 (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
6844 (net->ipv4.sysctl_max_syn_backlog >> 2)) &&
6845 !tcp_peer_is_proven(req, dst)) {
6846
6847
6848
6849
6850
6851
6852
6853 pr_drop_req(req, ntohs(tcp_hdr(skb)->source),
6854 rsk_ops->family);
6855 goto drop_and_release;
6856 }
6857
6858 isn = af_ops->init_seq(skb);
6859 }
6860
6861 tcp_ecn_create_request(req, skb, sk, dst);
6862
6863 if (want_cookie) {
6864 isn = cookie_init_sequence(af_ops, sk, skb, &req->mss);
6865 if (!tmp_opt.tstamp_ok)
6866 inet_rsk(req)->ecn_ok = 0;
6867 }
6868
6869 tcp_rsk(req)->snt_isn = isn;
6870 tcp_rsk(req)->txhash = net_tx_rndhash();
6871 tcp_rsk(req)->syn_tos = TCP_SKB_CB(skb)->ip_dsfield;
6872 tcp_openreq_init_rwin(req, sk, dst);
6873 sk_rx_queue_set(req_to_sk(req), skb);
6874 if (!want_cookie) {
6875 tcp_reqsk_record_syn(sk, req, skb);
6876 fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
6877 }
6878 if (fastopen_sk) {
6879 af_ops->send_synack(fastopen_sk, dst, &fl, req,
6880 &foc, TCP_SYNACK_FASTOPEN, skb);
6881
6882 if (!inet_csk_reqsk_queue_add(sk, req, fastopen_sk)) {
6883 reqsk_fastopen_remove(fastopen_sk, req, false);
6884 bh_unlock_sock(fastopen_sk);
6885 sock_put(fastopen_sk);
6886 goto drop_and_free;
6887 }
6888 sk->sk_data_ready(sk);
6889 bh_unlock_sock(fastopen_sk);
6890 sock_put(fastopen_sk);
6891 } else {
6892 tcp_rsk(req)->tfo_listener = false;
6893 if (!want_cookie)
6894 inet_csk_reqsk_queue_hash_add(sk, req,
6895 tcp_timeout_init((struct sock *)req));
6896 af_ops->send_synack(sk, dst, &fl, req, &foc,
6897 !want_cookie ? TCP_SYNACK_NORMAL :
6898 TCP_SYNACK_COOKIE,
6899 skb);
6900 if (want_cookie) {
6901 reqsk_free(req);
6902 return 0;
6903 }
6904 }
6905 reqsk_put(req);
6906 return 0;
6907
6908drop_and_release:
6909 dst_release(dst);
6910drop_and_free:
6911 __reqsk_free(req);
6912drop:
6913 tcp_listendrop(sk);
6914 return 0;
6915}
6916EXPORT_SYMBOL(tcp_conn_request);
6917