1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64#define pr_fmt(fmt) "TCP: " fmt
65
66#include <linux/mm.h>
67#include <linux/slab.h>
68#include <linux/module.h>
69#include <linux/sysctl.h>
70#include <linux/kernel.h>
71#include <net/dst.h>
72#include <net/tcp.h>
73#include <net/inet_common.h>
74#include <linux/ipsec.h>
75#include <asm/unaligned.h>
76#include <net/netdma.h>
77
78int sysctl_tcp_timestamps __read_mostly = 1;
79int sysctl_tcp_window_scaling __read_mostly = 1;
80int sysctl_tcp_sack __read_mostly = 1;
81int sysctl_tcp_fack __read_mostly = 1;
82int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
83EXPORT_SYMBOL(sysctl_tcp_reordering);
84int sysctl_tcp_ecn __read_mostly = 2;
85EXPORT_SYMBOL(sysctl_tcp_ecn);
86int sysctl_tcp_dsack __read_mostly = 1;
87int sysctl_tcp_app_win __read_mostly = 31;
88int sysctl_tcp_adv_win_scale __read_mostly = 1;
89EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
90
91
92int sysctl_tcp_challenge_ack_limit = 100;
93
94int sysctl_tcp_stdurg __read_mostly;
95int sysctl_tcp_rfc1337 __read_mostly;
96int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
97int sysctl_tcp_frto __read_mostly = 2;
98int sysctl_tcp_frto_response __read_mostly;
99
100int sysctl_tcp_thin_dupack __read_mostly;
101
102int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
103int sysctl_tcp_abc __read_mostly;
104int sysctl_tcp_early_retrans __read_mostly = 2;
105
106#define FLAG_DATA 0x01
107#define FLAG_WIN_UPDATE 0x02
108#define FLAG_DATA_ACKED 0x04
109#define FLAG_RETRANS_DATA_ACKED 0x08
110#define FLAG_SYN_ACKED 0x10
111#define FLAG_DATA_SACKED 0x20
112#define FLAG_ECE 0x40
113#define FLAG_SLOWPATH 0x100
114#define FLAG_ONLY_ORIG_SACKED 0x200
115#define FLAG_SND_UNA_ADVANCED 0x400
116#define FLAG_DSACKING_ACK 0x800
117#define FLAG_NONHEAD_RETRANS_ACKED 0x1000
118#define FLAG_SACK_RENEGING 0x2000
119
120#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
121#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
122#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE)
123#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)
124#define FLAG_ANY_PROGRESS (FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED)
125
126#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
127#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
128
129
130
131
132static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
133{
134 struct inet_connection_sock *icsk = inet_csk(sk);
135 const unsigned int lss = icsk->icsk_ack.last_seg_size;
136 unsigned int len;
137
138 icsk->icsk_ack.last_seg_size = 0;
139
140
141
142
143 len = skb_shinfo(skb)->gso_size ? : skb->len;
144 if (len >= icsk->icsk_ack.rcv_mss) {
145 icsk->icsk_ack.rcv_mss = len;
146 } else {
147
148
149
150
151
152 len += skb->data - skb_transport_header(skb);
153 if (len >= TCP_MSS_DEFAULT + sizeof(struct tcphdr) ||
154
155
156
157
158
159 (len >= TCP_MIN_MSS + sizeof(struct tcphdr) &&
160 !(tcp_flag_word(tcp_hdr(skb)) & TCP_REMNANT))) {
161
162
163
164
165 len -= tcp_sk(sk)->tcp_header_len;
166 icsk->icsk_ack.last_seg_size = len;
167 if (len == lss) {
168 icsk->icsk_ack.rcv_mss = len;
169 return;
170 }
171 }
172 if (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)
173 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED2;
174 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
175 }
176}
177
178static void tcp_incr_quickack(struct sock *sk)
179{
180 struct inet_connection_sock *icsk = inet_csk(sk);
181 unsigned int quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
182
183 if (quickacks == 0)
184 quickacks = 2;
185 if (quickacks > icsk->icsk_ack.quick)
186 icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
187}
188
189static void tcp_enter_quickack_mode(struct sock *sk)
190{
191 struct inet_connection_sock *icsk = inet_csk(sk);
192 tcp_incr_quickack(sk);
193 icsk->icsk_ack.pingpong = 0;
194 icsk->icsk_ack.ato = TCP_ATO_MIN;
195}
196
197
198
199
200
201static inline bool tcp_in_quickack_mode(const struct sock *sk)
202{
203 const struct inet_connection_sock *icsk = inet_csk(sk);
204
205 return icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong;
206}
207
208static inline void TCP_ECN_queue_cwr(struct tcp_sock *tp)
209{
210 if (tp->ecn_flags & TCP_ECN_OK)
211 tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
212}
213
214static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, const struct sk_buff *skb)
215{
216 if (tcp_hdr(skb)->cwr)
217 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
218}
219
220static inline void TCP_ECN_withdraw_cwr(struct tcp_sock *tp)
221{
222 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
223}
224
225static inline void TCP_ECN_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
226{
227 if (!(tp->ecn_flags & TCP_ECN_OK))
228 return;
229
230 switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) {
231 case INET_ECN_NOT_ECT:
232
233
234
235
236 if (tp->ecn_flags & TCP_ECN_SEEN)
237 tcp_enter_quickack_mode((struct sock *)tp);
238 break;
239 case INET_ECN_CE:
240 if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
241
242 tcp_enter_quickack_mode((struct sock *)tp);
243 tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
244 }
245
246 default:
247 tp->ecn_flags |= TCP_ECN_SEEN;
248 }
249}
250
251static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th)
252{
253 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr))
254 tp->ecn_flags &= ~TCP_ECN_OK;
255}
256
257static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th)
258{
259 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr))
260 tp->ecn_flags &= ~TCP_ECN_OK;
261}
262
263static bool TCP_ECN_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th)
264{
265 if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK))
266 return true;
267 return false;
268}
269
270
271
272
273
274
275static void tcp_fixup_sndbuf(struct sock *sk)
276{
277 int sndmem = SKB_TRUESIZE(tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER);
278
279 sndmem *= TCP_INIT_CWND;
280 if (sk->sk_sndbuf < sndmem)
281 sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
282}
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
311{
312 struct tcp_sock *tp = tcp_sk(sk);
313
314 int truesize = tcp_win_from_space(skb->truesize) >> 1;
315 int window = tcp_win_from_space(sysctl_tcp_rmem[2]) >> 1;
316
317 while (tp->rcv_ssthresh <= window) {
318 if (truesize <= skb->len)
319 return 2 * inet_csk(sk)->icsk_ack.rcv_mss;
320
321 truesize >>= 1;
322 window >>= 1;
323 }
324 return 0;
325}
326
327static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
328{
329 struct tcp_sock *tp = tcp_sk(sk);
330
331
332 if (tp->rcv_ssthresh < tp->window_clamp &&
333 (int)tp->rcv_ssthresh < tcp_space(sk) &&
334 !sk_under_memory_pressure(sk)) {
335 int incr;
336
337
338
339
340 if (tcp_win_from_space(skb->truesize) <= skb->len)
341 incr = 2 * tp->advmss;
342 else
343 incr = __tcp_grow_window(sk, skb);
344
345 if (incr) {
346 incr = max_t(int, incr, 2 * skb->len);
347 tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr,
348 tp->window_clamp);
349 inet_csk(sk)->icsk_ack.quick |= 1;
350 }
351 }
352}
353
354
355
356static void tcp_fixup_rcvbuf(struct sock *sk)
357{
358 u32 mss = tcp_sk(sk)->advmss;
359 u32 icwnd = TCP_DEFAULT_INIT_RCVWND;
360 int rcvmem;
361
362
363
364
365 if (mss > 1460)
366 icwnd = max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2);
367
368 rcvmem = SKB_TRUESIZE(mss + MAX_TCP_HEADER);
369 while (tcp_win_from_space(rcvmem) < mss)
370 rcvmem += 128;
371
372 rcvmem *= icwnd;
373
374 if (sk->sk_rcvbuf < rcvmem)
375 sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]);
376}
377
378
379
380
381void tcp_init_buffer_space(struct sock *sk)
382{
383 struct tcp_sock *tp = tcp_sk(sk);
384 int maxwin;
385
386 if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK))
387 tcp_fixup_rcvbuf(sk);
388 if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
389 tcp_fixup_sndbuf(sk);
390
391 tp->rcvq_space.space = tp->rcv_wnd;
392
393 maxwin = tcp_full_space(sk);
394
395 if (tp->window_clamp >= maxwin) {
396 tp->window_clamp = maxwin;
397
398 if (sysctl_tcp_app_win && maxwin > 4 * tp->advmss)
399 tp->window_clamp = max(maxwin -
400 (maxwin >> sysctl_tcp_app_win),
401 4 * tp->advmss);
402 }
403
404
405 if (sysctl_tcp_app_win &&
406 tp->window_clamp > 2 * tp->advmss &&
407 tp->window_clamp + tp->advmss > maxwin)
408 tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
409
410 tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
411 tp->snd_cwnd_stamp = tcp_time_stamp;
412}
413
414
415static void tcp_clamp_window(struct sock *sk)
416{
417 struct tcp_sock *tp = tcp_sk(sk);
418 struct inet_connection_sock *icsk = inet_csk(sk);
419
420 icsk->icsk_ack.quick = 0;
421
422 if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
423 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
424 !sk_under_memory_pressure(sk) &&
425 sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
426 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
427 sysctl_tcp_rmem[2]);
428 }
429 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
430 tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
431}
432
433
434
435
436
437
438
439
440void tcp_initialize_rcv_mss(struct sock *sk)
441{
442 const struct tcp_sock *tp = tcp_sk(sk);
443 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
444
445 hint = min(hint, tp->rcv_wnd / 2);
446 hint = min(hint, TCP_MSS_DEFAULT);
447 hint = max(hint, TCP_MIN_MSS);
448
449 inet_csk(sk)->icsk_ack.rcv_mss = hint;
450}
451EXPORT_SYMBOL(tcp_initialize_rcv_mss);
452
453
454
455
456
457
458
459
460
461
462
463
464static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
465{
466 u32 new_sample = tp->rcv_rtt_est.rtt;
467 long m = sample;
468
469 if (m == 0)
470 m = 1;
471
472 if (new_sample != 0) {
473
474
475
476
477
478
479
480
481
482
483 if (!win_dep) {
484 m -= (new_sample >> 3);
485 new_sample += m;
486 } else {
487 m <<= 3;
488 if (m < new_sample)
489 new_sample = m;
490 }
491 } else {
492
493 new_sample = m << 3;
494 }
495
496 if (tp->rcv_rtt_est.rtt != new_sample)
497 tp->rcv_rtt_est.rtt = new_sample;
498}
499
500static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
501{
502 if (tp->rcv_rtt_est.time == 0)
503 goto new_measure;
504 if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
505 return;
506 tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rcv_rtt_est.time, 1);
507
508new_measure:
509 tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
510 tp->rcv_rtt_est.time = tcp_time_stamp;
511}
512
513static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
514 const struct sk_buff *skb)
515{
516 struct tcp_sock *tp = tcp_sk(sk);
517 if (tp->rx_opt.rcv_tsecr &&
518 (TCP_SKB_CB(skb)->end_seq -
519 TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss))
520 tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rx_opt.rcv_tsecr, 0);
521}
522
523
524
525
526
527void tcp_rcv_space_adjust(struct sock *sk)
528{
529 struct tcp_sock *tp = tcp_sk(sk);
530 int time;
531 int space;
532
533 if (tp->rcvq_space.time == 0)
534 goto new_measure;
535
536 time = tcp_time_stamp - tp->rcvq_space.time;
537 if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0)
538 return;
539
540 space = 2 * (tp->copied_seq - tp->rcvq_space.seq);
541
542 space = max(tp->rcvq_space.space, space);
543
544 if (tp->rcvq_space.space != space) {
545 int rcvmem;
546
547 tp->rcvq_space.space = space;
548
549 if (sysctl_tcp_moderate_rcvbuf &&
550 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
551 int new_clamp = space;
552
553
554
555
556
557 space /= tp->advmss;
558 if (!space)
559 space = 1;
560 rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
561 while (tcp_win_from_space(rcvmem) < tp->advmss)
562 rcvmem += 128;
563 space *= rcvmem;
564 space = min(space, sysctl_tcp_rmem[2]);
565 if (space > sk->sk_rcvbuf) {
566 sk->sk_rcvbuf = space;
567
568
569 tp->window_clamp = new_clamp;
570 }
571 }
572 }
573
574new_measure:
575 tp->rcvq_space.seq = tp->copied_seq;
576 tp->rcvq_space.time = tcp_time_stamp;
577}
578
579
580
581
582
583
584
585
586
587
588
589static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
590{
591 struct tcp_sock *tp = tcp_sk(sk);
592 struct inet_connection_sock *icsk = inet_csk(sk);
593 u32 now;
594
595 inet_csk_schedule_ack(sk);
596
597 tcp_measure_rcv_mss(sk, skb);
598
599 tcp_rcv_rtt_measure(tp);
600
601 now = tcp_time_stamp;
602
603 if (!icsk->icsk_ack.ato) {
604
605
606
607 tcp_incr_quickack(sk);
608 icsk->icsk_ack.ato = TCP_ATO_MIN;
609 } else {
610 int m = now - icsk->icsk_ack.lrcvtime;
611
612 if (m <= TCP_ATO_MIN / 2) {
613
614 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + TCP_ATO_MIN / 2;
615 } else if (m < icsk->icsk_ack.ato) {
616 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + m;
617 if (icsk->icsk_ack.ato > icsk->icsk_rto)
618 icsk->icsk_ack.ato = icsk->icsk_rto;
619 } else if (m > icsk->icsk_rto) {
620
621
622
623 tcp_incr_quickack(sk);
624 sk_mem_reclaim(sk);
625 }
626 }
627 icsk->icsk_ack.lrcvtime = now;
628
629 TCP_ECN_check_ce(tp, skb);
630
631 if (skb->len >= 128)
632 tcp_grow_window(sk, skb);
633}
634
635
636
637
638
639
640
641
642
643
644static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
645{
646 struct tcp_sock *tp = tcp_sk(sk);
647 long m = mrtt;
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665 if (m == 0)
666 m = 1;
667 if (tp->srtt != 0) {
668 m -= (tp->srtt >> 3);
669 tp->srtt += m;
670 if (m < 0) {
671 m = -m;
672 m -= (tp->mdev >> 2);
673
674
675
676
677
678
679
680
681 if (m > 0)
682 m >>= 3;
683 } else {
684 m -= (tp->mdev >> 2);
685 }
686 tp->mdev += m;
687 if (tp->mdev > tp->mdev_max) {
688 tp->mdev_max = tp->mdev;
689 if (tp->mdev_max > tp->rttvar)
690 tp->rttvar = tp->mdev_max;
691 }
692 if (after(tp->snd_una, tp->rtt_seq)) {
693 if (tp->mdev_max < tp->rttvar)
694 tp->rttvar -= (tp->rttvar - tp->mdev_max) >> 2;
695 tp->rtt_seq = tp->snd_nxt;
696 tp->mdev_max = tcp_rto_min(sk);
697 }
698 } else {
699
700 tp->srtt = m << 3;
701 tp->mdev = m << 1;
702 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
703 tp->rtt_seq = tp->snd_nxt;
704 }
705}
706
707
708
709
710void tcp_set_rto(struct sock *sk)
711{
712 const struct tcp_sock *tp = tcp_sk(sk);
713
714
715
716
717
718
719
720
721
722
723 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp);
724
725
726
727
728
729
730
731
732
733
734 tcp_bound_rto(sk);
735}
736
737__u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)
738{
739 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
740
741 if (!cwnd)
742 cwnd = TCP_INIT_CWND;
743 return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
744}
745
746
747
748
749
750void tcp_disable_fack(struct tcp_sock *tp)
751{
752
753 if (tcp_is_fack(tp))
754 tp->lost_skb_hint = NULL;
755 tp->rx_opt.sack_ok &= ~TCP_FACK_ENABLED;
756}
757
758
759static void tcp_dsack_seen(struct tcp_sock *tp)
760{
761 tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
762}
763
764static void tcp_update_reordering(struct sock *sk, const int metric,
765 const int ts)
766{
767 struct tcp_sock *tp = tcp_sk(sk);
768 if (metric > tp->reordering) {
769 int mib_idx;
770
771 tp->reordering = min(TCP_MAX_REORDERING, metric);
772
773
774 if (ts)
775 mib_idx = LINUX_MIB_TCPTSREORDER;
776 else if (tcp_is_reno(tp))
777 mib_idx = LINUX_MIB_TCPRENOREORDER;
778 else if (tcp_is_fack(tp))
779 mib_idx = LINUX_MIB_TCPFACKREORDER;
780 else
781 mib_idx = LINUX_MIB_TCPSACKREORDER;
782
783 NET_INC_STATS_BH(sock_net(sk), mib_idx);
784#if FASTRETRANS_DEBUG > 1
785 pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
786 tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
787 tp->reordering,
788 tp->fackets_out,
789 tp->sacked_out,
790 tp->undo_marker ? tp->undo_retrans : 0);
791#endif
792 tcp_disable_fack(tp);
793 }
794
795 if (metric > 0)
796 tcp_disable_early_retrans(tp);
797}
798
799
800static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
801{
802 if ((tp->retransmit_skb_hint == NULL) ||
803 before(TCP_SKB_CB(skb)->seq,
804 TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
805 tp->retransmit_skb_hint = skb;
806
807 if (!tp->lost_out ||
808 after(TCP_SKB_CB(skb)->end_seq, tp->retransmit_high))
809 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
810}
811
812static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb)
813{
814 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
815 tcp_verify_retransmit_hint(tp, skb);
816
817 tp->lost_out += tcp_skb_pcount(skb);
818 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
819 }
820}
821
822static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp,
823 struct sk_buff *skb)
824{
825 tcp_verify_retransmit_hint(tp, skb);
826
827 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
828 tp->lost_out += tcp_skb_pcount(skb);
829 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
830 }
831}
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927static bool tcp_is_sackblock_valid(struct tcp_sock *tp, bool is_dsack,
928 u32 start_seq, u32 end_seq)
929{
930
931 if (after(end_seq, tp->snd_nxt) || !before(start_seq, end_seq))
932 return false;
933
934
935 if (!before(start_seq, tp->snd_nxt))
936 return false;
937
938
939
940
941 if (after(start_seq, tp->snd_una))
942 return true;
943
944 if (!is_dsack || !tp->undo_marker)
945 return false;
946
947
948 if (after(end_seq, tp->snd_una))
949 return false;
950
951 if (!before(start_seq, tp->undo_marker))
952 return true;
953
954
955 if (!after(end_seq, tp->undo_marker))
956 return false;
957
958
959
960
961 return !before(start_seq, end_seq - tp->max_window);
962}
963
964
965
966
967
968
969
970
971
972
973static void tcp_mark_lost_retrans(struct sock *sk)
974{
975 const struct inet_connection_sock *icsk = inet_csk(sk);
976 struct tcp_sock *tp = tcp_sk(sk);
977 struct sk_buff *skb;
978 int cnt = 0;
979 u32 new_low_seq = tp->snd_nxt;
980 u32 received_upto = tcp_highest_sack_seq(tp);
981
982 if (!tcp_is_fack(tp) || !tp->retrans_out ||
983 !after(received_upto, tp->lost_retrans_low) ||
984 icsk->icsk_ca_state != TCP_CA_Recovery)
985 return;
986
987 tcp_for_write_queue(skb, sk) {
988 u32 ack_seq = TCP_SKB_CB(skb)->ack_seq;
989
990 if (skb == tcp_send_head(sk))
991 break;
992 if (cnt == tp->retrans_out)
993 break;
994 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
995 continue;
996
997 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS))
998 continue;
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011 if (after(received_upto, ack_seq)) {
1012 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1013 tp->retrans_out -= tcp_skb_pcount(skb);
1014
1015 tcp_skb_mark_lost_uncond_verify(tp, skb);
1016 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT);
1017 } else {
1018 if (before(ack_seq, new_low_seq))
1019 new_low_seq = ack_seq;
1020 cnt += tcp_skb_pcount(skb);
1021 }
1022 }
1023
1024 if (tp->retrans_out)
1025 tp->lost_retrans_low = new_low_seq;
1026}
1027
1028static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
1029 struct tcp_sack_block_wire *sp, int num_sacks,
1030 u32 prior_snd_una)
1031{
1032 struct tcp_sock *tp = tcp_sk(sk);
1033 u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq);
1034 u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq);
1035 bool dup_sack = false;
1036
1037 if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) {
1038 dup_sack = true;
1039 tcp_dsack_seen(tp);
1040 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKRECV);
1041 } else if (num_sacks > 1) {
1042 u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq);
1043 u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq);
1044
1045 if (!after(end_seq_0, end_seq_1) &&
1046 !before(start_seq_0, start_seq_1)) {
1047 dup_sack = true;
1048 tcp_dsack_seen(tp);
1049 NET_INC_STATS_BH(sock_net(sk),
1050 LINUX_MIB_TCPDSACKOFORECV);
1051 }
1052 }
1053
1054
1055 if (dup_sack && tp->undo_marker && tp->undo_retrans &&
1056 !after(end_seq_0, prior_snd_una) &&
1057 after(end_seq_0, tp->undo_marker))
1058 tp->undo_retrans--;
1059
1060 return dup_sack;
1061}
1062
1063struct tcp_sacktag_state {
1064 int reord;
1065 int fack_count;
1066 int flag;
1067};
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
1078 u32 start_seq, u32 end_seq)
1079{
1080 int err;
1081 bool in_sack;
1082 unsigned int pkt_len;
1083 unsigned int mss;
1084
1085 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1086 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1087
1088 if (tcp_skb_pcount(skb) > 1 && !in_sack &&
1089 after(TCP_SKB_CB(skb)->end_seq, start_seq)) {
1090 mss = tcp_skb_mss(skb);
1091 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1092
1093 if (!in_sack) {
1094 pkt_len = start_seq - TCP_SKB_CB(skb)->seq;
1095 if (pkt_len < mss)
1096 pkt_len = mss;
1097 } else {
1098 pkt_len = end_seq - TCP_SKB_CB(skb)->seq;
1099 if (pkt_len < mss)
1100 return -EINVAL;
1101 }
1102
1103
1104
1105
1106 if (pkt_len > mss) {
1107 unsigned int new_len = (pkt_len / mss) * mss;
1108 if (!in_sack && new_len < pkt_len) {
1109 new_len += mss;
1110 if (new_len > skb->len)
1111 return 0;
1112 }
1113 pkt_len = new_len;
1114 }
1115 err = tcp_fragment(sk, skb, pkt_len, mss);
1116 if (err < 0)
1117 return err;
1118 }
1119
1120 return in_sack;
1121}
1122
1123
1124static u8 tcp_sacktag_one(struct sock *sk,
1125 struct tcp_sacktag_state *state, u8 sacked,
1126 u32 start_seq, u32 end_seq,
1127 bool dup_sack, int pcount)
1128{
1129 struct tcp_sock *tp = tcp_sk(sk);
1130 int fack_count = state->fack_count;
1131
1132
1133 if (dup_sack && (sacked & TCPCB_RETRANS)) {
1134 if (tp->undo_marker && tp->undo_retrans &&
1135 after(end_seq, tp->undo_marker))
1136 tp->undo_retrans--;
1137 if (sacked & TCPCB_SACKED_ACKED)
1138 state->reord = min(fack_count, state->reord);
1139 }
1140
1141
1142 if (!after(end_seq, tp->snd_una))
1143 return sacked;
1144
1145 if (!(sacked & TCPCB_SACKED_ACKED)) {
1146 if (sacked & TCPCB_SACKED_RETRANS) {
1147
1148
1149
1150
1151 if (sacked & TCPCB_LOST) {
1152 sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
1153 tp->lost_out -= pcount;
1154 tp->retrans_out -= pcount;
1155 }
1156 } else {
1157 if (!(sacked & TCPCB_RETRANS)) {
1158
1159
1160
1161 if (before(start_seq,
1162 tcp_highest_sack_seq(tp)))
1163 state->reord = min(fack_count,
1164 state->reord);
1165
1166
1167 if (!after(end_seq, tp->frto_highmark))
1168 state->flag |= FLAG_ONLY_ORIG_SACKED;
1169 }
1170
1171 if (sacked & TCPCB_LOST) {
1172 sacked &= ~TCPCB_LOST;
1173 tp->lost_out -= pcount;
1174 }
1175 }
1176
1177 sacked |= TCPCB_SACKED_ACKED;
1178 state->flag |= FLAG_DATA_SACKED;
1179 tp->sacked_out += pcount;
1180
1181 fack_count += pcount;
1182
1183
1184 if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) &&
1185 before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq))
1186 tp->lost_cnt_hint += pcount;
1187
1188 if (fack_count > tp->fackets_out)
1189 tp->fackets_out = fack_count;
1190 }
1191
1192
1193
1194
1195
1196 if (dup_sack && (sacked & TCPCB_SACKED_RETRANS)) {
1197 sacked &= ~TCPCB_SACKED_RETRANS;
1198 tp->retrans_out -= pcount;
1199 }
1200
1201 return sacked;
1202}
1203
1204
1205
1206
1207static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1208 struct tcp_sacktag_state *state,
1209 unsigned int pcount, int shifted, int mss,
1210 bool dup_sack)
1211{
1212 struct tcp_sock *tp = tcp_sk(sk);
1213 struct sk_buff *prev = tcp_write_queue_prev(sk, skb);
1214 u32 start_seq = TCP_SKB_CB(skb)->seq;
1215 u32 end_seq = start_seq + shifted;
1216
1217 BUG_ON(!pcount);
1218
1219
1220
1221
1222
1223
1224
1225 tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
1226 start_seq, end_seq, dup_sack, pcount);
1227
1228 if (skb == tp->lost_skb_hint)
1229 tp->lost_cnt_hint += pcount;
1230
1231 TCP_SKB_CB(prev)->end_seq += shifted;
1232 TCP_SKB_CB(skb)->seq += shifted;
1233
1234 skb_shinfo(prev)->gso_segs += pcount;
1235 BUG_ON(skb_shinfo(skb)->gso_segs < pcount);
1236 skb_shinfo(skb)->gso_segs -= pcount;
1237
1238
1239
1240
1241
1242
1243 if (!skb_shinfo(prev)->gso_size) {
1244 skb_shinfo(prev)->gso_size = mss;
1245 skb_shinfo(prev)->gso_type = sk->sk_gso_type;
1246 }
1247
1248
1249 if (skb_shinfo(skb)->gso_segs <= 1) {
1250 skb_shinfo(skb)->gso_size = 0;
1251 skb_shinfo(skb)->gso_type = 0;
1252 }
1253
1254
1255 TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
1256
1257 if (skb->len > 0) {
1258 BUG_ON(!tcp_skb_pcount(skb));
1259 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTED);
1260 return false;
1261 }
1262
1263
1264
1265 if (skb == tp->retransmit_skb_hint)
1266 tp->retransmit_skb_hint = prev;
1267 if (skb == tp->scoreboard_skb_hint)
1268 tp->scoreboard_skb_hint = prev;
1269 if (skb == tp->lost_skb_hint) {
1270 tp->lost_skb_hint = prev;
1271 tp->lost_cnt_hint -= tcp_skb_pcount(prev);
1272 }
1273
1274 TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(prev)->tcp_flags;
1275 if (skb == tcp_highest_sack(sk))
1276 tcp_advance_highest_sack(sk, skb);
1277
1278 tcp_unlink_write_queue(skb, sk);
1279 sk_wmem_free_skb(sk, skb);
1280
1281 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKMERGED);
1282
1283 return true;
1284}
1285
1286
1287
1288
1289static int tcp_skb_seglen(const struct sk_buff *skb)
1290{
1291 return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb);
1292}
1293
1294
1295static int skb_can_shift(const struct sk_buff *skb)
1296{
1297 return !skb_headlen(skb) && skb_is_nonlinear(skb);
1298}
1299
1300
1301
1302
1303static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
1304 struct tcp_sacktag_state *state,
1305 u32 start_seq, u32 end_seq,
1306 bool dup_sack)
1307{
1308 struct tcp_sock *tp = tcp_sk(sk);
1309 struct sk_buff *prev;
1310 int mss;
1311 int pcount = 0;
1312 int len;
1313 int in_sack;
1314
1315 if (!sk_can_gso(sk))
1316 goto fallback;
1317
1318
1319 if (!dup_sack &&
1320 (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS)
1321 goto fallback;
1322 if (!skb_can_shift(skb))
1323 goto fallback;
1324
1325 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1326 goto fallback;
1327
1328
1329 if (unlikely(skb == tcp_write_queue_head(sk)))
1330 goto fallback;
1331 prev = tcp_write_queue_prev(sk, skb);
1332
1333 if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
1334 goto fallback;
1335
1336 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1337 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1338
1339 if (in_sack) {
1340 len = skb->len;
1341 pcount = tcp_skb_pcount(skb);
1342 mss = tcp_skb_seglen(skb);
1343
1344
1345
1346
1347 if (mss != tcp_skb_seglen(prev))
1348 goto fallback;
1349 } else {
1350 if (!after(TCP_SKB_CB(skb)->end_seq, start_seq))
1351 goto noop;
1352
1353
1354
1355
1356 if (tcp_skb_pcount(skb) <= 1)
1357 goto noop;
1358
1359 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1360 if (!in_sack) {
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372 goto fallback;
1373 }
1374
1375 len = end_seq - TCP_SKB_CB(skb)->seq;
1376 BUG_ON(len < 0);
1377 BUG_ON(len > skb->len);
1378
1379
1380
1381
1382
1383 mss = tcp_skb_mss(skb);
1384
1385
1386
1387
1388 if (mss != tcp_skb_seglen(prev))
1389 goto fallback;
1390
1391 if (len == mss) {
1392 pcount = 1;
1393 } else if (len < mss) {
1394 goto noop;
1395 } else {
1396 pcount = len / mss;
1397 len = pcount * mss;
1398 }
1399 }
1400
1401
1402 if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una))
1403 goto fallback;
1404
1405 if (!skb_shift(prev, skb, len))
1406 goto fallback;
1407 if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack))
1408 goto out;
1409
1410
1411
1412
1413 if (prev == tcp_write_queue_tail(sk))
1414 goto out;
1415 skb = tcp_write_queue_next(sk, prev);
1416
1417 if (!skb_can_shift(skb) ||
1418 (skb == tcp_send_head(sk)) ||
1419 ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) ||
1420 (mss != tcp_skb_seglen(skb)))
1421 goto out;
1422
1423 len = skb->len;
1424 if (skb_shift(prev, skb, len)) {
1425 pcount += tcp_skb_pcount(skb);
1426 tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0);
1427 }
1428
1429out:
1430 state->fack_count += pcount;
1431 return prev;
1432
1433noop:
1434 return skb;
1435
1436fallback:
1437 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK);
1438 return NULL;
1439}
1440
1441static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1442 struct tcp_sack_block *next_dup,
1443 struct tcp_sacktag_state *state,
1444 u32 start_seq, u32 end_seq,
1445 bool dup_sack_in)
1446{
1447 struct tcp_sock *tp = tcp_sk(sk);
1448 struct sk_buff *tmp;
1449
1450 tcp_for_write_queue_from(skb, sk) {
1451 int in_sack = 0;
1452 bool dup_sack = dup_sack_in;
1453
1454 if (skb == tcp_send_head(sk))
1455 break;
1456
1457
1458 if (!before(TCP_SKB_CB(skb)->seq, end_seq))
1459 break;
1460
1461 if ((next_dup != NULL) &&
1462 before(TCP_SKB_CB(skb)->seq, next_dup->end_seq)) {
1463 in_sack = tcp_match_skb_to_sack(sk, skb,
1464 next_dup->start_seq,
1465 next_dup->end_seq);
1466 if (in_sack > 0)
1467 dup_sack = true;
1468 }
1469
1470
1471
1472
1473
1474 if (in_sack <= 0) {
1475 tmp = tcp_shift_skb_data(sk, skb, state,
1476 start_seq, end_seq, dup_sack);
1477 if (tmp != NULL) {
1478 if (tmp != skb) {
1479 skb = tmp;
1480 continue;
1481 }
1482
1483 in_sack = 0;
1484 } else {
1485 in_sack = tcp_match_skb_to_sack(sk, skb,
1486 start_seq,
1487 end_seq);
1488 }
1489 }
1490
1491 if (unlikely(in_sack < 0))
1492 break;
1493
1494 if (in_sack) {
1495 TCP_SKB_CB(skb)->sacked =
1496 tcp_sacktag_one(sk,
1497 state,
1498 TCP_SKB_CB(skb)->sacked,
1499 TCP_SKB_CB(skb)->seq,
1500 TCP_SKB_CB(skb)->end_seq,
1501 dup_sack,
1502 tcp_skb_pcount(skb));
1503
1504 if (!before(TCP_SKB_CB(skb)->seq,
1505 tcp_highest_sack_seq(tp)))
1506 tcp_advance_highest_sack(sk, skb);
1507 }
1508
1509 state->fack_count += tcp_skb_pcount(skb);
1510 }
1511 return skb;
1512}
1513
1514
1515
1516
1517static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
1518 struct tcp_sacktag_state *state,
1519 u32 skip_to_seq)
1520{
1521 tcp_for_write_queue_from(skb, sk) {
1522 if (skb == tcp_send_head(sk))
1523 break;
1524
1525 if (after(TCP_SKB_CB(skb)->end_seq, skip_to_seq))
1526 break;
1527
1528 state->fack_count += tcp_skb_pcount(skb);
1529 }
1530 return skb;
1531}
1532
1533static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
1534 struct sock *sk,
1535 struct tcp_sack_block *next_dup,
1536 struct tcp_sacktag_state *state,
1537 u32 skip_to_seq)
1538{
1539 if (next_dup == NULL)
1540 return skb;
1541
1542 if (before(next_dup->start_seq, skip_to_seq)) {
1543 skb = tcp_sacktag_skip(skb, sk, state, next_dup->start_seq);
1544 skb = tcp_sacktag_walk(skb, sk, NULL, state,
1545 next_dup->start_seq, next_dup->end_seq,
1546 1);
1547 }
1548
1549 return skb;
1550}
1551
1552static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_block *cache)
1553{
1554 return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1555}
1556
1557static int
1558tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1559 u32 prior_snd_una)
1560{
1561 const struct inet_connection_sock *icsk = inet_csk(sk);
1562 struct tcp_sock *tp = tcp_sk(sk);
1563 const unsigned char *ptr = (skb_transport_header(ack_skb) +
1564 TCP_SKB_CB(ack_skb)->sacked);
1565 struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
1566 struct tcp_sack_block sp[TCP_NUM_SACKS];
1567 struct tcp_sack_block *cache;
1568 struct tcp_sacktag_state state;
1569 struct sk_buff *skb;
1570 int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
1571 int used_sacks;
1572 bool found_dup_sack = false;
1573 int i, j;
1574 int first_sack_index;
1575
1576 state.flag = 0;
1577 state.reord = tp->packets_out;
1578
1579 if (!tp->sacked_out) {
1580 if (WARN_ON(tp->fackets_out))
1581 tp->fackets_out = 0;
1582 tcp_highest_sack_reset(sk);
1583 }
1584
1585 found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
1586 num_sacks, prior_snd_una);
1587 if (found_dup_sack)
1588 state.flag |= FLAG_DSACKING_ACK;
1589
1590
1591
1592
1593
1594 if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window))
1595 return 0;
1596
1597 if (!tp->packets_out)
1598 goto out;
1599
1600 used_sacks = 0;
1601 first_sack_index = 0;
1602 for (i = 0; i < num_sacks; i++) {
1603 bool dup_sack = !i && found_dup_sack;
1604
1605 sp[used_sacks].start_seq = get_unaligned_be32(&sp_wire[i].start_seq);
1606 sp[used_sacks].end_seq = get_unaligned_be32(&sp_wire[i].end_seq);
1607
1608 if (!tcp_is_sackblock_valid(tp, dup_sack,
1609 sp[used_sacks].start_seq,
1610 sp[used_sacks].end_seq)) {
1611 int mib_idx;
1612
1613 if (dup_sack) {
1614 if (!tp->undo_marker)
1615 mib_idx = LINUX_MIB_TCPDSACKIGNOREDNOUNDO;
1616 else
1617 mib_idx = LINUX_MIB_TCPDSACKIGNOREDOLD;
1618 } else {
1619
1620 if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) &&
1621 !after(sp[used_sacks].end_seq, tp->snd_una))
1622 continue;
1623 mib_idx = LINUX_MIB_TCPSACKDISCARD;
1624 }
1625
1626 NET_INC_STATS_BH(sock_net(sk), mib_idx);
1627 if (i == 0)
1628 first_sack_index = -1;
1629 continue;
1630 }
1631
1632
1633 if (!after(sp[used_sacks].end_seq, prior_snd_una))
1634 continue;
1635
1636 used_sacks++;
1637 }
1638
1639
1640 for (i = used_sacks - 1; i > 0; i--) {
1641 for (j = 0; j < i; j++) {
1642 if (after(sp[j].start_seq, sp[j + 1].start_seq)) {
1643 swap(sp[j], sp[j + 1]);
1644
1645
1646 if (j == first_sack_index)
1647 first_sack_index = j + 1;
1648 }
1649 }
1650 }
1651
1652 skb = tcp_write_queue_head(sk);
1653 state.fack_count = 0;
1654 i = 0;
1655
1656 if (!tp->sacked_out) {
1657
1658 cache = tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1659 } else {
1660 cache = tp->recv_sack_cache;
1661
1662 while (tcp_sack_cache_ok(tp, cache) && !cache->start_seq &&
1663 !cache->end_seq)
1664 cache++;
1665 }
1666
1667 while (i < used_sacks) {
1668 u32 start_seq = sp[i].start_seq;
1669 u32 end_seq = sp[i].end_seq;
1670 bool dup_sack = (found_dup_sack && (i == first_sack_index));
1671 struct tcp_sack_block *next_dup = NULL;
1672
1673 if (found_dup_sack && ((i + 1) == first_sack_index))
1674 next_dup = &sp[i + 1];
1675
1676
1677 while (tcp_sack_cache_ok(tp, cache) &&
1678 !before(start_seq, cache->end_seq))
1679 cache++;
1680
1681
1682 if (tcp_sack_cache_ok(tp, cache) && !dup_sack &&
1683 after(end_seq, cache->start_seq)) {
1684
1685
1686 if (before(start_seq, cache->start_seq)) {
1687 skb = tcp_sacktag_skip(skb, sk, &state,
1688 start_seq);
1689 skb = tcp_sacktag_walk(skb, sk, next_dup,
1690 &state,
1691 start_seq,
1692 cache->start_seq,
1693 dup_sack);
1694 }
1695
1696
1697 if (!after(end_seq, cache->end_seq))
1698 goto advance_sp;
1699
1700 skb = tcp_maybe_skipping_dsack(skb, sk, next_dup,
1701 &state,
1702 cache->end_seq);
1703
1704
1705 if (tcp_highest_sack_seq(tp) == cache->end_seq) {
1706
1707 skb = tcp_highest_sack(sk);
1708 if (skb == NULL)
1709 break;
1710 state.fack_count = tp->fackets_out;
1711 cache++;
1712 goto walk;
1713 }
1714
1715 skb = tcp_sacktag_skip(skb, sk, &state, cache->end_seq);
1716
1717 cache++;
1718 continue;
1719 }
1720
1721 if (!before(start_seq, tcp_highest_sack_seq(tp))) {
1722 skb = tcp_highest_sack(sk);
1723 if (skb == NULL)
1724 break;
1725 state.fack_count = tp->fackets_out;
1726 }
1727 skb = tcp_sacktag_skip(skb, sk, &state, start_seq);
1728
1729walk:
1730 skb = tcp_sacktag_walk(skb, sk, next_dup, &state,
1731 start_seq, end_seq, dup_sack);
1732
1733advance_sp:
1734
1735
1736
1737 if (after(end_seq, tp->frto_highmark))
1738 state.flag &= ~FLAG_ONLY_ORIG_SACKED;
1739
1740 i++;
1741 }
1742
1743
1744 for (i = 0; i < ARRAY_SIZE(tp->recv_sack_cache) - used_sacks; i++) {
1745 tp->recv_sack_cache[i].start_seq = 0;
1746 tp->recv_sack_cache[i].end_seq = 0;
1747 }
1748 for (j = 0; j < used_sacks; j++)
1749 tp->recv_sack_cache[i++] = sp[j];
1750
1751 tcp_mark_lost_retrans(sk);
1752
1753 tcp_verify_left_out(tp);
1754
1755 if ((state.reord < tp->fackets_out) &&
1756 ((icsk->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker) &&
1757 (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark)))
1758 tcp_update_reordering(sk, tp->fackets_out - state.reord, 0);
1759
1760out:
1761
1762#if FASTRETRANS_DEBUG > 0
1763 WARN_ON((int)tp->sacked_out < 0);
1764 WARN_ON((int)tp->lost_out < 0);
1765 WARN_ON((int)tp->retrans_out < 0);
1766 WARN_ON((int)tcp_packets_in_flight(tp) < 0);
1767#endif
1768 return state.flag;
1769}
1770
1771
1772
1773
1774static bool tcp_limit_reno_sacked(struct tcp_sock *tp)
1775{
1776 u32 holes;
1777
1778 holes = max(tp->lost_out, 1U);
1779 holes = min(holes, tp->packets_out);
1780
1781 if ((tp->sacked_out + holes) > tp->packets_out) {
1782 tp->sacked_out = tp->packets_out - holes;
1783 return true;
1784 }
1785 return false;
1786}
1787
1788
1789
1790
1791
1792static void tcp_check_reno_reordering(struct sock *sk, const int addend)
1793{
1794 struct tcp_sock *tp = tcp_sk(sk);
1795 if (tcp_limit_reno_sacked(tp))
1796 tcp_update_reordering(sk, tp->packets_out + addend, 0);
1797}
1798
1799
1800
1801static void tcp_add_reno_sack(struct sock *sk)
1802{
1803 struct tcp_sock *tp = tcp_sk(sk);
1804 tp->sacked_out++;
1805 tcp_check_reno_reordering(sk, 0);
1806 tcp_verify_left_out(tp);
1807}
1808
1809
1810
1811static void tcp_remove_reno_sacks(struct sock *sk, int acked)
1812{
1813 struct tcp_sock *tp = tcp_sk(sk);
1814
1815 if (acked > 0) {
1816
1817 if (acked - 1 >= tp->sacked_out)
1818 tp->sacked_out = 0;
1819 else
1820 tp->sacked_out -= acked - 1;
1821 }
1822 tcp_check_reno_reordering(sk, acked);
1823 tcp_verify_left_out(tp);
1824}
1825
1826static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
1827{
1828 tp->sacked_out = 0;
1829}
1830
1831static int tcp_is_sackfrto(const struct tcp_sock *tp)
1832{
1833 return (sysctl_tcp_frto == 0x2) && !tcp_is_reno(tp);
1834}
1835
1836
1837
1838
1839bool tcp_use_frto(struct sock *sk)
1840{
1841 const struct tcp_sock *tp = tcp_sk(sk);
1842 const struct inet_connection_sock *icsk = inet_csk(sk);
1843 struct sk_buff *skb;
1844
1845 if (!sysctl_tcp_frto)
1846 return false;
1847
1848
1849 if (icsk->icsk_mtup.probe_size)
1850 return false;
1851
1852 if (tcp_is_sackfrto(tp))
1853 return true;
1854
1855
1856 if (tp->retrans_out > 1)
1857 return false;
1858
1859 skb = tcp_write_queue_head(sk);
1860 if (tcp_skb_is_last(sk, skb))
1861 return true;
1862 skb = tcp_write_queue_next(sk, skb);
1863 tcp_for_write_queue_from(skb, sk) {
1864 if (skb == tcp_send_head(sk))
1865 break;
1866 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
1867 return false;
1868
1869 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
1870 break;
1871 }
1872 return true;
1873}
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887void tcp_enter_frto(struct sock *sk)
1888{
1889 const struct inet_connection_sock *icsk = inet_csk(sk);
1890 struct tcp_sock *tp = tcp_sk(sk);
1891 struct sk_buff *skb;
1892
1893 if ((!tp->frto_counter && icsk->icsk_ca_state <= TCP_CA_Disorder) ||
1894 tp->snd_una == tp->high_seq ||
1895 ((icsk->icsk_ca_state == TCP_CA_Loss || tp->frto_counter) &&
1896 !icsk->icsk_retransmits)) {
1897 tp->prior_ssthresh = tcp_current_ssthresh(sk);
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907 if (tp->frto_counter) {
1908 u32 stored_cwnd;
1909 stored_cwnd = tp->snd_cwnd;
1910 tp->snd_cwnd = 2;
1911 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
1912 tp->snd_cwnd = stored_cwnd;
1913 } else {
1914 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
1915 }
1916
1917
1918
1919
1920
1921
1922
1923 tcp_ca_event(sk, CA_EVENT_FRTO);
1924 }
1925
1926 tp->undo_marker = tp->snd_una;
1927 tp->undo_retrans = 0;
1928
1929 skb = tcp_write_queue_head(sk);
1930 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
1931 tp->undo_marker = 0;
1932 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
1933 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1934 tp->retrans_out -= tcp_skb_pcount(skb);
1935 }
1936 tcp_verify_left_out(tp);
1937
1938
1939 tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);
1940
1941
1942
1943
1944 if (tcp_is_sackfrto(tp) && (tp->frto_counter ||
1945 ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) &&
1946 after(tp->high_seq, tp->snd_una)) {
1947 tp->frto_highmark = tp->high_seq;
1948 } else {
1949 tp->frto_highmark = tp->snd_nxt;
1950 }
1951 tcp_set_ca_state(sk, TCP_CA_Disorder);
1952 tp->high_seq = tp->snd_nxt;
1953 tp->frto_counter = 1;
1954}
1955
1956
1957
1958
1959
1960static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
1961{
1962 struct tcp_sock *tp = tcp_sk(sk);
1963 struct sk_buff *skb;
1964
1965 tp->lost_out = 0;
1966 tp->retrans_out = 0;
1967 if (tcp_is_reno(tp))
1968 tcp_reset_reno_sack(tp);
1969
1970 tcp_for_write_queue(skb, sk) {
1971 if (skb == tcp_send_head(sk))
1972 break;
1973
1974 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
1975
1976
1977
1978
1979 if ((tp->frto_counter == 1) && !(flag & FLAG_DATA_ACKED)) {
1980
1981 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
1982 tp->retrans_out += tcp_skb_pcount(skb);
1983
1984 flag |= FLAG_DATA_ACKED;
1985 } else {
1986 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
1987 tp->undo_marker = 0;
1988 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1989 }
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
2001 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
2002 tp->lost_out += tcp_skb_pcount(skb);
2003 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
2004 }
2005 }
2006 tcp_verify_left_out(tp);
2007
2008 tp->snd_cwnd = tcp_packets_in_flight(tp) + allowed_segments;
2009 tp->snd_cwnd_cnt = 0;
2010 tp->snd_cwnd_stamp = tcp_time_stamp;
2011 tp->frto_counter = 0;
2012 tp->bytes_acked = 0;
2013
2014 tp->reordering = min_t(unsigned int, tp->reordering,
2015 sysctl_tcp_reordering);
2016 tcp_set_ca_state(sk, TCP_CA_Loss);
2017 tp->high_seq = tp->snd_nxt;
2018 TCP_ECN_queue_cwr(tp);
2019
2020 tcp_clear_all_retrans_hints(tp);
2021}
2022
2023static void tcp_clear_retrans_partial(struct tcp_sock *tp)
2024{
2025 tp->retrans_out = 0;
2026 tp->lost_out = 0;
2027
2028 tp->undo_marker = 0;
2029 tp->undo_retrans = 0;
2030}
2031
2032void tcp_clear_retrans(struct tcp_sock *tp)
2033{
2034 tcp_clear_retrans_partial(tp);
2035
2036 tp->fackets_out = 0;
2037 tp->sacked_out = 0;
2038}
2039
2040
2041
2042
2043
2044void tcp_enter_loss(struct sock *sk, int how)
2045{
2046 const struct inet_connection_sock *icsk = inet_csk(sk);
2047 struct tcp_sock *tp = tcp_sk(sk);
2048 struct sk_buff *skb;
2049
2050
2051 if (icsk->icsk_ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq ||
2052 (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
2053 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2054 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
2055 tcp_ca_event(sk, CA_EVENT_LOSS);
2056 }
2057 tp->snd_cwnd = 1;
2058 tp->snd_cwnd_cnt = 0;
2059 tp->snd_cwnd_stamp = tcp_time_stamp;
2060
2061 tp->bytes_acked = 0;
2062 tcp_clear_retrans_partial(tp);
2063
2064 if (tcp_is_reno(tp))
2065 tcp_reset_reno_sack(tp);
2066
2067 if (!how) {
2068
2069
2070 tp->undo_marker = tp->snd_una;
2071 } else {
2072 tp->sacked_out = 0;
2073 tp->fackets_out = 0;
2074 }
2075 tcp_clear_all_retrans_hints(tp);
2076
2077 tcp_for_write_queue(skb, sk) {
2078 if (skb == tcp_send_head(sk))
2079 break;
2080
2081 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
2082 tp->undo_marker = 0;
2083 TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
2084 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) {
2085 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
2086 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
2087 tp->lost_out += tcp_skb_pcount(skb);
2088 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
2089 }
2090 }
2091 tcp_verify_left_out(tp);
2092
2093 tp->reordering = min_t(unsigned int, tp->reordering,
2094 sysctl_tcp_reordering);
2095 tcp_set_ca_state(sk, TCP_CA_Loss);
2096 tp->high_seq = tp->snd_nxt;
2097 TCP_ECN_queue_cwr(tp);
2098
2099 tp->frto_counter = 0;
2100}
2101
2102
2103
2104
2105
2106
2107
2108static bool tcp_check_sack_reneging(struct sock *sk, int flag)
2109{
2110 if (flag & FLAG_SACK_RENEGING) {
2111 struct inet_connection_sock *icsk = inet_csk(sk);
2112 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
2113
2114 tcp_enter_loss(sk, 1);
2115 icsk->icsk_retransmits++;
2116 tcp_retransmit_skb(sk, tcp_write_queue_head(sk));
2117 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2118 icsk->icsk_rto, TCP_RTO_MAX);
2119 return true;
2120 }
2121 return false;
2122}
2123
2124static inline int tcp_fackets_out(const struct tcp_sock *tp)
2125{
2126 return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out;
2127}
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
2145{
2146 return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
2147}
2148
2149static bool tcp_pause_early_retransmit(struct sock *sk, int flag)
2150{
2151 struct tcp_sock *tp = tcp_sk(sk);
2152 unsigned long delay;
2153
2154
2155
2156
2157
2158 if (sysctl_tcp_early_retrans < 2 || (flag & FLAG_ECE) || !tp->srtt)
2159 return false;
2160
2161 delay = max_t(unsigned long, (tp->srtt >> 5), msecs_to_jiffies(2));
2162 if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay)))
2163 return false;
2164
2165 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, delay, TCP_RTO_MAX);
2166 tp->early_retrans_delayed = 1;
2167 return true;
2168}
2169
2170static inline int tcp_skb_timedout(const struct sock *sk,
2171 const struct sk_buff *skb)
2172{
2173 return tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto;
2174}
2175
2176static inline int tcp_head_timedout(const struct sock *sk)
2177{
2178 const struct tcp_sock *tp = tcp_sk(sk);
2179
2180 return tp->packets_out &&
2181 tcp_skb_timedout(sk, tcp_write_queue_head(sk));
2182}
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277static bool tcp_time_to_recover(struct sock *sk, int flag)
2278{
2279 struct tcp_sock *tp = tcp_sk(sk);
2280 __u32 packets_out;
2281
2282
2283 if (tp->frto_counter)
2284 return false;
2285
2286
2287 if (tp->lost_out)
2288 return true;
2289
2290
2291 if (tcp_dupack_heuristics(tp) > tp->reordering)
2292 return true;
2293
2294
2295
2296
2297 if (tcp_is_fack(tp) && tcp_head_timedout(sk))
2298 return true;
2299
2300
2301
2302
2303 packets_out = tp->packets_out;
2304 if (packets_out <= tp->reordering &&
2305 tp->sacked_out >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) &&
2306 !tcp_may_send_now(sk)) {
2307
2308
2309
2310 return true;
2311 }
2312
2313
2314
2315
2316
2317
2318 if ((tp->thin_dupack || sysctl_tcp_thin_dupack) &&
2319 tcp_stream_is_thin(tp) && tcp_dupack_heuristics(tp) > 1 &&
2320 tcp_is_sack(tp) && !tcp_send_head(sk))
2321 return true;
2322
2323
2324
2325
2326
2327
2328 if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out &&
2329 (tp->packets_out == (tp->sacked_out + 1) && tp->packets_out < 4) &&
2330 !tcp_may_send_now(sk))
2331 return !tcp_pause_early_retransmit(sk, flag);
2332
2333 return false;
2334}
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348static void tcp_timeout_skbs(struct sock *sk)
2349{
2350 struct tcp_sock *tp = tcp_sk(sk);
2351 struct sk_buff *skb;
2352
2353 if (!tcp_is_fack(tp) || !tcp_head_timedout(sk))
2354 return;
2355
2356 skb = tp->scoreboard_skb_hint;
2357 if (tp->scoreboard_skb_hint == NULL)
2358 skb = tcp_write_queue_head(sk);
2359
2360 tcp_for_write_queue_from(skb, sk) {
2361 if (skb == tcp_send_head(sk))
2362 break;
2363 if (!tcp_skb_timedout(sk, skb))
2364 break;
2365
2366 tcp_skb_mark_lost(tp, skb);
2367 }
2368
2369 tp->scoreboard_skb_hint = skb;
2370
2371 tcp_verify_left_out(tp);
2372}
2373
2374
2375
2376
2377
2378
2379
2380static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2381{
2382 struct tcp_sock *tp = tcp_sk(sk);
2383 struct sk_buff *skb;
2384 int cnt, oldcnt;
2385 int err;
2386 unsigned int mss;
2387
2388 const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq;
2389
2390 WARN_ON(packets > tp->packets_out);
2391 if (tp->lost_skb_hint) {
2392 skb = tp->lost_skb_hint;
2393 cnt = tp->lost_cnt_hint;
2394
2395 if (mark_head && skb != tcp_write_queue_head(sk))
2396 return;
2397 } else {
2398 skb = tcp_write_queue_head(sk);
2399 cnt = 0;
2400 }
2401
2402 tcp_for_write_queue_from(skb, sk) {
2403 if (skb == tcp_send_head(sk))
2404 break;
2405
2406
2407 tp->lost_skb_hint = skb;
2408 tp->lost_cnt_hint = cnt;
2409
2410 if (after(TCP_SKB_CB(skb)->end_seq, loss_high))
2411 break;
2412
2413 oldcnt = cnt;
2414 if (tcp_is_fack(tp) || tcp_is_reno(tp) ||
2415 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
2416 cnt += tcp_skb_pcount(skb);
2417
2418 if (cnt > packets) {
2419 if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) ||
2420 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
2421 (oldcnt >= packets))
2422 break;
2423
2424 mss = skb_shinfo(skb)->gso_size;
2425 err = tcp_fragment(sk, skb, (packets - oldcnt) * mss, mss);
2426 if (err < 0)
2427 break;
2428 cnt = packets;
2429 }
2430
2431 tcp_skb_mark_lost(tp, skb);
2432
2433 if (mark_head)
2434 break;
2435 }
2436 tcp_verify_left_out(tp);
2437}
2438
2439
2440
2441static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
2442{
2443 struct tcp_sock *tp = tcp_sk(sk);
2444
2445 if (tcp_is_reno(tp)) {
2446 tcp_mark_head_lost(sk, 1, 1);
2447 } else if (tcp_is_fack(tp)) {
2448 int lost = tp->fackets_out - tp->reordering;
2449 if (lost <= 0)
2450 lost = 1;
2451 tcp_mark_head_lost(sk, lost, 0);
2452 } else {
2453 int sacked_upto = tp->sacked_out - tp->reordering;
2454 if (sacked_upto >= 0)
2455 tcp_mark_head_lost(sk, sacked_upto, 0);
2456 else if (fast_rexmit)
2457 tcp_mark_head_lost(sk, 1, 1);
2458 }
2459
2460 tcp_timeout_skbs(sk);
2461}
2462
2463
2464
2465
2466static inline void tcp_moderate_cwnd(struct tcp_sock *tp)
2467{
2468 tp->snd_cwnd = min(tp->snd_cwnd,
2469 tcp_packets_in_flight(tp) + tcp_max_burst(tp));
2470 tp->snd_cwnd_stamp = tcp_time_stamp;
2471}
2472
2473
2474
2475
2476static inline bool tcp_packet_delayed(const struct tcp_sock *tp)
2477{
2478 return !tp->retrans_stamp ||
2479 (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
2480 before(tp->rx_opt.rcv_tsecr, tp->retrans_stamp));
2481}
2482
2483
2484
2485#if FASTRETRANS_DEBUG > 1
2486static void DBGUNDO(struct sock *sk, const char *msg)
2487{
2488 struct tcp_sock *tp = tcp_sk(sk);
2489 struct inet_sock *inet = inet_sk(sk);
2490
2491 if (sk->sk_family == AF_INET) {
2492 pr_debug("Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n",
2493 msg,
2494 &inet->inet_daddr, ntohs(inet->inet_dport),
2495 tp->snd_cwnd, tcp_left_out(tp),
2496 tp->snd_ssthresh, tp->prior_ssthresh,
2497 tp->packets_out);
2498 }
2499#if IS_ENABLED(CONFIG_IPV6)
2500 else if (sk->sk_family == AF_INET6) {
2501 struct ipv6_pinfo *np = inet6_sk(sk);
2502 pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
2503 msg,
2504 &np->daddr, ntohs(inet->inet_dport),
2505 tp->snd_cwnd, tcp_left_out(tp),
2506 tp->snd_ssthresh, tp->prior_ssthresh,
2507 tp->packets_out);
2508 }
2509#endif
2510}
2511#else
2512#define DBGUNDO(x...) do { } while (0)
2513#endif
2514
2515static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh)
2516{
2517 struct tcp_sock *tp = tcp_sk(sk);
2518
2519 if (tp->prior_ssthresh) {
2520 const struct inet_connection_sock *icsk = inet_csk(sk);
2521
2522 if (icsk->icsk_ca_ops->undo_cwnd)
2523 tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
2524 else
2525 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1);
2526
2527 if (undo_ssthresh && tp->prior_ssthresh > tp->snd_ssthresh) {
2528 tp->snd_ssthresh = tp->prior_ssthresh;
2529 TCP_ECN_withdraw_cwr(tp);
2530 }
2531 } else {
2532 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);
2533 }
2534 tp->snd_cwnd_stamp = tcp_time_stamp;
2535}
2536
2537static inline bool tcp_may_undo(const struct tcp_sock *tp)
2538{
2539 return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
2540}
2541
2542
2543static bool tcp_try_undo_recovery(struct sock *sk)
2544{
2545 struct tcp_sock *tp = tcp_sk(sk);
2546
2547 if (tcp_may_undo(tp)) {
2548 int mib_idx;
2549
2550
2551
2552
2553 DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
2554 tcp_undo_cwr(sk, true);
2555 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
2556 mib_idx = LINUX_MIB_TCPLOSSUNDO;
2557 else
2558 mib_idx = LINUX_MIB_TCPFULLUNDO;
2559
2560 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2561 tp->undo_marker = 0;
2562 }
2563 if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
2564
2565
2566
2567 tcp_moderate_cwnd(tp);
2568 return true;
2569 }
2570 tcp_set_ca_state(sk, TCP_CA_Open);
2571 return false;
2572}
2573
2574
2575static void tcp_try_undo_dsack(struct sock *sk)
2576{
2577 struct tcp_sock *tp = tcp_sk(sk);
2578
2579 if (tp->undo_marker && !tp->undo_retrans) {
2580 DBGUNDO(sk, "D-SACK");
2581 tcp_undo_cwr(sk, true);
2582 tp->undo_marker = 0;
2583 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
2584 }
2585}
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601static bool tcp_any_retrans_done(const struct sock *sk)
2602{
2603 const struct tcp_sock *tp = tcp_sk(sk);
2604 struct sk_buff *skb;
2605
2606 if (tp->retrans_out)
2607 return true;
2608
2609 skb = tcp_write_queue_head(sk);
2610 if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
2611 return true;
2612
2613 return false;
2614}
2615
2616
2617
2618static int tcp_try_undo_partial(struct sock *sk, int acked)
2619{
2620 struct tcp_sock *tp = tcp_sk(sk);
2621
2622 int failed = tcp_is_reno(tp) || (tcp_fackets_out(tp) > tp->reordering);
2623
2624 if (tcp_may_undo(tp)) {
2625
2626
2627
2628 if (!tcp_any_retrans_done(sk))
2629 tp->retrans_stamp = 0;
2630
2631 tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
2632
2633 DBGUNDO(sk, "Hoe");
2634 tcp_undo_cwr(sk, false);
2635 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
2636
2637
2638
2639
2640
2641 failed = 0;
2642 }
2643 return failed;
2644}
2645
2646
2647static bool tcp_try_undo_loss(struct sock *sk)
2648{
2649 struct tcp_sock *tp = tcp_sk(sk);
2650
2651 if (tcp_may_undo(tp)) {
2652 struct sk_buff *skb;
2653 tcp_for_write_queue(skb, sk) {
2654 if (skb == tcp_send_head(sk))
2655 break;
2656 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
2657 }
2658
2659 tcp_clear_all_retrans_hints(tp);
2660
2661 DBGUNDO(sk, "partial loss");
2662 tp->lost_out = 0;
2663 tcp_undo_cwr(sk, true);
2664 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
2665 inet_csk(sk)->icsk_retransmits = 0;
2666 tp->undo_marker = 0;
2667 if (tcp_is_sack(tp))
2668 tcp_set_ca_state(sk, TCP_CA_Open);
2669 return true;
2670 }
2671 return false;
2672}
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh)
2685{
2686 struct tcp_sock *tp = tcp_sk(sk);
2687
2688 tp->high_seq = tp->snd_nxt;
2689 tp->bytes_acked = 0;
2690 tp->snd_cwnd_cnt = 0;
2691 tp->prior_cwnd = tp->snd_cwnd;
2692 tp->prr_delivered = 0;
2693 tp->prr_out = 0;
2694 if (set_ssthresh)
2695 tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
2696 TCP_ECN_queue_cwr(tp);
2697}
2698
2699static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked,
2700 int fast_rexmit)
2701{
2702 struct tcp_sock *tp = tcp_sk(sk);
2703 int sndcnt = 0;
2704 int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
2705
2706 tp->prr_delivered += newly_acked_sacked;
2707 if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) {
2708 u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
2709 tp->prior_cwnd - 1;
2710 sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
2711 } else {
2712 sndcnt = min_t(int, delta,
2713 max_t(int, tp->prr_delivered - tp->prr_out,
2714 newly_acked_sacked) + 1);
2715 }
2716
2717 sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0));
2718 tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
2719}
2720
2721static inline void tcp_end_cwnd_reduction(struct sock *sk)
2722{
2723 struct tcp_sock *tp = tcp_sk(sk);
2724
2725
2726 if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR ||
2727 (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) {
2728 tp->snd_cwnd = tp->snd_ssthresh;
2729 tp->snd_cwnd_stamp = tcp_time_stamp;
2730 }
2731 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
2732}
2733
2734
2735void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
2736{
2737 struct tcp_sock *tp = tcp_sk(sk);
2738
2739 tp->prior_ssthresh = 0;
2740 tp->bytes_acked = 0;
2741 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
2742 tp->undo_marker = 0;
2743 tcp_init_cwnd_reduction(sk, set_ssthresh);
2744 tcp_set_ca_state(sk, TCP_CA_CWR);
2745 }
2746}
2747
2748static void tcp_try_keep_open(struct sock *sk)
2749{
2750 struct tcp_sock *tp = tcp_sk(sk);
2751 int state = TCP_CA_Open;
2752
2753 if (tcp_left_out(tp) || tcp_any_retrans_done(sk))
2754 state = TCP_CA_Disorder;
2755
2756 if (inet_csk(sk)->icsk_ca_state != state) {
2757 tcp_set_ca_state(sk, state);
2758 tp->high_seq = tp->snd_nxt;
2759 }
2760}
2761
2762static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked)
2763{
2764 struct tcp_sock *tp = tcp_sk(sk);
2765
2766 tcp_verify_left_out(tp);
2767
2768 if (!tp->frto_counter && !tcp_any_retrans_done(sk))
2769 tp->retrans_stamp = 0;
2770
2771 if (flag & FLAG_ECE)
2772 tcp_enter_cwr(sk, 1);
2773
2774 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
2775 tcp_try_keep_open(sk);
2776 if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
2777 tcp_moderate_cwnd(tp);
2778 } else {
2779 tcp_cwnd_reduction(sk, newly_acked_sacked, 0);
2780 }
2781}
2782
2783static void tcp_mtup_probe_failed(struct sock *sk)
2784{
2785 struct inet_connection_sock *icsk = inet_csk(sk);
2786
2787 icsk->icsk_mtup.search_high = icsk->icsk_mtup.probe_size - 1;
2788 icsk->icsk_mtup.probe_size = 0;
2789}
2790
2791static void tcp_mtup_probe_success(struct sock *sk)
2792{
2793 struct tcp_sock *tp = tcp_sk(sk);
2794 struct inet_connection_sock *icsk = inet_csk(sk);
2795
2796
2797 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2798 tp->snd_cwnd = tp->snd_cwnd *
2799 tcp_mss_to_mtu(sk, tp->mss_cache) /
2800 icsk->icsk_mtup.probe_size;
2801 tp->snd_cwnd_cnt = 0;
2802 tp->snd_cwnd_stamp = tcp_time_stamp;
2803 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2804
2805 icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size;
2806 icsk->icsk_mtup.probe_size = 0;
2807 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
2808}
2809
2810
2811
2812
2813
2814void tcp_simple_retransmit(struct sock *sk)
2815{
2816 const struct inet_connection_sock *icsk = inet_csk(sk);
2817 struct tcp_sock *tp = tcp_sk(sk);
2818 struct sk_buff *skb;
2819 unsigned int mss = tcp_current_mss(sk);
2820 u32 prior_lost = tp->lost_out;
2821
2822 tcp_for_write_queue(skb, sk) {
2823 if (skb == tcp_send_head(sk))
2824 break;
2825 if (tcp_skb_seglen(skb) > mss &&
2826 !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
2827 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2828 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
2829 tp->retrans_out -= tcp_skb_pcount(skb);
2830 }
2831 tcp_skb_mark_lost_uncond_verify(tp, skb);
2832 }
2833 }
2834
2835 tcp_clear_retrans_hints_partial(tp);
2836
2837 if (prior_lost == tp->lost_out)
2838 return;
2839
2840 if (tcp_is_reno(tp))
2841 tcp_limit_reno_sacked(tp);
2842
2843 tcp_verify_left_out(tp);
2844
2845
2846
2847
2848
2849
2850 if (icsk->icsk_ca_state != TCP_CA_Loss) {
2851 tp->high_seq = tp->snd_nxt;
2852 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2853 tp->prior_ssthresh = 0;
2854 tp->undo_marker = 0;
2855 tcp_set_ca_state(sk, TCP_CA_Loss);
2856 }
2857 tcp_xmit_retransmit_queue(sk);
2858}
2859EXPORT_SYMBOL(tcp_simple_retransmit);
2860
2861static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
2862{
2863 struct tcp_sock *tp = tcp_sk(sk);
2864 int mib_idx;
2865
2866 if (tcp_is_reno(tp))
2867 mib_idx = LINUX_MIB_TCPRENORECOVERY;
2868 else
2869 mib_idx = LINUX_MIB_TCPSACKRECOVERY;
2870
2871 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2872
2873 tp->prior_ssthresh = 0;
2874 tp->undo_marker = tp->snd_una;
2875 tp->undo_retrans = tp->retrans_out;
2876
2877 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
2878 if (!ece_ack)
2879 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2880 tcp_init_cwnd_reduction(sk, true);
2881 }
2882 tcp_set_ca_state(sk, TCP_CA_Recovery);
2883}
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
2897 int prior_sacked, bool is_dupack,
2898 int flag)
2899{
2900 struct inet_connection_sock *icsk = inet_csk(sk);
2901 struct tcp_sock *tp = tcp_sk(sk);
2902 int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
2903 (tcp_fackets_out(tp) > tp->reordering));
2904 int newly_acked_sacked = 0;
2905 int fast_rexmit = 0;
2906
2907 if (WARN_ON(!tp->packets_out && tp->sacked_out))
2908 tp->sacked_out = 0;
2909 if (WARN_ON(!tp->sacked_out && tp->fackets_out))
2910 tp->fackets_out = 0;
2911
2912
2913
2914 if (flag & FLAG_ECE)
2915 tp->prior_ssthresh = 0;
2916
2917
2918 if (tcp_check_sack_reneging(sk, flag))
2919 return;
2920
2921
2922 tcp_verify_left_out(tp);
2923
2924
2925
2926 if (icsk->icsk_ca_state == TCP_CA_Open) {
2927 WARN_ON(tp->retrans_out != 0);
2928 tp->retrans_stamp = 0;
2929 } else if (!before(tp->snd_una, tp->high_seq)) {
2930 switch (icsk->icsk_ca_state) {
2931 case TCP_CA_Loss:
2932 icsk->icsk_retransmits = 0;
2933 if (tcp_try_undo_recovery(sk))
2934 return;
2935 break;
2936
2937 case TCP_CA_CWR:
2938
2939
2940 if (tp->snd_una != tp->high_seq) {
2941 tcp_end_cwnd_reduction(sk);
2942 tcp_set_ca_state(sk, TCP_CA_Open);
2943 }
2944 break;
2945
2946 case TCP_CA_Recovery:
2947 if (tcp_is_reno(tp))
2948 tcp_reset_reno_sack(tp);
2949 if (tcp_try_undo_recovery(sk))
2950 return;
2951 tcp_end_cwnd_reduction(sk);
2952 break;
2953 }
2954 }
2955
2956
2957 switch (icsk->icsk_ca_state) {
2958 case TCP_CA_Recovery:
2959 if (!(flag & FLAG_SND_UNA_ADVANCED)) {
2960 if (tcp_is_reno(tp) && is_dupack)
2961 tcp_add_reno_sack(sk);
2962 } else
2963 do_lost = tcp_try_undo_partial(sk, pkts_acked);
2964 newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked;
2965 break;
2966 case TCP_CA_Loss:
2967 if (flag & FLAG_DATA_ACKED)
2968 icsk->icsk_retransmits = 0;
2969 if (tcp_is_reno(tp) && flag & FLAG_SND_UNA_ADVANCED)
2970 tcp_reset_reno_sack(tp);
2971 if (!tcp_try_undo_loss(sk)) {
2972 tcp_moderate_cwnd(tp);
2973 tcp_xmit_retransmit_queue(sk);
2974 return;
2975 }
2976 if (icsk->icsk_ca_state != TCP_CA_Open)
2977 return;
2978
2979 default:
2980 if (tcp_is_reno(tp)) {
2981 if (flag & FLAG_SND_UNA_ADVANCED)
2982 tcp_reset_reno_sack(tp);
2983 if (is_dupack)
2984 tcp_add_reno_sack(sk);
2985 }
2986 newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked;
2987
2988 if (icsk->icsk_ca_state <= TCP_CA_Disorder)
2989 tcp_try_undo_dsack(sk);
2990
2991 if (!tcp_time_to_recover(sk, flag)) {
2992 tcp_try_to_open(sk, flag, newly_acked_sacked);
2993 return;
2994 }
2995
2996
2997 if (icsk->icsk_ca_state < TCP_CA_CWR &&
2998 icsk->icsk_mtup.probe_size &&
2999 tp->snd_una == tp->mtu_probe.probe_seq_start) {
3000 tcp_mtup_probe_failed(sk);
3001
3002 tp->snd_cwnd++;
3003 tcp_simple_retransmit(sk);
3004 return;
3005 }
3006
3007
3008 tcp_enter_recovery(sk, (flag & FLAG_ECE));
3009 fast_rexmit = 1;
3010 }
3011
3012 if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk)))
3013 tcp_update_scoreboard(sk, fast_rexmit);
3014 tcp_cwnd_reduction(sk, newly_acked_sacked, fast_rexmit);
3015 tcp_xmit_retransmit_queue(sk);
3016}
3017
3018void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt)
3019{
3020 tcp_rtt_estimator(sk, seq_rtt);
3021 tcp_set_rto(sk);
3022 inet_csk(sk)->icsk_backoff = 0;
3023}
3024EXPORT_SYMBOL(tcp_valid_rtt_meas);
3025
3026
3027
3028
3029static void tcp_ack_saw_tstamp(struct sock *sk, int flag)
3030{
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046 struct tcp_sock *tp = tcp_sk(sk);
3047
3048 tcp_valid_rtt_meas(sk, tcp_time_stamp - tp->rx_opt.rcv_tsecr);
3049}
3050
3051static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag)
3052{
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062 if (flag & FLAG_RETRANS_DATA_ACKED)
3063 return;
3064
3065 tcp_valid_rtt_meas(sk, seq_rtt);
3066}
3067
3068static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
3069 const s32 seq_rtt)
3070{
3071 const struct tcp_sock *tp = tcp_sk(sk);
3072
3073 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
3074 tcp_ack_saw_tstamp(sk, flag);
3075 else if (seq_rtt >= 0)
3076 tcp_ack_no_tstamp(sk, seq_rtt, flag);
3077}
3078
3079static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
3080{
3081 const struct inet_connection_sock *icsk = inet_csk(sk);
3082 icsk->icsk_ca_ops->cong_avoid(sk, ack, in_flight);
3083 tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp;
3084}
3085
3086
3087
3088
3089void tcp_rearm_rto(struct sock *sk)
3090{
3091 struct tcp_sock *tp = tcp_sk(sk);
3092
3093
3094
3095
3096 if (tp->fastopen_rsk)
3097 return;
3098
3099 if (!tp->packets_out) {
3100 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
3101 } else {
3102 u32 rto = inet_csk(sk)->icsk_rto;
3103
3104 if (tp->early_retrans_delayed) {
3105 struct sk_buff *skb = tcp_write_queue_head(sk);
3106 const u32 rto_time_stamp = TCP_SKB_CB(skb)->when + rto;
3107 s32 delta = (s32)(rto_time_stamp - tcp_time_stamp);
3108
3109
3110
3111 if (delta > 0)
3112 rto = delta;
3113 }
3114 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
3115 TCP_RTO_MAX);
3116 }
3117 tp->early_retrans_delayed = 0;
3118}
3119
3120
3121
3122
3123void tcp_resume_early_retransmit(struct sock *sk)
3124{
3125 struct tcp_sock *tp = tcp_sk(sk);
3126
3127 tcp_rearm_rto(sk);
3128
3129
3130 if (!tp->do_early_retrans)
3131 return;
3132
3133 tcp_enter_recovery(sk, false);
3134 tcp_update_scoreboard(sk, 1);
3135 tcp_xmit_retransmit_queue(sk);
3136}
3137
3138
3139static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
3140{
3141 struct tcp_sock *tp = tcp_sk(sk);
3142 u32 packets_acked;
3143
3144 BUG_ON(!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una));
3145
3146 packets_acked = tcp_skb_pcount(skb);
3147 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
3148 return 0;
3149 packets_acked -= tcp_skb_pcount(skb);
3150
3151 if (packets_acked) {
3152 BUG_ON(tcp_skb_pcount(skb) == 0);
3153 BUG_ON(!before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq));
3154 }
3155
3156 return packets_acked;
3157}
3158
3159
3160
3161
3162
3163static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3164 u32 prior_snd_una)
3165{
3166 struct tcp_sock *tp = tcp_sk(sk);
3167 const struct inet_connection_sock *icsk = inet_csk(sk);
3168 struct sk_buff *skb;
3169 u32 now = tcp_time_stamp;
3170 int fully_acked = true;
3171 int flag = 0;
3172 u32 pkts_acked = 0;
3173 u32 reord = tp->packets_out;
3174 u32 prior_sacked = tp->sacked_out;
3175 s32 seq_rtt = -1;
3176 s32 ca_seq_rtt = -1;
3177 ktime_t last_ackt = net_invalid_timestamp();
3178
3179 while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
3180 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
3181 u32 acked_pcount;
3182 u8 sacked = scb->sacked;
3183
3184
3185 if (after(scb->end_seq, tp->snd_una)) {
3186 if (tcp_skb_pcount(skb) == 1 ||
3187 !after(tp->snd_una, scb->seq))
3188 break;
3189
3190 acked_pcount = tcp_tso_acked(sk, skb);
3191 if (!acked_pcount)
3192 break;
3193
3194 fully_acked = false;
3195 } else {
3196 acked_pcount = tcp_skb_pcount(skb);
3197 }
3198
3199 if (sacked & TCPCB_RETRANS) {
3200 if (sacked & TCPCB_SACKED_RETRANS)
3201 tp->retrans_out -= acked_pcount;
3202 flag |= FLAG_RETRANS_DATA_ACKED;
3203 ca_seq_rtt = -1;
3204 seq_rtt = -1;
3205 if ((flag & FLAG_DATA_ACKED) || (acked_pcount > 1))
3206 flag |= FLAG_NONHEAD_RETRANS_ACKED;
3207 } else {
3208 ca_seq_rtt = now - scb->when;
3209 last_ackt = skb->tstamp;
3210 if (seq_rtt < 0) {
3211 seq_rtt = ca_seq_rtt;
3212 }
3213 if (!(sacked & TCPCB_SACKED_ACKED))
3214 reord = min(pkts_acked, reord);
3215 }
3216
3217 if (sacked & TCPCB_SACKED_ACKED)
3218 tp->sacked_out -= acked_pcount;
3219 if (sacked & TCPCB_LOST)
3220 tp->lost_out -= acked_pcount;
3221
3222 tp->packets_out -= acked_pcount;
3223 pkts_acked += acked_pcount;
3224
3225
3226
3227
3228
3229
3230
3231
3232 if (!(scb->tcp_flags & TCPHDR_SYN)) {
3233 flag |= FLAG_DATA_ACKED;
3234 } else {
3235 flag |= FLAG_SYN_ACKED;
3236 tp->retrans_stamp = 0;
3237 }
3238
3239 if (!fully_acked)
3240 break;
3241
3242 tcp_unlink_write_queue(skb, sk);
3243 sk_wmem_free_skb(sk, skb);
3244 tp->scoreboard_skb_hint = NULL;
3245 if (skb == tp->retransmit_skb_hint)
3246 tp->retransmit_skb_hint = NULL;
3247 if (skb == tp->lost_skb_hint)
3248 tp->lost_skb_hint = NULL;
3249 }
3250
3251 if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una)))
3252 tp->snd_up = tp->snd_una;
3253
3254 if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
3255 flag |= FLAG_SACK_RENEGING;
3256
3257 if (flag & FLAG_ACKED) {
3258 const struct tcp_congestion_ops *ca_ops
3259 = inet_csk(sk)->icsk_ca_ops;
3260
3261 if (unlikely(icsk->icsk_mtup.probe_size &&
3262 !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
3263 tcp_mtup_probe_success(sk);
3264 }
3265
3266 tcp_ack_update_rtt(sk, flag, seq_rtt);
3267 tcp_rearm_rto(sk);
3268
3269 if (tcp_is_reno(tp)) {
3270 tcp_remove_reno_sacks(sk, pkts_acked);
3271 } else {
3272 int delta;
3273
3274
3275 if (reord < prior_fackets)
3276 tcp_update_reordering(sk, tp->fackets_out - reord, 0);
3277
3278 delta = tcp_is_fack(tp) ? pkts_acked :
3279 prior_sacked - tp->sacked_out;
3280 tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta);
3281 }
3282
3283 tp->fackets_out -= min(pkts_acked, tp->fackets_out);
3284
3285 if (ca_ops->pkts_acked) {
3286 s32 rtt_us = -1;
3287
3288
3289 if (!(flag & FLAG_RETRANS_DATA_ACKED)) {
3290
3291 if (ca_ops->flags & TCP_CONG_RTT_STAMP &&
3292 !ktime_equal(last_ackt,
3293 net_invalid_timestamp()))
3294 rtt_us = ktime_us_delta(ktime_get_real(),
3295 last_ackt);
3296 else if (ca_seq_rtt >= 0)
3297 rtt_us = jiffies_to_usecs(ca_seq_rtt);
3298 }
3299
3300 ca_ops->pkts_acked(sk, pkts_acked, rtt_us);
3301 }
3302 }
3303
3304#if FASTRETRANS_DEBUG > 0
3305 WARN_ON((int)tp->sacked_out < 0);
3306 WARN_ON((int)tp->lost_out < 0);
3307 WARN_ON((int)tp->retrans_out < 0);
3308 if (!tp->packets_out && tcp_is_sack(tp)) {
3309 icsk = inet_csk(sk);
3310 if (tp->lost_out) {
3311 pr_debug("Leak l=%u %d\n",
3312 tp->lost_out, icsk->icsk_ca_state);
3313 tp->lost_out = 0;
3314 }
3315 if (tp->sacked_out) {
3316 pr_debug("Leak s=%u %d\n",
3317 tp->sacked_out, icsk->icsk_ca_state);
3318 tp->sacked_out = 0;
3319 }
3320 if (tp->retrans_out) {
3321 pr_debug("Leak r=%u %d\n",
3322 tp->retrans_out, icsk->icsk_ca_state);
3323 tp->retrans_out = 0;
3324 }
3325 }
3326#endif
3327 return flag;
3328}
3329
3330static void tcp_ack_probe(struct sock *sk)
3331{
3332 const struct tcp_sock *tp = tcp_sk(sk);
3333 struct inet_connection_sock *icsk = inet_csk(sk);
3334
3335
3336
3337 if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, tcp_wnd_end(tp))) {
3338 icsk->icsk_backoff = 0;
3339 inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
3340
3341
3342
3343 } else {
3344 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
3345 min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX),
3346 TCP_RTO_MAX);
3347 }
3348}
3349
3350static inline bool tcp_ack_is_dubious(const struct sock *sk, const int flag)
3351{
3352 return !(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
3353 inet_csk(sk)->icsk_ca_state != TCP_CA_Open;
3354}
3355
3356static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
3357{
3358 const struct tcp_sock *tp = tcp_sk(sk);
3359 return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) &&
3360 !tcp_in_cwnd_reduction(sk);
3361}
3362
3363
3364
3365
3366static inline bool tcp_may_update_window(const struct tcp_sock *tp,
3367 const u32 ack, const u32 ack_seq,
3368 const u32 nwin)
3369{
3370 return after(ack, tp->snd_una) ||
3371 after(ack_seq, tp->snd_wl1) ||
3372 (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd);
3373}
3374
3375
3376
3377
3378
3379
3380static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 ack,
3381 u32 ack_seq)
3382{
3383 struct tcp_sock *tp = tcp_sk(sk);
3384 int flag = 0;
3385 u32 nwin = ntohs(tcp_hdr(skb)->window);
3386
3387 if (likely(!tcp_hdr(skb)->syn))
3388 nwin <<= tp->rx_opt.snd_wscale;
3389
3390 if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
3391 flag |= FLAG_WIN_UPDATE;
3392 tcp_update_wl(tp, ack_seq);
3393
3394 if (tp->snd_wnd != nwin) {
3395 tp->snd_wnd = nwin;
3396
3397
3398
3399
3400 tp->pred_flags = 0;
3401 tcp_fast_path_check(sk);
3402
3403 if (nwin > tp->max_window) {
3404 tp->max_window = nwin;
3405 tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie);
3406 }
3407 }
3408 }
3409
3410 tp->snd_una = ack;
3411
3412 return flag;
3413}
3414
3415
3416
3417
3418static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
3419{
3420 tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
3421 tp->snd_cwnd_cnt = 0;
3422 tp->bytes_acked = 0;
3423 TCP_ECN_queue_cwr(tp);
3424 tcp_moderate_cwnd(tp);
3425}
3426
3427
3428
3429
3430static void tcp_cwr_spur_to_response(struct sock *sk)
3431{
3432 tcp_enter_cwr(sk, 0);
3433}
3434
3435static void tcp_undo_spur_to_response(struct sock *sk, int flag)
3436{
3437 if (flag & FLAG_ECE)
3438 tcp_cwr_spur_to_response(sk);
3439 else
3440 tcp_undo_cwr(sk, true);
3441}
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473static bool tcp_process_frto(struct sock *sk, int flag)
3474{
3475 struct tcp_sock *tp = tcp_sk(sk);
3476
3477 tcp_verify_left_out(tp);
3478
3479
3480 if (flag & FLAG_DATA_ACKED)
3481 inet_csk(sk)->icsk_retransmits = 0;
3482
3483 if ((flag & FLAG_NONHEAD_RETRANS_ACKED) ||
3484 ((tp->frto_counter >= 2) && (flag & FLAG_RETRANS_DATA_ACKED)))
3485 tp->undo_marker = 0;
3486
3487 if (!before(tp->snd_una, tp->frto_highmark)) {
3488 tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag);
3489 return true;
3490 }
3491
3492 if (!tcp_is_sackfrto(tp)) {
3493
3494
3495
3496
3497 if (!(flag & FLAG_ANY_PROGRESS) && (flag & FLAG_NOT_DUP))
3498 return true;
3499
3500 if (!(flag & FLAG_DATA_ACKED)) {
3501 tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3),
3502 flag);
3503 return true;
3504 }
3505 } else {
3506 if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) {
3507
3508 tp->snd_cwnd = min(tp->snd_cwnd,
3509 tcp_packets_in_flight(tp));
3510 return true;
3511 }
3512
3513 if ((tp->frto_counter >= 2) &&
3514 (!(flag & FLAG_FORWARD_PROGRESS) ||
3515 ((flag & FLAG_DATA_SACKED) &&
3516 !(flag & FLAG_ONLY_ORIG_SACKED)))) {
3517
3518 if (!(flag & FLAG_FORWARD_PROGRESS) &&
3519 (flag & FLAG_NOT_DUP))
3520 return true;
3521
3522 tcp_enter_frto_loss(sk, 3, flag);
3523 return true;
3524 }
3525 }
3526
3527 if (tp->frto_counter == 1) {
3528
3529 tp->snd_cwnd = tcp_packets_in_flight(tp) + 2;
3530 tp->frto_counter = 2;
3531
3532 if (!tcp_may_send_now(sk))
3533 tcp_enter_frto_loss(sk, 2, flag);
3534
3535 return true;
3536 } else {
3537 switch (sysctl_tcp_frto_response) {
3538 case 2:
3539 tcp_undo_spur_to_response(sk, flag);
3540 break;
3541 case 1:
3542 tcp_conservative_spur_to_response(tp);
3543 break;
3544 default:
3545 tcp_cwr_spur_to_response(sk);
3546 break;
3547 }
3548 tp->frto_counter = 0;
3549 tp->undo_marker = 0;
3550 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS);
3551 }
3552 return false;
3553}
3554
3555
3556static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3557{
3558 struct inet_connection_sock *icsk = inet_csk(sk);
3559 struct tcp_sock *tp = tcp_sk(sk);
3560 u32 prior_snd_una = tp->snd_una;
3561 u32 ack_seq = TCP_SKB_CB(skb)->seq;
3562 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3563 bool is_dupack = false;
3564 u32 prior_in_flight;
3565 u32 prior_fackets;
3566 int prior_packets;
3567 int prior_sacked = tp->sacked_out;
3568 int pkts_acked = 0;
3569 bool frto_cwnd = false;
3570
3571
3572
3573
3574 if (before(ack, prior_snd_una))
3575 goto old_ack;
3576
3577
3578
3579
3580 if (after(ack, tp->snd_nxt))
3581 goto invalid_ack;
3582
3583 if (tp->early_retrans_delayed)
3584 tcp_rearm_rto(sk);
3585
3586 if (after(ack, prior_snd_una))
3587 flag |= FLAG_SND_UNA_ADVANCED;
3588
3589 if (sysctl_tcp_abc) {
3590 if (icsk->icsk_ca_state < TCP_CA_CWR)
3591 tp->bytes_acked += ack - prior_snd_una;
3592 else if (icsk->icsk_ca_state == TCP_CA_Loss)
3593
3594 tp->bytes_acked += min(ack - prior_snd_una,
3595 tp->mss_cache);
3596 }
3597
3598 prior_fackets = tp->fackets_out;
3599 prior_in_flight = tcp_packets_in_flight(tp);
3600
3601 if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
3602
3603
3604
3605
3606 tcp_update_wl(tp, ack_seq);
3607 tp->snd_una = ack;
3608 flag |= FLAG_WIN_UPDATE;
3609
3610 tcp_ca_event(sk, CA_EVENT_FAST_ACK);
3611
3612 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPACKS);
3613 } else {
3614 if (ack_seq != TCP_SKB_CB(skb)->end_seq)
3615 flag |= FLAG_DATA;
3616 else
3617 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPUREACKS);
3618
3619 flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
3620
3621 if (TCP_SKB_CB(skb)->sacked)
3622 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
3623
3624 if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb)))
3625 flag |= FLAG_ECE;
3626
3627 tcp_ca_event(sk, CA_EVENT_SLOW_ACK);
3628 }
3629
3630
3631
3632
3633 sk->sk_err_soft = 0;
3634 icsk->icsk_probes_out = 0;
3635 tp->rcv_tstamp = tcp_time_stamp;
3636 prior_packets = tp->packets_out;
3637 if (!prior_packets)
3638 goto no_queue;
3639
3640
3641 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
3642
3643 pkts_acked = prior_packets - tp->packets_out;
3644
3645 if (tp->frto_counter)
3646 frto_cwnd = tcp_process_frto(sk, flag);
3647
3648 if (before(tp->frto_highmark, tp->snd_una))
3649 tp->frto_highmark = 0;
3650
3651 if (tcp_ack_is_dubious(sk, flag)) {
3652
3653 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd &&
3654 tcp_may_raise_cwnd(sk, flag))
3655 tcp_cong_avoid(sk, ack, prior_in_flight);
3656 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
3657 tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
3658 is_dupack, flag);
3659 } else {
3660 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
3661 tcp_cong_avoid(sk, ack, prior_in_flight);
3662 }
3663
3664 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) {
3665 struct dst_entry *dst = __sk_dst_get(sk);
3666 if (dst)
3667 dst_confirm(dst);
3668 }
3669 return 1;
3670
3671no_queue:
3672
3673 if (flag & FLAG_DSACKING_ACK)
3674 tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
3675 is_dupack, flag);
3676
3677
3678
3679
3680 if (tcp_send_head(sk))
3681 tcp_ack_probe(sk);
3682 return 1;
3683
3684invalid_ack:
3685 SOCK_DEBUG(sk, "Ack %u after %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
3686 return -1;
3687
3688old_ack:
3689
3690
3691
3692 if (TCP_SKB_CB(skb)->sacked) {
3693 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
3694 tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
3695 is_dupack, flag);
3696 }
3697
3698 SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
3699 return 0;
3700}
3701
3702
3703
3704
3705
3706void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx,
3707 const u8 **hvpp, int estab,
3708 struct tcp_fastopen_cookie *foc)
3709{
3710 const unsigned char *ptr;
3711 const struct tcphdr *th = tcp_hdr(skb);
3712 int length = (th->doff * 4) - sizeof(struct tcphdr);
3713
3714 ptr = (const unsigned char *)(th + 1);
3715 opt_rx->saw_tstamp = 0;
3716
3717 while (length > 0) {
3718 int opcode = *ptr++;
3719 int opsize;
3720
3721 switch (opcode) {
3722 case TCPOPT_EOL:
3723 return;
3724 case TCPOPT_NOP:
3725 length--;
3726 continue;
3727 default:
3728 opsize = *ptr++;
3729 if (opsize < 2)
3730 return;
3731 if (opsize > length)
3732 return;
3733 switch (opcode) {
3734 case TCPOPT_MSS:
3735 if (opsize == TCPOLEN_MSS && th->syn && !estab) {
3736 u16 in_mss = get_unaligned_be16(ptr);
3737 if (in_mss) {
3738 if (opt_rx->user_mss &&
3739 opt_rx->user_mss < in_mss)
3740 in_mss = opt_rx->user_mss;
3741 opt_rx->mss_clamp = in_mss;
3742 }
3743 }
3744 break;
3745 case TCPOPT_WINDOW:
3746 if (opsize == TCPOLEN_WINDOW && th->syn &&
3747 !estab && sysctl_tcp_window_scaling) {
3748 __u8 snd_wscale = *(__u8 *)ptr;
3749 opt_rx->wscale_ok = 1;
3750 if (snd_wscale > 14) {
3751 net_info_ratelimited("%s: Illegal window scaling value %d >14 received\n",
3752 __func__,
3753 snd_wscale);
3754 snd_wscale = 14;
3755 }
3756 opt_rx->snd_wscale = snd_wscale;
3757 }
3758 break;
3759 case TCPOPT_TIMESTAMP:
3760 if ((opsize == TCPOLEN_TIMESTAMP) &&
3761 ((estab && opt_rx->tstamp_ok) ||
3762 (!estab && sysctl_tcp_timestamps))) {
3763 opt_rx->saw_tstamp = 1;
3764 opt_rx->rcv_tsval = get_unaligned_be32(ptr);
3765 opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
3766 }
3767 break;
3768 case TCPOPT_SACK_PERM:
3769 if (opsize == TCPOLEN_SACK_PERM && th->syn &&
3770 !estab && sysctl_tcp_sack) {
3771 opt_rx->sack_ok = TCP_SACK_SEEN;
3772 tcp_sack_reset(opt_rx);
3773 }
3774 break;
3775
3776 case TCPOPT_SACK:
3777 if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
3778 !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
3779 opt_rx->sack_ok) {
3780 TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
3781 }
3782 break;
3783#ifdef CONFIG_TCP_MD5SIG
3784 case TCPOPT_MD5SIG:
3785
3786
3787
3788
3789 break;
3790#endif
3791 case TCPOPT_COOKIE:
3792
3793
3794 switch (opsize) {
3795 case TCPOLEN_COOKIE_BASE:
3796
3797 break;
3798 case TCPOLEN_COOKIE_PAIR:
3799
3800 break;
3801 case TCPOLEN_COOKIE_MIN+0:
3802 case TCPOLEN_COOKIE_MIN+2:
3803 case TCPOLEN_COOKIE_MIN+4:
3804 case TCPOLEN_COOKIE_MIN+6:
3805 case TCPOLEN_COOKIE_MAX:
3806
3807 opt_rx->cookie_plus = opsize;
3808 *hvpp = ptr;
3809 break;
3810 default:
3811
3812 break;
3813 }
3814 break;
3815
3816 case TCPOPT_EXP:
3817
3818
3819
3820
3821 if (opsize < TCPOLEN_EXP_FASTOPEN_BASE ||
3822 get_unaligned_be16(ptr) != TCPOPT_FASTOPEN_MAGIC ||
3823 foc == NULL || !th->syn || (opsize & 1))
3824 break;
3825 foc->len = opsize - TCPOLEN_EXP_FASTOPEN_BASE;
3826 if (foc->len >= TCP_FASTOPEN_COOKIE_MIN &&
3827 foc->len <= TCP_FASTOPEN_COOKIE_MAX)
3828 memcpy(foc->val, ptr + 2, foc->len);
3829 else if (foc->len != 0)
3830 foc->len = -1;
3831 break;
3832
3833 }
3834 ptr += opsize-2;
3835 length -= opsize;
3836 }
3837 }
3838}
3839EXPORT_SYMBOL(tcp_parse_options);
3840
3841static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr *th)
3842{
3843 const __be32 *ptr = (const __be32 *)(th + 1);
3844
3845 if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
3846 | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
3847 tp->rx_opt.saw_tstamp = 1;
3848 ++ptr;
3849 tp->rx_opt.rcv_tsval = ntohl(*ptr);
3850 ++ptr;
3851 tp->rx_opt.rcv_tsecr = ntohl(*ptr);
3852 return true;
3853 }
3854 return false;
3855}
3856
3857
3858
3859
3860static bool tcp_fast_parse_options(const struct sk_buff *skb,
3861 const struct tcphdr *th,
3862 struct tcp_sock *tp, const u8 **hvpp)
3863{
3864
3865
3866
3867 if (th->doff == (sizeof(*th) / 4)) {
3868 tp->rx_opt.saw_tstamp = 0;
3869 return false;
3870 } else if (tp->rx_opt.tstamp_ok &&
3871 th->doff == ((sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4)) {
3872 if (tcp_parse_aligned_timestamp(tp, th))
3873 return true;
3874 }
3875 tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL);
3876 return true;
3877}
3878
3879#ifdef CONFIG_TCP_MD5SIG
3880
3881
3882
3883const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
3884{
3885 int length = (th->doff << 2) - sizeof(*th);
3886 const u8 *ptr = (const u8 *)(th + 1);
3887
3888
3889 if (length < TCPOLEN_MD5SIG)
3890 return NULL;
3891
3892 while (length > 0) {
3893 int opcode = *ptr++;
3894 int opsize;
3895
3896 switch(opcode) {
3897 case TCPOPT_EOL:
3898 return NULL;
3899 case TCPOPT_NOP:
3900 length--;
3901 continue;
3902 default:
3903 opsize = *ptr++;
3904 if (opsize < 2 || opsize > length)
3905 return NULL;
3906 if (opcode == TCPOPT_MD5SIG)
3907 return opsize == TCPOLEN_MD5SIG ? ptr : NULL;
3908 }
3909 ptr += opsize - 2;
3910 length -= opsize;
3911 }
3912 return NULL;
3913}
3914EXPORT_SYMBOL(tcp_parse_md5sig_option);
3915#endif
3916
3917static inline void tcp_store_ts_recent(struct tcp_sock *tp)
3918{
3919 tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
3920 tp->rx_opt.ts_recent_stamp = get_seconds();
3921}
3922
3923static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
3924{
3925 if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) {
3926
3927
3928
3929
3930
3931
3932
3933 if (tcp_paws_check(&tp->rx_opt, 0))
3934 tcp_store_ts_recent(tp);
3935 }
3936}
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
3962{
3963 const struct tcp_sock *tp = tcp_sk(sk);
3964 const struct tcphdr *th = tcp_hdr(skb);
3965 u32 seq = TCP_SKB_CB(skb)->seq;
3966 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3967
3968 return (
3969 (th->ack && seq == TCP_SKB_CB(skb)->end_seq && seq == tp->rcv_nxt) &&
3970
3971
3972 ack == tp->snd_una &&
3973
3974
3975 !tcp_may_update_window(tp, ack, seq, ntohs(th->window) << tp->rx_opt.snd_wscale) &&
3976
3977
3978 (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ);
3979}
3980
3981static inline bool tcp_paws_discard(const struct sock *sk,
3982 const struct sk_buff *skb)
3983{
3984 const struct tcp_sock *tp = tcp_sk(sk);
3985
3986 return !tcp_paws_check(&tp->rx_opt, TCP_PAWS_WINDOW) &&
3987 !tcp_disordered_ack(sk, skb);
3988}
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
4004{
4005 return !before(end_seq, tp->rcv_wup) &&
4006 !after(seq, tp->rcv_nxt + tcp_receive_window(tp));
4007}
4008
4009
4010void tcp_reset(struct sock *sk)
4011{
4012
4013 switch (sk->sk_state) {
4014 case TCP_SYN_SENT:
4015 sk->sk_err = ECONNREFUSED;
4016 break;
4017 case TCP_CLOSE_WAIT:
4018 sk->sk_err = EPIPE;
4019 break;
4020 case TCP_CLOSE:
4021 return;
4022 default:
4023 sk->sk_err = ECONNRESET;
4024 }
4025
4026 smp_wmb();
4027
4028 if (!sock_flag(sk, SOCK_DEAD))
4029 sk->sk_error_report(sk);
4030
4031 tcp_done(sk);
4032}
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048static void tcp_fin(struct sock *sk)
4049{
4050 struct tcp_sock *tp = tcp_sk(sk);
4051
4052 inet_csk_schedule_ack(sk);
4053
4054 sk->sk_shutdown |= RCV_SHUTDOWN;
4055 sock_set_flag(sk, SOCK_DONE);
4056
4057 switch (sk->sk_state) {
4058 case TCP_SYN_RECV:
4059 case TCP_ESTABLISHED:
4060
4061 tcp_set_state(sk, TCP_CLOSE_WAIT);
4062 inet_csk(sk)->icsk_ack.pingpong = 1;
4063 break;
4064
4065 case TCP_CLOSE_WAIT:
4066 case TCP_CLOSING:
4067
4068
4069
4070 break;
4071 case TCP_LAST_ACK:
4072
4073 break;
4074
4075 case TCP_FIN_WAIT1:
4076
4077
4078
4079
4080 tcp_send_ack(sk);
4081 tcp_set_state(sk, TCP_CLOSING);
4082 break;
4083 case TCP_FIN_WAIT2:
4084
4085 tcp_send_ack(sk);
4086 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
4087 break;
4088 default:
4089
4090
4091
4092 pr_err("%s: Impossible, sk->sk_state=%d\n",
4093 __func__, sk->sk_state);
4094 break;
4095 }
4096
4097
4098
4099
4100 __skb_queue_purge(&tp->out_of_order_queue);
4101 if (tcp_is_sack(tp))
4102 tcp_sack_reset(&tp->rx_opt);
4103 sk_mem_reclaim(sk);
4104
4105 if (!sock_flag(sk, SOCK_DEAD)) {
4106 sk->sk_state_change(sk);
4107
4108
4109 if (sk->sk_shutdown == SHUTDOWN_MASK ||
4110 sk->sk_state == TCP_CLOSE)
4111 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
4112 else
4113 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
4114 }
4115}
4116
4117static inline bool tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
4118 u32 end_seq)
4119{
4120 if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
4121 if (before(seq, sp->start_seq))
4122 sp->start_seq = seq;
4123 if (after(end_seq, sp->end_seq))
4124 sp->end_seq = end_seq;
4125 return true;
4126 }
4127 return false;
4128}
4129
4130static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
4131{
4132 struct tcp_sock *tp = tcp_sk(sk);
4133
4134 if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
4135 int mib_idx;
4136
4137 if (before(seq, tp->rcv_nxt))
4138 mib_idx = LINUX_MIB_TCPDSACKOLDSENT;
4139 else
4140 mib_idx = LINUX_MIB_TCPDSACKOFOSENT;
4141
4142 NET_INC_STATS_BH(sock_net(sk), mib_idx);
4143
4144 tp->rx_opt.dsack = 1;
4145 tp->duplicate_sack[0].start_seq = seq;
4146 tp->duplicate_sack[0].end_seq = end_seq;
4147 }
4148}
4149
4150static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
4151{
4152 struct tcp_sock *tp = tcp_sk(sk);
4153
4154 if (!tp->rx_opt.dsack)
4155 tcp_dsack_set(sk, seq, end_seq);
4156 else
4157 tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
4158}
4159
4160static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
4161{
4162 struct tcp_sock *tp = tcp_sk(sk);
4163
4164 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
4165 before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4166 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4167 tcp_enter_quickack_mode(sk);
4168
4169 if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
4170 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
4171
4172 if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
4173 end_seq = tp->rcv_nxt;
4174 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, end_seq);
4175 }
4176 }
4177
4178 tcp_send_ack(sk);
4179}
4180
4181
4182
4183
4184static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
4185{
4186 int this_sack;
4187 struct tcp_sack_block *sp = &tp->selective_acks[0];
4188 struct tcp_sack_block *swalk = sp + 1;
4189
4190
4191
4192
4193 for (this_sack = 1; this_sack < tp->rx_opt.num_sacks;) {
4194 if (tcp_sack_extend(sp, swalk->start_seq, swalk->end_seq)) {
4195 int i;
4196
4197
4198
4199
4200 tp->rx_opt.num_sacks--;
4201 for (i = this_sack; i < tp->rx_opt.num_sacks; i++)
4202 sp[i] = sp[i + 1];
4203 continue;
4204 }
4205 this_sack++, swalk++;
4206 }
4207}
4208
4209static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
4210{
4211 struct tcp_sock *tp = tcp_sk(sk);
4212 struct tcp_sack_block *sp = &tp->selective_acks[0];
4213 int cur_sacks = tp->rx_opt.num_sacks;
4214 int this_sack;
4215
4216 if (!cur_sacks)
4217 goto new_sack;
4218
4219 for (this_sack = 0; this_sack < cur_sacks; this_sack++, sp++) {
4220 if (tcp_sack_extend(sp, seq, end_seq)) {
4221
4222 for (; this_sack > 0; this_sack--, sp--)
4223 swap(*sp, *(sp - 1));
4224 if (cur_sacks > 1)
4225 tcp_sack_maybe_coalesce(tp);
4226 return;
4227 }
4228 }
4229
4230
4231
4232
4233
4234
4235
4236 if (this_sack >= TCP_NUM_SACKS) {
4237 this_sack--;
4238 tp->rx_opt.num_sacks--;
4239 sp--;
4240 }
4241 for (; this_sack > 0; this_sack--, sp--)
4242 *sp = *(sp - 1);
4243
4244new_sack:
4245
4246 sp->start_seq = seq;
4247 sp->end_seq = end_seq;
4248 tp->rx_opt.num_sacks++;
4249}
4250
4251
4252
4253static void tcp_sack_remove(struct tcp_sock *tp)
4254{
4255 struct tcp_sack_block *sp = &tp->selective_acks[0];
4256 int num_sacks = tp->rx_opt.num_sacks;
4257 int this_sack;
4258
4259
4260 if (skb_queue_empty(&tp->out_of_order_queue)) {
4261 tp->rx_opt.num_sacks = 0;
4262 return;
4263 }
4264
4265 for (this_sack = 0; this_sack < num_sacks;) {
4266
4267 if (!before(tp->rcv_nxt, sp->start_seq)) {
4268 int i;
4269
4270
4271 WARN_ON(before(tp->rcv_nxt, sp->end_seq));
4272
4273
4274 for (i=this_sack+1; i < num_sacks; i++)
4275 tp->selective_acks[i-1] = tp->selective_acks[i];
4276 num_sacks--;
4277 continue;
4278 }
4279 this_sack++;
4280 sp++;
4281 }
4282 tp->rx_opt.num_sacks = num_sacks;
4283}
4284
4285
4286
4287
4288static void tcp_ofo_queue(struct sock *sk)
4289{
4290 struct tcp_sock *tp = tcp_sk(sk);
4291 __u32 dsack_high = tp->rcv_nxt;
4292 struct sk_buff *skb;
4293
4294 while ((skb = skb_peek(&tp->out_of_order_queue)) != NULL) {
4295 if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
4296 break;
4297
4298 if (before(TCP_SKB_CB(skb)->seq, dsack_high)) {
4299 __u32 dsack = dsack_high;
4300 if (before(TCP_SKB_CB(skb)->end_seq, dsack_high))
4301 dsack_high = TCP_SKB_CB(skb)->end_seq;
4302 tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack);
4303 }
4304
4305 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
4306 SOCK_DEBUG(sk, "ofo packet was already received\n");
4307 __skb_unlink(skb, &tp->out_of_order_queue);
4308 __kfree_skb(skb);
4309 continue;
4310 }
4311 SOCK_DEBUG(sk, "ofo requeuing : rcv_next %X seq %X - %X\n",
4312 tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
4313 TCP_SKB_CB(skb)->end_seq);
4314
4315 __skb_unlink(skb, &tp->out_of_order_queue);
4316 __skb_queue_tail(&sk->sk_receive_queue, skb);
4317 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
4318 if (tcp_hdr(skb)->fin)
4319 tcp_fin(sk);
4320 }
4321}
4322
4323static bool tcp_prune_ofo_queue(struct sock *sk);
4324static int tcp_prune_queue(struct sock *sk);
4325
4326static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
4327 unsigned int size)
4328{
4329 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
4330 !sk_rmem_schedule(sk, skb, size)) {
4331
4332 if (tcp_prune_queue(sk) < 0)
4333 return -1;
4334
4335 if (!sk_rmem_schedule(sk, skb, size)) {
4336 if (!tcp_prune_ofo_queue(sk))
4337 return -1;
4338
4339 if (!sk_rmem_schedule(sk, skb, size))
4340 return -1;
4341 }
4342 }
4343 return 0;
4344}
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359static bool tcp_try_coalesce(struct sock *sk,
4360 struct sk_buff *to,
4361 struct sk_buff *from,
4362 bool *fragstolen)
4363{
4364 int delta;
4365
4366 *fragstolen = false;
4367
4368 if (tcp_hdr(from)->fin)
4369 return false;
4370
4371
4372 if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq)
4373 return false;
4374
4375 if (!skb_try_coalesce(to, from, fragstolen, &delta))
4376 return false;
4377
4378 atomic_add(delta, &sk->sk_rmem_alloc);
4379 sk_mem_charge(sk, delta);
4380 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE);
4381 TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
4382 TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
4383 return true;
4384}
4385
4386static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4387{
4388 struct tcp_sock *tp = tcp_sk(sk);
4389 struct sk_buff *skb1;
4390 u32 seq, end_seq;
4391
4392 TCP_ECN_check_ce(tp, skb);
4393
4394 if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
4395 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFODROP);
4396 __kfree_skb(skb);
4397 return;
4398 }
4399
4400
4401 tp->pred_flags = 0;
4402 inet_csk_schedule_ack(sk);
4403
4404 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
4405 SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
4406 tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
4407
4408 skb1 = skb_peek_tail(&tp->out_of_order_queue);
4409 if (!skb1) {
4410
4411 if (tcp_is_sack(tp)) {
4412 tp->rx_opt.num_sacks = 1;
4413 tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
4414 tp->selective_acks[0].end_seq =
4415 TCP_SKB_CB(skb)->end_seq;
4416 }
4417 __skb_queue_head(&tp->out_of_order_queue, skb);
4418 goto end;
4419 }
4420
4421 seq = TCP_SKB_CB(skb)->seq;
4422 end_seq = TCP_SKB_CB(skb)->end_seq;
4423
4424 if (seq == TCP_SKB_CB(skb1)->end_seq) {
4425 bool fragstolen;
4426
4427 if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
4428 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4429 } else {
4430 kfree_skb_partial(skb, fragstolen);
4431 skb = NULL;
4432 }
4433
4434 if (!tp->rx_opt.num_sacks ||
4435 tp->selective_acks[0].end_seq != seq)
4436 goto add_sack;
4437
4438
4439 tp->selective_acks[0].end_seq = end_seq;
4440 goto end;
4441 }
4442
4443
4444 while (1) {
4445 if (!after(TCP_SKB_CB(skb1)->seq, seq))
4446 break;
4447 if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) {
4448 skb1 = NULL;
4449 break;
4450 }
4451 skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1);
4452 }
4453
4454
4455 if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
4456 if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4457
4458 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
4459 __kfree_skb(skb);
4460 skb = NULL;
4461 tcp_dsack_set(sk, seq, end_seq);
4462 goto add_sack;
4463 }
4464 if (after(seq, TCP_SKB_CB(skb1)->seq)) {
4465
4466 tcp_dsack_set(sk, seq,
4467 TCP_SKB_CB(skb1)->end_seq);
4468 } else {
4469 if (skb_queue_is_first(&tp->out_of_order_queue,
4470 skb1))
4471 skb1 = NULL;
4472 else
4473 skb1 = skb_queue_prev(
4474 &tp->out_of_order_queue,
4475 skb1);
4476 }
4477 }
4478 if (!skb1)
4479 __skb_queue_head(&tp->out_of_order_queue, skb);
4480 else
4481 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4482
4483
4484 while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) {
4485 skb1 = skb_queue_next(&tp->out_of_order_queue, skb);
4486
4487 if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
4488 break;
4489 if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4490 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4491 end_seq);
4492 break;
4493 }
4494 __skb_unlink(skb1, &tp->out_of_order_queue);
4495 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4496 TCP_SKB_CB(skb1)->end_seq);
4497 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
4498 __kfree_skb(skb1);
4499 }
4500
4501add_sack:
4502 if (tcp_is_sack(tp))
4503 tcp_sack_new_ofo_skb(sk, seq, end_seq);
4504end:
4505 if (skb)
4506 skb_set_owner_r(skb, sk);
4507}
4508
4509static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen,
4510 bool *fragstolen)
4511{
4512 int eaten;
4513 struct sk_buff *tail = skb_peek_tail(&sk->sk_receive_queue);
4514
4515 __skb_pull(skb, hdrlen);
4516 eaten = (tail &&
4517 tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0;
4518 tcp_sk(sk)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
4519 if (!eaten) {
4520 __skb_queue_tail(&sk->sk_receive_queue, skb);
4521 skb_set_owner_r(skb, sk);
4522 }
4523 return eaten;
4524}
4525
4526int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
4527{
4528 struct sk_buff *skb = NULL;
4529 struct tcphdr *th;
4530 bool fragstolen;
4531
4532 if (size == 0)
4533 return 0;
4534
4535 skb = alloc_skb(size + sizeof(*th), sk->sk_allocation);
4536 if (!skb)
4537 goto err;
4538
4539 if (tcp_try_rmem_schedule(sk, skb, size + sizeof(*th)))
4540 goto err_free;
4541
4542 th = (struct tcphdr *)skb_put(skb, sizeof(*th));
4543 skb_reset_transport_header(skb);
4544 memset(th, 0, sizeof(*th));
4545
4546 if (memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size))
4547 goto err_free;
4548
4549 TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
4550 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + size;
4551 TCP_SKB_CB(skb)->ack_seq = tcp_sk(sk)->snd_una - 1;
4552
4553 if (tcp_queue_rcv(sk, skb, sizeof(*th), &fragstolen)) {
4554 WARN_ON_ONCE(fragstolen);
4555 __kfree_skb(skb);
4556 }
4557 return size;
4558
4559err_free:
4560 kfree_skb(skb);
4561err:
4562 return -ENOMEM;
4563}
4564
4565static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
4566{
4567 const struct tcphdr *th = tcp_hdr(skb);
4568 struct tcp_sock *tp = tcp_sk(sk);
4569 int eaten = -1;
4570 bool fragstolen = false;
4571
4572 if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq)
4573 goto drop;
4574
4575 skb_dst_drop(skb);
4576 __skb_pull(skb, th->doff * 4);
4577
4578 TCP_ECN_accept_cwr(tp, skb);
4579
4580 tp->rx_opt.dsack = 0;
4581
4582
4583
4584
4585
4586 if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
4587 if (tcp_receive_window(tp) == 0)
4588 goto out_of_window;
4589
4590
4591 if (tp->ucopy.task == current &&
4592 tp->copied_seq == tp->rcv_nxt && tp->ucopy.len &&
4593 sock_owned_by_user(sk) && !tp->urg_data) {
4594 int chunk = min_t(unsigned int, skb->len,
4595 tp->ucopy.len);
4596
4597 __set_current_state(TASK_RUNNING);
4598
4599 local_bh_enable();
4600 if (!skb_copy_datagram_iovec(skb, 0, tp->ucopy.iov, chunk)) {
4601 tp->ucopy.len -= chunk;
4602 tp->copied_seq += chunk;
4603 eaten = (chunk == skb->len);
4604 tcp_rcv_space_adjust(sk);
4605 }
4606 local_bh_disable();
4607 }
4608
4609 if (eaten <= 0) {
4610queue_and_out:
4611 if (eaten < 0 &&
4612 tcp_try_rmem_schedule(sk, skb, skb->truesize))
4613 goto drop;
4614
4615 eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
4616 }
4617 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
4618 if (skb->len)
4619 tcp_event_data_recv(sk, skb);
4620 if (th->fin)
4621 tcp_fin(sk);
4622
4623 if (!skb_queue_empty(&tp->out_of_order_queue)) {
4624 tcp_ofo_queue(sk);
4625
4626
4627
4628
4629 if (skb_queue_empty(&tp->out_of_order_queue))
4630 inet_csk(sk)->icsk_ack.pingpong = 0;
4631 }
4632
4633 if (tp->rx_opt.num_sacks)
4634 tcp_sack_remove(tp);
4635
4636 tcp_fast_path_check(sk);
4637
4638 if (eaten > 0)
4639 kfree_skb_partial(skb, fragstolen);
4640 if (!sock_flag(sk, SOCK_DEAD))
4641 sk->sk_data_ready(sk, 0);
4642 return;
4643 }
4644
4645 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
4646
4647 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4648 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
4649
4650out_of_window:
4651 tcp_enter_quickack_mode(sk);
4652 inet_csk_schedule_ack(sk);
4653drop:
4654 __kfree_skb(skb);
4655 return;
4656 }
4657
4658
4659 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp)))
4660 goto out_of_window;
4661
4662 tcp_enter_quickack_mode(sk);
4663
4664 if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4665
4666 SOCK_DEBUG(sk, "partial packet: rcv_next %X seq %X - %X\n",
4667 tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
4668 TCP_SKB_CB(skb)->end_seq);
4669
4670 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, tp->rcv_nxt);
4671
4672
4673
4674
4675 if (!tcp_receive_window(tp))
4676 goto out_of_window;
4677 goto queue_and_out;
4678 }
4679
4680 tcp_data_queue_ofo(sk, skb);
4681}
4682
4683static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
4684 struct sk_buff_head *list)
4685{
4686 struct sk_buff *next = NULL;
4687
4688 if (!skb_queue_is_last(list, skb))
4689 next = skb_queue_next(list, skb);
4690
4691 __skb_unlink(skb, list);
4692 __kfree_skb(skb);
4693 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
4694
4695 return next;
4696}
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706static void
4707tcp_collapse(struct sock *sk, struct sk_buff_head *list,
4708 struct sk_buff *head, struct sk_buff *tail,
4709 u32 start, u32 end)
4710{
4711 struct sk_buff *skb, *n;
4712 bool end_of_skbs;
4713
4714
4715
4716 skb = head;
4717restart:
4718 end_of_skbs = true;
4719 skb_queue_walk_from_safe(list, skb, n) {
4720 if (skb == tail)
4721 break;
4722
4723 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
4724 skb = tcp_collapse_one(sk, skb, list);
4725 if (!skb)
4726 break;
4727 goto restart;
4728 }
4729
4730
4731
4732
4733
4734
4735 if (!tcp_hdr(skb)->syn && !tcp_hdr(skb)->fin &&
4736 (tcp_win_from_space(skb->truesize) > skb->len ||
4737 before(TCP_SKB_CB(skb)->seq, start))) {
4738 end_of_skbs = false;
4739 break;
4740 }
4741
4742 if (!skb_queue_is_last(list, skb)) {
4743 struct sk_buff *next = skb_queue_next(list, skb);
4744 if (next != tail &&
4745 TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(next)->seq) {
4746 end_of_skbs = false;
4747 break;
4748 }
4749 }
4750
4751
4752 start = TCP_SKB_CB(skb)->end_seq;
4753 }
4754 if (end_of_skbs || tcp_hdr(skb)->syn || tcp_hdr(skb)->fin)
4755 return;
4756
4757 while (before(start, end)) {
4758 struct sk_buff *nskb;
4759 unsigned int header = skb_headroom(skb);
4760 int copy = SKB_MAX_ORDER(header, 0);
4761
4762
4763 if (copy < 0)
4764 return;
4765 if (end - start < copy)
4766 copy = end - start;
4767 nskb = alloc_skb(copy + header, GFP_ATOMIC);
4768 if (!nskb)
4769 return;
4770
4771 skb_set_mac_header(nskb, skb_mac_header(skb) - skb->head);
4772 skb_set_network_header(nskb, (skb_network_header(skb) -
4773 skb->head));
4774 skb_set_transport_header(nskb, (skb_transport_header(skb) -
4775 skb->head));
4776 skb_reserve(nskb, header);
4777 memcpy(nskb->head, skb->head, header);
4778 memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
4779 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
4780 __skb_queue_before(list, skb, nskb);
4781 skb_set_owner_r(nskb, sk);
4782
4783
4784 while (copy > 0) {
4785 int offset = start - TCP_SKB_CB(skb)->seq;
4786 int size = TCP_SKB_CB(skb)->end_seq - start;
4787
4788 BUG_ON(offset < 0);
4789 if (size > 0) {
4790 size = min(copy, size);
4791 if (skb_copy_bits(skb, offset, skb_put(nskb, size), size))
4792 BUG();
4793 TCP_SKB_CB(nskb)->end_seq += size;
4794 copy -= size;
4795 start += size;
4796 }
4797 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
4798 skb = tcp_collapse_one(sk, skb, list);
4799 if (!skb ||
4800 skb == tail ||
4801 tcp_hdr(skb)->syn ||
4802 tcp_hdr(skb)->fin)
4803 return;
4804 }
4805 }
4806 }
4807}
4808
4809
4810
4811
4812static void tcp_collapse_ofo_queue(struct sock *sk)
4813{
4814 struct tcp_sock *tp = tcp_sk(sk);
4815 struct sk_buff *skb = skb_peek(&tp->out_of_order_queue);
4816 struct sk_buff *head;
4817 u32 start, end;
4818
4819 if (skb == NULL)
4820 return;
4821
4822 start = TCP_SKB_CB(skb)->seq;
4823 end = TCP_SKB_CB(skb)->end_seq;
4824 head = skb;
4825
4826 for (;;) {
4827 struct sk_buff *next = NULL;
4828
4829 if (!skb_queue_is_last(&tp->out_of_order_queue, skb))
4830 next = skb_queue_next(&tp->out_of_order_queue, skb);
4831 skb = next;
4832
4833
4834
4835 if (!skb ||
4836 after(TCP_SKB_CB(skb)->seq, end) ||
4837 before(TCP_SKB_CB(skb)->end_seq, start)) {
4838 tcp_collapse(sk, &tp->out_of_order_queue,
4839 head, skb, start, end);
4840 head = skb;
4841 if (!skb)
4842 break;
4843
4844 start = TCP_SKB_CB(skb)->seq;
4845 end = TCP_SKB_CB(skb)->end_seq;
4846 } else {
4847 if (before(TCP_SKB_CB(skb)->seq, start))
4848 start = TCP_SKB_CB(skb)->seq;
4849 if (after(TCP_SKB_CB(skb)->end_seq, end))
4850 end = TCP_SKB_CB(skb)->end_seq;
4851 }
4852 }
4853}
4854
4855
4856
4857
4858
4859static bool tcp_prune_ofo_queue(struct sock *sk)
4860{
4861 struct tcp_sock *tp = tcp_sk(sk);
4862 bool res = false;
4863
4864 if (!skb_queue_empty(&tp->out_of_order_queue)) {
4865 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_OFOPRUNED);
4866 __skb_queue_purge(&tp->out_of_order_queue);
4867
4868
4869
4870
4871
4872
4873 if (tp->rx_opt.sack_ok)
4874 tcp_sack_reset(&tp->rx_opt);
4875 sk_mem_reclaim(sk);
4876 res = true;
4877 }
4878 return res;
4879}
4880
4881
4882
4883
4884
4885
4886
4887
4888static int tcp_prune_queue(struct sock *sk)
4889{
4890 struct tcp_sock *tp = tcp_sk(sk);
4891
4892 SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq);
4893
4894 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PRUNECALLED);
4895
4896 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
4897 tcp_clamp_window(sk);
4898 else if (sk_under_memory_pressure(sk))
4899 tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
4900
4901 tcp_collapse_ofo_queue(sk);
4902 if (!skb_queue_empty(&sk->sk_receive_queue))
4903 tcp_collapse(sk, &sk->sk_receive_queue,
4904 skb_peek(&sk->sk_receive_queue),
4905 NULL,
4906 tp->copied_seq, tp->rcv_nxt);
4907 sk_mem_reclaim(sk);
4908
4909 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
4910 return 0;
4911
4912
4913
4914
4915 tcp_prune_ofo_queue(sk);
4916
4917 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
4918 return 0;
4919
4920
4921
4922
4923
4924 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_RCVPRUNED);
4925
4926
4927 tp->pred_flags = 0;
4928 return -1;
4929}
4930
4931
4932
4933
4934
4935void tcp_cwnd_application_limited(struct sock *sk)
4936{
4937 struct tcp_sock *tp = tcp_sk(sk);
4938
4939 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open &&
4940 sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
4941
4942 u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk));
4943 u32 win_used = max(tp->snd_cwnd_used, init_win);
4944 if (win_used < tp->snd_cwnd) {
4945 tp->snd_ssthresh = tcp_current_ssthresh(sk);
4946 tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
4947 }
4948 tp->snd_cwnd_used = 0;
4949 }
4950 tp->snd_cwnd_stamp = tcp_time_stamp;
4951}
4952
4953static bool tcp_should_expand_sndbuf(const struct sock *sk)
4954{
4955 const struct tcp_sock *tp = tcp_sk(sk);
4956
4957
4958
4959
4960 if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
4961 return false;
4962
4963
4964 if (sk_under_memory_pressure(sk))
4965 return false;
4966
4967
4968 if (sk_memory_allocated(sk) >= sk_prot_mem_limits(sk, 0))
4969 return false;
4970
4971
4972 if (tp->packets_out >= tp->snd_cwnd)
4973 return false;
4974
4975 return true;
4976}
4977
4978
4979
4980
4981
4982
4983
4984static void tcp_new_space(struct sock *sk)
4985{
4986 struct tcp_sock *tp = tcp_sk(sk);
4987
4988 if (tcp_should_expand_sndbuf(sk)) {
4989 int sndmem = SKB_TRUESIZE(max_t(u32,
4990 tp->rx_opt.mss_clamp,
4991 tp->mss_cache) +
4992 MAX_TCP_HEADER);
4993 int demanded = max_t(unsigned int, tp->snd_cwnd,
4994 tp->reordering + 1);
4995 sndmem *= 2 * demanded;
4996 if (sndmem > sk->sk_sndbuf)
4997 sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
4998 tp->snd_cwnd_stamp = tcp_time_stamp;
4999 }
5000
5001 sk->sk_write_space(sk);
5002}
5003
5004static void tcp_check_space(struct sock *sk)
5005{
5006 if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) {
5007 sock_reset_flag(sk, SOCK_QUEUE_SHRUNK);
5008 if (sk->sk_socket &&
5009 test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
5010 tcp_new_space(sk);
5011 }
5012}
5013
5014static inline void tcp_data_snd_check(struct sock *sk)
5015{
5016 tcp_push_pending_frames(sk);
5017 tcp_check_space(sk);
5018}
5019
5020
5021
5022
5023static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
5024{
5025 struct tcp_sock *tp = tcp_sk(sk);
5026
5027
5028 if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss &&
5029
5030
5031
5032 __tcp_select_window(sk) >= tp->rcv_wnd) ||
5033
5034 tcp_in_quickack_mode(sk) ||
5035
5036 (ofo_possible && skb_peek(&tp->out_of_order_queue))) {
5037
5038 tcp_send_ack(sk);
5039 } else {
5040
5041 tcp_send_delayed_ack(sk);
5042 }
5043}
5044
5045static inline void tcp_ack_snd_check(struct sock *sk)
5046{
5047 if (!inet_csk_ack_scheduled(sk)) {
5048
5049 return;
5050 }
5051 __tcp_ack_snd_check(sk, 1);
5052}
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
5065{
5066 struct tcp_sock *tp = tcp_sk(sk);
5067 u32 ptr = ntohs(th->urg_ptr);
5068
5069 if (ptr && !sysctl_tcp_stdurg)
5070 ptr--;
5071 ptr += ntohl(th->seq);
5072
5073
5074 if (after(tp->copied_seq, ptr))
5075 return;
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087 if (before(ptr, tp->rcv_nxt))
5088 return;
5089
5090
5091 if (tp->urg_data && !after(ptr, tp->urg_seq))
5092 return;
5093
5094
5095 sk_send_sigurg(sk);
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112 if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
5113 !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) {
5114 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
5115 tp->copied_seq++;
5116 if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) {
5117 __skb_unlink(skb, &sk->sk_receive_queue);
5118 __kfree_skb(skb);
5119 }
5120 }
5121
5122 tp->urg_data = TCP_URG_NOTYET;
5123 tp->urg_seq = ptr;
5124
5125
5126 tp->pred_flags = 0;
5127}
5128
5129
5130static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th)
5131{
5132 struct tcp_sock *tp = tcp_sk(sk);
5133
5134
5135 if (th->urg)
5136 tcp_check_urg(sk, th);
5137
5138
5139 if (tp->urg_data == TCP_URG_NOTYET) {
5140 u32 ptr = tp->urg_seq - ntohl(th->seq) + (th->doff * 4) -
5141 th->syn;
5142
5143
5144 if (ptr < skb->len) {
5145 u8 tmp;
5146 if (skb_copy_bits(skb, ptr, &tmp, 1))
5147 BUG();
5148 tp->urg_data = TCP_URG_VALID | tmp;
5149 if (!sock_flag(sk, SOCK_DEAD))
5150 sk->sk_data_ready(sk, 0);
5151 }
5152 }
5153}
5154
5155static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
5156{
5157 struct tcp_sock *tp = tcp_sk(sk);
5158 int chunk = skb->len - hlen;
5159 int err;
5160
5161 local_bh_enable();
5162 if (skb_csum_unnecessary(skb))
5163 err = skb_copy_datagram_iovec(skb, hlen, tp->ucopy.iov, chunk);
5164 else
5165 err = skb_copy_and_csum_datagram_iovec(skb, hlen,
5166 tp->ucopy.iov);
5167
5168 if (!err) {
5169 tp->ucopy.len -= chunk;
5170 tp->copied_seq += chunk;
5171 tcp_rcv_space_adjust(sk);
5172 }
5173
5174 local_bh_disable();
5175 return err;
5176}
5177
5178static __sum16 __tcp_checksum_complete_user(struct sock *sk,
5179 struct sk_buff *skb)
5180{
5181 __sum16 result;
5182
5183 if (sock_owned_by_user(sk)) {
5184 local_bh_enable();
5185 result = __tcp_checksum_complete(skb);
5186 local_bh_disable();
5187 } else {
5188 result = __tcp_checksum_complete(skb);
5189 }
5190 return result;
5191}
5192
5193static inline bool tcp_checksum_complete_user(struct sock *sk,
5194 struct sk_buff *skb)
5195{
5196 return !skb_csum_unnecessary(skb) &&
5197 __tcp_checksum_complete_user(sk, skb);
5198}
5199
5200#ifdef CONFIG_NET_DMA
5201static bool tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb,
5202 int hlen)
5203{
5204 struct tcp_sock *tp = tcp_sk(sk);
5205 int chunk = skb->len - hlen;
5206 int dma_cookie;
5207 bool copied_early = false;
5208
5209 if (tp->ucopy.wakeup)
5210 return false;
5211
5212 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
5213 tp->ucopy.dma_chan = net_dma_find_channel();
5214
5215 if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) {
5216
5217 dma_cookie = dma_skb_copy_datagram_iovec(tp->ucopy.dma_chan,
5218 skb, hlen,
5219 tp->ucopy.iov, chunk,
5220 tp->ucopy.pinned_list);
5221
5222 if (dma_cookie < 0)
5223 goto out;
5224
5225 tp->ucopy.dma_cookie = dma_cookie;
5226 copied_early = true;
5227
5228 tp->ucopy.len -= chunk;
5229 tp->copied_seq += chunk;
5230 tcp_rcv_space_adjust(sk);
5231
5232 if ((tp->ucopy.len == 0) ||
5233 (tcp_flag_word(