1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37#define pr_fmt(fmt) "TCP: " fmt
38
39#include <net/tcp.h>
40
41#include <linux/compiler.h>
42#include <linux/gfp.h>
43#include <linux/module.h>
44
45
46int sysctl_tcp_retrans_collapse __read_mostly = 1;
47
48
49
50
51int sysctl_tcp_workaround_signed_windows __read_mostly = 0;
52
53
54int sysctl_tcp_limit_output_bytes __read_mostly = 131072;
55
56
57
58
59
60int sysctl_tcp_tso_win_divisor __read_mostly = 3;
61
62int sysctl_tcp_mtu_probing __read_mostly = 0;
63int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS;
64
65
66int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
67
68int sysctl_tcp_cookie_size __read_mostly = 0;
69EXPORT_SYMBOL_GPL(sysctl_tcp_cookie_size);
70
71static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
72 int push_one, gfp_t gfp);
73
74
75static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
76{
77 struct tcp_sock *tp = tcp_sk(sk);
78 unsigned int prior_packets = tp->packets_out;
79
80 tcp_advance_send_head(sk, skb);
81 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
82
83
84 if (tp->frto_counter == 2)
85 tp->frto_counter = 3;
86
87 tp->packets_out += tcp_skb_pcount(skb);
88 if (!prior_packets || tp->early_retrans_delayed)
89 tcp_rearm_rto(sk);
90}
91
92
93
94
95
96
97
98static inline __u32 tcp_acceptable_seq(const struct sock *sk)
99{
100 const struct tcp_sock *tp = tcp_sk(sk);
101
102 if (!before(tcp_wnd_end(tp), tp->snd_nxt))
103 return tp->snd_nxt;
104 else
105 return tcp_wnd_end(tp);
106}
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122static __u16 tcp_advertise_mss(struct sock *sk)
123{
124 struct tcp_sock *tp = tcp_sk(sk);
125 const struct dst_entry *dst = __sk_dst_get(sk);
126 int mss = tp->advmss;
127
128 if (dst) {
129 unsigned int metric = dst_metric_advmss(dst);
130
131 if (metric < mss) {
132 mss = metric;
133 tp->advmss = mss;
134 }
135 }
136
137 return (__u16)mss;
138}
139
140
141
142static void tcp_cwnd_restart(struct sock *sk, const struct dst_entry *dst)
143{
144 struct tcp_sock *tp = tcp_sk(sk);
145 s32 delta = tcp_time_stamp - tp->lsndtime;
146 u32 restart_cwnd = tcp_init_cwnd(tp, dst);
147 u32 cwnd = tp->snd_cwnd;
148
149 tcp_ca_event(sk, CA_EVENT_CWND_RESTART);
150
151 tp->snd_ssthresh = tcp_current_ssthresh(sk);
152 restart_cwnd = min(restart_cwnd, cwnd);
153
154 while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd)
155 cwnd >>= 1;
156 tp->snd_cwnd = max(cwnd, restart_cwnd);
157 tp->snd_cwnd_stamp = tcp_time_stamp;
158 tp->snd_cwnd_used = 0;
159}
160
161
162static void tcp_event_data_sent(struct tcp_sock *tp,
163 struct sock *sk)
164{
165 struct inet_connection_sock *icsk = inet_csk(sk);
166 const u32 now = tcp_time_stamp;
167
168 if (sysctl_tcp_slow_start_after_idle &&
169 (!tp->packets_out && (s32)(now - tp->lsndtime) > icsk->icsk_rto))
170 tcp_cwnd_restart(sk, __sk_dst_get(sk));
171
172 tp->lsndtime = now;
173
174
175
176
177 if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
178 icsk->icsk_ack.pingpong = 1;
179}
180
181
182static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
183{
184 tcp_dec_quickack_mode(sk, pkts);
185 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
186}
187
188
189
190
191
192
193
194
195void tcp_select_initial_window(int __space, __u32 mss,
196 __u32 *rcv_wnd, __u32 *window_clamp,
197 int wscale_ok, __u8 *rcv_wscale,
198 __u32 init_rcv_wnd)
199{
200 unsigned int space = (__space < 0 ? 0 : __space);
201
202
203 if (*window_clamp == 0)
204 (*window_clamp) = (65535 << 14);
205 space = min(*window_clamp, space);
206
207
208 if (space > mss)
209 space = (space / mss) * mss;
210
211
212
213
214
215
216
217
218
219 if (sysctl_tcp_workaround_signed_windows)
220 (*rcv_wnd) = min(space, MAX_TCP_WINDOW);
221 else
222 (*rcv_wnd) = space;
223
224 (*rcv_wscale) = 0;
225 if (wscale_ok) {
226
227
228
229 space = max_t(u32, sysctl_tcp_rmem[2], sysctl_rmem_max);
230 space = min_t(u32, space, *window_clamp);
231 while (space > 65535 && (*rcv_wscale) < 14) {
232 space >>= 1;
233 (*rcv_wscale)++;
234 }
235 }
236
237
238
239
240
241 if (mss > (1 << *rcv_wscale)) {
242 int init_cwnd = TCP_DEFAULT_INIT_RCVWND;
243 if (mss > 1460)
244 init_cwnd =
245 max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2);
246
247
248
249 if (init_rcv_wnd)
250 *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
251 else
252 *rcv_wnd = min(*rcv_wnd, init_cwnd * mss);
253 }
254
255
256 (*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp);
257}
258EXPORT_SYMBOL(tcp_select_initial_window);
259
260
261
262
263
264
265static u16 tcp_select_window(struct sock *sk)
266{
267 struct tcp_sock *tp = tcp_sk(sk);
268 u32 cur_win = tcp_receive_window(tp);
269 u32 new_win = __tcp_select_window(sk);
270
271
272 if (new_win < cur_win) {
273
274
275
276
277
278
279
280 new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale);
281 }
282 tp->rcv_wnd = new_win;
283 tp->rcv_wup = tp->rcv_nxt;
284
285
286
287
288 if (!tp->rx_opt.rcv_wscale && sysctl_tcp_workaround_signed_windows)
289 new_win = min(new_win, MAX_TCP_WINDOW);
290 else
291 new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
292
293
294 new_win >>= tp->rx_opt.rcv_wscale;
295
296
297 if (new_win == 0)
298 tp->pred_flags = 0;
299
300 return new_win;
301}
302
303
304static inline void TCP_ECN_send_synack(const struct tcp_sock *tp, struct sk_buff *skb)
305{
306 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
307 if (!(tp->ecn_flags & TCP_ECN_OK))
308 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
309}
310
311
312static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb)
313{
314 struct tcp_sock *tp = tcp_sk(sk);
315
316 tp->ecn_flags = 0;
317 if (sysctl_tcp_ecn == 1) {
318 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
319 tp->ecn_flags = TCP_ECN_OK;
320 }
321}
322
323static __inline__ void
324TCP_ECN_make_synack(const struct request_sock *req, struct tcphdr *th)
325{
326 if (inet_rsk(req)->ecn_ok)
327 th->ece = 1;
328}
329
330
331
332
333static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb,
334 int tcp_header_len)
335{
336 struct tcp_sock *tp = tcp_sk(sk);
337
338 if (tp->ecn_flags & TCP_ECN_OK) {
339
340 if (skb->len != tcp_header_len &&
341 !before(TCP_SKB_CB(skb)->seq, tp->snd_nxt)) {
342 INET_ECN_xmit(sk);
343 if (tp->ecn_flags & TCP_ECN_QUEUE_CWR) {
344 tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
345 tcp_hdr(skb)->cwr = 1;
346 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
347 }
348 } else {
349
350 INET_ECN_dontxmit(sk);
351 }
352 if (tp->ecn_flags & TCP_ECN_DEMAND_CWR)
353 tcp_hdr(skb)->ece = 1;
354 }
355}
356
357
358
359
360static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
361{
362 skb->ip_summed = CHECKSUM_PARTIAL;
363 skb->csum = 0;
364
365 TCP_SKB_CB(skb)->tcp_flags = flags;
366 TCP_SKB_CB(skb)->sacked = 0;
367
368 skb_shinfo(skb)->gso_segs = 1;
369 skb_shinfo(skb)->gso_size = 0;
370 skb_shinfo(skb)->gso_type = 0;
371
372 TCP_SKB_CB(skb)->seq = seq;
373 if (flags & (TCPHDR_SYN | TCPHDR_FIN))
374 seq++;
375 TCP_SKB_CB(skb)->end_seq = seq;
376}
377
378static inline bool tcp_urg_mode(const struct tcp_sock *tp)
379{
380 return tp->snd_una != tp->snd_up;
381}
382
383#define OPTION_SACK_ADVERTISE (1 << 0)
384#define OPTION_TS (1 << 1)
385#define OPTION_MD5 (1 << 2)
386#define OPTION_WSCALE (1 << 3)
387#define OPTION_COOKIE_EXTENSION (1 << 4)
388#define OPTION_FAST_OPEN_COOKIE (1 << 8)
389
390struct tcp_out_options {
391 u16 options;
392 u16 mss;
393 u8 ws;
394 u8 num_sack_blocks;
395 u8 hash_size;
396 __u8 *hash_location;
397 __u32 tsval, tsecr;
398 struct tcp_fastopen_cookie *fastopen_cookie;
399};
400
401
402
403static u8 tcp_cookie_size_check(u8 desired)
404{
405 int cookie_size;
406
407 if (desired > 0)
408
409 return desired;
410
411 cookie_size = ACCESS_ONCE(sysctl_tcp_cookie_size);
412 if (cookie_size <= 0)
413
414 return 0;
415
416 if (cookie_size <= TCP_COOKIE_MIN)
417
418 return TCP_COOKIE_MIN;
419
420 if (cookie_size >= TCP_COOKIE_MAX)
421
422 return TCP_COOKIE_MAX;
423
424 if (cookie_size & 1)
425
426 cookie_size++;
427
428 return (u8)cookie_size;
429}
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
445 struct tcp_out_options *opts)
446{
447 u16 options = opts->options;
448
449
450
451
452
453
454
455
456
457 if (unlikely(OPTION_MD5 & options)) {
458 if (unlikely(OPTION_COOKIE_EXTENSION & options)) {
459 *ptr++ = htonl((TCPOPT_COOKIE << 24) |
460 (TCPOLEN_COOKIE_BASE << 16) |
461 (TCPOPT_MD5SIG << 8) |
462 TCPOLEN_MD5SIG);
463 } else {
464 *ptr++ = htonl((TCPOPT_NOP << 24) |
465 (TCPOPT_NOP << 16) |
466 (TCPOPT_MD5SIG << 8) |
467 TCPOLEN_MD5SIG);
468 }
469 options &= ~OPTION_COOKIE_EXTENSION;
470
471 opts->hash_location = (__u8 *)ptr;
472 ptr += 4;
473 }
474
475 if (unlikely(opts->mss)) {
476 *ptr++ = htonl((TCPOPT_MSS << 24) |
477 (TCPOLEN_MSS << 16) |
478 opts->mss);
479 }
480
481 if (likely(OPTION_TS & options)) {
482 if (unlikely(OPTION_SACK_ADVERTISE & options)) {
483 *ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
484 (TCPOLEN_SACK_PERM << 16) |
485 (TCPOPT_TIMESTAMP << 8) |
486 TCPOLEN_TIMESTAMP);
487 options &= ~OPTION_SACK_ADVERTISE;
488 } else {
489 *ptr++ = htonl((TCPOPT_NOP << 24) |
490 (TCPOPT_NOP << 16) |
491 (TCPOPT_TIMESTAMP << 8) |
492 TCPOLEN_TIMESTAMP);
493 }
494 *ptr++ = htonl(opts->tsval);
495 *ptr++ = htonl(opts->tsecr);
496 }
497
498
499
500
501
502
503
504 if (unlikely(OPTION_COOKIE_EXTENSION & options)) {
505 __u8 *cookie_copy = opts->hash_location;
506 u8 cookie_size = opts->hash_size;
507
508
509
510
511 if (0x2 & cookie_size) {
512 __u8 *p = (__u8 *)ptr;
513
514
515 *p++ = TCPOPT_COOKIE;
516 *p++ = TCPOLEN_COOKIE_BASE + cookie_size;
517 *p++ = *cookie_copy++;
518 *p++ = *cookie_copy++;
519 ptr++;
520 cookie_size -= 2;
521 } else {
522
523 *ptr++ = htonl(((TCPOPT_NOP << 24) |
524 (TCPOPT_NOP << 16) |
525 (TCPOPT_COOKIE << 8) |
526 TCPOLEN_COOKIE_BASE) +
527 cookie_size);
528 }
529
530 if (cookie_size > 0) {
531 memcpy(ptr, cookie_copy, cookie_size);
532 ptr += (cookie_size / 4);
533 }
534 }
535
536 if (unlikely(OPTION_SACK_ADVERTISE & options)) {
537 *ptr++ = htonl((TCPOPT_NOP << 24) |
538 (TCPOPT_NOP << 16) |
539 (TCPOPT_SACK_PERM << 8) |
540 TCPOLEN_SACK_PERM);
541 }
542
543 if (unlikely(OPTION_WSCALE & options)) {
544 *ptr++ = htonl((TCPOPT_NOP << 24) |
545 (TCPOPT_WINDOW << 16) |
546 (TCPOLEN_WINDOW << 8) |
547 opts->ws);
548 }
549
550 if (unlikely(opts->num_sack_blocks)) {
551 struct tcp_sack_block *sp = tp->rx_opt.dsack ?
552 tp->duplicate_sack : tp->selective_acks;
553 int this_sack;
554
555 *ptr++ = htonl((TCPOPT_NOP << 24) |
556 (TCPOPT_NOP << 16) |
557 (TCPOPT_SACK << 8) |
558 (TCPOLEN_SACK_BASE + (opts->num_sack_blocks *
559 TCPOLEN_SACK_PERBLOCK)));
560
561 for (this_sack = 0; this_sack < opts->num_sack_blocks;
562 ++this_sack) {
563 *ptr++ = htonl(sp[this_sack].start_seq);
564 *ptr++ = htonl(sp[this_sack].end_seq);
565 }
566
567 tp->rx_opt.dsack = 0;
568 }
569
570 if (unlikely(OPTION_FAST_OPEN_COOKIE & options)) {
571 struct tcp_fastopen_cookie *foc = opts->fastopen_cookie;
572
573 *ptr++ = htonl((TCPOPT_EXP << 24) |
574 ((TCPOLEN_EXP_FASTOPEN_BASE + foc->len) << 16) |
575 TCPOPT_FASTOPEN_MAGIC);
576
577 memcpy(ptr, foc->val, foc->len);
578 if ((foc->len & 3) == 2) {
579 u8 *align = ((u8 *)ptr) + foc->len;
580 align[0] = align[1] = TCPOPT_NOP;
581 }
582 ptr += (foc->len + 3) >> 2;
583 }
584}
585
586
587
588
589static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
590 struct tcp_out_options *opts,
591 struct tcp_md5sig_key **md5)
592{
593 struct tcp_sock *tp = tcp_sk(sk);
594 struct tcp_cookie_values *cvp = tp->cookie_values;
595 unsigned int remaining = MAX_TCP_OPTION_SPACE;
596 u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ?
597 tcp_cookie_size_check(cvp->cookie_desired) :
598 0;
599 struct tcp_fastopen_request *fastopen = tp->fastopen_req;
600
601#ifdef CONFIG_TCP_MD5SIG
602 *md5 = tp->af_specific->md5_lookup(sk, sk);
603 if (*md5) {
604 opts->options |= OPTION_MD5;
605 remaining -= TCPOLEN_MD5SIG_ALIGNED;
606 }
607#else
608 *md5 = NULL;
609#endif
610
611
612
613
614
615
616
617
618
619
620 opts->mss = tcp_advertise_mss(sk);
621 remaining -= TCPOLEN_MSS_ALIGNED;
622
623 if (likely(sysctl_tcp_timestamps && *md5 == NULL)) {
624 opts->options |= OPTION_TS;
625 opts->tsval = TCP_SKB_CB(skb)->when;
626 opts->tsecr = tp->rx_opt.ts_recent;
627 remaining -= TCPOLEN_TSTAMP_ALIGNED;
628 }
629 if (likely(sysctl_tcp_window_scaling)) {
630 opts->ws = tp->rx_opt.rcv_wscale;
631 opts->options |= OPTION_WSCALE;
632 remaining -= TCPOLEN_WSCALE_ALIGNED;
633 }
634 if (likely(sysctl_tcp_sack)) {
635 opts->options |= OPTION_SACK_ADVERTISE;
636 if (unlikely(!(OPTION_TS & opts->options)))
637 remaining -= TCPOLEN_SACKPERM_ALIGNED;
638 }
639
640 if (fastopen && fastopen->cookie.len >= 0) {
641 u32 need = TCPOLEN_EXP_FASTOPEN_BASE + fastopen->cookie.len;
642 need = (need + 3) & ~3U;
643 if (remaining >= need) {
644 opts->options |= OPTION_FAST_OPEN_COOKIE;
645 opts->fastopen_cookie = &fastopen->cookie;
646 remaining -= need;
647 tp->syn_fastopen = 1;
648 }
649 }
650
651
652
653
654
655
656 if (*md5 == NULL &&
657 (OPTION_TS & opts->options) &&
658 cookie_size > 0) {
659 int need = TCPOLEN_COOKIE_BASE + cookie_size;
660
661 if (0x2 & need) {
662
663 need += 2;
664
665 if (need > remaining) {
666
667 cookie_size -= 2;
668 need -= 4;
669 }
670 }
671 while (need > remaining && TCP_COOKIE_MIN <= cookie_size) {
672 cookie_size -= 4;
673 need -= 4;
674 }
675 if (TCP_COOKIE_MIN <= cookie_size) {
676 opts->options |= OPTION_COOKIE_EXTENSION;
677 opts->hash_location = (__u8 *)&cvp->cookie_pair[0];
678 opts->hash_size = cookie_size;
679
680
681 cvp->cookie_desired = cookie_size;
682
683 if (cvp->cookie_desired != cvp->cookie_pair_size) {
684
685
686
687
688 get_random_bytes(&cvp->cookie_pair[0],
689 cookie_size);
690 cvp->cookie_pair_size = cookie_size;
691 }
692
693 remaining -= need;
694 }
695 }
696 return MAX_TCP_OPTION_SPACE - remaining;
697}
698
699
700static unsigned int tcp_synack_options(struct sock *sk,
701 struct request_sock *req,
702 unsigned int mss, struct sk_buff *skb,
703 struct tcp_out_options *opts,
704 struct tcp_md5sig_key **md5,
705 struct tcp_extend_values *xvp)
706{
707 struct inet_request_sock *ireq = inet_rsk(req);
708 unsigned int remaining = MAX_TCP_OPTION_SPACE;
709 u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ?
710 xvp->cookie_plus :
711 0;
712
713#ifdef CONFIG_TCP_MD5SIG
714 *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req);
715 if (*md5) {
716 opts->options |= OPTION_MD5;
717 remaining -= TCPOLEN_MD5SIG_ALIGNED;
718
719
720
721
722
723
724 ireq->tstamp_ok &= !ireq->sack_ok;
725 }
726#else
727 *md5 = NULL;
728#endif
729
730
731 opts->mss = mss;
732 remaining -= TCPOLEN_MSS_ALIGNED;
733
734 if (likely(ireq->wscale_ok)) {
735 opts->ws = ireq->rcv_wscale;
736 opts->options |= OPTION_WSCALE;
737 remaining -= TCPOLEN_WSCALE_ALIGNED;
738 }
739 if (likely(ireq->tstamp_ok)) {
740 opts->options |= OPTION_TS;
741 opts->tsval = TCP_SKB_CB(skb)->when;
742 opts->tsecr = req->ts_recent;
743 remaining -= TCPOLEN_TSTAMP_ALIGNED;
744 }
745 if (likely(ireq->sack_ok)) {
746 opts->options |= OPTION_SACK_ADVERTISE;
747 if (unlikely(!ireq->tstamp_ok))
748 remaining -= TCPOLEN_SACKPERM_ALIGNED;
749 }
750
751
752
753
754 if (*md5 == NULL &&
755 ireq->tstamp_ok &&
756 cookie_plus > TCPOLEN_COOKIE_BASE) {
757 int need = cookie_plus;
758
759 if (0x2 & need) {
760
761 need += 2;
762 }
763 if (need <= remaining) {
764 opts->options |= OPTION_COOKIE_EXTENSION;
765 opts->hash_size = cookie_plus - TCPOLEN_COOKIE_BASE;
766 remaining -= need;
767 } else {
768
769 xvp->cookie_out_never = 1;
770 opts->hash_size = 0;
771 }
772 }
773 return MAX_TCP_OPTION_SPACE - remaining;
774}
775
776
777
778
779static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb,
780 struct tcp_out_options *opts,
781 struct tcp_md5sig_key **md5)
782{
783 struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL;
784 struct tcp_sock *tp = tcp_sk(sk);
785 unsigned int size = 0;
786 unsigned int eff_sacks;
787
788#ifdef CONFIG_TCP_MD5SIG
789 *md5 = tp->af_specific->md5_lookup(sk, sk);
790 if (unlikely(*md5)) {
791 opts->options |= OPTION_MD5;
792 size += TCPOLEN_MD5SIG_ALIGNED;
793 }
794#else
795 *md5 = NULL;
796#endif
797
798 if (likely(tp->rx_opt.tstamp_ok)) {
799 opts->options |= OPTION_TS;
800 opts->tsval = tcb ? tcb->when : 0;
801 opts->tsecr = tp->rx_opt.ts_recent;
802 size += TCPOLEN_TSTAMP_ALIGNED;
803 }
804
805 eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
806 if (unlikely(eff_sacks)) {
807 const unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
808 opts->num_sack_blocks =
809 min_t(unsigned int, eff_sacks,
810 (remaining - TCPOLEN_SACK_BASE_ALIGNED) /
811 TCPOLEN_SACK_PERBLOCK);
812 size += TCPOLEN_SACK_BASE_ALIGNED +
813 opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
814 }
815
816 return size;
817}
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834struct tsq_tasklet {
835 struct tasklet_struct tasklet;
836 struct list_head head;
837};
838static DEFINE_PER_CPU(struct tsq_tasklet, tsq_tasklet);
839
840static void tcp_tsq_handler(struct sock *sk)
841{
842 if ((1 << sk->sk_state) &
843 (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING |
844 TCPF_CLOSE_WAIT | TCPF_LAST_ACK))
845 tcp_write_xmit(sk, tcp_current_mss(sk), 0, 0, GFP_ATOMIC);
846}
847
848
849
850
851
852
853static void tcp_tasklet_func(unsigned long data)
854{
855 struct tsq_tasklet *tsq = (struct tsq_tasklet *)data;
856 LIST_HEAD(list);
857 unsigned long flags;
858 struct list_head *q, *n;
859 struct tcp_sock *tp;
860 struct sock *sk;
861
862 local_irq_save(flags);
863 list_splice_init(&tsq->head, &list);
864 local_irq_restore(flags);
865
866 list_for_each_safe(q, n, &list) {
867 tp = list_entry(q, struct tcp_sock, tsq_node);
868 list_del(&tp->tsq_node);
869
870 sk = (struct sock *)tp;
871 bh_lock_sock(sk);
872
873 if (!sock_owned_by_user(sk)) {
874 tcp_tsq_handler(sk);
875 } else {
876
877 set_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags);
878 }
879 bh_unlock_sock(sk);
880
881 clear_bit(TSQ_QUEUED, &tp->tsq_flags);
882 sk_free(sk);
883 }
884}
885
886#define TCP_DEFERRED_ALL ((1UL << TCP_TSQ_DEFERRED) | \
887 (1UL << TCP_WRITE_TIMER_DEFERRED) | \
888 (1UL << TCP_DELACK_TIMER_DEFERRED) | \
889 (1UL << TCP_MTU_REDUCED_DEFERRED))
890
891
892
893
894
895
896
897void tcp_release_cb(struct sock *sk)
898{
899 struct tcp_sock *tp = tcp_sk(sk);
900 unsigned long flags, nflags;
901
902
903 do {
904 flags = tp->tsq_flags;
905 if (!(flags & TCP_DEFERRED_ALL))
906 return;
907 nflags = flags & ~TCP_DEFERRED_ALL;
908 } while (cmpxchg(&tp->tsq_flags, flags, nflags) != flags);
909
910 if (flags & (1UL << TCP_TSQ_DEFERRED))
911 tcp_tsq_handler(sk);
912
913 if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) {
914 tcp_write_timer_handler(sk);
915 __sock_put(sk);
916 }
917 if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) {
918 tcp_delack_timer_handler(sk);
919 __sock_put(sk);
920 }
921 if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) {
922 sk->sk_prot->mtu_reduced(sk);
923 __sock_put(sk);
924 }
925}
926EXPORT_SYMBOL(tcp_release_cb);
927
928void __init tcp_tasklet_init(void)
929{
930 int i;
931
932 for_each_possible_cpu(i) {
933 struct tsq_tasklet *tsq = &per_cpu(tsq_tasklet, i);
934
935 INIT_LIST_HEAD(&tsq->head);
936 tasklet_init(&tsq->tasklet,
937 tcp_tasklet_func,
938 (unsigned long)tsq);
939 }
940}
941
942
943
944
945
946
947static void tcp_wfree(struct sk_buff *skb)
948{
949 struct sock *sk = skb->sk;
950 struct tcp_sock *tp = tcp_sk(sk);
951
952 if (test_and_clear_bit(TSQ_THROTTLED, &tp->tsq_flags) &&
953 !test_and_set_bit(TSQ_QUEUED, &tp->tsq_flags)) {
954 unsigned long flags;
955 struct tsq_tasklet *tsq;
956
957
958
959
960 atomic_sub(skb->truesize - 1, &sk->sk_wmem_alloc);
961
962
963 local_irq_save(flags);
964 tsq = &__get_cpu_var(tsq_tasklet);
965 list_add(&tp->tsq_node, &tsq->head);
966 tasklet_schedule(&tsq->tasklet);
967 local_irq_restore(flags);
968 } else {
969 sock_wfree(skb);
970 }
971}
972
973
974
975
976
977
978
979
980
981
982
983
984static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
985 gfp_t gfp_mask)
986{
987 const struct inet_connection_sock *icsk = inet_csk(sk);
988 struct inet_sock *inet;
989 struct tcp_sock *tp;
990 struct tcp_skb_cb *tcb;
991 struct tcp_out_options opts;
992 unsigned int tcp_options_size, tcp_header_size;
993 struct tcp_md5sig_key *md5;
994 struct tcphdr *th;
995 int err;
996
997 BUG_ON(!skb || !tcp_skb_pcount(skb));
998
999
1000
1001
1002 if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP)
1003 __net_timestamp(skb);
1004
1005 if (likely(clone_it)) {
1006 if (unlikely(skb_cloned(skb)))
1007 skb = pskb_copy(skb, gfp_mask);
1008 else
1009 skb = skb_clone(skb, gfp_mask);
1010 if (unlikely(!skb))
1011 return -ENOBUFS;
1012 }
1013
1014 inet = inet_sk(sk);
1015 tp = tcp_sk(sk);
1016 tcb = TCP_SKB_CB(skb);
1017 memset(&opts, 0, sizeof(opts));
1018
1019 if (unlikely(tcb->tcp_flags & TCPHDR_SYN))
1020 tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
1021 else
1022 tcp_options_size = tcp_established_options(sk, skb, &opts,
1023 &md5);
1024 tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
1025
1026 if (tcp_packets_in_flight(tp) == 0) {
1027 tcp_ca_event(sk, CA_EVENT_TX_START);
1028 skb->ooo_okay = 1;
1029 } else
1030 skb->ooo_okay = 0;
1031
1032 skb_push(skb, tcp_header_size);
1033 skb_reset_transport_header(skb);
1034
1035 skb_orphan(skb);
1036 skb->sk = sk;
1037 skb->destructor = (sysctl_tcp_limit_output_bytes > 0) ?
1038 tcp_wfree : sock_wfree;
1039 atomic_add(skb->truesize, &sk->sk_wmem_alloc);
1040
1041
1042 th = tcp_hdr(skb);
1043 th->source = inet->inet_sport;
1044 th->dest = inet->inet_dport;
1045 th->seq = htonl(tcb->seq);
1046 th->ack_seq = htonl(tp->rcv_nxt);
1047 *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) |
1048 tcb->tcp_flags);
1049
1050 if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) {
1051
1052
1053
1054 th->window = htons(min(tp->rcv_wnd, 65535U));
1055 } else {
1056 th->window = htons(tcp_select_window(sk));
1057 }
1058 th->check = 0;
1059 th->urg_ptr = 0;
1060
1061
1062 if (unlikely(tcp_urg_mode(tp) && before(tcb->seq, tp->snd_up))) {
1063 if (before(tp->snd_up, tcb->seq + 0x10000)) {
1064 th->urg_ptr = htons(tp->snd_up - tcb->seq);
1065 th->urg = 1;
1066 } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) {
1067 th->urg_ptr = htons(0xFFFF);
1068 th->urg = 1;
1069 }
1070 }
1071
1072 tcp_options_write((__be32 *)(th + 1), tp, &opts);
1073 if (likely((tcb->tcp_flags & TCPHDR_SYN) == 0))
1074 TCP_ECN_send(sk, skb, tcp_header_size);
1075
1076#ifdef CONFIG_TCP_MD5SIG
1077
1078 if (md5) {
1079 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1080 tp->af_specific->calc_md5_hash(opts.hash_location,
1081 md5, sk, NULL, skb);
1082 }
1083#endif
1084
1085 icsk->icsk_af_ops->send_check(sk, skb);
1086
1087 if (likely(tcb->tcp_flags & TCPHDR_ACK))
1088 tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
1089
1090 if (skb->len != tcp_header_size)
1091 tcp_event_data_sent(tp, sk);
1092
1093 if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
1094 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
1095 tcp_skb_pcount(skb));
1096
1097 err = icsk->icsk_af_ops->queue_xmit(skb, &inet->cork.fl);
1098 if (likely(err <= 0))
1099 return err;
1100
1101 tcp_enter_cwr(sk, 1);
1102
1103 return net_xmit_eval(err);
1104}
1105
1106
1107
1108
1109
1110
1111static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
1112{
1113 struct tcp_sock *tp = tcp_sk(sk);
1114
1115
1116 tp->write_seq = TCP_SKB_CB(skb)->end_seq;
1117 skb_header_release(skb);
1118 tcp_add_write_queue_tail(sk, skb);
1119 sk->sk_wmem_queued += skb->truesize;
1120 sk_mem_charge(sk, skb->truesize);
1121}
1122
1123
1124static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
1125 unsigned int mss_now)
1126{
1127 if (skb->len <= mss_now || !sk_can_gso(sk) ||
1128 skb->ip_summed == CHECKSUM_NONE) {
1129
1130
1131
1132 skb_shinfo(skb)->gso_segs = 1;
1133 skb_shinfo(skb)->gso_size = 0;
1134 skb_shinfo(skb)->gso_type = 0;
1135 } else {
1136 skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss_now);
1137 skb_shinfo(skb)->gso_size = mss_now;
1138 skb_shinfo(skb)->gso_type = sk->sk_gso_type;
1139 }
1140}
1141
1142
1143
1144
1145static void tcp_adjust_fackets_out(struct sock *sk, const struct sk_buff *skb,
1146 int decr)
1147{
1148 struct tcp_sock *tp = tcp_sk(sk);
1149
1150 if (!tp->sacked_out || tcp_is_reno(tp))
1151 return;
1152
1153 if (after(tcp_highest_sack_seq(tp), TCP_SKB_CB(skb)->seq))
1154 tp->fackets_out -= decr;
1155}
1156
1157
1158
1159
1160static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr)
1161{
1162 struct tcp_sock *tp = tcp_sk(sk);
1163
1164 tp->packets_out -= decr;
1165
1166 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
1167 tp->sacked_out -= decr;
1168 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
1169 tp->retrans_out -= decr;
1170 if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST)
1171 tp->lost_out -= decr;
1172
1173
1174 if (tcp_is_reno(tp) && decr > 0)
1175 tp->sacked_out -= min_t(u32, tp->sacked_out, decr);
1176
1177 tcp_adjust_fackets_out(sk, skb, decr);
1178
1179 if (tp->lost_skb_hint &&
1180 before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) &&
1181 (tcp_is_fack(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)))
1182 tp->lost_cnt_hint -= decr;
1183
1184 tcp_verify_left_out(tp);
1185}
1186
1187
1188
1189
1190
1191
1192int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
1193 unsigned int mss_now)
1194{
1195 struct tcp_sock *tp = tcp_sk(sk);
1196 struct sk_buff *buff;
1197 int nsize, old_factor;
1198 int nlen;
1199 u8 flags;
1200
1201 if (WARN_ON(len > skb->len))
1202 return -EINVAL;
1203
1204 nsize = skb_headlen(skb) - len;
1205 if (nsize < 0)
1206 nsize = 0;
1207
1208 if (skb_cloned(skb) &&
1209 skb_is_nonlinear(skb) &&
1210 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
1211 return -ENOMEM;
1212
1213
1214 buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC);
1215 if (buff == NULL)
1216 return -ENOMEM;
1217
1218 sk->sk_wmem_queued += buff->truesize;
1219 sk_mem_charge(sk, buff->truesize);
1220 nlen = skb->len - len - nsize;
1221 buff->truesize += nlen;
1222 skb->truesize -= nlen;
1223
1224
1225 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
1226 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
1227 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
1228
1229
1230 flags = TCP_SKB_CB(skb)->tcp_flags;
1231 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1232 TCP_SKB_CB(buff)->tcp_flags = flags;
1233 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
1234
1235 if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
1236
1237 buff->csum = csum_partial_copy_nocheck(skb->data + len,
1238 skb_put(buff, nsize),
1239 nsize, 0);
1240
1241 skb_trim(skb, len);
1242
1243 skb->csum = csum_block_sub(skb->csum, buff->csum, len);
1244 } else {
1245 skb->ip_summed = CHECKSUM_PARTIAL;
1246 skb_split(skb, buff, len);
1247 }
1248
1249 buff->ip_summed = skb->ip_summed;
1250
1251
1252
1253
1254 TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when;
1255 buff->tstamp = skb->tstamp;
1256
1257 old_factor = tcp_skb_pcount(skb);
1258
1259
1260 tcp_set_skb_tso_segs(sk, skb, mss_now);
1261 tcp_set_skb_tso_segs(sk, buff, mss_now);
1262
1263
1264
1265
1266 if (!before(tp->snd_nxt, TCP_SKB_CB(buff)->end_seq)) {
1267 int diff = old_factor - tcp_skb_pcount(skb) -
1268 tcp_skb_pcount(buff);
1269
1270 if (diff)
1271 tcp_adjust_pcount(sk, skb, diff);
1272 }
1273
1274
1275 skb_header_release(buff);
1276 tcp_insert_write_queue_after(skb, buff, sk);
1277
1278 return 0;
1279}
1280
1281
1282
1283
1284
1285static void __pskb_trim_head(struct sk_buff *skb, int len)
1286{
1287 int i, k, eat;
1288
1289 eat = min_t(int, len, skb_headlen(skb));
1290 if (eat) {
1291 __skb_pull(skb, eat);
1292 skb->avail_size -= eat;
1293 len -= eat;
1294 if (!len)
1295 return;
1296 }
1297 eat = len;
1298 k = 0;
1299 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1300 int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
1301
1302 if (size <= eat) {
1303 skb_frag_unref(skb, i);
1304 eat -= size;
1305 } else {
1306 skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
1307 if (eat) {
1308 skb_shinfo(skb)->frags[k].page_offset += eat;
1309 skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat);
1310 eat = 0;
1311 }
1312 k++;
1313 }
1314 }
1315 skb_shinfo(skb)->nr_frags = k;
1316
1317 skb_reset_tail_pointer(skb);
1318 skb->data_len -= len;
1319 skb->len = skb->data_len;
1320}
1321
1322
1323int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
1324{
1325 if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
1326 return -ENOMEM;
1327
1328 __pskb_trim_head(skb, len);
1329
1330 TCP_SKB_CB(skb)->seq += len;
1331 skb->ip_summed = CHECKSUM_PARTIAL;
1332
1333 skb->truesize -= len;
1334 sk->sk_wmem_queued -= len;
1335 sk_mem_uncharge(sk, len);
1336 sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
1337
1338
1339 if (tcp_skb_pcount(skb) > 1)
1340 tcp_set_skb_tso_segs(sk, skb, tcp_skb_mss(skb));
1341
1342 return 0;
1343}
1344
1345
1346int tcp_mtu_to_mss(struct sock *sk, int pmtu)
1347{
1348 const struct tcp_sock *tp = tcp_sk(sk);
1349 const struct inet_connection_sock *icsk = inet_csk(sk);
1350 int mss_now;
1351
1352
1353
1354
1355 mss_now = pmtu - icsk->icsk_af_ops->net_header_len - sizeof(struct tcphdr);
1356
1357
1358 if (icsk->icsk_af_ops->net_frag_header_len) {
1359 const struct dst_entry *dst = __sk_dst_get(sk);
1360
1361 if (dst && dst_allfrag(dst))
1362 mss_now -= icsk->icsk_af_ops->net_frag_header_len;
1363 }
1364
1365
1366 if (mss_now > tp->rx_opt.mss_clamp)
1367 mss_now = tp->rx_opt.mss_clamp;
1368
1369
1370 mss_now -= icsk->icsk_ext_hdr_len;
1371
1372
1373 if (mss_now < 48)
1374 mss_now = 48;
1375
1376
1377 mss_now -= tp->tcp_header_len - sizeof(struct tcphdr);
1378
1379 return mss_now;
1380}
1381
1382
1383int tcp_mss_to_mtu(struct sock *sk, int mss)
1384{
1385 const struct tcp_sock *tp = tcp_sk(sk);
1386 const struct inet_connection_sock *icsk = inet_csk(sk);
1387 int mtu;
1388
1389 mtu = mss +
1390 tp->tcp_header_len +
1391 icsk->icsk_ext_hdr_len +
1392 icsk->icsk_af_ops->net_header_len;
1393
1394
1395 if (icsk->icsk_af_ops->net_frag_header_len) {
1396 const struct dst_entry *dst = __sk_dst_get(sk);
1397
1398 if (dst && dst_allfrag(dst))
1399 mtu += icsk->icsk_af_ops->net_frag_header_len;
1400 }
1401 return mtu;
1402}
1403
1404
1405void tcp_mtup_init(struct sock *sk)
1406{
1407 struct tcp_sock *tp = tcp_sk(sk);
1408 struct inet_connection_sock *icsk = inet_csk(sk);
1409
1410 icsk->icsk_mtup.enabled = sysctl_tcp_mtu_probing > 1;
1411 icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) +
1412 icsk->icsk_af_ops->net_header_len;
1413 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, sysctl_tcp_base_mss);
1414 icsk->icsk_mtup.probe_size = 0;
1415}
1416EXPORT_SYMBOL(tcp_mtup_init);
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
1441{
1442 struct tcp_sock *tp = tcp_sk(sk);
1443 struct inet_connection_sock *icsk = inet_csk(sk);
1444 int mss_now;
1445
1446 if (icsk->icsk_mtup.search_high > pmtu)
1447 icsk->icsk_mtup.search_high = pmtu;
1448
1449 mss_now = tcp_mtu_to_mss(sk, pmtu);
1450 mss_now = tcp_bound_to_half_wnd(tp, mss_now);
1451
1452
1453 icsk->icsk_pmtu_cookie = pmtu;
1454 if (icsk->icsk_mtup.enabled)
1455 mss_now = min(mss_now, tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low));
1456 tp->mss_cache = mss_now;
1457
1458 return mss_now;
1459}
1460EXPORT_SYMBOL(tcp_sync_mss);
1461
1462
1463
1464
1465unsigned int tcp_current_mss(struct sock *sk)
1466{
1467 const struct tcp_sock *tp = tcp_sk(sk);
1468 const struct dst_entry *dst = __sk_dst_get(sk);
1469 u32 mss_now;
1470 unsigned int header_len;
1471 struct tcp_out_options opts;
1472 struct tcp_md5sig_key *md5;
1473
1474 mss_now = tp->mss_cache;
1475
1476 if (dst) {
1477 u32 mtu = dst_mtu(dst);
1478 if (mtu != inet_csk(sk)->icsk_pmtu_cookie)
1479 mss_now = tcp_sync_mss(sk, mtu);
1480 }
1481
1482 header_len = tcp_established_options(sk, NULL, &opts, &md5) +
1483 sizeof(struct tcphdr);
1484
1485
1486
1487
1488 if (header_len != tp->tcp_header_len) {
1489 int delta = (int) header_len - tp->tcp_header_len;
1490 mss_now -= delta;
1491 }
1492
1493 return mss_now;
1494}
1495
1496
1497static void tcp_cwnd_validate(struct sock *sk)
1498{
1499 struct tcp_sock *tp = tcp_sk(sk);
1500
1501 if (tp->packets_out >= tp->snd_cwnd) {
1502
1503 tp->snd_cwnd_used = 0;
1504 tp->snd_cwnd_stamp = tcp_time_stamp;
1505 } else {
1506
1507 if (tp->packets_out > tp->snd_cwnd_used)
1508 tp->snd_cwnd_used = tp->packets_out;
1509
1510 if (sysctl_tcp_slow_start_after_idle &&
1511 (s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto)
1512 tcp_cwnd_application_limited(sk);
1513 }
1514}
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb,
1529 unsigned int mss_now, unsigned int max_segs)
1530{
1531 const struct tcp_sock *tp = tcp_sk(sk);
1532 u32 needed, window, max_len;
1533
1534 window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1535 max_len = mss_now * max_segs;
1536
1537 if (likely(max_len <= window && skb != tcp_write_queue_tail(sk)))
1538 return max_len;
1539
1540 needed = min(skb->len, window);
1541
1542 if (max_len <= needed)
1543 return max_len;
1544
1545 return needed - needed % mss_now;
1546}
1547
1548
1549
1550
1551static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp,
1552 const struct sk_buff *skb)
1553{
1554 u32 in_flight, cwnd;
1555
1556
1557 if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&
1558 tcp_skb_pcount(skb) == 1)
1559 return 1;
1560
1561 in_flight = tcp_packets_in_flight(tp);
1562 cwnd = tp->snd_cwnd;
1563 if (in_flight < cwnd)
1564 return (cwnd - in_flight);
1565
1566 return 0;
1567}
1568
1569
1570
1571
1572
1573static int tcp_init_tso_segs(const struct sock *sk, struct sk_buff *skb,
1574 unsigned int mss_now)
1575{
1576 int tso_segs = tcp_skb_pcount(skb);
1577
1578 if (!tso_segs || (tso_segs > 1 && tcp_skb_mss(skb) != mss_now)) {
1579 tcp_set_skb_tso_segs(sk, skb, mss_now);
1580 tso_segs = tcp_skb_pcount(skb);
1581 }
1582 return tso_segs;
1583}
1584
1585
1586static inline bool tcp_minshall_check(const struct tcp_sock *tp)
1587{
1588 return after(tp->snd_sml, tp->snd_una) &&
1589 !after(tp->snd_sml, tp->snd_nxt);
1590}
1591
1592
1593
1594
1595
1596
1597
1598
1599static inline bool tcp_nagle_check(const struct tcp_sock *tp,
1600 const struct sk_buff *skb,
1601 unsigned int mss_now, int nonagle)
1602{
1603 return skb->len < mss_now &&
1604 ((nonagle & TCP_NAGLE_CORK) ||
1605 (!nonagle && tp->packets_out && tcp_minshall_check(tp)));
1606}
1607
1608
1609
1610
1611static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb,
1612 unsigned int cur_mss, int nonagle)
1613{
1614
1615
1616
1617
1618
1619
1620 if (nonagle & TCP_NAGLE_PUSH)
1621 return true;
1622
1623
1624
1625
1626 if (tcp_urg_mode(tp) || (tp->frto_counter == 2) ||
1627 (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
1628 return true;
1629
1630 if (!tcp_nagle_check(tp, skb, cur_mss, nonagle))
1631 return true;
1632
1633 return false;
1634}
1635
1636
1637static bool tcp_snd_wnd_test(const struct tcp_sock *tp,
1638 const struct sk_buff *skb,
1639 unsigned int cur_mss)
1640{
1641 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
1642
1643 if (skb->len > cur_mss)
1644 end_seq = TCP_SKB_CB(skb)->seq + cur_mss;
1645
1646 return !after(end_seq, tcp_wnd_end(tp));
1647}
1648
1649
1650
1651
1652
1653static unsigned int tcp_snd_test(const struct sock *sk, struct sk_buff *skb,
1654 unsigned int cur_mss, int nonagle)
1655{
1656 const struct tcp_sock *tp = tcp_sk(sk);
1657 unsigned int cwnd_quota;
1658
1659 tcp_init_tso_segs(sk, skb, cur_mss);
1660
1661 if (!tcp_nagle_test(tp, skb, cur_mss, nonagle))
1662 return 0;
1663
1664 cwnd_quota = tcp_cwnd_test(tp, skb);
1665 if (cwnd_quota && !tcp_snd_wnd_test(tp, skb, cur_mss))
1666 cwnd_quota = 0;
1667
1668 return cwnd_quota;
1669}
1670
1671
1672bool tcp_may_send_now(struct sock *sk)
1673{
1674 const struct tcp_sock *tp = tcp_sk(sk);
1675 struct sk_buff *skb = tcp_send_head(sk);
1676
1677 return skb &&
1678 tcp_snd_test(sk, skb, tcp_current_mss(sk),
1679 (tcp_skb_is_last(sk, skb) ?
1680 tp->nonagle : TCP_NAGLE_PUSH));
1681}
1682
1683
1684
1685
1686
1687
1688
1689
1690static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1691 unsigned int mss_now, gfp_t gfp)
1692{
1693 struct sk_buff *buff;
1694 int nlen = skb->len - len;
1695 u8 flags;
1696
1697
1698 if (skb->len != skb->data_len)
1699 return tcp_fragment(sk, skb, len, mss_now);
1700
1701 buff = sk_stream_alloc_skb(sk, 0, gfp);
1702 if (unlikely(buff == NULL))
1703 return -ENOMEM;
1704
1705 sk->sk_wmem_queued += buff->truesize;
1706 sk_mem_charge(sk, buff->truesize);
1707 buff->truesize += nlen;
1708 skb->truesize -= nlen;
1709
1710
1711 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
1712 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
1713 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
1714
1715
1716 flags = TCP_SKB_CB(skb)->tcp_flags;
1717 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1718 TCP_SKB_CB(buff)->tcp_flags = flags;
1719
1720
1721 TCP_SKB_CB(buff)->sacked = 0;
1722
1723 buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL;
1724 skb_split(skb, buff, len);
1725
1726
1727 tcp_set_skb_tso_segs(sk, skb, mss_now);
1728 tcp_set_skb_tso_segs(sk, buff, mss_now);
1729
1730
1731 skb_header_release(buff);
1732 tcp_insert_write_queue_after(skb, buff, sk);
1733
1734 return 0;
1735}
1736
1737
1738
1739
1740
1741
1742static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1743{
1744 struct tcp_sock *tp = tcp_sk(sk);
1745 const struct inet_connection_sock *icsk = inet_csk(sk);
1746 u32 send_win, cong_win, limit, in_flight;
1747 int win_divisor;
1748
1749 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
1750 goto send_now;
1751
1752 if (icsk->icsk_ca_state != TCP_CA_Open)
1753 goto send_now;
1754
1755
1756 if (tp->tso_deferred &&
1757 (((u32)jiffies << 1) >> 1) - (tp->tso_deferred >> 1) > 1)
1758 goto send_now;
1759
1760 in_flight = tcp_packets_in_flight(tp);
1761
1762 BUG_ON(tcp_skb_pcount(skb) <= 1 || (tp->snd_cwnd <= in_flight));
1763
1764 send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1765
1766
1767 cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache;
1768
1769 limit = min(send_win, cong_win);
1770
1771
1772 if (limit >= min_t(unsigned int, sk->sk_gso_max_size,
1773 sk->sk_gso_max_segs * tp->mss_cache))
1774 goto send_now;
1775
1776
1777 if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len))
1778 goto send_now;
1779
1780 win_divisor = ACCESS_ONCE(sysctl_tcp_tso_win_divisor);
1781 if (win_divisor) {
1782 u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
1783
1784
1785
1786
1787 chunk /= win_divisor;
1788 if (limit >= chunk)
1789 goto send_now;
1790 } else {
1791
1792
1793
1794
1795
1796 if (limit > tcp_max_tso_deferred_mss(tp) * tp->mss_cache)
1797 goto send_now;
1798 }
1799
1800
1801 tp->tso_deferred = 1 | (jiffies << 1);
1802
1803 return true;
1804
1805send_now:
1806 tp->tso_deferred = 0;
1807 return false;
1808}
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819static int tcp_mtu_probe(struct sock *sk)
1820{
1821 struct tcp_sock *tp = tcp_sk(sk);
1822 struct inet_connection_sock *icsk = inet_csk(sk);
1823 struct sk_buff *skb, *nskb, *next;
1824 int len;
1825 int probe_size;
1826 int size_needed;
1827 int copy;
1828 int mss_now;
1829
1830
1831
1832
1833
1834 if (!icsk->icsk_mtup.enabled ||
1835 icsk->icsk_mtup.probe_size ||
1836 inet_csk(sk)->icsk_ca_state != TCP_CA_Open ||
1837 tp->snd_cwnd < 11 ||
1838 tp->rx_opt.num_sacks || tp->rx_opt.dsack)
1839 return -1;
1840
1841
1842 mss_now = tcp_current_mss(sk);
1843 probe_size = 2 * tp->mss_cache;
1844 size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
1845 if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) {
1846
1847 return -1;
1848 }
1849
1850
1851 if (tp->write_seq - tp->snd_nxt < size_needed)
1852 return -1;
1853
1854 if (tp->snd_wnd < size_needed)
1855 return -1;
1856 if (after(tp->snd_nxt + size_needed, tcp_wnd_end(tp)))
1857 return 0;
1858
1859
1860 if (tcp_packets_in_flight(tp) + 2 > tp->snd_cwnd) {
1861 if (!tcp_packets_in_flight(tp))
1862 return -1;
1863 else
1864 return 0;
1865 }
1866
1867
1868 if ((nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC)) == NULL)
1869 return -1;
1870 sk->sk_wmem_queued += nskb->truesize;
1871 sk_mem_charge(sk, nskb->truesize);
1872
1873 skb = tcp_send_head(sk);
1874
1875 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
1876 TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
1877 TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK;
1878 TCP_SKB_CB(nskb)->sacked = 0;
1879 nskb->csum = 0;
1880 nskb->ip_summed = skb->ip_summed;
1881
1882 tcp_insert_write_queue_before(nskb, skb, sk);
1883
1884 len = 0;
1885 tcp_for_write_queue_from_safe(skb, next, sk) {
1886 copy = min_t(int, skb->len, probe_size - len);
1887 if (nskb->ip_summed)
1888 skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
1889 else
1890 nskb->csum = skb_copy_and_csum_bits(skb, 0,
1891 skb_put(nskb, copy),
1892 copy, nskb->csum);
1893
1894 if (skb->len <= copy) {
1895
1896
1897 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
1898 tcp_unlink_write_queue(skb, sk);
1899 sk_wmem_free_skb(sk, skb);
1900 } else {
1901 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags &
1902 ~(TCPHDR_FIN|TCPHDR_PSH);
1903 if (!skb_shinfo(skb)->nr_frags) {
1904 skb_pull(skb, copy);
1905 if (skb->ip_summed != CHECKSUM_PARTIAL)
1906 skb->csum = csum_partial(skb->data,
1907 skb->len, 0);
1908 } else {
1909 __pskb_trim_head(skb, copy);
1910 tcp_set_skb_tso_segs(sk, skb, mss_now);
1911 }
1912 TCP_SKB_CB(skb)->seq += copy;
1913 }
1914
1915 len += copy;
1916
1917 if (len >= probe_size)
1918 break;
1919 }
1920 tcp_init_tso_segs(sk, nskb, nskb->len);
1921
1922
1923
1924 TCP_SKB_CB(nskb)->when = tcp_time_stamp;
1925 if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) {
1926
1927
1928 tp->snd_cwnd--;
1929 tcp_event_new_data_sent(sk, nskb);
1930
1931 icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len);
1932 tp->mtu_probe.probe_seq_start = TCP_SKB_CB(nskb)->seq;
1933 tp->mtu_probe.probe_seq_end = TCP_SKB_CB(nskb)->end_seq;
1934
1935 return 1;
1936 }
1937
1938 return -1;
1939}
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1953 int push_one, gfp_t gfp)
1954{
1955 struct tcp_sock *tp = tcp_sk(sk);
1956 struct sk_buff *skb;
1957 unsigned int tso_segs, sent_pkts;
1958 int cwnd_quota;
1959 int result;
1960
1961 sent_pkts = 0;
1962
1963 if (!push_one) {
1964
1965 result = tcp_mtu_probe(sk);
1966 if (!result) {
1967 return false;
1968 } else if (result > 0) {
1969 sent_pkts = 1;
1970 }
1971 }
1972
1973 while ((skb = tcp_send_head(sk))) {
1974 unsigned int limit;
1975
1976
1977 tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
1978 BUG_ON(!tso_segs);
1979
1980 if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE)
1981 goto repair;
1982
1983 cwnd_quota = tcp_cwnd_test(tp, skb);
1984 if (!cwnd_quota)
1985 break;
1986
1987 if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now)))
1988 break;
1989
1990 if (tso_segs == 1) {
1991 if (unlikely(!tcp_nagle_test(tp, skb, mss_now,
1992 (tcp_skb_is_last(sk, skb) ?
1993 nonagle : TCP_NAGLE_PUSH))))
1994 break;
1995 } else {
1996 if (!push_one && tcp_tso_should_defer(sk, skb))
1997 break;
1998 }
1999
2000
2001
2002
2003 if (atomic_read(&sk->sk_wmem_alloc) >= sysctl_tcp_limit_output_bytes) {
2004 set_bit(TSQ_THROTTLED, &tp->tsq_flags);
2005 break;
2006 }
2007 limit = mss_now;
2008 if (tso_segs > 1 && !tcp_urg_mode(tp))
2009 limit = tcp_mss_split_point(sk, skb, mss_now,
2010 min_t(unsigned int,
2011 cwnd_quota,
2012 sk->sk_gso_max_segs));
2013
2014 if (skb->len > limit &&
2015 unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
2016 break;
2017
2018 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2019
2020 if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
2021 break;
2022
2023repair:
2024
2025
2026
2027 tcp_event_new_data_sent(sk, skb);
2028
2029 tcp_minshall_update(tp, mss_now, skb);
2030 sent_pkts += tcp_skb_pcount(skb);
2031
2032 if (push_one)
2033 break;
2034 }
2035 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery)
2036 tp->prr_out += sent_pkts;
2037
2038 if (likely(sent_pkts)) {
2039 tcp_cwnd_validate(sk);
2040 return false;
2041 }
2042 return !tp->packets_out && tcp_send_head(sk);
2043}
2044
2045
2046
2047
2048
2049void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
2050 int nonagle)
2051{
2052
2053
2054
2055
2056 if (unlikely(sk->sk_state == TCP_CLOSE))
2057 return;
2058
2059 if (tcp_write_xmit(sk, cur_mss, nonagle, 0,
2060 sk_gfp_atomic(sk, GFP_ATOMIC)))
2061 tcp_check_probe_timer(sk);
2062}
2063
2064
2065
2066
2067void tcp_push_one(struct sock *sk, unsigned int mss_now)
2068{
2069 struct sk_buff *skb = tcp_send_head(sk);
2070
2071 BUG_ON(!skb || skb->len < mss_now);
2072
2073 tcp_write_xmit(sk, mss_now, TCP_NAGLE_PUSH, 1, sk->sk_allocation);
2074}
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128u32 __tcp_select_window(struct sock *sk)
2129{
2130 struct inet_connection_sock *icsk = inet_csk(sk);
2131 struct tcp_sock *tp = tcp_sk(sk);
2132
2133
2134
2135
2136
2137
2138 int mss = icsk->icsk_ack.rcv_mss;
2139 int free_space = tcp_space(sk);
2140 int full_space = min_t(int, tp->window_clamp, tcp_full_space(sk));
2141 int window;
2142
2143 if (mss > full_space)
2144 mss = full_space;
2145
2146 if (free_space < (full_space >> 1)) {
2147 icsk->icsk_ack.quick = 0;
2148
2149 if (sk_under_memory_pressure(sk))
2150 tp->rcv_ssthresh = min(tp->rcv_ssthresh,
2151 4U * tp->advmss);
2152
2153 if (free_space < mss)
2154 return 0;
2155 }
2156
2157 if (free_space > tp->rcv_ssthresh)
2158 free_space = tp->rcv_ssthresh;
2159
2160
2161
2162
2163 window = tp->rcv_wnd;
2164 if (tp->rx_opt.rcv_wscale) {
2165 window = free_space;
2166
2167
2168
2169
2170
2171 if (((window >> tp->rx_opt.rcv_wscale) << tp->rx_opt.rcv_wscale) != window)
2172 window = (((window >> tp->rx_opt.rcv_wscale) + 1)
2173 << tp->rx_opt.rcv_wscale);
2174 } else {
2175
2176
2177
2178
2179
2180
2181
2182
2183 if (window <= free_space - mss || window > free_space)
2184 window = (free_space / mss) * mss;
2185 else if (mss == full_space &&
2186 free_space > window + (full_space >> 1))
2187 window = free_space;
2188 }
2189
2190 return window;
2191}
2192
2193
2194static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
2195{
2196 struct tcp_sock *tp = tcp_sk(sk);
2197 struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
2198 int skb_size, next_skb_size;
2199
2200 skb_size = skb->len;
2201 next_skb_size = next_skb->len;
2202
2203 BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
2204
2205 tcp_highest_sack_combine(sk, next_skb, skb);
2206
2207 tcp_unlink_write_queue(next_skb, sk);
2208
2209 skb_copy_from_linear_data(next_skb, skb_put(skb, next_skb_size),
2210 next_skb_size);
2211
2212 if (next_skb->ip_summed == CHECKSUM_PARTIAL)
2213 skb->ip_summed = CHECKSUM_PARTIAL;
2214
2215 if (skb->ip_summed != CHECKSUM_PARTIAL)
2216 skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
2217
2218
2219 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
2220
2221
2222 TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(next_skb)->tcp_flags;
2223
2224
2225
2226
2227 TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS;
2228
2229
2230 tcp_clear_retrans_hints_partial(tp);
2231 if (next_skb == tp->retransmit_skb_hint)
2232 tp->retransmit_skb_hint = skb;
2233
2234 tcp_adjust_pcount(sk, next_skb, tcp_skb_pcount(next_skb));
2235
2236 sk_wmem_free_skb(sk, next_skb);
2237}
2238
2239
2240static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb)
2241{
2242 if (tcp_skb_pcount(skb) > 1)
2243 return false;
2244
2245 if (skb_shinfo(skb)->nr_frags != 0)
2246 return false;
2247 if (skb_cloned(skb))
2248 return false;
2249 if (skb == tcp_send_head(sk))
2250 return false;
2251
2252 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
2253 return false;
2254
2255 return true;
2256}
2257
2258
2259
2260
2261static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
2262 int space)
2263{
2264 struct tcp_sock *tp = tcp_sk(sk);
2265 struct sk_buff *skb = to, *tmp;
2266 bool first = true;
2267
2268 if (!sysctl_tcp_retrans_collapse)
2269 return;
2270 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
2271 return;
2272
2273 tcp_for_write_queue_from_safe(skb, tmp, sk) {
2274 if (!tcp_can_collapse(sk, skb))
2275 break;
2276
2277 space -= skb->len;
2278
2279 if (first) {
2280 first = false;
2281 continue;
2282 }
2283
2284 if (space < 0)
2285 break;
2286
2287
2288
2289 if (skb->len > skb_availroom(to))
2290 break;
2291
2292 if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp)))
2293 break;
2294
2295 tcp_collapse_retrans(sk, to);
2296 }
2297}
2298
2299
2300
2301
2302
2303int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2304{
2305 struct tcp_sock *tp = tcp_sk(sk);
2306 struct inet_connection_sock *icsk = inet_csk(sk);
2307 unsigned int cur_mss;
2308 int err;
2309
2310
2311 if (icsk->icsk_mtup.probe_size) {
2312 icsk->icsk_mtup.probe_size = 0;
2313 }
2314
2315
2316
2317
2318 if (atomic_read(&sk->sk_wmem_alloc) >
2319 min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))
2320 return -EAGAIN;
2321
2322 if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
2323 if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
2324 BUG();
2325 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
2326 return -ENOMEM;
2327 }
2328
2329 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
2330 return -EHOSTUNREACH;
2331
2332 cur_mss = tcp_current_mss(sk);
2333
2334
2335
2336
2337
2338
2339 if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp)) &&
2340 TCP_SKB_CB(skb)->seq != tp->snd_una)
2341 return -EAGAIN;
2342
2343 if (skb->len > cur_mss) {
2344 if (tcp_fragment(sk, skb, cur_mss, cur_mss))
2345 return -ENOMEM;
2346 } else {
2347 int oldpcount = tcp_skb_pcount(skb);
2348
2349 if (unlikely(oldpcount > 1)) {
2350 tcp_init_tso_segs(sk, skb, cur_mss);
2351 tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb));
2352 }
2353 }
2354
2355 tcp_retrans_try_collapse(sk, skb, cur_mss);
2356
2357
2358
2359
2360
2361 if (skb->len > 0 &&
2362 (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&
2363 tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
2364 if (!pskb_trim(skb, 0)) {
2365
2366 tcp_init_nondata_skb(skb, TCP_SKB_CB(skb)->end_seq - 1,
2367 TCP_SKB_CB(skb)->tcp_flags);
2368 skb->ip_summed = CHECKSUM_NONE;
2369 }
2370 }
2371
2372
2373
2374
2375 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2376
2377
2378 if (unlikely(NET_IP_ALIGN && ((unsigned long)skb->data & 3))) {
2379 struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER,
2380 GFP_ATOMIC);
2381 err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
2382 -ENOBUFS;
2383 } else {
2384 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2385 }
2386
2387 if (err == 0) {
2388
2389 TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
2390
2391 tp->total_retrans++;
2392
2393#if FASTRETRANS_DEBUG > 0
2394 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2395 net_dbg_ratelimited("retrans_out leaked\n");
2396 }
2397#endif
2398 if (!tp->retrans_out)
2399 tp->lost_retrans_low = tp->snd_nxt;
2400 TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;
2401 tp->retrans_out += tcp_skb_pcount(skb);
2402
2403
2404 if (!tp->retrans_stamp)
2405 tp->retrans_stamp = TCP_SKB_CB(skb)->when;
2406
2407 tp->undo_retrans += tcp_skb_pcount(skb);
2408
2409
2410
2411
2412 TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt;
2413 }
2414 return err;
2415}
2416
2417
2418
2419
2420static bool tcp_can_forward_retransmit(struct sock *sk)
2421{
2422 const struct inet_connection_sock *icsk = inet_csk(sk);
2423 const struct tcp_sock *tp = tcp_sk(sk);
2424
2425
2426 if (icsk->icsk_ca_state != TCP_CA_Recovery)
2427 return false;
2428
2429
2430 if (tcp_is_reno(tp))
2431 return false;
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441 if (tcp_may_send_now(sk))
2442 return false;
2443
2444 return true;
2445}
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455void tcp_xmit_retransmit_queue(struct sock *sk)
2456{
2457 const struct inet_connection_sock *icsk = inet_csk(sk);
2458 struct tcp_sock *tp = tcp_sk(sk);
2459 struct sk_buff *skb;
2460 struct sk_buff *hole = NULL;
2461 u32 last_lost;
2462 int mib_idx;
2463 int fwd_rexmitting = 0;
2464
2465 if (!tp->packets_out)
2466 return;
2467
2468 if (!tp->lost_out)
2469 tp->retransmit_high = tp->snd_una;
2470
2471 if (tp->retransmit_skb_hint) {
2472 skb = tp->retransmit_skb_hint;
2473 last_lost = TCP_SKB_CB(skb)->end_seq;
2474 if (after(last_lost, tp->retransmit_high))
2475 last_lost = tp->retransmit_high;
2476 } else {
2477 skb = tcp_write_queue_head(sk);
2478 last_lost = tp->snd_una;
2479 }
2480
2481 tcp_for_write_queue_from(skb, sk) {
2482 __u8 sacked = TCP_SKB_CB(skb)->sacked;
2483
2484 if (skb == tcp_send_head(sk))
2485 break;
2486
2487 if (hole == NULL)
2488 tp->retransmit_skb_hint = skb;
2489
2490
2491
2492
2493
2494
2495
2496
2497 if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
2498 return;
2499
2500 if (fwd_rexmitting) {
2501begin_fwd:
2502 if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
2503 break;
2504 mib_idx = LINUX_MIB_TCPFORWARDRETRANS;
2505
2506 } else if (!before(TCP_SKB_CB(skb)->seq, tp->retransmit_high)) {
2507 tp->retransmit_high = last_lost;
2508 if (!tcp_can_forward_retransmit(sk))
2509 break;
2510
2511 if (hole != NULL) {
2512 skb = hole;
2513 hole = NULL;
2514 }
2515 fwd_rexmitting = 1;
2516 goto begin_fwd;
2517
2518 } else if (!(sacked & TCPCB_LOST)) {
2519 if (hole == NULL && !(sacked & (TCPCB_SACKED_RETRANS|TCPCB_SACKED_ACKED)))
2520 hole = skb;
2521 continue;
2522
2523 } else {
2524 last_lost = TCP_SKB_CB(skb)->end_seq;
2525 if (icsk->icsk_ca_state != TCP_CA_Loss)
2526 mib_idx = LINUX_MIB_TCPFASTRETRANS;
2527 else
2528 mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS;
2529 }
2530
2531 if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))
2532 continue;
2533
2534 if (tcp_retransmit_skb(sk, skb)) {
2535 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
2536 return;
2537 }
2538 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2539
2540 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery)
2541 tp->prr_out += tcp_skb_pcount(skb);
2542
2543 if (skb == tcp_write_queue_head(sk))
2544 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2545 inet_csk(sk)->icsk_rto,
2546 TCP_RTO_MAX);
2547 }
2548}
2549
2550
2551
2552
2553void tcp_send_fin(struct sock *sk)
2554{
2555 struct tcp_sock *tp = tcp_sk(sk);
2556 struct sk_buff *skb = tcp_write_queue_tail(sk);
2557 int mss_now;
2558
2559
2560
2561
2562
2563 mss_now = tcp_current_mss(sk);
2564
2565 if (tcp_send_head(sk) != NULL) {
2566 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN;
2567 TCP_SKB_CB(skb)->end_seq++;
2568 tp->write_seq++;
2569 } else {
2570
2571 for (;;) {
2572 skb = alloc_skb_fclone(MAX_TCP_HEADER,
2573 sk->sk_allocation);
2574 if (skb)
2575 break;
2576 yield();
2577 }
2578
2579
2580 skb_reserve(skb, MAX_TCP_HEADER);
2581
2582 tcp_init_nondata_skb(skb, tp->write_seq,
2583 TCPHDR_ACK | TCPHDR_FIN);
2584 tcp_queue_skb(sk, skb);
2585 }
2586 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);
2587}
2588
2589
2590
2591
2592
2593
2594void tcp_send_active_reset(struct sock *sk, gfp_t priority)
2595{
2596 struct sk_buff *skb;
2597
2598
2599 skb = alloc_skb(MAX_TCP_HEADER, priority);
2600 if (!skb) {
2601 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
2602 return;
2603 }
2604
2605
2606 skb_reserve(skb, MAX_TCP_HEADER);
2607 tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
2608 TCPHDR_ACK | TCPHDR_RST);
2609
2610 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2611 if (tcp_transmit_skb(sk, skb, 0, priority))
2612 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
2613
2614 TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS);
2615}
2616
2617
2618
2619
2620
2621
2622
2623int tcp_send_synack(struct sock *sk)
2624{
2625 struct sk_buff *skb;
2626
2627 skb = tcp_write_queue_head(sk);
2628 if (skb == NULL || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
2629 pr_debug("%s: wrong queue state\n", __func__);
2630 return -EFAULT;
2631 }
2632 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) {
2633 if (skb_cloned(skb)) {
2634 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
2635 if (nskb == NULL)
2636 return -ENOMEM;
2637 tcp_unlink_write_queue(skb, sk);
2638 skb_header_release(nskb);
2639 __tcp_add_write_queue_head(sk, nskb);
2640 sk_wmem_free_skb(sk, skb);
2641 sk->sk_wmem_queued += nskb->truesize;
2642 sk_mem_charge(sk, nskb->truesize);
2643 skb = nskb;
2644 }
2645
2646 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ACK;
2647 TCP_ECN_send_synack(tcp_sk(sk), skb);
2648 }
2649 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2650 return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2651}
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2664 struct request_sock *req,
2665 struct request_values *rvp)
2666{
2667 struct tcp_out_options opts;
2668 struct tcp_extend_values *xvp = tcp_xv(rvp);
2669 struct inet_request_sock *ireq = inet_rsk(req);
2670 struct tcp_sock *tp = tcp_sk(sk);
2671 const struct tcp_cookie_values *cvp = tp->cookie_values;
2672 struct tcphdr *th;
2673 struct sk_buff *skb;
2674 struct tcp_md5sig_key *md5;
2675 int tcp_header_size;
2676 int mss;
2677 int s_data_desired = 0;
2678
2679 if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired)
2680 s_data_desired = cvp->s_data_desired;
2681 skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired,
2682 sk_gfp_atomic(sk, GFP_ATOMIC));
2683 if (unlikely(!skb)) {
2684 dst_release(dst);
2685 return NULL;
2686 }
2687
2688 skb_reserve(skb, MAX_TCP_HEADER);
2689
2690 skb_dst_set(skb, dst);
2691
2692 mss = dst_metric_advmss(dst);
2693 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
2694 mss = tp->rx_opt.user_mss;
2695
2696 if (req->rcv_wnd == 0) {
2697 __u8 rcv_wscale;
2698
2699 req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
2700
2701
2702 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
2703 (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0))
2704 req->window_clamp = tcp_full_space(sk);
2705
2706
2707 tcp_select_initial_window(tcp_full_space(sk),
2708 mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
2709 &req->rcv_wnd,
2710 &req->window_clamp,
2711 ireq->wscale_ok,
2712 &rcv_wscale,
2713 dst_metric(dst, RTAX_INITRWND));
2714 ireq->rcv_wscale = rcv_wscale;
2715 }
2716
2717 memset(&opts, 0, sizeof(opts));
2718#ifdef CONFIG_SYN_COOKIES
2719 if (unlikely(req->cookie_ts))
2720 TCP_SKB_CB(skb)->when = cookie_init_timestamp(req);
2721 else
2722#endif
2723 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2724 tcp_header_size = tcp_synack_options(sk, req, mss,
2725 skb, &opts, &md5, xvp)
2726 + sizeof(*th);
2727
2728 skb_push(skb, tcp_header_size);
2729 skb_reset_transport_header(skb);
2730
2731 th = tcp_hdr(skb);
2732 memset(th, 0, sizeof(struct tcphdr));
2733 th->syn = 1;
2734 th->ack = 1;
2735 TCP_ECN_make_synack(req, th);
2736 th->source = ireq->loc_port;
2737 th->dest = ireq->rmt_port;
2738
2739
2740
2741 tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
2742 TCPHDR_SYN | TCPHDR_ACK);
2743
2744 if (OPTION_COOKIE_EXTENSION & opts.options) {
2745 if (s_data_desired) {
2746 u8 *buf = skb_put(skb, s_data_desired);
2747
2748
2749 memcpy(buf, cvp->s_data_payload, s_data_desired);
2750 TCP_SKB_CB(skb)->end_seq += s_data_desired;
2751 }
2752
2753 if (opts.hash_size > 0) {
2754 __u32 workspace[SHA_WORKSPACE_WORDS];
2755 u32 *mess = &xvp->cookie_bakery[COOKIE_DIGEST_WORDS];
2756 u32 *tail = &mess[COOKIE_MESSAGE_WORDS-1];
2757
2758
2759
2760
2761
2762 *tail-- ^= opts.tsval;
2763 *tail-- ^= tcp_rsk(req)->rcv_isn + 1;
2764 *tail-- ^= TCP_SKB_CB(skb)->seq + 1;
2765
2766
2767 *tail-- ^= (((__force u32)th->dest << 16) | (__force u32)th->source);
2768 *tail-- ^= (u32)(unsigned long)cvp;
2769
2770 sha_transform((__u32 *)&xvp->cookie_bakery[0],
2771 (char *)mess,
2772 &workspace[0]);
2773 opts.hash_location =
2774 (__u8 *)&xvp->cookie_bakery[0];
2775 }
2776 }
2777
2778 th->seq = htonl(TCP_SKB_CB(skb)->seq);
2779 th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1);
2780
2781
2782 th->window = htons(min(req->rcv_wnd, 65535U));
2783 tcp_options_write((__be32 *)(th + 1), tp, &opts);
2784 th->doff = (tcp_header_size >> 2);
2785 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, tcp_skb_pcount(skb));
2786
2787#ifdef CONFIG_TCP_MD5SIG
2788
2789 if (md5) {
2790 tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location,
2791 md5, NULL, req, skb);
2792 }
2793#endif
2794
2795 return skb;
2796}
2797EXPORT_SYMBOL(tcp_make_synack);
2798
2799
2800void tcp_connect_init(struct sock *sk)
2801{
2802 const struct dst_entry *dst = __sk_dst_get(sk);
2803 struct tcp_sock *tp = tcp_sk(sk);
2804 __u8 rcv_wscale;
2805
2806
2807
2808
2809 tp->tcp_header_len = sizeof(struct tcphdr) +
2810 (sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
2811
2812#ifdef CONFIG_TCP_MD5SIG
2813 if (tp->af_specific->md5_lookup(sk, sk) != NULL)
2814 tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
2815#endif
2816
2817
2818 if (tp->rx_opt.user_mss)
2819 tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
2820 tp->max_window = 0;
2821 tcp_mtup_init(sk);
2822 tcp_sync_mss(sk, dst_mtu(dst));
2823
2824 if (!tp->window_clamp)
2825 tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
2826 tp->advmss = dst_metric_advmss(dst);
2827 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->advmss)
2828 tp->advmss = tp->rx_opt.user_mss;
2829
2830 tcp_initialize_rcv_mss(sk);
2831
2832
2833 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
2834 (tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0))
2835 tp->window_clamp = tcp_full_space(sk);
2836
2837 tcp_select_initial_window(tcp_full_space(sk),
2838 tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
2839 &tp->rcv_wnd,
2840 &tp->window_clamp,
2841 sysctl_tcp_window_scaling,
2842 &rcv_wscale,
2843 dst_metric(dst, RTAX_INITRWND));
2844
2845 tp->rx_opt.rcv_wscale = rcv_wscale;
2846 tp->rcv_ssthresh = tp->rcv_wnd;
2847
2848 sk->sk_err = 0;
2849 sock_reset_flag(sk, SOCK_DONE);
2850 tp->snd_wnd = 0;
2851 tcp_init_wl(tp, 0);
2852 tp->snd_una = tp->write_seq;
2853 tp->snd_sml = tp->write_seq;
2854 tp->snd_up = tp->write_seq;
2855 tp->snd_nxt = tp->write_seq;
2856
2857 if (likely(!tp->repair))
2858 tp->rcv_nxt = 0;
2859 tp->rcv_wup = tp->rcv_nxt;
2860 tp->copied_seq = tp->rcv_nxt;
2861
2862 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
2863 inet_csk(sk)->icsk_retransmits = 0;
2864 tcp_clear_retrans(tp);
2865}
2866
2867static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
2868{
2869 struct tcp_sock *tp = tcp_sk(sk);
2870 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
2871
2872 tcb->end_seq += skb->len;
2873 skb_header_release(skb);
2874 __tcp_add_write_queue_tail(sk, skb);
2875 sk->sk_wmem_queued += skb->truesize;
2876 sk_mem_charge(sk, skb->truesize);
2877 tp->write_seq = tcb->end_seq;
2878 tp->packets_out += tcp_skb_pcount(skb);
2879}
2880
2881
2882
2883
2884
2885
2886
2887
2888static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
2889{
2890 struct tcp_sock *tp = tcp_sk(sk);
2891 struct tcp_fastopen_request *fo = tp->fastopen_req;
2892 int syn_loss = 0, space, i, err = 0, iovlen = fo->data->msg_iovlen;
2893 struct sk_buff *syn_data = NULL, *data;
2894 unsigned long last_syn_loss = 0;
2895
2896 tp->rx_opt.mss_clamp = tp->advmss;
2897 tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie,
2898 &syn_loss, &last_syn_loss);
2899
2900 if (syn_loss > 1 &&
2901 time_before(jiffies, last_syn_loss + (60*HZ << syn_loss))) {
2902 fo->cookie.len = -1;
2903 goto fallback;
2904 }
2905
2906 if (sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE)
2907 fo->cookie.len = -1;
2908 else if (fo->cookie.len <= 0)
2909 goto fallback;
2910
2911
2912
2913
2914
2915 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->rx_opt.mss_clamp)
2916 tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
2917 space = tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) -
2918 MAX_TCP_OPTION_SPACE;
2919
2920 syn_data = skb_copy_expand(syn, skb_headroom(syn), space,
2921 sk->sk_allocation);
2922 if (syn_data == NULL)
2923 goto fallback;
2924
2925 for (i = 0; i < iovlen && syn_data->len < space; ++i) {
2926 struct iovec *iov = &fo->data->msg_iov[i];
2927 unsigned char __user *from = iov->iov_base;
2928 int len = iov->iov_len;
2929
2930 if (syn_data->len + len > space)
2931 len = space - syn_data->len;
2932 else if (i + 1 == iovlen)
2933
2934 fo->data = NULL;
2935
2936 if (skb_add_data(syn_data, from, len))
2937 goto fallback;
2938 }
2939
2940
2941 data = pskb_copy(syn_data, sk->sk_allocation);
2942 if (data == NULL)
2943 goto fallback;
2944 TCP_SKB_CB(data)->seq++;
2945 TCP_SKB_CB(data)->tcp_flags &= ~TCPHDR_SYN;
2946 TCP_SKB_CB(data)->tcp_flags = (TCPHDR_ACK|TCPHDR_PSH);
2947 tcp_connect_queue_skb(sk, data);
2948 fo->copied = data->len;
2949
2950 if (tcp_transmit_skb(sk, syn_data, 0, sk->sk_allocation) == 0) {
2951 tp->syn_data = (fo->copied > 0);
2952 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE);
2953 goto done;
2954 }
2955 syn_data = NULL;
2956
2957fallback:
2958
2959 if (fo->cookie.len > 0)
2960 fo->cookie.len = 0;
2961 err = tcp_transmit_skb(sk, syn, 1, sk->sk_allocation);
2962 if (err)
2963 tp->syn_fastopen = 0;
2964 kfree_skb(syn_data);
2965done:
2966 fo->cookie.len = -1;
2967 return err;
2968}
2969
2970
2971int tcp_connect(struct sock *sk)
2972{
2973 struct tcp_sock *tp = tcp_sk(sk);
2974 struct sk_buff *buff;
2975 int err;
2976
2977 tcp_connect_init(sk);
2978
2979 buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation);
2980 if (unlikely(buff == NULL))
2981 return -ENOBUFS;
2982
2983
2984 skb_reserve(buff, MAX_TCP_HEADER);
2985
2986 tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
2987 tp->retrans_stamp = TCP_SKB_CB(buff)->when = tcp_time_stamp;
2988 tcp_connect_queue_skb(sk, buff);
2989 TCP_ECN_send_syn(sk, buff);
2990
2991
2992 err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
2993 tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
2994 if (err == -ECONNREFUSED)
2995 return err;
2996
2997
2998
2999
3000 tp->snd_nxt = tp->write_seq;
3001 tp->pushed_seq = tp->write_seq;
3002 TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
3003
3004
3005 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
3006 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
3007 return 0;
3008}
3009EXPORT_SYMBOL(tcp_connect);
3010
3011
3012
3013
3014
3015void tcp_send_delayed_ack(struct sock *sk)
3016{
3017 struct inet_connection_sock *icsk = inet_csk(sk);
3018 int ato = icsk->icsk_ack.ato;
3019 unsigned long timeout;
3020
3021 if (ato > TCP_DELACK_MIN) {
3022 const struct tcp_sock *tp = tcp_sk(sk);
3023 int max_ato = HZ / 2;
3024
3025 if (icsk->icsk_ack.pingpong ||
3026 (icsk->icsk_ack.pending & ICSK_ACK_PUSHED))
3027 max_ato = TCP_DELACK_MAX;
3028
3029
3030
3031
3032
3033
3034
3035 if (tp->srtt) {
3036 int rtt = max(tp->srtt >> 3, TCP_DELACK_MIN);
3037
3038 if (rtt < max_ato)
3039 max_ato = rtt;
3040 }
3041
3042 ato = min(ato, max_ato);
3043 }
3044
3045
3046 timeout = jiffies + ato;
3047
3048
3049 if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) {
3050
3051
3052
3053 if (icsk->icsk_ack.blocked ||
3054 time_before_eq(icsk->icsk_ack.timeout, jiffies + (ato >> 2))) {
3055 tcp_send_ack(sk);
3056 return;
3057 }
3058
3059 if (!time_before(timeout, icsk->icsk_ack.timeout))
3060 timeout = icsk->icsk_ack.timeout;
3061 }
3062 icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
3063 icsk->icsk_ack.timeout = timeout;
3064 sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
3065}
3066
3067
3068void tcp_send_ack(struct sock *sk)
3069{
3070 struct sk_buff *buff;
3071
3072
3073 if (sk->sk_state == TCP_CLOSE)
3074 return;
3075
3076
3077
3078
3079
3080 buff = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC));
3081 if (buff == NULL) {
3082 inet_csk_schedule_ack(sk);
3083 inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
3084 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
3085 TCP_DELACK_MAX, TCP_RTO_MAX);
3086 return;
3087 }
3088
3089
3090 skb_reserve(buff, MAX_TCP_HEADER);
3091 tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK);
3092
3093
3094 TCP_SKB_CB(buff)->when = tcp_time_stamp;
3095 tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC));
3096}
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
3110{
3111 struct tcp_sock *tp = tcp_sk(sk);
3112 struct sk_buff *skb;
3113
3114
3115 skb = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC));
3116 if (skb == NULL)
3117 return -1;
3118
3119
3120 skb_reserve(skb, MAX_TCP_HEADER);
3121
3122
3123
3124
3125 tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
3126 TCP_SKB_CB(skb)->when = tcp_time_stamp;
3127 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
3128}
3129
3130void tcp_send_window_probe(struct sock *sk)
3131{
3132 if (sk->sk_state == TCP_ESTABLISHED) {
3133 tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1;
3134 tcp_sk(sk)->snd_nxt = tcp_sk(sk)->write_seq;
3135 tcp_xmit_probe_skb(sk, 0);
3136 }
3137}
3138
3139
3140int tcp_write_wakeup(struct sock *sk)
3141{
3142 struct tcp_sock *tp = tcp_sk(sk);
3143 struct sk_buff *skb;
3144
3145 if (sk->sk_state == TCP_CLOSE)
3146 return -1;
3147
3148 if ((skb = tcp_send_head(sk)) != NULL &&
3149 before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) {
3150 int err;
3151 unsigned int mss = tcp_current_mss(sk);
3152 unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
3153
3154 if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
3155 tp->pushed_seq = TCP_SKB_CB(skb)->end_seq;
3156
3157
3158
3159
3160
3161 if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
3162 skb->len > mss) {
3163 seg_size = min(seg_size, mss);
3164 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
3165 if (tcp_fragment(sk, skb, seg_size, mss))
3166 return -1;
3167 } else if (!tcp_skb_pcount(skb))
3168 tcp_set_skb_tso_segs(sk, skb, mss);
3169
3170 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
3171 TCP_SKB_CB(skb)->when = tcp_time_stamp;
3172 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
3173 if (!err)
3174 tcp_event_new_data_sent(sk, skb);
3175 return err;
3176 } else {
3177 if (between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF))
3178 tcp_xmit_probe_skb(sk, 1);
3179 return tcp_xmit_probe_skb(sk, 0);
3180 }
3181}
3182
3183
3184
3185
3186void tcp_send_probe0(struct sock *sk)
3187{
3188 struct inet_connection_sock *icsk = inet_csk(sk);
3189 struct tcp_sock *tp = tcp_sk(sk);
3190 int err;
3191
3192 err = tcp_write_wakeup(sk);
3193
3194 if (tp->packets_out || !tcp_send_head(sk)) {
3195
3196 icsk->icsk_probes_out = 0;
3197 icsk->icsk_backoff = 0;
3198 return;
3199 }
3200
3201 if (err <= 0) {
3202 if (icsk->icsk_backoff < sysctl_tcp_retries2)
3203 icsk->icsk_backoff++;
3204 icsk->icsk_probes_out++;
3205 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
3206 min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX),
3207 TCP_RTO_MAX);
3208 } else {
3209
3210
3211
3212
3213
3214
3215 if (!icsk->icsk_probes_out)
3216 icsk->icsk_probes_out = 1;
3217 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
3218 min(icsk->icsk_rto << icsk->icsk_backoff,
3219 TCP_RESOURCE_PROBE_INTERVAL),
3220 TCP_RTO_MAX);
3221 }
3222}
3223