1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37#define pr_fmt(fmt) "TCP: " fmt
38
39#include <net/tcp.h>
40
41#include <linux/compiler.h>
42#include <linux/gfp.h>
43#include <linux/module.h>
44
45
46int sysctl_tcp_retrans_collapse __read_mostly = 1;
47
48
49
50
51int sysctl_tcp_workaround_signed_windows __read_mostly = 0;
52
53
54int sysctl_tcp_limit_output_bytes __read_mostly = 131072;
55
56
57
58
59
60int sysctl_tcp_tso_win_divisor __read_mostly = 3;
61
62int sysctl_tcp_mtu_probing __read_mostly = 0;
63int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS;
64
65
66int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
67
68int sysctl_tcp_cookie_size __read_mostly = 0;
69EXPORT_SYMBOL_GPL(sysctl_tcp_cookie_size);
70
71static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
72 int push_one, gfp_t gfp);
73
74
75static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
76{
77 struct tcp_sock *tp = tcp_sk(sk);
78 unsigned int prior_packets = tp->packets_out;
79
80 tcp_advance_send_head(sk, skb);
81 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
82
83
84 if (tp->frto_counter == 2)
85 tp->frto_counter = 3;
86
87 tp->packets_out += tcp_skb_pcount(skb);
88 if (!prior_packets || tp->early_retrans_delayed)
89 tcp_rearm_rto(sk);
90}
91
92
93
94
95
96
97
98static inline __u32 tcp_acceptable_seq(const struct sock *sk)
99{
100 const struct tcp_sock *tp = tcp_sk(sk);
101
102 if (!before(tcp_wnd_end(tp), tp->snd_nxt))
103 return tp->snd_nxt;
104 else
105 return tcp_wnd_end(tp);
106}
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122static __u16 tcp_advertise_mss(struct sock *sk)
123{
124 struct tcp_sock *tp = tcp_sk(sk);
125 const struct dst_entry *dst = __sk_dst_get(sk);
126 int mss = tp->advmss;
127
128 if (dst) {
129 unsigned int metric = dst_metric_advmss(dst);
130
131 if (metric < mss) {
132 mss = metric;
133 tp->advmss = mss;
134 }
135 }
136
137 return (__u16)mss;
138}
139
140
141
142static void tcp_cwnd_restart(struct sock *sk, const struct dst_entry *dst)
143{
144 struct tcp_sock *tp = tcp_sk(sk);
145 s32 delta = tcp_time_stamp - tp->lsndtime;
146 u32 restart_cwnd = tcp_init_cwnd(tp, dst);
147 u32 cwnd = tp->snd_cwnd;
148
149 tcp_ca_event(sk, CA_EVENT_CWND_RESTART);
150
151 tp->snd_ssthresh = tcp_current_ssthresh(sk);
152 restart_cwnd = min(restart_cwnd, cwnd);
153
154 while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd)
155 cwnd >>= 1;
156 tp->snd_cwnd = max(cwnd, restart_cwnd);
157 tp->snd_cwnd_stamp = tcp_time_stamp;
158 tp->snd_cwnd_used = 0;
159}
160
161
162static void tcp_event_data_sent(struct tcp_sock *tp,
163 struct sock *sk)
164{
165 struct inet_connection_sock *icsk = inet_csk(sk);
166 const u32 now = tcp_time_stamp;
167
168 if (sysctl_tcp_slow_start_after_idle &&
169 (!tp->packets_out && (s32)(now - tp->lsndtime) > icsk->icsk_rto))
170 tcp_cwnd_restart(sk, __sk_dst_get(sk));
171
172 tp->lsndtime = now;
173
174
175
176
177 if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
178 icsk->icsk_ack.pingpong = 1;
179}
180
181
182static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
183{
184 tcp_dec_quickack_mode(sk, pkts);
185 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
186}
187
188
189
190
191
192
193
194
195void tcp_select_initial_window(int __space, __u32 mss,
196 __u32 *rcv_wnd, __u32 *window_clamp,
197 int wscale_ok, __u8 *rcv_wscale,
198 __u32 init_rcv_wnd)
199{
200 unsigned int space = (__space < 0 ? 0 : __space);
201
202
203 if (*window_clamp == 0)
204 (*window_clamp) = (65535 << 14);
205 space = min(*window_clamp, space);
206
207
208 if (space > mss)
209 space = (space / mss) * mss;
210
211
212
213
214
215
216
217
218
219 if (sysctl_tcp_workaround_signed_windows)
220 (*rcv_wnd) = min(space, MAX_TCP_WINDOW);
221 else
222 (*rcv_wnd) = space;
223
224 (*rcv_wscale) = 0;
225 if (wscale_ok) {
226
227
228
229 space = max_t(u32, sysctl_tcp_rmem[2], sysctl_rmem_max);
230 space = min_t(u32, space, *window_clamp);
231 while (space > 65535 && (*rcv_wscale) < 14) {
232 space >>= 1;
233 (*rcv_wscale)++;
234 }
235 }
236
237
238
239
240
241 if (mss > (1 << *rcv_wscale)) {
242 int init_cwnd = TCP_DEFAULT_INIT_RCVWND;
243 if (mss > 1460)
244 init_cwnd =
245 max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2);
246
247
248
249 if (init_rcv_wnd)
250 *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
251 else
252 *rcv_wnd = min(*rcv_wnd, init_cwnd * mss);
253 }
254
255
256 (*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp);
257}
258EXPORT_SYMBOL(tcp_select_initial_window);
259
260
261
262
263
264
265static u16 tcp_select_window(struct sock *sk)
266{
267 struct tcp_sock *tp = tcp_sk(sk);
268 u32 cur_win = tcp_receive_window(tp);
269 u32 new_win = __tcp_select_window(sk);
270
271
272 if (new_win < cur_win) {
273
274
275
276
277
278
279
280 new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale);
281 }
282 tp->rcv_wnd = new_win;
283 tp->rcv_wup = tp->rcv_nxt;
284
285
286
287
288 if (!tp->rx_opt.rcv_wscale && sysctl_tcp_workaround_signed_windows)
289 new_win = min(new_win, MAX_TCP_WINDOW);
290 else
291 new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
292
293
294 new_win >>= tp->rx_opt.rcv_wscale;
295
296
297 if (new_win == 0)
298 tp->pred_flags = 0;
299
300 return new_win;
301}
302
303
304static inline void TCP_ECN_send_synack(const struct tcp_sock *tp, struct sk_buff *skb)
305{
306 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
307 if (!(tp->ecn_flags & TCP_ECN_OK))
308 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
309}
310
311
312static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb)
313{
314 struct tcp_sock *tp = tcp_sk(sk);
315
316 tp->ecn_flags = 0;
317 if (sysctl_tcp_ecn == 1) {
318 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
319 tp->ecn_flags = TCP_ECN_OK;
320 }
321}
322
323static __inline__ void
324TCP_ECN_make_synack(const struct request_sock *req, struct tcphdr *th)
325{
326 if (inet_rsk(req)->ecn_ok)
327 th->ece = 1;
328}
329
330
331
332
333static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb,
334 int tcp_header_len)
335{
336 struct tcp_sock *tp = tcp_sk(sk);
337
338 if (tp->ecn_flags & TCP_ECN_OK) {
339
340 if (skb->len != tcp_header_len &&
341 !before(TCP_SKB_CB(skb)->seq, tp->snd_nxt)) {
342 INET_ECN_xmit(sk);
343 if (tp->ecn_flags & TCP_ECN_QUEUE_CWR) {
344 tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
345 tcp_hdr(skb)->cwr = 1;
346 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
347 }
348 } else {
349
350 INET_ECN_dontxmit(sk);
351 }
352 if (tp->ecn_flags & TCP_ECN_DEMAND_CWR)
353 tcp_hdr(skb)->ece = 1;
354 }
355}
356
357
358
359
360static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
361{
362 skb->ip_summed = CHECKSUM_PARTIAL;
363 skb->csum = 0;
364
365 TCP_SKB_CB(skb)->tcp_flags = flags;
366 TCP_SKB_CB(skb)->sacked = 0;
367
368 skb_shinfo(skb)->gso_segs = 1;
369 skb_shinfo(skb)->gso_size = 0;
370 skb_shinfo(skb)->gso_type = 0;
371
372 TCP_SKB_CB(skb)->seq = seq;
373 if (flags & (TCPHDR_SYN | TCPHDR_FIN))
374 seq++;
375 TCP_SKB_CB(skb)->end_seq = seq;
376}
377
378static inline bool tcp_urg_mode(const struct tcp_sock *tp)
379{
380 return tp->snd_una != tp->snd_up;
381}
382
383#define OPTION_SACK_ADVERTISE (1 << 0)
384#define OPTION_TS (1 << 1)
385#define OPTION_MD5 (1 << 2)
386#define OPTION_WSCALE (1 << 3)
387#define OPTION_COOKIE_EXTENSION (1 << 4)
388#define OPTION_FAST_OPEN_COOKIE (1 << 8)
389
390struct tcp_out_options {
391 u16 options;
392 u16 mss;
393 u8 ws;
394 u8 num_sack_blocks;
395 u8 hash_size;
396 __u8 *hash_location;
397 __u32 tsval, tsecr;
398 struct tcp_fastopen_cookie *fastopen_cookie;
399};
400
401
402
403static u8 tcp_cookie_size_check(u8 desired)
404{
405 int cookie_size;
406
407 if (desired > 0)
408
409 return desired;
410
411 cookie_size = ACCESS_ONCE(sysctl_tcp_cookie_size);
412 if (cookie_size <= 0)
413
414 return 0;
415
416 if (cookie_size <= TCP_COOKIE_MIN)
417
418 return TCP_COOKIE_MIN;
419
420 if (cookie_size >= TCP_COOKIE_MAX)
421
422 return TCP_COOKIE_MAX;
423
424 if (cookie_size & 1)
425
426 cookie_size++;
427
428 return (u8)cookie_size;
429}
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
445 struct tcp_out_options *opts)
446{
447 u16 options = opts->options;
448
449
450
451
452
453
454
455
456
457 if (unlikely(OPTION_MD5 & options)) {
458 if (unlikely(OPTION_COOKIE_EXTENSION & options)) {
459 *ptr++ = htonl((TCPOPT_COOKIE << 24) |
460 (TCPOLEN_COOKIE_BASE << 16) |
461 (TCPOPT_MD5SIG << 8) |
462 TCPOLEN_MD5SIG);
463 } else {
464 *ptr++ = htonl((TCPOPT_NOP << 24) |
465 (TCPOPT_NOP << 16) |
466 (TCPOPT_MD5SIG << 8) |
467 TCPOLEN_MD5SIG);
468 }
469 options &= ~OPTION_COOKIE_EXTENSION;
470
471 opts->hash_location = (__u8 *)ptr;
472 ptr += 4;
473 }
474
475 if (unlikely(opts->mss)) {
476 *ptr++ = htonl((TCPOPT_MSS << 24) |
477 (TCPOLEN_MSS << 16) |
478 opts->mss);
479 }
480
481 if (likely(OPTION_TS & options)) {
482 if (unlikely(OPTION_SACK_ADVERTISE & options)) {
483 *ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
484 (TCPOLEN_SACK_PERM << 16) |
485 (TCPOPT_TIMESTAMP << 8) |
486 TCPOLEN_TIMESTAMP);
487 options &= ~OPTION_SACK_ADVERTISE;
488 } else {
489 *ptr++ = htonl((TCPOPT_NOP << 24) |
490 (TCPOPT_NOP << 16) |
491 (TCPOPT_TIMESTAMP << 8) |
492 TCPOLEN_TIMESTAMP);
493 }
494 *ptr++ = htonl(opts->tsval);
495 *ptr++ = htonl(opts->tsecr);
496 }
497
498
499
500
501
502
503
504 if (unlikely(OPTION_COOKIE_EXTENSION & options)) {
505 __u8 *cookie_copy = opts->hash_location;
506 u8 cookie_size = opts->hash_size;
507
508
509
510
511 if (0x2 & cookie_size) {
512 __u8 *p = (__u8 *)ptr;
513
514
515 *p++ = TCPOPT_COOKIE;
516 *p++ = TCPOLEN_COOKIE_BASE + cookie_size;
517 *p++ = *cookie_copy++;
518 *p++ = *cookie_copy++;
519 ptr++;
520 cookie_size -= 2;
521 } else {
522
523 *ptr++ = htonl(((TCPOPT_NOP << 24) |
524 (TCPOPT_NOP << 16) |
525 (TCPOPT_COOKIE << 8) |
526 TCPOLEN_COOKIE_BASE) +
527 cookie_size);
528 }
529
530 if (cookie_size > 0) {
531 memcpy(ptr, cookie_copy, cookie_size);
532 ptr += (cookie_size / 4);
533 }
534 }
535
536 if (unlikely(OPTION_SACK_ADVERTISE & options)) {
537 *ptr++ = htonl((TCPOPT_NOP << 24) |
538 (TCPOPT_NOP << 16) |
539 (TCPOPT_SACK_PERM << 8) |
540 TCPOLEN_SACK_PERM);
541 }
542
543 if (unlikely(OPTION_WSCALE & options)) {
544 *ptr++ = htonl((TCPOPT_NOP << 24) |
545 (TCPOPT_WINDOW << 16) |
546 (TCPOLEN_WINDOW << 8) |
547 opts->ws);
548 }
549
550 if (unlikely(opts->num_sack_blocks)) {
551 struct tcp_sack_block *sp = tp->rx_opt.dsack ?
552 tp->duplicate_sack : tp->selective_acks;
553 int this_sack;
554
555 *ptr++ = htonl((TCPOPT_NOP << 24) |
556 (TCPOPT_NOP << 16) |
557 (TCPOPT_SACK << 8) |
558 (TCPOLEN_SACK_BASE + (opts->num_sack_blocks *
559 TCPOLEN_SACK_PERBLOCK)));
560
561 for (this_sack = 0; this_sack < opts->num_sack_blocks;
562 ++this_sack) {
563 *ptr++ = htonl(sp[this_sack].start_seq);
564 *ptr++ = htonl(sp[this_sack].end_seq);
565 }
566
567 tp->rx_opt.dsack = 0;
568 }
569
570 if (unlikely(OPTION_FAST_OPEN_COOKIE & options)) {
571 struct tcp_fastopen_cookie *foc = opts->fastopen_cookie;
572
573 *ptr++ = htonl((TCPOPT_EXP << 24) |
574 ((TCPOLEN_EXP_FASTOPEN_BASE + foc->len) << 16) |
575 TCPOPT_FASTOPEN_MAGIC);
576
577 memcpy(ptr, foc->val, foc->len);
578 if ((foc->len & 3) == 2) {
579 u8 *align = ((u8 *)ptr) + foc->len;
580 align[0] = align[1] = TCPOPT_NOP;
581 }
582 ptr += (foc->len + 3) >> 2;
583 }
584}
585
586
587
588
589static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
590 struct tcp_out_options *opts,
591 struct tcp_md5sig_key **md5)
592{
593 struct tcp_sock *tp = tcp_sk(sk);
594 struct tcp_cookie_values *cvp = tp->cookie_values;
595 unsigned int remaining = MAX_TCP_OPTION_SPACE;
596 u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ?
597 tcp_cookie_size_check(cvp->cookie_desired) :
598 0;
599 struct tcp_fastopen_request *fastopen = tp->fastopen_req;
600
601#ifdef CONFIG_TCP_MD5SIG
602 *md5 = tp->af_specific->md5_lookup(sk, sk);
603 if (*md5) {
604 opts->options |= OPTION_MD5;
605 remaining -= TCPOLEN_MD5SIG_ALIGNED;
606 }
607#else
608 *md5 = NULL;
609#endif
610
611
612
613
614
615
616
617
618
619
620 opts->mss = tcp_advertise_mss(sk);
621 remaining -= TCPOLEN_MSS_ALIGNED;
622
623 if (likely(sysctl_tcp_timestamps && *md5 == NULL)) {
624 opts->options |= OPTION_TS;
625 opts->tsval = TCP_SKB_CB(skb)->when;
626 opts->tsecr = tp->rx_opt.ts_recent;
627 remaining -= TCPOLEN_TSTAMP_ALIGNED;
628 }
629 if (likely(sysctl_tcp_window_scaling)) {
630 opts->ws = tp->rx_opt.rcv_wscale;
631 opts->options |= OPTION_WSCALE;
632 remaining -= TCPOLEN_WSCALE_ALIGNED;
633 }
634 if (likely(sysctl_tcp_sack)) {
635 opts->options |= OPTION_SACK_ADVERTISE;
636 if (unlikely(!(OPTION_TS & opts->options)))
637 remaining -= TCPOLEN_SACKPERM_ALIGNED;
638 }
639
640 if (fastopen && fastopen->cookie.len >= 0) {
641 u32 need = TCPOLEN_EXP_FASTOPEN_BASE + fastopen->cookie.len;
642 need = (need + 3) & ~3U;
643 if (remaining >= need) {
644 opts->options |= OPTION_FAST_OPEN_COOKIE;
645 opts->fastopen_cookie = &fastopen->cookie;
646 remaining -= need;
647 tp->syn_fastopen = 1;
648 }
649 }
650
651
652
653
654
655
656 if (*md5 == NULL &&
657 (OPTION_TS & opts->options) &&
658 cookie_size > 0) {
659 int need = TCPOLEN_COOKIE_BASE + cookie_size;
660
661 if (0x2 & need) {
662
663 need += 2;
664
665 if (need > remaining) {
666
667 cookie_size -= 2;
668 need -= 4;
669 }
670 }
671 while (need > remaining && TCP_COOKIE_MIN <= cookie_size) {
672 cookie_size -= 4;
673 need -= 4;
674 }
675 if (TCP_COOKIE_MIN <= cookie_size) {
676 opts->options |= OPTION_COOKIE_EXTENSION;
677 opts->hash_location = (__u8 *)&cvp->cookie_pair[0];
678 opts->hash_size = cookie_size;
679
680
681 cvp->cookie_desired = cookie_size;
682
683 if (cvp->cookie_desired != cvp->cookie_pair_size) {
684
685
686
687
688 get_random_bytes(&cvp->cookie_pair[0],
689 cookie_size);
690 cvp->cookie_pair_size = cookie_size;
691 }
692
693 remaining -= need;
694 }
695 }
696 return MAX_TCP_OPTION_SPACE - remaining;
697}
698
699
700static unsigned int tcp_synack_options(struct sock *sk,
701 struct request_sock *req,
702 unsigned int mss, struct sk_buff *skb,
703 struct tcp_out_options *opts,
704 struct tcp_md5sig_key **md5,
705 struct tcp_extend_values *xvp,
706 struct tcp_fastopen_cookie *foc)
707{
708 struct inet_request_sock *ireq = inet_rsk(req);
709 unsigned int remaining = MAX_TCP_OPTION_SPACE;
710 u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ?
711 xvp->cookie_plus :
712 0;
713
714#ifdef CONFIG_TCP_MD5SIG
715 *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req);
716 if (*md5) {
717 opts->options |= OPTION_MD5;
718 remaining -= TCPOLEN_MD5SIG_ALIGNED;
719
720
721
722
723
724
725 ireq->tstamp_ok &= !ireq->sack_ok;
726 }
727#else
728 *md5 = NULL;
729#endif
730
731
732 opts->mss = mss;
733 remaining -= TCPOLEN_MSS_ALIGNED;
734
735 if (likely(ireq->wscale_ok)) {
736 opts->ws = ireq->rcv_wscale;
737 opts->options |= OPTION_WSCALE;
738 remaining -= TCPOLEN_WSCALE_ALIGNED;
739 }
740 if (likely(ireq->tstamp_ok)) {
741 opts->options |= OPTION_TS;
742 opts->tsval = TCP_SKB_CB(skb)->when;
743 opts->tsecr = req->ts_recent;
744 remaining -= TCPOLEN_TSTAMP_ALIGNED;
745 }
746 if (likely(ireq->sack_ok)) {
747 opts->options |= OPTION_SACK_ADVERTISE;
748 if (unlikely(!ireq->tstamp_ok))
749 remaining -= TCPOLEN_SACKPERM_ALIGNED;
750 }
751 if (foc != NULL) {
752 u32 need = TCPOLEN_EXP_FASTOPEN_BASE + foc->len;
753 need = (need + 3) & ~3U;
754 if (remaining >= need) {
755 opts->options |= OPTION_FAST_OPEN_COOKIE;
756 opts->fastopen_cookie = foc;
757 remaining -= need;
758 }
759 }
760
761
762
763 if (*md5 == NULL &&
764 ireq->tstamp_ok &&
765 cookie_plus > TCPOLEN_COOKIE_BASE) {
766 int need = cookie_plus;
767
768 if (0x2 & need) {
769
770 need += 2;
771 }
772 if (need <= remaining) {
773 opts->options |= OPTION_COOKIE_EXTENSION;
774 opts->hash_size = cookie_plus - TCPOLEN_COOKIE_BASE;
775 remaining -= need;
776 } else {
777
778 xvp->cookie_out_never = 1;
779 opts->hash_size = 0;
780 }
781 }
782 return MAX_TCP_OPTION_SPACE - remaining;
783}
784
785
786
787
788static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb,
789 struct tcp_out_options *opts,
790 struct tcp_md5sig_key **md5)
791{
792 struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL;
793 struct tcp_sock *tp = tcp_sk(sk);
794 unsigned int size = 0;
795 unsigned int eff_sacks;
796
797#ifdef CONFIG_TCP_MD5SIG
798 *md5 = tp->af_specific->md5_lookup(sk, sk);
799 if (unlikely(*md5)) {
800 opts->options |= OPTION_MD5;
801 size += TCPOLEN_MD5SIG_ALIGNED;
802 }
803#else
804 *md5 = NULL;
805#endif
806
807 if (likely(tp->rx_opt.tstamp_ok)) {
808 opts->options |= OPTION_TS;
809 opts->tsval = tcb ? tcb->when : 0;
810 opts->tsecr = tp->rx_opt.ts_recent;
811 size += TCPOLEN_TSTAMP_ALIGNED;
812 }
813
814 eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
815 if (unlikely(eff_sacks)) {
816 const unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
817 opts->num_sack_blocks =
818 min_t(unsigned int, eff_sacks,
819 (remaining - TCPOLEN_SACK_BASE_ALIGNED) /
820 TCPOLEN_SACK_PERBLOCK);
821 size += TCPOLEN_SACK_BASE_ALIGNED +
822 opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
823 }
824
825 return size;
826}
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843struct tsq_tasklet {
844 struct tasklet_struct tasklet;
845 struct list_head head;
846};
847static DEFINE_PER_CPU(struct tsq_tasklet, tsq_tasklet);
848
849static void tcp_tsq_handler(struct sock *sk)
850{
851 if ((1 << sk->sk_state) &
852 (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING |
853 TCPF_CLOSE_WAIT | TCPF_LAST_ACK))
854 tcp_write_xmit(sk, tcp_current_mss(sk), 0, 0, GFP_ATOMIC);
855}
856
857
858
859
860
861
862static void tcp_tasklet_func(unsigned long data)
863{
864 struct tsq_tasklet *tsq = (struct tsq_tasklet *)data;
865 LIST_HEAD(list);
866 unsigned long flags;
867 struct list_head *q, *n;
868 struct tcp_sock *tp;
869 struct sock *sk;
870
871 local_irq_save(flags);
872 list_splice_init(&tsq->head, &list);
873 local_irq_restore(flags);
874
875 list_for_each_safe(q, n, &list) {
876 tp = list_entry(q, struct tcp_sock, tsq_node);
877 list_del(&tp->tsq_node);
878
879 sk = (struct sock *)tp;
880 bh_lock_sock(sk);
881
882 if (!sock_owned_by_user(sk)) {
883 tcp_tsq_handler(sk);
884 } else {
885
886 set_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags);
887 }
888 bh_unlock_sock(sk);
889
890 clear_bit(TSQ_QUEUED, &tp->tsq_flags);
891 sk_free(sk);
892 }
893}
894
895#define TCP_DEFERRED_ALL ((1UL << TCP_TSQ_DEFERRED) | \
896 (1UL << TCP_WRITE_TIMER_DEFERRED) | \
897 (1UL << TCP_DELACK_TIMER_DEFERRED) | \
898 (1UL << TCP_MTU_REDUCED_DEFERRED))
899
900
901
902
903
904
905
906void tcp_release_cb(struct sock *sk)
907{
908 struct tcp_sock *tp = tcp_sk(sk);
909 unsigned long flags, nflags;
910
911
912 do {
913 flags = tp->tsq_flags;
914 if (!(flags & TCP_DEFERRED_ALL))
915 return;
916 nflags = flags & ~TCP_DEFERRED_ALL;
917 } while (cmpxchg(&tp->tsq_flags, flags, nflags) != flags);
918
919 if (flags & (1UL << TCP_TSQ_DEFERRED))
920 tcp_tsq_handler(sk);
921
922 if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) {
923 tcp_write_timer_handler(sk);
924 __sock_put(sk);
925 }
926 if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) {
927 tcp_delack_timer_handler(sk);
928 __sock_put(sk);
929 }
930 if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) {
931 sk->sk_prot->mtu_reduced(sk);
932 __sock_put(sk);
933 }
934}
935EXPORT_SYMBOL(tcp_release_cb);
936
937void __init tcp_tasklet_init(void)
938{
939 int i;
940
941 for_each_possible_cpu(i) {
942 struct tsq_tasklet *tsq = &per_cpu(tsq_tasklet, i);
943
944 INIT_LIST_HEAD(&tsq->head);
945 tasklet_init(&tsq->tasklet,
946 tcp_tasklet_func,
947 (unsigned long)tsq);
948 }
949}
950
951
952
953
954
955
956static void tcp_wfree(struct sk_buff *skb)
957{
958 struct sock *sk = skb->sk;
959 struct tcp_sock *tp = tcp_sk(sk);
960
961 if (test_and_clear_bit(TSQ_THROTTLED, &tp->tsq_flags) &&
962 !test_and_set_bit(TSQ_QUEUED, &tp->tsq_flags)) {
963 unsigned long flags;
964 struct tsq_tasklet *tsq;
965
966
967
968
969 atomic_sub(skb->truesize - 1, &sk->sk_wmem_alloc);
970
971
972 local_irq_save(flags);
973 tsq = &__get_cpu_var(tsq_tasklet);
974 list_add(&tp->tsq_node, &tsq->head);
975 tasklet_schedule(&tsq->tasklet);
976 local_irq_restore(flags);
977 } else {
978 sock_wfree(skb);
979 }
980}
981
982
983
984
985
986
987
988
989
990
991
992
993static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
994 gfp_t gfp_mask)
995{
996 const struct inet_connection_sock *icsk = inet_csk(sk);
997 struct inet_sock *inet;
998 struct tcp_sock *tp;
999 struct tcp_skb_cb *tcb;
1000 struct tcp_out_options opts;
1001 unsigned int tcp_options_size, tcp_header_size;
1002 struct tcp_md5sig_key *md5;
1003 struct tcphdr *th;
1004 int err;
1005
1006 BUG_ON(!skb || !tcp_skb_pcount(skb));
1007
1008
1009
1010
1011 if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP)
1012 __net_timestamp(skb);
1013
1014 if (likely(clone_it)) {
1015 if (unlikely(skb_cloned(skb)))
1016 skb = pskb_copy(skb, gfp_mask);
1017 else
1018 skb = skb_clone(skb, gfp_mask);
1019 if (unlikely(!skb))
1020 return -ENOBUFS;
1021 }
1022
1023 inet = inet_sk(sk);
1024 tp = tcp_sk(sk);
1025 tcb = TCP_SKB_CB(skb);
1026 memset(&opts, 0, sizeof(opts));
1027
1028 if (unlikely(tcb->tcp_flags & TCPHDR_SYN))
1029 tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
1030 else
1031 tcp_options_size = tcp_established_options(sk, skb, &opts,
1032 &md5);
1033 tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
1034
1035 if (tcp_packets_in_flight(tp) == 0) {
1036 tcp_ca_event(sk, CA_EVENT_TX_START);
1037 skb->ooo_okay = 1;
1038 } else
1039 skb->ooo_okay = 0;
1040
1041 skb_push(skb, tcp_header_size);
1042 skb_reset_transport_header(skb);
1043
1044 skb_orphan(skb);
1045 skb->sk = sk;
1046 skb->destructor = (sysctl_tcp_limit_output_bytes > 0) ?
1047 tcp_wfree : sock_wfree;
1048 atomic_add(skb->truesize, &sk->sk_wmem_alloc);
1049
1050
1051 th = tcp_hdr(skb);
1052 th->source = inet->inet_sport;
1053 th->dest = inet->inet_dport;
1054 th->seq = htonl(tcb->seq);
1055 th->ack_seq = htonl(tp->rcv_nxt);
1056 *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) |
1057 tcb->tcp_flags);
1058
1059 if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) {
1060
1061
1062
1063 th->window = htons(min(tp->rcv_wnd, 65535U));
1064 } else {
1065 th->window = htons(tcp_select_window(sk));
1066 }
1067 th->check = 0;
1068 th->urg_ptr = 0;
1069
1070
1071 if (unlikely(tcp_urg_mode(tp) && before(tcb->seq, tp->snd_up))) {
1072 if (before(tp->snd_up, tcb->seq + 0x10000)) {
1073 th->urg_ptr = htons(tp->snd_up - tcb->seq);
1074 th->urg = 1;
1075 } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) {
1076 th->urg_ptr = htons(0xFFFF);
1077 th->urg = 1;
1078 }
1079 }
1080
1081 tcp_options_write((__be32 *)(th + 1), tp, &opts);
1082 if (likely((tcb->tcp_flags & TCPHDR_SYN) == 0))
1083 TCP_ECN_send(sk, skb, tcp_header_size);
1084
1085#ifdef CONFIG_TCP_MD5SIG
1086
1087 if (md5) {
1088 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1089 tp->af_specific->calc_md5_hash(opts.hash_location,
1090 md5, sk, NULL, skb);
1091 }
1092#endif
1093
1094 icsk->icsk_af_ops->send_check(sk, skb);
1095
1096 if (likely(tcb->tcp_flags & TCPHDR_ACK))
1097 tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
1098
1099 if (skb->len != tcp_header_size)
1100 tcp_event_data_sent(tp, sk);
1101
1102 if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
1103 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
1104 tcp_skb_pcount(skb));
1105
1106 err = icsk->icsk_af_ops->queue_xmit(skb, &inet->cork.fl);
1107 if (likely(err <= 0))
1108 return err;
1109
1110 tcp_enter_cwr(sk, 1);
1111
1112 return net_xmit_eval(err);
1113}
1114
1115
1116
1117
1118
1119
1120static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
1121{
1122 struct tcp_sock *tp = tcp_sk(sk);
1123
1124
1125 tp->write_seq = TCP_SKB_CB(skb)->end_seq;
1126 skb_header_release(skb);
1127 tcp_add_write_queue_tail(sk, skb);
1128 sk->sk_wmem_queued += skb->truesize;
1129 sk_mem_charge(sk, skb->truesize);
1130}
1131
1132
1133static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
1134 unsigned int mss_now)
1135{
1136 if (skb->len <= mss_now || !sk_can_gso(sk) ||
1137 skb->ip_summed == CHECKSUM_NONE) {
1138
1139
1140
1141 skb_shinfo(skb)->gso_segs = 1;
1142 skb_shinfo(skb)->gso_size = 0;
1143 skb_shinfo(skb)->gso_type = 0;
1144 } else {
1145 skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss_now);
1146 skb_shinfo(skb)->gso_size = mss_now;
1147 skb_shinfo(skb)->gso_type = sk->sk_gso_type;
1148 }
1149}
1150
1151
1152
1153
1154static void tcp_adjust_fackets_out(struct sock *sk, const struct sk_buff *skb,
1155 int decr)
1156{
1157 struct tcp_sock *tp = tcp_sk(sk);
1158
1159 if (!tp->sacked_out || tcp_is_reno(tp))
1160 return;
1161
1162 if (after(tcp_highest_sack_seq(tp), TCP_SKB_CB(skb)->seq))
1163 tp->fackets_out -= decr;
1164}
1165
1166
1167
1168
1169static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr)
1170{
1171 struct tcp_sock *tp = tcp_sk(sk);
1172
1173 tp->packets_out -= decr;
1174
1175 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
1176 tp->sacked_out -= decr;
1177 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
1178 tp->retrans_out -= decr;
1179 if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST)
1180 tp->lost_out -= decr;
1181
1182
1183 if (tcp_is_reno(tp) && decr > 0)
1184 tp->sacked_out -= min_t(u32, tp->sacked_out, decr);
1185
1186 tcp_adjust_fackets_out(sk, skb, decr);
1187
1188 if (tp->lost_skb_hint &&
1189 before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) &&
1190 (tcp_is_fack(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)))
1191 tp->lost_cnt_hint -= decr;
1192
1193 tcp_verify_left_out(tp);
1194}
1195
1196
1197
1198
1199
1200
1201int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
1202 unsigned int mss_now)
1203{
1204 struct tcp_sock *tp = tcp_sk(sk);
1205 struct sk_buff *buff;
1206 int nsize, old_factor;
1207 int nlen;
1208 u8 flags;
1209
1210 if (WARN_ON(len > skb->len))
1211 return -EINVAL;
1212
1213 nsize = skb_headlen(skb) - len;
1214 if (nsize < 0)
1215 nsize = 0;
1216
1217 if (skb_cloned(skb) &&
1218 skb_is_nonlinear(skb) &&
1219 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
1220 return -ENOMEM;
1221
1222
1223 buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC);
1224 if (buff == NULL)
1225 return -ENOMEM;
1226
1227 sk->sk_wmem_queued += buff->truesize;
1228 sk_mem_charge(sk, buff->truesize);
1229 nlen = skb->len - len - nsize;
1230 buff->truesize += nlen;
1231 skb->truesize -= nlen;
1232
1233
1234 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
1235 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
1236 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
1237
1238
1239 flags = TCP_SKB_CB(skb)->tcp_flags;
1240 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1241 TCP_SKB_CB(buff)->tcp_flags = flags;
1242 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
1243
1244 if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
1245
1246 buff->csum = csum_partial_copy_nocheck(skb->data + len,
1247 skb_put(buff, nsize),
1248 nsize, 0);
1249
1250 skb_trim(skb, len);
1251
1252 skb->csum = csum_block_sub(skb->csum, buff->csum, len);
1253 } else {
1254 skb->ip_summed = CHECKSUM_PARTIAL;
1255 skb_split(skb, buff, len);
1256 }
1257
1258 buff->ip_summed = skb->ip_summed;
1259
1260
1261
1262
1263 TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when;
1264 buff->tstamp = skb->tstamp;
1265
1266 old_factor = tcp_skb_pcount(skb);
1267
1268
1269 tcp_set_skb_tso_segs(sk, skb, mss_now);
1270 tcp_set_skb_tso_segs(sk, buff, mss_now);
1271
1272
1273
1274
1275 if (!before(tp->snd_nxt, TCP_SKB_CB(buff)->end_seq)) {
1276 int diff = old_factor - tcp_skb_pcount(skb) -
1277 tcp_skb_pcount(buff);
1278
1279 if (diff)
1280 tcp_adjust_pcount(sk, skb, diff);
1281 }
1282
1283
1284 skb_header_release(buff);
1285 tcp_insert_write_queue_after(skb, buff, sk);
1286
1287 return 0;
1288}
1289
1290
1291
1292
1293
1294static void __pskb_trim_head(struct sk_buff *skb, int len)
1295{
1296 int i, k, eat;
1297
1298 eat = min_t(int, len, skb_headlen(skb));
1299 if (eat) {
1300 __skb_pull(skb, eat);
1301 skb->avail_size -= eat;
1302 len -= eat;
1303 if (!len)
1304 return;
1305 }
1306 eat = len;
1307 k = 0;
1308 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1309 int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
1310
1311 if (size <= eat) {
1312 skb_frag_unref(skb, i);
1313 eat -= size;
1314 } else {
1315 skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
1316 if (eat) {
1317 skb_shinfo(skb)->frags[k].page_offset += eat;
1318 skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat);
1319 eat = 0;
1320 }
1321 k++;
1322 }
1323 }
1324 skb_shinfo(skb)->nr_frags = k;
1325
1326 skb_reset_tail_pointer(skb);
1327 skb->data_len -= len;
1328 skb->len = skb->data_len;
1329}
1330
1331
1332int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
1333{
1334 if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
1335 return -ENOMEM;
1336
1337 __pskb_trim_head(skb, len);
1338
1339 TCP_SKB_CB(skb)->seq += len;
1340 skb->ip_summed = CHECKSUM_PARTIAL;
1341
1342 skb->truesize -= len;
1343 sk->sk_wmem_queued -= len;
1344 sk_mem_uncharge(sk, len);
1345 sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
1346
1347
1348 if (tcp_skb_pcount(skb) > 1)
1349 tcp_set_skb_tso_segs(sk, skb, tcp_skb_mss(skb));
1350
1351 return 0;
1352}
1353
1354
1355int tcp_mtu_to_mss(struct sock *sk, int pmtu)
1356{
1357 const struct tcp_sock *tp = tcp_sk(sk);
1358 const struct inet_connection_sock *icsk = inet_csk(sk);
1359 int mss_now;
1360
1361
1362
1363
1364 mss_now = pmtu - icsk->icsk_af_ops->net_header_len - sizeof(struct tcphdr);
1365
1366
1367 if (icsk->icsk_af_ops->net_frag_header_len) {
1368 const struct dst_entry *dst = __sk_dst_get(sk);
1369
1370 if (dst && dst_allfrag(dst))
1371 mss_now -= icsk->icsk_af_ops->net_frag_header_len;
1372 }
1373
1374
1375 if (mss_now > tp->rx_opt.mss_clamp)
1376 mss_now = tp->rx_opt.mss_clamp;
1377
1378
1379 mss_now -= icsk->icsk_ext_hdr_len;
1380
1381
1382 if (mss_now < 48)
1383 mss_now = 48;
1384
1385
1386 mss_now -= tp->tcp_header_len - sizeof(struct tcphdr);
1387
1388 return mss_now;
1389}
1390
1391
1392int tcp_mss_to_mtu(struct sock *sk, int mss)
1393{
1394 const struct tcp_sock *tp = tcp_sk(sk);
1395 const struct inet_connection_sock *icsk = inet_csk(sk);
1396 int mtu;
1397
1398 mtu = mss +
1399 tp->tcp_header_len +
1400 icsk->icsk_ext_hdr_len +
1401 icsk->icsk_af_ops->net_header_len;
1402
1403
1404 if (icsk->icsk_af_ops->net_frag_header_len) {
1405 const struct dst_entry *dst = __sk_dst_get(sk);
1406
1407 if (dst && dst_allfrag(dst))
1408 mtu += icsk->icsk_af_ops->net_frag_header_len;
1409 }
1410 return mtu;
1411}
1412
1413
1414void tcp_mtup_init(struct sock *sk)
1415{
1416 struct tcp_sock *tp = tcp_sk(sk);
1417 struct inet_connection_sock *icsk = inet_csk(sk);
1418
1419 icsk->icsk_mtup.enabled = sysctl_tcp_mtu_probing > 1;
1420 icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) +
1421 icsk->icsk_af_ops->net_header_len;
1422 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, sysctl_tcp_base_mss);
1423 icsk->icsk_mtup.probe_size = 0;
1424}
1425EXPORT_SYMBOL(tcp_mtup_init);
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
1450{
1451 struct tcp_sock *tp = tcp_sk(sk);
1452 struct inet_connection_sock *icsk = inet_csk(sk);
1453 int mss_now;
1454
1455 if (icsk->icsk_mtup.search_high > pmtu)
1456 icsk->icsk_mtup.search_high = pmtu;
1457
1458 mss_now = tcp_mtu_to_mss(sk, pmtu);
1459 mss_now = tcp_bound_to_half_wnd(tp, mss_now);
1460
1461
1462 icsk->icsk_pmtu_cookie = pmtu;
1463 if (icsk->icsk_mtup.enabled)
1464 mss_now = min(mss_now, tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low));
1465 tp->mss_cache = mss_now;
1466
1467 return mss_now;
1468}
1469EXPORT_SYMBOL(tcp_sync_mss);
1470
1471
1472
1473
1474unsigned int tcp_current_mss(struct sock *sk)
1475{
1476 const struct tcp_sock *tp = tcp_sk(sk);
1477 const struct dst_entry *dst = __sk_dst_get(sk);
1478 u32 mss_now;
1479 unsigned int header_len;
1480 struct tcp_out_options opts;
1481 struct tcp_md5sig_key *md5;
1482
1483 mss_now = tp->mss_cache;
1484
1485 if (dst) {
1486 u32 mtu = dst_mtu(dst);
1487 if (mtu != inet_csk(sk)->icsk_pmtu_cookie)
1488 mss_now = tcp_sync_mss(sk, mtu);
1489 }
1490
1491 header_len = tcp_established_options(sk, NULL, &opts, &md5) +
1492 sizeof(struct tcphdr);
1493
1494
1495
1496
1497 if (header_len != tp->tcp_header_len) {
1498 int delta = (int) header_len - tp->tcp_header_len;
1499 mss_now -= delta;
1500 }
1501
1502 return mss_now;
1503}
1504
1505
1506static void tcp_cwnd_validate(struct sock *sk)
1507{
1508 struct tcp_sock *tp = tcp_sk(sk);
1509
1510 if (tp->packets_out >= tp->snd_cwnd) {
1511
1512 tp->snd_cwnd_used = 0;
1513 tp->snd_cwnd_stamp = tcp_time_stamp;
1514 } else {
1515
1516 if (tp->packets_out > tp->snd_cwnd_used)
1517 tp->snd_cwnd_used = tp->packets_out;
1518
1519 if (sysctl_tcp_slow_start_after_idle &&
1520 (s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto)
1521 tcp_cwnd_application_limited(sk);
1522 }
1523}
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb,
1538 unsigned int mss_now, unsigned int max_segs)
1539{
1540 const struct tcp_sock *tp = tcp_sk(sk);
1541 u32 needed, window, max_len;
1542
1543 window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1544 max_len = mss_now * max_segs;
1545
1546 if (likely(max_len <= window && skb != tcp_write_queue_tail(sk)))
1547 return max_len;
1548
1549 needed = min(skb->len, window);
1550
1551 if (max_len <= needed)
1552 return max_len;
1553
1554 return needed - needed % mss_now;
1555}
1556
1557
1558
1559
1560static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp,
1561 const struct sk_buff *skb)
1562{
1563 u32 in_flight, cwnd;
1564
1565
1566 if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&
1567 tcp_skb_pcount(skb) == 1)
1568 return 1;
1569
1570 in_flight = tcp_packets_in_flight(tp);
1571 cwnd = tp->snd_cwnd;
1572 if (in_flight < cwnd)
1573 return (cwnd - in_flight);
1574
1575 return 0;
1576}
1577
1578
1579
1580
1581
1582static int tcp_init_tso_segs(const struct sock *sk, struct sk_buff *skb,
1583 unsigned int mss_now)
1584{
1585 int tso_segs = tcp_skb_pcount(skb);
1586
1587 if (!tso_segs || (tso_segs > 1 && tcp_skb_mss(skb) != mss_now)) {
1588 tcp_set_skb_tso_segs(sk, skb, mss_now);
1589 tso_segs = tcp_skb_pcount(skb);
1590 }
1591 return tso_segs;
1592}
1593
1594
1595static inline bool tcp_minshall_check(const struct tcp_sock *tp)
1596{
1597 return after(tp->snd_sml, tp->snd_una) &&
1598 !after(tp->snd_sml, tp->snd_nxt);
1599}
1600
1601
1602
1603
1604
1605
1606
1607
1608static inline bool tcp_nagle_check(const struct tcp_sock *tp,
1609 const struct sk_buff *skb,
1610 unsigned int mss_now, int nonagle)
1611{
1612 return skb->len < mss_now &&
1613 ((nonagle & TCP_NAGLE_CORK) ||
1614 (!nonagle && tp->packets_out && tcp_minshall_check(tp)));
1615}
1616
1617
1618
1619
1620static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb,
1621 unsigned int cur_mss, int nonagle)
1622{
1623
1624
1625
1626
1627
1628
1629 if (nonagle & TCP_NAGLE_PUSH)
1630 return true;
1631
1632
1633
1634
1635 if (tcp_urg_mode(tp) || (tp->frto_counter == 2) ||
1636 (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
1637 return true;
1638
1639 if (!tcp_nagle_check(tp, skb, cur_mss, nonagle))
1640 return true;
1641
1642 return false;
1643}
1644
1645
1646static bool tcp_snd_wnd_test(const struct tcp_sock *tp,
1647 const struct sk_buff *skb,
1648 unsigned int cur_mss)
1649{
1650 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
1651
1652 if (skb->len > cur_mss)
1653 end_seq = TCP_SKB_CB(skb)->seq + cur_mss;
1654
1655 return !after(end_seq, tcp_wnd_end(tp));
1656}
1657
1658
1659
1660
1661
1662static unsigned int tcp_snd_test(const struct sock *sk, struct sk_buff *skb,
1663 unsigned int cur_mss, int nonagle)
1664{
1665 const struct tcp_sock *tp = tcp_sk(sk);
1666 unsigned int cwnd_quota;
1667
1668 tcp_init_tso_segs(sk, skb, cur_mss);
1669
1670 if (!tcp_nagle_test(tp, skb, cur_mss, nonagle))
1671 return 0;
1672
1673 cwnd_quota = tcp_cwnd_test(tp, skb);
1674 if (cwnd_quota && !tcp_snd_wnd_test(tp, skb, cur_mss))
1675 cwnd_quota = 0;
1676
1677 return cwnd_quota;
1678}
1679
1680
1681bool tcp_may_send_now(struct sock *sk)
1682{
1683 const struct tcp_sock *tp = tcp_sk(sk);
1684 struct sk_buff *skb = tcp_send_head(sk);
1685
1686 return skb &&
1687 tcp_snd_test(sk, skb, tcp_current_mss(sk),
1688 (tcp_skb_is_last(sk, skb) ?
1689 tp->nonagle : TCP_NAGLE_PUSH));
1690}
1691
1692
1693
1694
1695
1696
1697
1698
1699static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1700 unsigned int mss_now, gfp_t gfp)
1701{
1702 struct sk_buff *buff;
1703 int nlen = skb->len - len;
1704 u8 flags;
1705
1706
1707 if (skb->len != skb->data_len)
1708 return tcp_fragment(sk, skb, len, mss_now);
1709
1710 buff = sk_stream_alloc_skb(sk, 0, gfp);
1711 if (unlikely(buff == NULL))
1712 return -ENOMEM;
1713
1714 sk->sk_wmem_queued += buff->truesize;
1715 sk_mem_charge(sk, buff->truesize);
1716 buff->truesize += nlen;
1717 skb->truesize -= nlen;
1718
1719
1720 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
1721 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
1722 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
1723
1724
1725 flags = TCP_SKB_CB(skb)->tcp_flags;
1726 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1727 TCP_SKB_CB(buff)->tcp_flags = flags;
1728
1729
1730 TCP_SKB_CB(buff)->sacked = 0;
1731
1732 buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL;
1733 skb_split(skb, buff, len);
1734
1735
1736 tcp_set_skb_tso_segs(sk, skb, mss_now);
1737 tcp_set_skb_tso_segs(sk, buff, mss_now);
1738
1739
1740 skb_header_release(buff);
1741 tcp_insert_write_queue_after(skb, buff, sk);
1742
1743 return 0;
1744}
1745
1746
1747
1748
1749
1750
1751static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1752{
1753 struct tcp_sock *tp = tcp_sk(sk);
1754 const struct inet_connection_sock *icsk = inet_csk(sk);
1755 u32 send_win, cong_win, limit, in_flight;
1756 int win_divisor;
1757
1758 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
1759 goto send_now;
1760
1761 if (icsk->icsk_ca_state != TCP_CA_Open)
1762 goto send_now;
1763
1764
1765 if (tp->tso_deferred &&
1766 (((u32)jiffies << 1) >> 1) - (tp->tso_deferred >> 1) > 1)
1767 goto send_now;
1768
1769 in_flight = tcp_packets_in_flight(tp);
1770
1771 BUG_ON(tcp_skb_pcount(skb) <= 1 || (tp->snd_cwnd <= in_flight));
1772
1773 send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1774
1775
1776 cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache;
1777
1778 limit = min(send_win, cong_win);
1779
1780
1781 if (limit >= min_t(unsigned int, sk->sk_gso_max_size,
1782 sk->sk_gso_max_segs * tp->mss_cache))
1783 goto send_now;
1784
1785
1786 if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len))
1787 goto send_now;
1788
1789 win_divisor = ACCESS_ONCE(sysctl_tcp_tso_win_divisor);
1790 if (win_divisor) {
1791 u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
1792
1793
1794
1795
1796 chunk /= win_divisor;
1797 if (limit >= chunk)
1798 goto send_now;
1799 } else {
1800
1801
1802
1803
1804
1805 if (limit > tcp_max_tso_deferred_mss(tp) * tp->mss_cache)
1806 goto send_now;
1807 }
1808
1809
1810 tp->tso_deferred = 1 | (jiffies << 1);
1811
1812 return true;
1813
1814send_now:
1815 tp->tso_deferred = 0;
1816 return false;
1817}
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828static int tcp_mtu_probe(struct sock *sk)
1829{
1830 struct tcp_sock *tp = tcp_sk(sk);
1831 struct inet_connection_sock *icsk = inet_csk(sk);
1832 struct sk_buff *skb, *nskb, *next;
1833 int len;
1834 int probe_size;
1835 int size_needed;
1836 int copy;
1837 int mss_now;
1838
1839
1840
1841
1842
1843 if (!icsk->icsk_mtup.enabled ||
1844 icsk->icsk_mtup.probe_size ||
1845 inet_csk(sk)->icsk_ca_state != TCP_CA_Open ||
1846 tp->snd_cwnd < 11 ||
1847 tp->rx_opt.num_sacks || tp->rx_opt.dsack)
1848 return -1;
1849
1850
1851 mss_now = tcp_current_mss(sk);
1852 probe_size = 2 * tp->mss_cache;
1853 size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
1854 if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) {
1855
1856 return -1;
1857 }
1858
1859
1860 if (tp->write_seq - tp->snd_nxt < size_needed)
1861 return -1;
1862
1863 if (tp->snd_wnd < size_needed)
1864 return -1;
1865 if (after(tp->snd_nxt + size_needed, tcp_wnd_end(tp)))
1866 return 0;
1867
1868
1869 if (tcp_packets_in_flight(tp) + 2 > tp->snd_cwnd) {
1870 if (!tcp_packets_in_flight(tp))
1871 return -1;
1872 else
1873 return 0;
1874 }
1875
1876
1877 if ((nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC)) == NULL)
1878 return -1;
1879 sk->sk_wmem_queued += nskb->truesize;
1880 sk_mem_charge(sk, nskb->truesize);
1881
1882 skb = tcp_send_head(sk);
1883
1884 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
1885 TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
1886 TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK;
1887 TCP_SKB_CB(nskb)->sacked = 0;
1888 nskb->csum = 0;
1889 nskb->ip_summed = skb->ip_summed;
1890
1891 tcp_insert_write_queue_before(nskb, skb, sk);
1892
1893 len = 0;
1894 tcp_for_write_queue_from_safe(skb, next, sk) {
1895 copy = min_t(int, skb->len, probe_size - len);
1896 if (nskb->ip_summed)
1897 skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
1898 else
1899 nskb->csum = skb_copy_and_csum_bits(skb, 0,
1900 skb_put(nskb, copy),
1901 copy, nskb->csum);
1902
1903 if (skb->len <= copy) {
1904
1905
1906 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
1907 tcp_unlink_write_queue(skb, sk);
1908 sk_wmem_free_skb(sk, skb);
1909 } else {
1910 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags &
1911 ~(TCPHDR_FIN|TCPHDR_PSH);
1912 if (!skb_shinfo(skb)->nr_frags) {
1913 skb_pull(skb, copy);
1914 if (skb->ip_summed != CHECKSUM_PARTIAL)
1915 skb->csum = csum_partial(skb->data,
1916 skb->len, 0);
1917 } else {
1918 __pskb_trim_head(skb, copy);
1919 tcp_set_skb_tso_segs(sk, skb, mss_now);
1920 }
1921 TCP_SKB_CB(skb)->seq += copy;
1922 }
1923
1924 len += copy;
1925
1926 if (len >= probe_size)
1927 break;
1928 }
1929 tcp_init_tso_segs(sk, nskb, nskb->len);
1930
1931
1932
1933 TCP_SKB_CB(nskb)->when = tcp_time_stamp;
1934 if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) {
1935
1936
1937 tp->snd_cwnd--;
1938 tcp_event_new_data_sent(sk, nskb);
1939
1940 icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len);
1941 tp->mtu_probe.probe_seq_start = TCP_SKB_CB(nskb)->seq;
1942 tp->mtu_probe.probe_seq_end = TCP_SKB_CB(nskb)->end_seq;
1943
1944 return 1;
1945 }
1946
1947 return -1;
1948}
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1962 int push_one, gfp_t gfp)
1963{
1964 struct tcp_sock *tp = tcp_sk(sk);
1965 struct sk_buff *skb;
1966 unsigned int tso_segs, sent_pkts;
1967 int cwnd_quota;
1968 int result;
1969
1970 sent_pkts = 0;
1971
1972 if (!push_one) {
1973
1974 result = tcp_mtu_probe(sk);
1975 if (!result) {
1976 return false;
1977 } else if (result > 0) {
1978 sent_pkts = 1;
1979 }
1980 }
1981
1982 while ((skb = tcp_send_head(sk))) {
1983 unsigned int limit;
1984
1985
1986 tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
1987 BUG_ON(!tso_segs);
1988
1989 if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE)
1990 goto repair;
1991
1992 cwnd_quota = tcp_cwnd_test(tp, skb);
1993 if (!cwnd_quota)
1994 break;
1995
1996 if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now)))
1997 break;
1998
1999 if (tso_segs == 1) {
2000 if (unlikely(!tcp_nagle_test(tp, skb, mss_now,
2001 (tcp_skb_is_last(sk, skb) ?
2002 nonagle : TCP_NAGLE_PUSH))))
2003 break;
2004 } else {
2005 if (!push_one && tcp_tso_should_defer(sk, skb))
2006 break;
2007 }
2008
2009
2010
2011
2012 if (atomic_read(&sk->sk_wmem_alloc) >= sysctl_tcp_limit_output_bytes) {
2013 set_bit(TSQ_THROTTLED, &tp->tsq_flags);
2014 break;
2015 }
2016 limit = mss_now;
2017 if (tso_segs > 1 && !tcp_urg_mode(tp))
2018 limit = tcp_mss_split_point(sk, skb, mss_now,
2019 min_t(unsigned int,
2020 cwnd_quota,
2021 sk->sk_gso_max_segs));
2022
2023 if (skb->len > limit &&
2024 unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
2025 break;
2026
2027 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2028
2029 if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
2030 break;
2031
2032repair:
2033
2034
2035
2036 tcp_event_new_data_sent(sk, skb);
2037
2038 tcp_minshall_update(tp, mss_now, skb);
2039 sent_pkts += tcp_skb_pcount(skb);
2040
2041 if (push_one)
2042 break;
2043 }
2044
2045 if (likely(sent_pkts)) {
2046 if (tcp_in_cwnd_reduction(sk))
2047 tp->prr_out += sent_pkts;
2048 tcp_cwnd_validate(sk);
2049 return false;
2050 }
2051 return !tp->packets_out && tcp_send_head(sk);
2052}
2053
2054
2055
2056
2057
2058void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
2059 int nonagle)
2060{
2061
2062
2063
2064
2065 if (unlikely(sk->sk_state == TCP_CLOSE))
2066 return;
2067
2068 if (tcp_write_xmit(sk, cur_mss, nonagle, 0,
2069 sk_gfp_atomic(sk, GFP_ATOMIC)))
2070 tcp_check_probe_timer(sk);
2071}
2072
2073
2074
2075
2076void tcp_push_one(struct sock *sk, unsigned int mss_now)
2077{
2078 struct sk_buff *skb = tcp_send_head(sk);
2079
2080 BUG_ON(!skb || skb->len < mss_now);
2081
2082 tcp_write_xmit(sk, mss_now, TCP_NAGLE_PUSH, 1, sk->sk_allocation);
2083}
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137u32 __tcp_select_window(struct sock *sk)
2138{
2139 struct inet_connection_sock *icsk = inet_csk(sk);
2140 struct tcp_sock *tp = tcp_sk(sk);
2141
2142
2143
2144
2145
2146
2147 int mss = icsk->icsk_ack.rcv_mss;
2148 int free_space = tcp_space(sk);
2149 int full_space = min_t(int, tp->window_clamp, tcp_full_space(sk));
2150 int window;
2151
2152 if (mss > full_space)
2153 mss = full_space;
2154
2155 if (free_space < (full_space >> 1)) {
2156 icsk->icsk_ack.quick = 0;
2157
2158 if (sk_under_memory_pressure(sk))
2159 tp->rcv_ssthresh = min(tp->rcv_ssthresh,
2160 4U * tp->advmss);
2161
2162 if (free_space < mss)
2163 return 0;
2164 }
2165
2166 if (free_space > tp->rcv_ssthresh)
2167 free_space = tp->rcv_ssthresh;
2168
2169
2170
2171
2172 window = tp->rcv_wnd;
2173 if (tp->rx_opt.rcv_wscale) {
2174 window = free_space;
2175
2176
2177
2178
2179
2180 if (((window >> tp->rx_opt.rcv_wscale) << tp->rx_opt.rcv_wscale) != window)
2181 window = (((window >> tp->rx_opt.rcv_wscale) + 1)
2182 << tp->rx_opt.rcv_wscale);
2183 } else {
2184
2185
2186
2187
2188
2189
2190
2191
2192 if (window <= free_space - mss || window > free_space)
2193 window = (free_space / mss) * mss;
2194 else if (mss == full_space &&
2195 free_space > window + (full_space >> 1))
2196 window = free_space;
2197 }
2198
2199 return window;
2200}
2201
2202
2203static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
2204{
2205 struct tcp_sock *tp = tcp_sk(sk);
2206 struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
2207 int skb_size, next_skb_size;
2208
2209 skb_size = skb->len;
2210 next_skb_size = next_skb->len;
2211
2212 BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
2213
2214 tcp_highest_sack_combine(sk, next_skb, skb);
2215
2216 tcp_unlink_write_queue(next_skb, sk);
2217
2218 skb_copy_from_linear_data(next_skb, skb_put(skb, next_skb_size),
2219 next_skb_size);
2220
2221 if (next_skb->ip_summed == CHECKSUM_PARTIAL)
2222 skb->ip_summed = CHECKSUM_PARTIAL;
2223
2224 if (skb->ip_summed != CHECKSUM_PARTIAL)
2225 skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
2226
2227
2228 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
2229
2230
2231 TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(next_skb)->tcp_flags;
2232
2233
2234
2235
2236 TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS;
2237
2238
2239 tcp_clear_retrans_hints_partial(tp);
2240 if (next_skb == tp->retransmit_skb_hint)
2241 tp->retransmit_skb_hint = skb;
2242
2243 tcp_adjust_pcount(sk, next_skb, tcp_skb_pcount(next_skb));
2244
2245 sk_wmem_free_skb(sk, next_skb);
2246}
2247
2248
2249static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb)
2250{
2251 if (tcp_skb_pcount(skb) > 1)
2252 return false;
2253
2254 if (skb_shinfo(skb)->nr_frags != 0)
2255 return false;
2256 if (skb_cloned(skb))
2257 return false;
2258 if (skb == tcp_send_head(sk))
2259 return false;
2260
2261 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
2262 return false;
2263
2264 return true;
2265}
2266
2267
2268
2269
2270static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
2271 int space)
2272{
2273 struct tcp_sock *tp = tcp_sk(sk);
2274 struct sk_buff *skb = to, *tmp;
2275 bool first = true;
2276
2277 if (!sysctl_tcp_retrans_collapse)
2278 return;
2279 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
2280 return;
2281
2282 tcp_for_write_queue_from_safe(skb, tmp, sk) {
2283 if (!tcp_can_collapse(sk, skb))
2284 break;
2285
2286 space -= skb->len;
2287
2288 if (first) {
2289 first = false;
2290 continue;
2291 }
2292
2293 if (space < 0)
2294 break;
2295
2296
2297
2298 if (skb->len > skb_availroom(to))
2299 break;
2300
2301 if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp)))
2302 break;
2303
2304 tcp_collapse_retrans(sk, to);
2305 }
2306}
2307
2308
2309
2310
2311
2312int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2313{
2314 struct tcp_sock *tp = tcp_sk(sk);
2315 struct inet_connection_sock *icsk = inet_csk(sk);
2316 unsigned int cur_mss;
2317
2318
2319 if (icsk->icsk_mtup.probe_size) {
2320 icsk->icsk_mtup.probe_size = 0;
2321 }
2322
2323
2324
2325
2326 if (atomic_read(&sk->sk_wmem_alloc) >
2327 min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))
2328 return -EAGAIN;
2329
2330 if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
2331 if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
2332 BUG();
2333 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
2334 return -ENOMEM;
2335 }
2336
2337 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
2338 return -EHOSTUNREACH;
2339
2340 cur_mss = tcp_current_mss(sk);
2341
2342
2343
2344
2345
2346
2347 if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp)) &&
2348 TCP_SKB_CB(skb)->seq != tp->snd_una)
2349 return -EAGAIN;
2350
2351 if (skb->len > cur_mss) {
2352 if (tcp_fragment(sk, skb, cur_mss, cur_mss))
2353 return -ENOMEM;
2354 } else {
2355 int oldpcount = tcp_skb_pcount(skb);
2356
2357 if (unlikely(oldpcount > 1)) {
2358 tcp_init_tso_segs(sk, skb, cur_mss);
2359 tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb));
2360 }
2361 }
2362
2363 tcp_retrans_try_collapse(sk, skb, cur_mss);
2364
2365
2366
2367
2368
2369 if (skb->len > 0 &&
2370 (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&
2371 tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
2372 if (!pskb_trim(skb, 0)) {
2373
2374 tcp_init_nondata_skb(skb, TCP_SKB_CB(skb)->end_seq - 1,
2375 TCP_SKB_CB(skb)->tcp_flags);
2376 skb->ip_summed = CHECKSUM_NONE;
2377 }
2378 }
2379
2380
2381
2382
2383 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2384
2385
2386 if (unlikely(NET_IP_ALIGN && ((unsigned long)skb->data & 3))) {
2387 struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER,
2388 GFP_ATOMIC);
2389 return nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
2390 -ENOBUFS;
2391 } else {
2392 return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2393 }
2394}
2395
2396int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2397{
2398 struct tcp_sock *tp = tcp_sk(sk);
2399 int err = __tcp_retransmit_skb(sk, skb);
2400
2401 if (err == 0) {
2402
2403 TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
2404
2405 tp->total_retrans++;
2406
2407#if FASTRETRANS_DEBUG > 0
2408 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2409 net_dbg_ratelimited("retrans_out leaked\n");
2410 }
2411#endif
2412 if (!tp->retrans_out)
2413 tp->lost_retrans_low = tp->snd_nxt;
2414 TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;
2415 tp->retrans_out += tcp_skb_pcount(skb);
2416
2417
2418 if (!tp->retrans_stamp)
2419 tp->retrans_stamp = TCP_SKB_CB(skb)->when;
2420
2421 tp->undo_retrans += tcp_skb_pcount(skb);
2422
2423
2424
2425
2426 TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt;
2427 }
2428 return err;
2429}
2430
2431
2432
2433
2434static bool tcp_can_forward_retransmit(struct sock *sk)
2435{
2436 const struct inet_connection_sock *icsk = inet_csk(sk);
2437 const struct tcp_sock *tp = tcp_sk(sk);
2438
2439
2440 if (icsk->icsk_ca_state != TCP_CA_Recovery)
2441 return false;
2442
2443
2444 if (tcp_is_reno(tp))
2445 return false;
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455 if (tcp_may_send_now(sk))
2456 return false;
2457
2458 return true;
2459}
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469void tcp_xmit_retransmit_queue(struct sock *sk)
2470{
2471 const struct inet_connection_sock *icsk = inet_csk(sk);
2472 struct tcp_sock *tp = tcp_sk(sk);
2473 struct sk_buff *skb;
2474 struct sk_buff *hole = NULL;
2475 u32 last_lost;
2476 int mib_idx;
2477 int fwd_rexmitting = 0;
2478
2479 if (!tp->packets_out)
2480 return;
2481
2482 if (!tp->lost_out)
2483 tp->retransmit_high = tp->snd_una;
2484
2485 if (tp->retransmit_skb_hint) {
2486 skb = tp->retransmit_skb_hint;
2487 last_lost = TCP_SKB_CB(skb)->end_seq;
2488 if (after(last_lost, tp->retransmit_high))
2489 last_lost = tp->retransmit_high;
2490 } else {
2491 skb = tcp_write_queue_head(sk);
2492 last_lost = tp->snd_una;
2493 }
2494
2495 tcp_for_write_queue_from(skb, sk) {
2496 __u8 sacked = TCP_SKB_CB(skb)->sacked;
2497
2498 if (skb == tcp_send_head(sk))
2499 break;
2500
2501 if (hole == NULL)
2502 tp->retransmit_skb_hint = skb;
2503
2504
2505
2506
2507
2508
2509
2510
2511 if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
2512 return;
2513
2514 if (fwd_rexmitting) {
2515begin_fwd:
2516 if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
2517 break;
2518 mib_idx = LINUX_MIB_TCPFORWARDRETRANS;
2519
2520 } else if (!before(TCP_SKB_CB(skb)->seq, tp->retransmit_high)) {
2521 tp->retransmit_high = last_lost;
2522 if (!tcp_can_forward_retransmit(sk))
2523 break;
2524
2525 if (hole != NULL) {
2526 skb = hole;
2527 hole = NULL;
2528 }
2529 fwd_rexmitting = 1;
2530 goto begin_fwd;
2531
2532 } else if (!(sacked & TCPCB_LOST)) {
2533 if (hole == NULL && !(sacked & (TCPCB_SACKED_RETRANS|TCPCB_SACKED_ACKED)))
2534 hole = skb;
2535 continue;
2536
2537 } else {
2538 last_lost = TCP_SKB_CB(skb)->end_seq;
2539 if (icsk->icsk_ca_state != TCP_CA_Loss)
2540 mib_idx = LINUX_MIB_TCPFASTRETRANS;
2541 else
2542 mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS;
2543 }
2544
2545 if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))
2546 continue;
2547
2548 if (tcp_retransmit_skb(sk, skb)) {
2549 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
2550 return;
2551 }
2552 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2553
2554 if (tcp_in_cwnd_reduction(sk))
2555 tp->prr_out += tcp_skb_pcount(skb);
2556
2557 if (skb == tcp_write_queue_head(sk))
2558 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2559 inet_csk(sk)->icsk_rto,
2560 TCP_RTO_MAX);
2561 }
2562}
2563
2564
2565
2566
2567void tcp_send_fin(struct sock *sk)
2568{
2569 struct tcp_sock *tp = tcp_sk(sk);
2570 struct sk_buff *skb = tcp_write_queue_tail(sk);
2571 int mss_now;
2572
2573
2574
2575
2576
2577 mss_now = tcp_current_mss(sk);
2578
2579 if (tcp_send_head(sk) != NULL) {
2580 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN;
2581 TCP_SKB_CB(skb)->end_seq++;
2582 tp->write_seq++;
2583 } else {
2584
2585 for (;;) {
2586 skb = alloc_skb_fclone(MAX_TCP_HEADER,
2587 sk->sk_allocation);
2588 if (skb)
2589 break;
2590 yield();
2591 }
2592
2593
2594 skb_reserve(skb, MAX_TCP_HEADER);
2595
2596 tcp_init_nondata_skb(skb, tp->write_seq,
2597 TCPHDR_ACK | TCPHDR_FIN);
2598 tcp_queue_skb(sk, skb);
2599 }
2600 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);
2601}
2602
2603
2604
2605
2606
2607
2608void tcp_send_active_reset(struct sock *sk, gfp_t priority)
2609{
2610 struct sk_buff *skb;
2611
2612
2613 skb = alloc_skb(MAX_TCP_HEADER, priority);
2614 if (!skb) {
2615 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
2616 return;
2617 }
2618
2619
2620 skb_reserve(skb, MAX_TCP_HEADER);
2621 tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
2622 TCPHDR_ACK | TCPHDR_RST);
2623
2624 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2625 if (tcp_transmit_skb(sk, skb, 0, priority))
2626 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
2627
2628 TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS);
2629}
2630
2631
2632
2633
2634
2635
2636
2637int tcp_send_synack(struct sock *sk)
2638{
2639 struct sk_buff *skb;
2640
2641 skb = tcp_write_queue_head(sk);
2642 if (skb == NULL || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
2643 pr_debug("%s: wrong queue state\n", __func__);
2644 return -EFAULT;
2645 }
2646 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) {
2647 if (skb_cloned(skb)) {
2648 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
2649 if (nskb == NULL)
2650 return -ENOMEM;
2651 tcp_unlink_write_queue(skb, sk);
2652 skb_header_release(nskb);
2653 __tcp_add_write_queue_head(sk, nskb);
2654 sk_wmem_free_skb(sk, skb);
2655 sk->sk_wmem_queued += nskb->truesize;
2656 sk_mem_charge(sk, nskb->truesize);
2657 skb = nskb;
2658 }
2659
2660 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ACK;
2661 TCP_ECN_send_synack(tcp_sk(sk), skb);
2662 }
2663 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2664 return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2665}
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2678 struct request_sock *req,
2679 struct request_values *rvp,
2680 struct tcp_fastopen_cookie *foc)
2681{
2682 struct tcp_out_options opts;
2683 struct tcp_extend_values *xvp = tcp_xv(rvp);
2684 struct inet_request_sock *ireq = inet_rsk(req);
2685 struct tcp_sock *tp = tcp_sk(sk);
2686 const struct tcp_cookie_values *cvp = tp->cookie_values;
2687 struct tcphdr *th;
2688 struct sk_buff *skb;
2689 struct tcp_md5sig_key *md5;
2690 int tcp_header_size;
2691 int mss;
2692 int s_data_desired = 0;
2693
2694 if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired)
2695 s_data_desired = cvp->s_data_desired;
2696 skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired,
2697 sk_gfp_atomic(sk, GFP_ATOMIC));
2698 if (unlikely(!skb)) {
2699 dst_release(dst);
2700 return NULL;
2701 }
2702
2703 skb_reserve(skb, MAX_TCP_HEADER);
2704
2705 skb_dst_set(skb, dst);
2706
2707 mss = dst_metric_advmss(dst);
2708 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
2709 mss = tp->rx_opt.user_mss;
2710
2711 if (req->rcv_wnd == 0) {
2712 __u8 rcv_wscale;
2713
2714 req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
2715
2716
2717 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
2718 (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0))
2719 req->window_clamp = tcp_full_space(sk);
2720
2721
2722 tcp_select_initial_window(tcp_full_space(sk),
2723 mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
2724 &req->rcv_wnd,
2725 &req->window_clamp,
2726 ireq->wscale_ok,
2727 &rcv_wscale,
2728 dst_metric(dst, RTAX_INITRWND));
2729 ireq->rcv_wscale = rcv_wscale;
2730 }
2731
2732 memset(&opts, 0, sizeof(opts));
2733#ifdef CONFIG_SYN_COOKIES
2734 if (unlikely(req->cookie_ts))
2735 TCP_SKB_CB(skb)->when = cookie_init_timestamp(req);
2736 else
2737#endif
2738 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2739 tcp_header_size = tcp_synack_options(sk, req, mss,
2740 skb, &opts, &md5, xvp, foc)
2741 + sizeof(*th);
2742
2743 skb_push(skb, tcp_header_size);
2744 skb_reset_transport_header(skb);
2745
2746 th = tcp_hdr(skb);
2747 memset(th, 0, sizeof(struct tcphdr));
2748 th->syn = 1;
2749 th->ack = 1;
2750 TCP_ECN_make_synack(req, th);
2751 th->source = ireq->loc_port;
2752 th->dest = ireq->rmt_port;
2753
2754
2755
2756 tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
2757 TCPHDR_SYN | TCPHDR_ACK);
2758
2759 if (OPTION_COOKIE_EXTENSION & opts.options) {
2760 if (s_data_desired) {
2761 u8 *buf = skb_put(skb, s_data_desired);
2762
2763
2764 memcpy(buf, cvp->s_data_payload, s_data_desired);
2765 TCP_SKB_CB(skb)->end_seq += s_data_desired;
2766 }
2767
2768 if (opts.hash_size > 0) {
2769 __u32 workspace[SHA_WORKSPACE_WORDS];
2770 u32 *mess = &xvp->cookie_bakery[COOKIE_DIGEST_WORDS];
2771 u32 *tail = &mess[COOKIE_MESSAGE_WORDS-1];
2772
2773
2774
2775
2776
2777 *tail-- ^= opts.tsval;
2778 *tail-- ^= tcp_rsk(req)->rcv_isn + 1;
2779 *tail-- ^= TCP_SKB_CB(skb)->seq + 1;
2780
2781
2782 *tail-- ^= (((__force u32)th->dest << 16) | (__force u32)th->source);
2783 *tail-- ^= (u32)(unsigned long)cvp;
2784
2785 sha_transform((__u32 *)&xvp->cookie_bakery[0],
2786 (char *)mess,
2787 &workspace[0]);
2788 opts.hash_location =
2789 (__u8 *)&xvp->cookie_bakery[0];
2790 }
2791 }
2792
2793 th->seq = htonl(TCP_SKB_CB(skb)->seq);
2794
2795 th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt);
2796
2797
2798 th->window = htons(min(req->rcv_wnd, 65535U));
2799 tcp_options_write((__be32 *)(th + 1), tp, &opts);
2800 th->doff = (tcp_header_size >> 2);
2801 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, tcp_skb_pcount(skb));
2802
2803#ifdef CONFIG_TCP_MD5SIG
2804
2805 if (md5) {
2806 tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location,
2807 md5, NULL, req, skb);
2808 }
2809#endif
2810
2811 return skb;
2812}
2813EXPORT_SYMBOL(tcp_make_synack);
2814
2815
2816void tcp_connect_init(struct sock *sk)
2817{
2818 const struct dst_entry *dst = __sk_dst_get(sk);
2819 struct tcp_sock *tp = tcp_sk(sk);
2820 __u8 rcv_wscale;
2821
2822
2823
2824
2825 tp->tcp_header_len = sizeof(struct tcphdr) +
2826 (sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
2827
2828#ifdef CONFIG_TCP_MD5SIG
2829 if (tp->af_specific->md5_lookup(sk, sk) != NULL)
2830 tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
2831#endif
2832
2833
2834 if (tp->rx_opt.user_mss)
2835 tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
2836 tp->max_window = 0;
2837 tcp_mtup_init(sk);
2838 tcp_sync_mss(sk, dst_mtu(dst));
2839
2840 if (!tp->window_clamp)
2841 tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
2842 tp->advmss = dst_metric_advmss(dst);
2843 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->advmss)
2844 tp->advmss = tp->rx_opt.user_mss;
2845
2846 tcp_initialize_rcv_mss(sk);
2847
2848
2849 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
2850 (tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0))
2851 tp->window_clamp = tcp_full_space(sk);
2852
2853 tcp_select_initial_window(tcp_full_space(sk),
2854 tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
2855 &tp->rcv_wnd,
2856 &tp->window_clamp,
2857 sysctl_tcp_window_scaling,
2858 &rcv_wscale,
2859 dst_metric(dst, RTAX_INITRWND));
2860
2861 tp->rx_opt.rcv_wscale = rcv_wscale;
2862 tp->rcv_ssthresh = tp->rcv_wnd;
2863
2864 sk->sk_err = 0;
2865 sock_reset_flag(sk, SOCK_DONE);
2866 tp->snd_wnd = 0;
2867 tcp_init_wl(tp, 0);
2868 tp->snd_una = tp->write_seq;
2869 tp->snd_sml = tp->write_seq;
2870 tp->snd_up = tp->write_seq;
2871 tp->snd_nxt = tp->write_seq;
2872
2873 if (likely(!tp->repair))
2874 tp->rcv_nxt = 0;
2875 tp->rcv_wup = tp->rcv_nxt;
2876 tp->copied_seq = tp->rcv_nxt;
2877
2878 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
2879 inet_csk(sk)->icsk_retransmits = 0;
2880 tcp_clear_retrans(tp);
2881}
2882
2883static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
2884{
2885 struct tcp_sock *tp = tcp_sk(sk);
2886 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
2887
2888 tcb->end_seq += skb->len;
2889 skb_header_release(skb);
2890 __tcp_add_write_queue_tail(sk, skb);
2891 sk->sk_wmem_queued += skb->truesize;
2892 sk_mem_charge(sk, skb->truesize);
2893 tp->write_seq = tcb->end_seq;
2894 tp->packets_out += tcp_skb_pcount(skb);
2895}
2896
2897
2898
2899
2900
2901
2902
2903
2904static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
2905{
2906 struct tcp_sock *tp = tcp_sk(sk);
2907 struct tcp_fastopen_request *fo = tp->fastopen_req;
2908 int syn_loss = 0, space, i, err = 0, iovlen = fo->data->msg_iovlen;
2909 struct sk_buff *syn_data = NULL, *data;
2910 unsigned long last_syn_loss = 0;
2911
2912 tp->rx_opt.mss_clamp = tp->advmss;
2913 tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie,
2914 &syn_loss, &last_syn_loss);
2915
2916 if (syn_loss > 1 &&
2917 time_before(jiffies, last_syn_loss + (60*HZ << syn_loss))) {
2918 fo->cookie.len = -1;
2919 goto fallback;
2920 }
2921
2922 if (sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE)
2923 fo->cookie.len = -1;
2924 else if (fo->cookie.len <= 0)
2925 goto fallback;
2926
2927
2928
2929
2930
2931 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->rx_opt.mss_clamp)
2932 tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
2933 space = tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) -
2934 MAX_TCP_OPTION_SPACE;
2935
2936 syn_data = skb_copy_expand(syn, skb_headroom(syn), space,
2937 sk->sk_allocation);
2938 if (syn_data == NULL)
2939 goto fallback;
2940
2941 for (i = 0; i < iovlen && syn_data->len < space; ++i) {
2942 struct iovec *iov = &fo->data->msg_iov[i];
2943 unsigned char __user *from = iov->iov_base;
2944 int len = iov->iov_len;
2945
2946 if (syn_data->len + len > space)
2947 len = space - syn_data->len;
2948 else if (i + 1 == iovlen)
2949
2950 fo->data = NULL;
2951
2952 if (skb_add_data(syn_data, from, len))
2953 goto fallback;
2954 }
2955
2956
2957 data = pskb_copy(syn_data, sk->sk_allocation);
2958 if (data == NULL)
2959 goto fallback;
2960 TCP_SKB_CB(data)->seq++;
2961 TCP_SKB_CB(data)->tcp_flags &= ~TCPHDR_SYN;
2962 TCP_SKB_CB(data)->tcp_flags = (TCPHDR_ACK|TCPHDR_PSH);
2963 tcp_connect_queue_skb(sk, data);
2964 fo->copied = data->len;
2965
2966 if (tcp_transmit_skb(sk, syn_data, 0, sk->sk_allocation) == 0) {
2967 tp->syn_data = (fo->copied > 0);
2968 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE);
2969 goto done;
2970 }
2971 syn_data = NULL;
2972
2973fallback:
2974
2975 if (fo->cookie.len > 0)
2976 fo->cookie.len = 0;
2977 err = tcp_transmit_skb(sk, syn, 1, sk->sk_allocation);
2978 if (err)
2979 tp->syn_fastopen = 0;
2980 kfree_skb(syn_data);
2981done:
2982 fo->cookie.len = -1;
2983 return err;
2984}
2985
2986
2987int tcp_connect(struct sock *sk)
2988{
2989 struct tcp_sock *tp = tcp_sk(sk);
2990 struct sk_buff *buff;
2991 int err;
2992
2993 tcp_connect_init(sk);
2994
2995 buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation);
2996 if (unlikely(buff == NULL))
2997 return -ENOBUFS;
2998
2999
3000 skb_reserve(buff, MAX_TCP_HEADER);
3001
3002 tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
3003 tp->retrans_stamp = TCP_SKB_CB(buff)->when = tcp_time_stamp;
3004 tcp_connect_queue_skb(sk, buff);
3005 TCP_ECN_send_syn(sk, buff);
3006
3007
3008 err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
3009 tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
3010 if (err == -ECONNREFUSED)
3011 return err;
3012
3013
3014
3015
3016 tp->snd_nxt = tp->write_seq;
3017 tp->pushed_seq = tp->write_seq;
3018 TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
3019
3020
3021 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
3022 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
3023 return 0;
3024}
3025EXPORT_SYMBOL(tcp_connect);
3026
3027
3028
3029
3030
3031void tcp_send_delayed_ack(struct sock *sk)
3032{
3033 struct inet_connection_sock *icsk = inet_csk(sk);
3034 int ato = icsk->icsk_ack.ato;
3035 unsigned long timeout;
3036
3037 if (ato > TCP_DELACK_MIN) {
3038 const struct tcp_sock *tp = tcp_sk(sk);
3039 int max_ato = HZ / 2;
3040
3041 if (icsk->icsk_ack.pingpong ||
3042 (icsk->icsk_ack.pending & ICSK_ACK_PUSHED))
3043 max_ato = TCP_DELACK_MAX;
3044
3045
3046
3047
3048
3049
3050
3051 if (tp->srtt) {
3052 int rtt = max(tp->srtt >> 3, TCP_DELACK_MIN);
3053
3054 if (rtt < max_ato)
3055 max_ato = rtt;
3056 }
3057
3058 ato = min(ato, max_ato);
3059 }
3060
3061
3062 timeout = jiffies + ato;
3063
3064
3065 if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) {
3066
3067
3068
3069 if (icsk->icsk_ack.blocked ||
3070 time_before_eq(icsk->icsk_ack.timeout, jiffies + (ato >> 2))) {
3071 tcp_send_ack(sk);
3072 return;
3073 }
3074
3075 if (!time_before(timeout, icsk->icsk_ack.timeout))
3076 timeout = icsk->icsk_ack.timeout;
3077 }
3078 icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
3079 icsk->icsk_ack.timeout = timeout;
3080 sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
3081}
3082
3083
3084void tcp_send_ack(struct sock *sk)
3085{
3086 struct sk_buff *buff;
3087
3088
3089 if (sk->sk_state == TCP_CLOSE)
3090 return;
3091
3092
3093
3094
3095
3096 buff = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC));
3097 if (buff == NULL) {
3098 inet_csk_schedule_ack(sk);
3099 inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
3100 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
3101 TCP_DELACK_MAX, TCP_RTO_MAX);
3102 return;
3103 }
3104
3105
3106 skb_reserve(buff, MAX_TCP_HEADER);
3107 tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK);
3108
3109
3110 TCP_SKB_CB(buff)->when = tcp_time_stamp;
3111 tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC));
3112}
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
3126{
3127 struct tcp_sock *tp = tcp_sk(sk);
3128 struct sk_buff *skb;
3129
3130
3131 skb = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC));
3132 if (skb == NULL)
3133 return -1;
3134
3135
3136 skb_reserve(skb, MAX_TCP_HEADER);
3137
3138
3139
3140
3141 tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
3142 TCP_SKB_CB(skb)->when = tcp_time_stamp;
3143 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
3144}
3145
3146void tcp_send_window_probe(struct sock *sk)
3147{
3148 if (sk->sk_state == TCP_ESTABLISHED) {
3149 tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1;
3150 tcp_sk(sk)->snd_nxt = tcp_sk(sk)->write_seq;
3151 tcp_xmit_probe_skb(sk, 0);
3152 }
3153}
3154
3155
3156int tcp_write_wakeup(struct sock *sk)
3157{
3158 struct tcp_sock *tp = tcp_sk(sk);
3159 struct sk_buff *skb;
3160
3161 if (sk->sk_state == TCP_CLOSE)
3162 return -1;
3163
3164 if ((skb = tcp_send_head(sk)) != NULL &&
3165 before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) {
3166 int err;
3167 unsigned int mss = tcp_current_mss(sk);
3168 unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
3169
3170 if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
3171 tp->pushed_seq = TCP_SKB_CB(skb)->end_seq;
3172
3173
3174
3175
3176
3177 if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
3178 skb->len > mss) {
3179 seg_size = min(seg_size, mss);
3180 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
3181 if (tcp_fragment(sk, skb, seg_size, mss))
3182 return -1;
3183 } else if (!tcp_skb_pcount(skb))
3184 tcp_set_skb_tso_segs(sk, skb, mss);
3185
3186 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
3187 TCP_SKB_CB(skb)->when = tcp_time_stamp;
3188 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
3189 if (!err)
3190 tcp_event_new_data_sent(sk, skb);
3191 return err;
3192 } else {
3193 if (between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF))
3194 tcp_xmit_probe_skb(sk, 1);
3195 return tcp_xmit_probe_skb(sk, 0);
3196 }
3197}
3198
3199
3200
3201
3202void tcp_send_probe0(struct sock *sk)
3203{
3204 struct inet_connection_sock *icsk = inet_csk(sk);
3205 struct tcp_sock *tp = tcp_sk(sk);
3206 int err;
3207
3208 err = tcp_write_wakeup(sk);
3209
3210 if (tp->packets_out || !tcp_send_head(sk)) {
3211
3212 icsk->icsk_probes_out = 0;
3213 icsk->icsk_backoff = 0;
3214 return;
3215 }
3216
3217 if (err <= 0) {
3218 if (icsk->icsk_backoff < sysctl_tcp_retries2)
3219 icsk->icsk_backoff++;
3220 icsk->icsk_probes_out++;
3221 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
3222 min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX),
3223 TCP_RTO_MAX);
3224 } else {
3225
3226
3227
3228
3229
3230
3231 if (!icsk->icsk_probes_out)
3232 icsk->icsk_probes_out = 1;
3233 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
3234 min(icsk->icsk_rto << icsk->icsk_backoff,
3235 TCP_RESOURCE_PROBE_INTERVAL),
3236 TCP_RTO_MAX);
3237 }
3238}
3239