1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37#define pr_fmt(fmt) "TCP: " fmt
38
39#include <net/tcp.h>
40
41#include <linux/compiler.h>
42#include <linux/gfp.h>
43#include <linux/module.h>
44
45
46int sysctl_tcp_retrans_collapse __read_mostly = 1;
47
48
49
50
51int sysctl_tcp_workaround_signed_windows __read_mostly = 0;
52
53
54int sysctl_tcp_limit_output_bytes __read_mostly = 131072;
55
56
57
58
59
60int sysctl_tcp_tso_win_divisor __read_mostly = 3;
61
62int sysctl_tcp_mtu_probing __read_mostly = 0;
63int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS;
64
65
66int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
67
68int sysctl_tcp_cookie_size __read_mostly = 0;
69EXPORT_SYMBOL_GPL(sysctl_tcp_cookie_size);
70
71static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
72 int push_one, gfp_t gfp);
73
74
75static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
76{
77 struct tcp_sock *tp = tcp_sk(sk);
78 unsigned int prior_packets = tp->packets_out;
79
80 tcp_advance_send_head(sk, skb);
81 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
82
83
84 if (tp->frto_counter == 2)
85 tp->frto_counter = 3;
86
87 tp->packets_out += tcp_skb_pcount(skb);
88 if (!prior_packets || tp->early_retrans_delayed)
89 tcp_rearm_rto(sk);
90}
91
92
93
94
95
96
97
98static inline __u32 tcp_acceptable_seq(const struct sock *sk)
99{
100 const struct tcp_sock *tp = tcp_sk(sk);
101
102 if (!before(tcp_wnd_end(tp), tp->snd_nxt))
103 return tp->snd_nxt;
104 else
105 return tcp_wnd_end(tp);
106}
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122static __u16 tcp_advertise_mss(struct sock *sk)
123{
124 struct tcp_sock *tp = tcp_sk(sk);
125 const struct dst_entry *dst = __sk_dst_get(sk);
126 int mss = tp->advmss;
127
128 if (dst) {
129 unsigned int metric = dst_metric_advmss(dst);
130
131 if (metric < mss) {
132 mss = metric;
133 tp->advmss = mss;
134 }
135 }
136
137 return (__u16)mss;
138}
139
140
141
142static void tcp_cwnd_restart(struct sock *sk, const struct dst_entry *dst)
143{
144 struct tcp_sock *tp = tcp_sk(sk);
145 s32 delta = tcp_time_stamp - tp->lsndtime;
146 u32 restart_cwnd = tcp_init_cwnd(tp, dst);
147 u32 cwnd = tp->snd_cwnd;
148
149 tcp_ca_event(sk, CA_EVENT_CWND_RESTART);
150
151 tp->snd_ssthresh = tcp_current_ssthresh(sk);
152 restart_cwnd = min(restart_cwnd, cwnd);
153
154 while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd)
155 cwnd >>= 1;
156 tp->snd_cwnd = max(cwnd, restart_cwnd);
157 tp->snd_cwnd_stamp = tcp_time_stamp;
158 tp->snd_cwnd_used = 0;
159}
160
161
162static void tcp_event_data_sent(struct tcp_sock *tp,
163 struct sock *sk)
164{
165 struct inet_connection_sock *icsk = inet_csk(sk);
166 const u32 now = tcp_time_stamp;
167
168 if (sysctl_tcp_slow_start_after_idle &&
169 (!tp->packets_out && (s32)(now - tp->lsndtime) > icsk->icsk_rto))
170 tcp_cwnd_restart(sk, __sk_dst_get(sk));
171
172 tp->lsndtime = now;
173
174
175
176
177 if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
178 icsk->icsk_ack.pingpong = 1;
179}
180
181
182static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
183{
184 tcp_dec_quickack_mode(sk, pkts);
185 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
186}
187
188
189
190
191
192
193
194
195void tcp_select_initial_window(int __space, __u32 mss,
196 __u32 *rcv_wnd, __u32 *window_clamp,
197 int wscale_ok, __u8 *rcv_wscale,
198 __u32 init_rcv_wnd)
199{
200 unsigned int space = (__space < 0 ? 0 : __space);
201
202
203 if (*window_clamp == 0)
204 (*window_clamp) = (65535 << 14);
205 space = min(*window_clamp, space);
206
207
208 if (space > mss)
209 space = (space / mss) * mss;
210
211
212
213
214
215
216
217
218
219 if (sysctl_tcp_workaround_signed_windows)
220 (*rcv_wnd) = min(space, MAX_TCP_WINDOW);
221 else
222 (*rcv_wnd) = space;
223
224 (*rcv_wscale) = 0;
225 if (wscale_ok) {
226
227
228
229 space = max_t(u32, sysctl_tcp_rmem[2], sysctl_rmem_max);
230 space = min_t(u32, space, *window_clamp);
231 while (space > 65535 && (*rcv_wscale) < 14) {
232 space >>= 1;
233 (*rcv_wscale)++;
234 }
235 }
236
237
238
239
240
241 if (mss > (1 << *rcv_wscale)) {
242 int init_cwnd = TCP_DEFAULT_INIT_RCVWND;
243 if (mss > 1460)
244 init_cwnd =
245 max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2);
246
247
248
249 if (init_rcv_wnd)
250 *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
251 else
252 *rcv_wnd = min(*rcv_wnd, init_cwnd * mss);
253 }
254
255
256 (*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp);
257}
258EXPORT_SYMBOL(tcp_select_initial_window);
259
260
261
262
263
264
265static u16 tcp_select_window(struct sock *sk)
266{
267 struct tcp_sock *tp = tcp_sk(sk);
268 u32 cur_win = tcp_receive_window(tp);
269 u32 new_win = __tcp_select_window(sk);
270
271
272 if (new_win < cur_win) {
273
274
275
276
277
278
279
280 new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale);
281 }
282 tp->rcv_wnd = new_win;
283 tp->rcv_wup = tp->rcv_nxt;
284
285
286
287
288 if (!tp->rx_opt.rcv_wscale && sysctl_tcp_workaround_signed_windows)
289 new_win = min(new_win, MAX_TCP_WINDOW);
290 else
291 new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
292
293
294 new_win >>= tp->rx_opt.rcv_wscale;
295
296
297 if (new_win == 0)
298 tp->pred_flags = 0;
299
300 return new_win;
301}
302
303
304static inline void TCP_ECN_send_synack(const struct tcp_sock *tp, struct sk_buff *skb)
305{
306 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
307 if (!(tp->ecn_flags & TCP_ECN_OK))
308 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
309}
310
311
312static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb)
313{
314 struct tcp_sock *tp = tcp_sk(sk);
315
316 tp->ecn_flags = 0;
317 if (sysctl_tcp_ecn == 1) {
318 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
319 tp->ecn_flags = TCP_ECN_OK;
320 }
321}
322
323static __inline__ void
324TCP_ECN_make_synack(const struct request_sock *req, struct tcphdr *th)
325{
326 if (inet_rsk(req)->ecn_ok)
327 th->ece = 1;
328}
329
330
331
332
333static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb,
334 int tcp_header_len)
335{
336 struct tcp_sock *tp = tcp_sk(sk);
337
338 if (tp->ecn_flags & TCP_ECN_OK) {
339
340 if (skb->len != tcp_header_len &&
341 !before(TCP_SKB_CB(skb)->seq, tp->snd_nxt)) {
342 INET_ECN_xmit(sk);
343 if (tp->ecn_flags & TCP_ECN_QUEUE_CWR) {
344 tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
345 tcp_hdr(skb)->cwr = 1;
346 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
347 }
348 } else {
349
350 INET_ECN_dontxmit(sk);
351 }
352 if (tp->ecn_flags & TCP_ECN_DEMAND_CWR)
353 tcp_hdr(skb)->ece = 1;
354 }
355}
356
357
358
359
360static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
361{
362 skb->ip_summed = CHECKSUM_PARTIAL;
363 skb->csum = 0;
364
365 TCP_SKB_CB(skb)->tcp_flags = flags;
366 TCP_SKB_CB(skb)->sacked = 0;
367
368 skb_shinfo(skb)->gso_segs = 1;
369 skb_shinfo(skb)->gso_size = 0;
370 skb_shinfo(skb)->gso_type = 0;
371
372 TCP_SKB_CB(skb)->seq = seq;
373 if (flags & (TCPHDR_SYN | TCPHDR_FIN))
374 seq++;
375 TCP_SKB_CB(skb)->end_seq = seq;
376}
377
378static inline bool tcp_urg_mode(const struct tcp_sock *tp)
379{
380 return tp->snd_una != tp->snd_up;
381}
382
383#define OPTION_SACK_ADVERTISE (1 << 0)
384#define OPTION_TS (1 << 1)
385#define OPTION_MD5 (1 << 2)
386#define OPTION_WSCALE (1 << 3)
387#define OPTION_COOKIE_EXTENSION (1 << 4)
388#define OPTION_FAST_OPEN_COOKIE (1 << 8)
389
390struct tcp_out_options {
391 u16 options;
392 u16 mss;
393 u8 ws;
394 u8 num_sack_blocks;
395 u8 hash_size;
396 __u8 *hash_location;
397 __u32 tsval, tsecr;
398 struct tcp_fastopen_cookie *fastopen_cookie;
399};
400
401
402
403static u8 tcp_cookie_size_check(u8 desired)
404{
405 int cookie_size;
406
407 if (desired > 0)
408
409 return desired;
410
411 cookie_size = ACCESS_ONCE(sysctl_tcp_cookie_size);
412 if (cookie_size <= 0)
413
414 return 0;
415
416 if (cookie_size <= TCP_COOKIE_MIN)
417
418 return TCP_COOKIE_MIN;
419
420 if (cookie_size >= TCP_COOKIE_MAX)
421
422 return TCP_COOKIE_MAX;
423
424 if (cookie_size & 1)
425
426 cookie_size++;
427
428 return (u8)cookie_size;
429}
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
445 struct tcp_out_options *opts)
446{
447 u16 options = opts->options;
448
449
450
451
452
453
454
455
456
457 if (unlikely(OPTION_MD5 & options)) {
458 if (unlikely(OPTION_COOKIE_EXTENSION & options)) {
459 *ptr++ = htonl((TCPOPT_COOKIE << 24) |
460 (TCPOLEN_COOKIE_BASE << 16) |
461 (TCPOPT_MD5SIG << 8) |
462 TCPOLEN_MD5SIG);
463 } else {
464 *ptr++ = htonl((TCPOPT_NOP << 24) |
465 (TCPOPT_NOP << 16) |
466 (TCPOPT_MD5SIG << 8) |
467 TCPOLEN_MD5SIG);
468 }
469 options &= ~OPTION_COOKIE_EXTENSION;
470
471 opts->hash_location = (__u8 *)ptr;
472 ptr += 4;
473 }
474
475 if (unlikely(opts->mss)) {
476 *ptr++ = htonl((TCPOPT_MSS << 24) |
477 (TCPOLEN_MSS << 16) |
478 opts->mss);
479 }
480
481 if (likely(OPTION_TS & options)) {
482 if (unlikely(OPTION_SACK_ADVERTISE & options)) {
483 *ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
484 (TCPOLEN_SACK_PERM << 16) |
485 (TCPOPT_TIMESTAMP << 8) |
486 TCPOLEN_TIMESTAMP);
487 options &= ~OPTION_SACK_ADVERTISE;
488 } else {
489 *ptr++ = htonl((TCPOPT_NOP << 24) |
490 (TCPOPT_NOP << 16) |
491 (TCPOPT_TIMESTAMP << 8) |
492 TCPOLEN_TIMESTAMP);
493 }
494 *ptr++ = htonl(opts->tsval);
495 *ptr++ = htonl(opts->tsecr);
496 }
497
498
499
500
501
502
503
504 if (unlikely(OPTION_COOKIE_EXTENSION & options)) {
505 __u8 *cookie_copy = opts->hash_location;
506 u8 cookie_size = opts->hash_size;
507
508
509
510
511 if (0x2 & cookie_size) {
512 __u8 *p = (__u8 *)ptr;
513
514
515 *p++ = TCPOPT_COOKIE;
516 *p++ = TCPOLEN_COOKIE_BASE + cookie_size;
517 *p++ = *cookie_copy++;
518 *p++ = *cookie_copy++;
519 ptr++;
520 cookie_size -= 2;
521 } else {
522
523 *ptr++ = htonl(((TCPOPT_NOP << 24) |
524 (TCPOPT_NOP << 16) |
525 (TCPOPT_COOKIE << 8) |
526 TCPOLEN_COOKIE_BASE) +
527 cookie_size);
528 }
529
530 if (cookie_size > 0) {
531 memcpy(ptr, cookie_copy, cookie_size);
532 ptr += (cookie_size / 4);
533 }
534 }
535
536 if (unlikely(OPTION_SACK_ADVERTISE & options)) {
537 *ptr++ = htonl((TCPOPT_NOP << 24) |
538 (TCPOPT_NOP << 16) |
539 (TCPOPT_SACK_PERM << 8) |
540 TCPOLEN_SACK_PERM);
541 }
542
543 if (unlikely(OPTION_WSCALE & options)) {
544 *ptr++ = htonl((TCPOPT_NOP << 24) |
545 (TCPOPT_WINDOW << 16) |
546 (TCPOLEN_WINDOW << 8) |
547 opts->ws);
548 }
549
550 if (unlikely(opts->num_sack_blocks)) {
551 struct tcp_sack_block *sp = tp->rx_opt.dsack ?
552 tp->duplicate_sack : tp->selective_acks;
553 int this_sack;
554
555 *ptr++ = htonl((TCPOPT_NOP << 24) |
556 (TCPOPT_NOP << 16) |
557 (TCPOPT_SACK << 8) |
558 (TCPOLEN_SACK_BASE + (opts->num_sack_blocks *
559 TCPOLEN_SACK_PERBLOCK)));
560
561 for (this_sack = 0; this_sack < opts->num_sack_blocks;
562 ++this_sack) {
563 *ptr++ = htonl(sp[this_sack].start_seq);
564 *ptr++ = htonl(sp[this_sack].end_seq);
565 }
566
567 tp->rx_opt.dsack = 0;
568 }
569
570 if (unlikely(OPTION_FAST_OPEN_COOKIE & options)) {
571 struct tcp_fastopen_cookie *foc = opts->fastopen_cookie;
572
573 *ptr++ = htonl((TCPOPT_EXP << 24) |
574 ((TCPOLEN_EXP_FASTOPEN_BASE + foc->len) << 16) |
575 TCPOPT_FASTOPEN_MAGIC);
576
577 memcpy(ptr, foc->val, foc->len);
578 if ((foc->len & 3) == 2) {
579 u8 *align = ((u8 *)ptr) + foc->len;
580 align[0] = align[1] = TCPOPT_NOP;
581 }
582 ptr += (foc->len + 3) >> 2;
583 }
584}
585
586
587
588
589static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
590 struct tcp_out_options *opts,
591 struct tcp_md5sig_key **md5)
592{
593 struct tcp_sock *tp = tcp_sk(sk);
594 struct tcp_cookie_values *cvp = tp->cookie_values;
595 unsigned int remaining = MAX_TCP_OPTION_SPACE;
596 u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ?
597 tcp_cookie_size_check(cvp->cookie_desired) :
598 0;
599 struct tcp_fastopen_request *fastopen = tp->fastopen_req;
600
601#ifdef CONFIG_TCP_MD5SIG
602 *md5 = tp->af_specific->md5_lookup(sk, sk);
603 if (*md5) {
604 opts->options |= OPTION_MD5;
605 remaining -= TCPOLEN_MD5SIG_ALIGNED;
606 }
607#else
608 *md5 = NULL;
609#endif
610
611
612
613
614
615
616
617
618
619
620 opts->mss = tcp_advertise_mss(sk);
621 remaining -= TCPOLEN_MSS_ALIGNED;
622
623 if (likely(sysctl_tcp_timestamps && *md5 == NULL)) {
624 opts->options |= OPTION_TS;
625 opts->tsval = TCP_SKB_CB(skb)->when;
626 opts->tsecr = tp->rx_opt.ts_recent;
627 remaining -= TCPOLEN_TSTAMP_ALIGNED;
628 }
629 if (likely(sysctl_tcp_window_scaling)) {
630 opts->ws = tp->rx_opt.rcv_wscale;
631 opts->options |= OPTION_WSCALE;
632 remaining -= TCPOLEN_WSCALE_ALIGNED;
633 }
634 if (likely(sysctl_tcp_sack)) {
635 opts->options |= OPTION_SACK_ADVERTISE;
636 if (unlikely(!(OPTION_TS & opts->options)))
637 remaining -= TCPOLEN_SACKPERM_ALIGNED;
638 }
639
640 if (fastopen && fastopen->cookie.len >= 0) {
641 u32 need = TCPOLEN_EXP_FASTOPEN_BASE + fastopen->cookie.len;
642 need = (need + 3) & ~3U;
643 if (remaining >= need) {
644 opts->options |= OPTION_FAST_OPEN_COOKIE;
645 opts->fastopen_cookie = &fastopen->cookie;
646 remaining -= need;
647 tp->syn_fastopen = 1;
648 }
649 }
650
651
652
653
654
655
656 if (*md5 == NULL &&
657 (OPTION_TS & opts->options) &&
658 cookie_size > 0) {
659 int need = TCPOLEN_COOKIE_BASE + cookie_size;
660
661 if (0x2 & need) {
662
663 need += 2;
664
665 if (need > remaining) {
666
667 cookie_size -= 2;
668 need -= 4;
669 }
670 }
671 while (need > remaining && TCP_COOKIE_MIN <= cookie_size) {
672 cookie_size -= 4;
673 need -= 4;
674 }
675 if (TCP_COOKIE_MIN <= cookie_size) {
676 opts->options |= OPTION_COOKIE_EXTENSION;
677 opts->hash_location = (__u8 *)&cvp->cookie_pair[0];
678 opts->hash_size = cookie_size;
679
680
681 cvp->cookie_desired = cookie_size;
682
683 if (cvp->cookie_desired != cvp->cookie_pair_size) {
684
685
686
687
688 get_random_bytes(&cvp->cookie_pair[0],
689 cookie_size);
690 cvp->cookie_pair_size = cookie_size;
691 }
692
693 remaining -= need;
694 }
695 }
696 return MAX_TCP_OPTION_SPACE - remaining;
697}
698
699
700static unsigned int tcp_synack_options(struct sock *sk,
701 struct request_sock *req,
702 unsigned int mss, struct sk_buff *skb,
703 struct tcp_out_options *opts,
704 struct tcp_md5sig_key **md5,
705 struct tcp_extend_values *xvp)
706{
707 struct inet_request_sock *ireq = inet_rsk(req);
708 unsigned int remaining = MAX_TCP_OPTION_SPACE;
709 u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ?
710 xvp->cookie_plus :
711 0;
712
713#ifdef CONFIG_TCP_MD5SIG
714 *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req);
715 if (*md5) {
716 opts->options |= OPTION_MD5;
717 remaining -= TCPOLEN_MD5SIG_ALIGNED;
718
719
720
721
722
723
724 ireq->tstamp_ok &= !ireq->sack_ok;
725 }
726#else
727 *md5 = NULL;
728#endif
729
730
731 opts->mss = mss;
732 remaining -= TCPOLEN_MSS_ALIGNED;
733
734 if (likely(ireq->wscale_ok)) {
735 opts->ws = ireq->rcv_wscale;
736 opts->options |= OPTION_WSCALE;
737 remaining -= TCPOLEN_WSCALE_ALIGNED;
738 }
739 if (likely(ireq->tstamp_ok)) {
740 opts->options |= OPTION_TS;
741 opts->tsval = TCP_SKB_CB(skb)->when;
742 opts->tsecr = req->ts_recent;
743 remaining -= TCPOLEN_TSTAMP_ALIGNED;
744 }
745 if (likely(ireq->sack_ok)) {
746 opts->options |= OPTION_SACK_ADVERTISE;
747 if (unlikely(!ireq->tstamp_ok))
748 remaining -= TCPOLEN_SACKPERM_ALIGNED;
749 }
750
751
752
753
754 if (*md5 == NULL &&
755 ireq->tstamp_ok &&
756 cookie_plus > TCPOLEN_COOKIE_BASE) {
757 int need = cookie_plus;
758
759 if (0x2 & need) {
760
761 need += 2;
762 }
763 if (need <= remaining) {
764 opts->options |= OPTION_COOKIE_EXTENSION;
765 opts->hash_size = cookie_plus - TCPOLEN_COOKIE_BASE;
766 remaining -= need;
767 } else {
768
769 xvp->cookie_out_never = 1;
770 opts->hash_size = 0;
771 }
772 }
773 return MAX_TCP_OPTION_SPACE - remaining;
774}
775
776
777
778
779static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb,
780 struct tcp_out_options *opts,
781 struct tcp_md5sig_key **md5)
782{
783 struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL;
784 struct tcp_sock *tp = tcp_sk(sk);
785 unsigned int size = 0;
786 unsigned int eff_sacks;
787
788#ifdef CONFIG_TCP_MD5SIG
789 *md5 = tp->af_specific->md5_lookup(sk, sk);
790 if (unlikely(*md5)) {
791 opts->options |= OPTION_MD5;
792 size += TCPOLEN_MD5SIG_ALIGNED;
793 }
794#else
795 *md5 = NULL;
796#endif
797
798 if (likely(tp->rx_opt.tstamp_ok)) {
799 opts->options |= OPTION_TS;
800 opts->tsval = tcb ? tcb->when : 0;
801 opts->tsecr = tp->rx_opt.ts_recent;
802 size += TCPOLEN_TSTAMP_ALIGNED;
803 }
804
805 eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
806 if (unlikely(eff_sacks)) {
807 const unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
808 opts->num_sack_blocks =
809 min_t(unsigned int, eff_sacks,
810 (remaining - TCPOLEN_SACK_BASE_ALIGNED) /
811 TCPOLEN_SACK_PERBLOCK);
812 size += TCPOLEN_SACK_BASE_ALIGNED +
813 opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
814 }
815
816 return size;
817}
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834struct tsq_tasklet {
835 struct tasklet_struct tasklet;
836 struct list_head head;
837};
838static DEFINE_PER_CPU(struct tsq_tasklet, tsq_tasklet);
839
840static void tcp_tsq_handler(struct sock *sk)
841{
842 if ((1 << sk->sk_state) &
843 (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING |
844 TCPF_CLOSE_WAIT | TCPF_LAST_ACK))
845 tcp_write_xmit(sk, tcp_current_mss(sk), 0, 0, GFP_ATOMIC);
846}
847
848
849
850
851
852
853static void tcp_tasklet_func(unsigned long data)
854{
855 struct tsq_tasklet *tsq = (struct tsq_tasklet *)data;
856 LIST_HEAD(list);
857 unsigned long flags;
858 struct list_head *q, *n;
859 struct tcp_sock *tp;
860 struct sock *sk;
861
862 local_irq_save(flags);
863 list_splice_init(&tsq->head, &list);
864 local_irq_restore(flags);
865
866 list_for_each_safe(q, n, &list) {
867 tp = list_entry(q, struct tcp_sock, tsq_node);
868 list_del(&tp->tsq_node);
869
870 sk = (struct sock *)tp;
871 bh_lock_sock(sk);
872
873 if (!sock_owned_by_user(sk)) {
874 tcp_tsq_handler(sk);
875 } else {
876
877 set_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags);
878 }
879 bh_unlock_sock(sk);
880
881 clear_bit(TSQ_QUEUED, &tp->tsq_flags);
882 sk_free(sk);
883 }
884}
885
886#define TCP_DEFERRED_ALL ((1UL << TCP_TSQ_DEFERRED) | \
887 (1UL << TCP_WRITE_TIMER_DEFERRED) | \
888 (1UL << TCP_DELACK_TIMER_DEFERRED) | \
889 (1UL << TCP_MTU_REDUCED_DEFERRED))
890
891
892
893
894
895
896
897void tcp_release_cb(struct sock *sk)
898{
899 struct tcp_sock *tp = tcp_sk(sk);
900 unsigned long flags, nflags;
901
902
903 do {
904 flags = tp->tsq_flags;
905 if (!(flags & TCP_DEFERRED_ALL))
906 return;
907 nflags = flags & ~TCP_DEFERRED_ALL;
908 } while (cmpxchg(&tp->tsq_flags, flags, nflags) != flags);
909
910 if (flags & (1UL << TCP_TSQ_DEFERRED))
911 tcp_tsq_handler(sk);
912
913 if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) {
914 tcp_write_timer_handler(sk);
915 __sock_put(sk);
916 }
917 if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) {
918 tcp_delack_timer_handler(sk);
919 __sock_put(sk);
920 }
921 if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) {
922 sk->sk_prot->mtu_reduced(sk);
923 __sock_put(sk);
924 }
925}
926EXPORT_SYMBOL(tcp_release_cb);
927
928void __init tcp_tasklet_init(void)
929{
930 int i;
931
932 for_each_possible_cpu(i) {
933 struct tsq_tasklet *tsq = &per_cpu(tsq_tasklet, i);
934
935 INIT_LIST_HEAD(&tsq->head);
936 tasklet_init(&tsq->tasklet,
937 tcp_tasklet_func,
938 (unsigned long)tsq);
939 }
940}
941
942
943
944
945
946
947static void tcp_wfree(struct sk_buff *skb)
948{
949 struct sock *sk = skb->sk;
950 struct tcp_sock *tp = tcp_sk(sk);
951
952 if (test_and_clear_bit(TSQ_THROTTLED, &tp->tsq_flags) &&
953 !test_and_set_bit(TSQ_QUEUED, &tp->tsq_flags)) {
954 unsigned long flags;
955 struct tsq_tasklet *tsq;
956
957
958
959
960 atomic_sub(skb->truesize - 1, &sk->sk_wmem_alloc);
961
962
963 local_irq_save(flags);
964 tsq = &__get_cpu_var(tsq_tasklet);
965 list_add(&tp->tsq_node, &tsq->head);
966 tasklet_schedule(&tsq->tasklet);
967 local_irq_restore(flags);
968 } else {
969 sock_wfree(skb);
970 }
971}
972
973
974
975
976
977
978
979
980
981
982
983
984static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
985 gfp_t gfp_mask)
986{
987 const struct inet_connection_sock *icsk = inet_csk(sk);
988 struct inet_sock *inet;
989 struct tcp_sock *tp;
990 struct tcp_skb_cb *tcb;
991 struct tcp_out_options opts;
992 unsigned int tcp_options_size, tcp_header_size;
993 struct tcp_md5sig_key *md5;
994 struct tcphdr *th;
995 int err;
996
997 BUG_ON(!skb || !tcp_skb_pcount(skb));
998
999
1000
1001
1002 if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP)
1003 __net_timestamp(skb);
1004
1005 if (likely(clone_it)) {
1006 if (unlikely(skb_cloned(skb)))
1007 skb = pskb_copy(skb, gfp_mask);
1008 else
1009 skb = skb_clone(skb, gfp_mask);
1010 if (unlikely(!skb))
1011 return -ENOBUFS;
1012 }
1013
1014 inet = inet_sk(sk);
1015 tp = tcp_sk(sk);
1016 tcb = TCP_SKB_CB(skb);
1017 memset(&opts, 0, sizeof(opts));
1018
1019 if (unlikely(tcb->tcp_flags & TCPHDR_SYN))
1020 tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
1021 else
1022 tcp_options_size = tcp_established_options(sk, skb, &opts,
1023 &md5);
1024 tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
1025
1026 if (tcp_packets_in_flight(tp) == 0) {
1027 tcp_ca_event(sk, CA_EVENT_TX_START);
1028 skb->ooo_okay = 1;
1029 } else
1030 skb->ooo_okay = 0;
1031
1032 skb_push(skb, tcp_header_size);
1033 skb_reset_transport_header(skb);
1034
1035 skb_orphan(skb);
1036 skb->sk = sk;
1037 skb->destructor = (sysctl_tcp_limit_output_bytes > 0) ?
1038 tcp_wfree : sock_wfree;
1039 atomic_add(skb->truesize, &sk->sk_wmem_alloc);
1040
1041
1042 th = tcp_hdr(skb);
1043 th->source = inet->inet_sport;
1044 th->dest = inet->inet_dport;
1045 th->seq = htonl(tcb->seq);
1046 th->ack_seq = htonl(tp->rcv_nxt);
1047 *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) |
1048 tcb->tcp_flags);
1049
1050 if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) {
1051
1052
1053
1054 th->window = htons(min(tp->rcv_wnd, 65535U));
1055 } else {
1056 th->window = htons(tcp_select_window(sk));
1057 }
1058 th->check = 0;
1059 th->urg_ptr = 0;
1060
1061
1062 if (unlikely(tcp_urg_mode(tp) && before(tcb->seq, tp->snd_up))) {
1063 if (before(tp->snd_up, tcb->seq + 0x10000)) {
1064 th->urg_ptr = htons(tp->snd_up - tcb->seq);
1065 th->urg = 1;
1066 } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) {
1067 th->urg_ptr = htons(0xFFFF);
1068 th->urg = 1;
1069 }
1070 }
1071
1072 tcp_options_write((__be32 *)(th + 1), tp, &opts);
1073 if (likely((tcb->tcp_flags & TCPHDR_SYN) == 0))
1074 TCP_ECN_send(sk, skb, tcp_header_size);
1075
1076#ifdef CONFIG_TCP_MD5SIG
1077
1078 if (md5) {
1079 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1080 tp->af_specific->calc_md5_hash(opts.hash_location,
1081 md5, sk, NULL, skb);
1082 }
1083#endif
1084
1085 icsk->icsk_af_ops->send_check(sk, skb);
1086
1087 if (likely(tcb->tcp_flags & TCPHDR_ACK))
1088 tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
1089
1090 if (skb->len != tcp_header_size)
1091 tcp_event_data_sent(tp, sk);
1092
1093 if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
1094 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
1095 tcp_skb_pcount(skb));
1096
1097 err = icsk->icsk_af_ops->queue_xmit(skb, &inet->cork.fl);
1098 if (likely(err <= 0))
1099 return err;
1100
1101 tcp_enter_cwr(sk, 1);
1102
1103 return net_xmit_eval(err);
1104}
1105
1106
1107
1108
1109
1110
1111static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
1112{
1113 struct tcp_sock *tp = tcp_sk(sk);
1114
1115
1116 tp->write_seq = TCP_SKB_CB(skb)->end_seq;
1117 skb_header_release(skb);
1118 tcp_add_write_queue_tail(sk, skb);
1119 sk->sk_wmem_queued += skb->truesize;
1120 sk_mem_charge(sk, skb->truesize);
1121}
1122
1123
1124static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
1125 unsigned int mss_now)
1126{
1127 if (skb->len <= mss_now || !sk_can_gso(sk) ||
1128 skb->ip_summed == CHECKSUM_NONE) {
1129
1130
1131
1132 skb_shinfo(skb)->gso_segs = 1;
1133 skb_shinfo(skb)->gso_size = 0;
1134 skb_shinfo(skb)->gso_type = 0;
1135 } else {
1136 skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss_now);
1137 skb_shinfo(skb)->gso_size = mss_now;
1138 skb_shinfo(skb)->gso_type = sk->sk_gso_type;
1139 }
1140}
1141
1142
1143
1144
1145static void tcp_adjust_fackets_out(struct sock *sk, const struct sk_buff *skb,
1146 int decr)
1147{
1148 struct tcp_sock *tp = tcp_sk(sk);
1149
1150 if (!tp->sacked_out || tcp_is_reno(tp))
1151 return;
1152
1153 if (after(tcp_highest_sack_seq(tp), TCP_SKB_CB(skb)->seq))
1154 tp->fackets_out -= decr;
1155}
1156
1157
1158
1159
1160static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr)
1161{
1162 struct tcp_sock *tp = tcp_sk(sk);
1163
1164 tp->packets_out -= decr;
1165
1166 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
1167 tp->sacked_out -= decr;
1168 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
1169 tp->retrans_out -= decr;
1170 if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST)
1171 tp->lost_out -= decr;
1172
1173
1174 if (tcp_is_reno(tp) && decr > 0)
1175 tp->sacked_out -= min_t(u32, tp->sacked_out, decr);
1176
1177 tcp_adjust_fackets_out(sk, skb, decr);
1178
1179 if (tp->lost_skb_hint &&
1180 before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) &&
1181 (tcp_is_fack(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)))
1182 tp->lost_cnt_hint -= decr;
1183
1184 tcp_verify_left_out(tp);
1185}
1186
1187
1188
1189
1190
1191
1192int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
1193 unsigned int mss_now)
1194{
1195 struct tcp_sock *tp = tcp_sk(sk);
1196 struct sk_buff *buff;
1197 int nsize, old_factor;
1198 int nlen;
1199 u8 flags;
1200
1201 if (WARN_ON(len > skb->len))
1202 return -EINVAL;
1203
1204 nsize = skb_headlen(skb) - len;
1205 if (nsize < 0)
1206 nsize = 0;
1207
1208 if (skb_cloned(skb) &&
1209 skb_is_nonlinear(skb) &&
1210 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
1211 return -ENOMEM;
1212
1213
1214 buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC);
1215 if (buff == NULL)
1216 return -ENOMEM;
1217
1218 sk->sk_wmem_queued += buff->truesize;
1219 sk_mem_charge(sk, buff->truesize);
1220 nlen = skb->len - len - nsize;
1221 buff->truesize += nlen;
1222 skb->truesize -= nlen;
1223
1224
1225 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
1226 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
1227 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
1228
1229
1230 flags = TCP_SKB_CB(skb)->tcp_flags;
1231 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1232 TCP_SKB_CB(buff)->tcp_flags = flags;
1233 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
1234
1235 if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
1236
1237 buff->csum = csum_partial_copy_nocheck(skb->data + len,
1238 skb_put(buff, nsize),
1239 nsize, 0);
1240
1241 skb_trim(skb, len);
1242
1243 skb->csum = csum_block_sub(skb->csum, buff->csum, len);
1244 } else {
1245 skb->ip_summed = CHECKSUM_PARTIAL;
1246 skb_split(skb, buff, len);
1247 }
1248
1249 buff->ip_summed = skb->ip_summed;
1250
1251
1252
1253
1254 TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when;
1255 buff->tstamp = skb->tstamp;
1256
1257 old_factor = tcp_skb_pcount(skb);
1258
1259
1260 tcp_set_skb_tso_segs(sk, skb, mss_now);
1261 tcp_set_skb_tso_segs(sk, buff, mss_now);
1262
1263
1264
1265
1266 if (!before(tp->snd_nxt, TCP_SKB_CB(buff)->end_seq)) {
1267 int diff = old_factor - tcp_skb_pcount(skb) -
1268 tcp_skb_pcount(buff);
1269
1270 if (diff)
1271 tcp_adjust_pcount(sk, skb, diff);
1272 }
1273
1274
1275 skb_header_release(buff);
1276 tcp_insert_write_queue_after(skb, buff, sk);
1277
1278 return 0;
1279}
1280
1281
1282
1283
1284
1285static void __pskb_trim_head(struct sk_buff *skb, int len)
1286{
1287 int i, k, eat;
1288
1289 eat = min_t(int, len, skb_headlen(skb));
1290 if (eat) {
1291 __skb_pull(skb, eat);
1292 skb->avail_size -= eat;
1293 len -= eat;
1294 if (!len)
1295 return;
1296 }
1297 eat = len;
1298 k = 0;
1299 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1300 int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
1301
1302 if (size <= eat) {
1303 skb_frag_unref(skb, i);
1304 eat -= size;
1305 } else {
1306 skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
1307 if (eat) {
1308 skb_shinfo(skb)->frags[k].page_offset += eat;
1309 skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat);
1310 eat = 0;
1311 }
1312 k++;
1313 }
1314 }
1315 skb_shinfo(skb)->nr_frags = k;
1316
1317 skb_reset_tail_pointer(skb);
1318 skb->data_len -= len;
1319 skb->len = skb->data_len;
1320}
1321
1322
1323int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
1324{
1325 if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
1326 return -ENOMEM;
1327
1328 __pskb_trim_head(skb, len);
1329
1330 TCP_SKB_CB(skb)->seq += len;
1331 skb->ip_summed = CHECKSUM_PARTIAL;
1332
1333 skb->truesize -= len;
1334 sk->sk_wmem_queued -= len;
1335 sk_mem_uncharge(sk, len);
1336 sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
1337
1338
1339 if (tcp_skb_pcount(skb) > 1)
1340 tcp_set_skb_tso_segs(sk, skb, tcp_skb_mss(skb));
1341
1342 return 0;
1343}
1344
1345
1346int tcp_mtu_to_mss(struct sock *sk, int pmtu)
1347{
1348 const struct tcp_sock *tp = tcp_sk(sk);
1349 const struct inet_connection_sock *icsk = inet_csk(sk);
1350 int mss_now;
1351
1352
1353
1354
1355 mss_now = pmtu - icsk->icsk_af_ops->net_header_len - sizeof(struct tcphdr);
1356
1357
1358 if (icsk->icsk_af_ops->net_frag_header_len) {
1359 const struct dst_entry *dst = __sk_dst_get(sk);
1360
1361 if (dst && dst_allfrag(dst))
1362 mss_now -= icsk->icsk_af_ops->net_frag_header_len;
1363 }
1364
1365
1366 if (mss_now > tp->rx_opt.mss_clamp)
1367 mss_now = tp->rx_opt.mss_clamp;
1368
1369
1370 mss_now -= icsk->icsk_ext_hdr_len;
1371
1372
1373 if (mss_now < 48)
1374 mss_now = 48;
1375
1376
1377 mss_now -= tp->tcp_header_len - sizeof(struct tcphdr);
1378
1379 return mss_now;
1380}
1381
1382
1383int tcp_mss_to_mtu(struct sock *sk, int mss)
1384{
1385 const struct tcp_sock *tp = tcp_sk(sk);
1386 const struct inet_connection_sock *icsk = inet_csk(sk);
1387 int mtu;
1388
1389 mtu = mss +
1390 tp->tcp_header_len +
1391 icsk->icsk_ext_hdr_len +
1392 icsk->icsk_af_ops->net_header_len;
1393
1394
1395 if (icsk->icsk_af_ops->net_frag_header_len) {
1396 const struct dst_entry *dst = __sk_dst_get(sk);
1397
1398 if (dst && dst_allfrag(dst))
1399 mtu += icsk->icsk_af_ops->net_frag_header_len;
1400 }
1401 return mtu;
1402}
1403
1404
1405void tcp_mtup_init(struct sock *sk)
1406{
1407 struct tcp_sock *tp = tcp_sk(sk);
1408 struct inet_connection_sock *icsk = inet_csk(sk);
1409
1410 icsk->icsk_mtup.enabled = sysctl_tcp_mtu_probing > 1;
1411 icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) +
1412 icsk->icsk_af_ops->net_header_len;
1413 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, sysctl_tcp_base_mss);
1414 icsk->icsk_mtup.probe_size = 0;
1415}
1416EXPORT_SYMBOL(tcp_mtup_init);
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
1441{
1442 struct tcp_sock *tp = tcp_sk(sk);
1443 struct inet_connection_sock *icsk = inet_csk(sk);
1444 int mss_now;
1445
1446 if (icsk->icsk_mtup.search_high > pmtu)
1447 icsk->icsk_mtup.search_high = pmtu;
1448
1449 mss_now = tcp_mtu_to_mss(sk, pmtu);
1450 mss_now = tcp_bound_to_half_wnd(tp, mss_now);
1451
1452
1453 icsk->icsk_pmtu_cookie = pmtu;
1454 if (icsk->icsk_mtup.enabled)
1455 mss_now = min(mss_now, tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low));
1456 tp->mss_cache = mss_now;
1457
1458 return mss_now;
1459}
1460EXPORT_SYMBOL(tcp_sync_mss);
1461
1462
1463
1464
1465unsigned int tcp_current_mss(struct sock *sk)
1466{
1467 const struct tcp_sock *tp = tcp_sk(sk);
1468 const struct dst_entry *dst = __sk_dst_get(sk);
1469 u32 mss_now;
1470 unsigned int header_len;
1471 struct tcp_out_options opts;
1472 struct tcp_md5sig_key *md5;
1473
1474 mss_now = tp->mss_cache;
1475
1476 if (dst) {
1477 u32 mtu = dst_mtu(dst);
1478 if (mtu != inet_csk(sk)->icsk_pmtu_cookie)
1479 mss_now = tcp_sync_mss(sk, mtu);
1480 }
1481
1482 header_len = tcp_established_options(sk, NULL, &opts, &md5) +
1483 sizeof(struct tcphdr);
1484
1485
1486
1487
1488 if (header_len != tp->tcp_header_len) {
1489 int delta = (int) header_len - tp->tcp_header_len;
1490 mss_now -= delta;
1491 }
1492
1493 return mss_now;
1494}
1495
1496
1497static void tcp_cwnd_validate(struct sock *sk)
1498{
1499 struct tcp_sock *tp = tcp_sk(sk);
1500
1501 if (tp->packets_out >= tp->snd_cwnd) {
1502
1503 tp->snd_cwnd_used = 0;
1504 tp->snd_cwnd_stamp = tcp_time_stamp;
1505 } else {
1506
1507 if (tp->packets_out > tp->snd_cwnd_used)
1508 tp->snd_cwnd_used = tp->packets_out;
1509
1510 if (sysctl_tcp_slow_start_after_idle &&
1511 (s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto)
1512 tcp_cwnd_application_limited(sk);
1513 }
1514}
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb,
1529 unsigned int mss_now, unsigned int max_segs)
1530{
1531 const struct tcp_sock *tp = tcp_sk(sk);
1532 u32 needed, window, max_len;
1533
1534 window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1535 max_len = mss_now * max_segs;
1536
1537 if (likely(max_len <= window && skb != tcp_write_queue_tail(sk)))
1538 return max_len;
1539
1540 needed = min(skb->len, window);
1541
1542 if (max_len <= needed)
1543 return max_len;
1544
1545 return needed - needed % mss_now;
1546}
1547
1548
1549
1550
1551static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp,
1552 const struct sk_buff *skb)
1553{
1554 u32 in_flight, cwnd;
1555
1556
1557 if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&
1558 tcp_skb_pcount(skb) == 1)
1559 return 1;
1560
1561 in_flight = tcp_packets_in_flight(tp);
1562 cwnd = tp->snd_cwnd;
1563 if (in_flight < cwnd)
1564 return (cwnd - in_flight);
1565
1566 return 0;
1567}
1568
1569
1570
1571
1572
1573static int tcp_init_tso_segs(const struct sock *sk, struct sk_buff *skb,
1574 unsigned int mss_now)
1575{
1576 int tso_segs = tcp_skb_pcount(skb);
1577
1578 if (!tso_segs || (tso_segs > 1 && tcp_skb_mss(skb) != mss_now)) {
1579 tcp_set_skb_tso_segs(sk, skb, mss_now);
1580 tso_segs = tcp_skb_pcount(skb);
1581 }
1582 return tso_segs;
1583}
1584
1585
1586static inline bool tcp_minshall_check(const struct tcp_sock *tp)
1587{
1588 return after(tp->snd_sml, tp->snd_una) &&
1589 !after(tp->snd_sml, tp->snd_nxt);
1590}
1591
1592
1593
1594
1595
1596
1597
1598
1599static inline bool tcp_nagle_check(const struct tcp_sock *tp,
1600 const struct sk_buff *skb,
1601 unsigned int mss_now, int nonagle)
1602{
1603 return skb->len < mss_now &&
1604 ((nonagle & TCP_NAGLE_CORK) ||
1605 (!nonagle && tp->packets_out && tcp_minshall_check(tp)));
1606}
1607
1608
1609
1610
1611static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb,
1612 unsigned int cur_mss, int nonagle)
1613{
1614
1615
1616
1617
1618
1619
1620 if (nonagle & TCP_NAGLE_PUSH)
1621 return true;
1622
1623
1624
1625
1626 if (tcp_urg_mode(tp) || (tp->frto_counter == 2) ||
1627 (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
1628 return true;
1629
1630 if (!tcp_nagle_check(tp, skb, cur_mss, nonagle))
1631 return true;
1632
1633 return false;
1634}
1635
1636
1637static bool tcp_snd_wnd_test(const struct tcp_sock *tp,
1638 const struct sk_buff *skb,
1639 unsigned int cur_mss)
1640{
1641 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
1642
1643 if (skb->len > cur_mss)
1644 end_seq = TCP_SKB_CB(skb)->seq + cur_mss;
1645
1646 return !after(end_seq, tcp_wnd_end(tp));
1647}
1648
1649
1650
1651
1652
1653static unsigned int tcp_snd_test(const struct sock *sk, struct sk_buff *skb,
1654 unsigned int cur_mss, int nonagle)
1655{
1656 const struct tcp_sock *tp = tcp_sk(sk);
1657 unsigned int cwnd_quota;
1658
1659 tcp_init_tso_segs(sk, skb, cur_mss);
1660
1661 if (!tcp_nagle_test(tp, skb, cur_mss, nonagle))
1662 return 0;
1663
1664 cwnd_quota = tcp_cwnd_test(tp, skb);
1665 if (cwnd_quota && !tcp_snd_wnd_test(tp, skb, cur_mss))
1666 cwnd_quota = 0;
1667
1668 return cwnd_quota;
1669}
1670
1671
1672bool tcp_may_send_now(struct sock *sk)
1673{
1674 const struct tcp_sock *tp = tcp_sk(sk);
1675 struct sk_buff *skb = tcp_send_head(sk);
1676
1677 return skb &&
1678 tcp_snd_test(sk, skb, tcp_current_mss(sk),
1679 (tcp_skb_is_last(sk, skb) ?
1680 tp->nonagle : TCP_NAGLE_PUSH));
1681}
1682
1683
1684
1685
1686
1687
1688
1689
1690static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1691 unsigned int mss_now, gfp_t gfp)
1692{
1693 struct sk_buff *buff;
1694 int nlen = skb->len - len;
1695 u8 flags;
1696
1697
1698 if (skb->len != skb->data_len)
1699 return tcp_fragment(sk, skb, len, mss_now);
1700
1701 buff = sk_stream_alloc_skb(sk, 0, gfp);
1702 if (unlikely(buff == NULL))
1703 return -ENOMEM;
1704
1705 sk->sk_wmem_queued += buff->truesize;
1706 sk_mem_charge(sk, buff->truesize);
1707 buff->truesize += nlen;
1708 skb->truesize -= nlen;
1709
1710
1711 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
1712 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
1713 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
1714
1715
1716 flags = TCP_SKB_CB(skb)->tcp_flags;
1717 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1718 TCP_SKB_CB(buff)->tcp_flags = flags;
1719
1720
1721 TCP_SKB_CB(buff)->sacked = 0;
1722
1723 buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL;
1724 skb_split(skb, buff, len);
1725
1726
1727 tcp_set_skb_tso_segs(sk, skb, mss_now);
1728 tcp_set_skb_tso_segs(sk, buff, mss_now);
1729
1730
1731 skb_header_release(buff);
1732 tcp_insert_write_queue_after(skb, buff, sk);
1733
1734 return 0;
1735}
1736
1737
1738
1739
1740
1741
1742static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1743{
1744 struct tcp_sock *tp = tcp_sk(sk);
1745 const struct inet_connection_sock *icsk = inet_csk(sk);
1746 u32 send_win, cong_win, limit, in_flight;
1747 int win_divisor;
1748
1749 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
1750 goto send_now;
1751
1752 if (icsk->icsk_ca_state != TCP_CA_Open)
1753 goto send_now;
1754
1755
1756 if (tp->tso_deferred &&
1757 (((u32)jiffies << 1) >> 1) - (tp->tso_deferred >> 1) > 1)
1758 goto send_now;
1759
1760 in_flight = tcp_packets_in_flight(tp);
1761
1762 BUG_ON(tcp_skb_pcount(skb) <= 1 || (tp->snd_cwnd <= in_flight));
1763
1764 send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1765
1766
1767 cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache;
1768
1769 limit = min(send_win, cong_win);
1770
1771
1772 if (limit >= min_t(unsigned int, sk->sk_gso_max_size,
1773 sk->sk_gso_max_segs * tp->mss_cache))
1774 goto send_now;
1775
1776
1777 if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len))
1778 goto send_now;
1779
1780 win_divisor = ACCESS_ONCE(sysctl_tcp_tso_win_divisor);
1781 if (win_divisor) {
1782 u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
1783
1784
1785
1786
1787 chunk /= win_divisor;
1788 if (limit >= chunk)
1789 goto send_now;
1790 } else {
1791
1792
1793
1794
1795
1796 if (limit > tcp_max_tso_deferred_mss(tp) * tp->mss_cache)
1797 goto send_now;
1798 }
1799
1800
1801 tp->tso_deferred = 1 | (jiffies << 1);
1802
1803 return true;
1804
1805send_now:
1806 tp->tso_deferred = 0;
1807 return false;
1808}
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819static int tcp_mtu_probe(struct sock *sk)
1820{
1821 struct tcp_sock *tp = tcp_sk(sk);
1822 struct inet_connection_sock *icsk = inet_csk(sk);
1823 struct sk_buff *skb, *nskb, *next;
1824 int len;
1825 int probe_size;
1826 int size_needed;
1827 int copy;
1828 int mss_now;
1829
1830
1831
1832
1833
1834 if (!icsk->icsk_mtup.enabled ||
1835 icsk->icsk_mtup.probe_size ||
1836 inet_csk(sk)->icsk_ca_state != TCP_CA_Open ||
1837 tp->snd_cwnd < 11 ||
1838 tp->rx_opt.num_sacks || tp->rx_opt.dsack)
1839 return -1;
1840
1841
1842 mss_now = tcp_current_mss(sk);
1843 probe_size = 2 * tp->mss_cache;
1844 size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
1845 if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) {
1846
1847 return -1;
1848 }
1849
1850
1851 if (tp->write_seq - tp->snd_nxt < size_needed)
1852 return -1;
1853
1854 if (tp->snd_wnd < size_needed)
1855 return -1;
1856 if (after(tp->snd_nxt + size_needed, tcp_wnd_end(tp)))
1857 return 0;
1858
1859
1860 if (tcp_packets_in_flight(tp) + 2 > tp->snd_cwnd) {
1861 if (!tcp_packets_in_flight(tp))
1862 return -1;
1863 else
1864 return 0;
1865 }
1866
1867
1868 if ((nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC)) == NULL)
1869 return -1;
1870 sk->sk_wmem_queued += nskb->truesize;
1871 sk_mem_charge(sk, nskb->truesize);
1872
1873 skb = tcp_send_head(sk);
1874
1875 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
1876 TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
1877 TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK;
1878 TCP_SKB_CB(nskb)->sacked = 0;
1879 nskb->csum = 0;
1880 nskb->ip_summed = skb->ip_summed;
1881
1882 tcp_insert_write_queue_before(nskb, skb, sk);
1883
1884 len = 0;
1885 tcp_for_write_queue_from_safe(skb, next, sk) {
1886 copy = min_t(int, skb->len, probe_size - len);
1887 if (nskb->ip_summed)
1888 skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
1889 else
1890 nskb->csum = skb_copy_and_csum_bits(skb, 0,
1891 skb_put(nskb, copy),
1892 copy, nskb->csum);
1893
1894 if (skb->len <= copy) {
1895
1896
1897 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
1898 tcp_unlink_write_queue(skb, sk);
1899 sk_wmem_free_skb(sk, skb);
1900 } else {
1901 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags &
1902 ~(TCPHDR_FIN|TCPHDR_PSH);
1903 if (!skb_shinfo(skb)->nr_frags) {
1904 skb_pull(skb, copy);
1905 if (skb->ip_summed != CHECKSUM_PARTIAL)
1906 skb->csum = csum_partial(skb->data,
1907 skb->len, 0);
1908 } else {
1909 __pskb_trim_head(skb, copy);
1910 tcp_set_skb_tso_segs(sk, skb, mss_now);
1911 }
1912 TCP_SKB_CB(skb)->seq += copy;
1913 }
1914
1915 len += copy;
1916
1917 if (len >= probe_size)
1918 break;
1919 }
1920 tcp_init_tso_segs(sk, nskb, nskb->len);
1921
1922
1923
1924 TCP_SKB_CB(nskb)->when = tcp_time_stamp;
1925 if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) {
1926
1927
1928 tp->snd_cwnd--;
1929 tcp_event_new_data_sent(sk, nskb);
1930
1931 icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len);
1932 tp->mtu_probe.probe_seq_start = TCP_SKB_CB(nskb)->seq;
1933 tp->mtu_probe.probe_seq_end = TCP_SKB_CB(nskb)->end_seq;
1934
1935 return 1;
1936 }
1937
1938 return -1;
1939}
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1953 int push_one, gfp_t gfp)
1954{
1955 struct tcp_sock *tp = tcp_sk(sk);
1956 struct sk_buff *skb;
1957 unsigned int tso_segs, sent_pkts;
1958 int cwnd_quota;
1959 int result;
1960
1961 sent_pkts = 0;
1962
1963 if (!push_one) {
1964
1965 result = tcp_mtu_probe(sk);
1966 if (!result) {
1967 return false;
1968 } else if (result > 0) {
1969 sent_pkts = 1;
1970 }
1971 }
1972
1973 while ((skb = tcp_send_head(sk))) {
1974 unsigned int limit;
1975
1976
1977 tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
1978 BUG_ON(!tso_segs);
1979
1980 cwnd_quota = tcp_cwnd_test(tp, skb);
1981 if (!cwnd_quota)
1982 break;
1983
1984 if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now)))
1985 break;
1986
1987 if (tso_segs == 1) {
1988 if (unlikely(!tcp_nagle_test(tp, skb, mss_now,
1989 (tcp_skb_is_last(sk, skb) ?
1990 nonagle : TCP_NAGLE_PUSH))))
1991 break;
1992 } else {
1993 if (!push_one && tcp_tso_should_defer(sk, skb))
1994 break;
1995 }
1996
1997
1998
1999
2000 if (atomic_read(&sk->sk_wmem_alloc) >= sysctl_tcp_limit_output_bytes) {
2001 set_bit(TSQ_THROTTLED, &tp->tsq_flags);
2002 break;
2003 }
2004 limit = mss_now;
2005 if (tso_segs > 1 && !tcp_urg_mode(tp))
2006 limit = tcp_mss_split_point(sk, skb, mss_now,
2007 min_t(unsigned int,
2008 cwnd_quota,
2009 sk->sk_gso_max_segs));
2010
2011 if (skb->len > limit &&
2012 unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
2013 break;
2014
2015 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2016
2017 if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
2018 break;
2019
2020
2021
2022
2023 tcp_event_new_data_sent(sk, skb);
2024
2025 tcp_minshall_update(tp, mss_now, skb);
2026 sent_pkts += tcp_skb_pcount(skb);
2027
2028 if (push_one)
2029 break;
2030 }
2031 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery)
2032 tp->prr_out += sent_pkts;
2033
2034 if (likely(sent_pkts)) {
2035 tcp_cwnd_validate(sk);
2036 return false;
2037 }
2038 return !tp->packets_out && tcp_send_head(sk);
2039}
2040
2041
2042
2043
2044
2045void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
2046 int nonagle)
2047{
2048
2049
2050
2051
2052 if (unlikely(sk->sk_state == TCP_CLOSE))
2053 return;
2054
2055 if (tcp_write_xmit(sk, cur_mss, nonagle, 0,
2056 sk_gfp_atomic(sk, GFP_ATOMIC)))
2057 tcp_check_probe_timer(sk);
2058}
2059
2060
2061
2062
2063void tcp_push_one(struct sock *sk, unsigned int mss_now)
2064{
2065 struct sk_buff *skb = tcp_send_head(sk);
2066
2067 BUG_ON(!skb || skb->len < mss_now);
2068
2069 tcp_write_xmit(sk, mss_now, TCP_NAGLE_PUSH, 1, sk->sk_allocation);
2070}
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124u32 __tcp_select_window(struct sock *sk)
2125{
2126 struct inet_connection_sock *icsk = inet_csk(sk);
2127 struct tcp_sock *tp = tcp_sk(sk);
2128
2129
2130
2131
2132
2133
2134 int mss = icsk->icsk_ack.rcv_mss;
2135 int free_space = tcp_space(sk);
2136 int full_space = min_t(int, tp->window_clamp, tcp_full_space(sk));
2137 int window;
2138
2139 if (mss > full_space)
2140 mss = full_space;
2141
2142 if (free_space < (full_space >> 1)) {
2143 icsk->icsk_ack.quick = 0;
2144
2145 if (sk_under_memory_pressure(sk))
2146 tp->rcv_ssthresh = min(tp->rcv_ssthresh,
2147 4U * tp->advmss);
2148
2149 if (free_space < mss)
2150 return 0;
2151 }
2152
2153 if (free_space > tp->rcv_ssthresh)
2154 free_space = tp->rcv_ssthresh;
2155
2156
2157
2158
2159 window = tp->rcv_wnd;
2160 if (tp->rx_opt.rcv_wscale) {
2161 window = free_space;
2162
2163
2164
2165
2166
2167 if (((window >> tp->rx_opt.rcv_wscale) << tp->rx_opt.rcv_wscale) != window)
2168 window = (((window >> tp->rx_opt.rcv_wscale) + 1)
2169 << tp->rx_opt.rcv_wscale);
2170 } else {
2171
2172
2173
2174
2175
2176
2177
2178
2179 if (window <= free_space - mss || window > free_space)
2180 window = (free_space / mss) * mss;
2181 else if (mss == full_space &&
2182 free_space > window + (full_space >> 1))
2183 window = free_space;
2184 }
2185
2186 return window;
2187}
2188
2189
2190static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
2191{
2192 struct tcp_sock *tp = tcp_sk(sk);
2193 struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
2194 int skb_size, next_skb_size;
2195
2196 skb_size = skb->len;
2197 next_skb_size = next_skb->len;
2198
2199 BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
2200
2201 tcp_highest_sack_combine(sk, next_skb, skb);
2202
2203 tcp_unlink_write_queue(next_skb, sk);
2204
2205 skb_copy_from_linear_data(next_skb, skb_put(skb, next_skb_size),
2206 next_skb_size);
2207
2208 if (next_skb->ip_summed == CHECKSUM_PARTIAL)
2209 skb->ip_summed = CHECKSUM_PARTIAL;
2210
2211 if (skb->ip_summed != CHECKSUM_PARTIAL)
2212 skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
2213
2214
2215 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
2216
2217
2218 TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(next_skb)->tcp_flags;
2219
2220
2221
2222
2223 TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS;
2224
2225
2226 tcp_clear_retrans_hints_partial(tp);
2227 if (next_skb == tp->retransmit_skb_hint)
2228 tp->retransmit_skb_hint = skb;
2229
2230 tcp_adjust_pcount(sk, next_skb, tcp_skb_pcount(next_skb));
2231
2232 sk_wmem_free_skb(sk, next_skb);
2233}
2234
2235
2236static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb)
2237{
2238 if (tcp_skb_pcount(skb) > 1)
2239 return false;
2240
2241 if (skb_shinfo(skb)->nr_frags != 0)
2242 return false;
2243 if (skb_cloned(skb))
2244 return false;
2245 if (skb == tcp_send_head(sk))
2246 return false;
2247
2248 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
2249 return false;
2250
2251 return true;
2252}
2253
2254
2255
2256
2257static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
2258 int space)
2259{
2260 struct tcp_sock *tp = tcp_sk(sk);
2261 struct sk_buff *skb = to, *tmp;
2262 bool first = true;
2263
2264 if (!sysctl_tcp_retrans_collapse)
2265 return;
2266 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
2267 return;
2268
2269 tcp_for_write_queue_from_safe(skb, tmp, sk) {
2270 if (!tcp_can_collapse(sk, skb))
2271 break;
2272
2273 space -= skb->len;
2274
2275 if (first) {
2276 first = false;
2277 continue;
2278 }
2279
2280 if (space < 0)
2281 break;
2282
2283
2284
2285 if (skb->len > skb_availroom(to))
2286 break;
2287
2288 if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp)))
2289 break;
2290
2291 tcp_collapse_retrans(sk, to);
2292 }
2293}
2294
2295
2296
2297
2298
2299int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2300{
2301 struct tcp_sock *tp = tcp_sk(sk);
2302 struct inet_connection_sock *icsk = inet_csk(sk);
2303 unsigned int cur_mss;
2304 int err;
2305
2306
2307 if (icsk->icsk_mtup.probe_size) {
2308 icsk->icsk_mtup.probe_size = 0;
2309 }
2310
2311
2312
2313
2314 if (atomic_read(&sk->sk_wmem_alloc) >
2315 min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))
2316 return -EAGAIN;
2317
2318 if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
2319 if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
2320 BUG();
2321 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
2322 return -ENOMEM;
2323 }
2324
2325 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
2326 return -EHOSTUNREACH;
2327
2328 cur_mss = tcp_current_mss(sk);
2329
2330
2331
2332
2333
2334
2335 if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp)) &&
2336 TCP_SKB_CB(skb)->seq != tp->snd_una)
2337 return -EAGAIN;
2338
2339 if (skb->len > cur_mss) {
2340 if (tcp_fragment(sk, skb, cur_mss, cur_mss))
2341 return -ENOMEM;
2342 } else {
2343 int oldpcount = tcp_skb_pcount(skb);
2344
2345 if (unlikely(oldpcount > 1)) {
2346 tcp_init_tso_segs(sk, skb, cur_mss);
2347 tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb));
2348 }
2349 }
2350
2351 tcp_retrans_try_collapse(sk, skb, cur_mss);
2352
2353
2354
2355
2356
2357 if (skb->len > 0 &&
2358 (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&
2359 tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
2360 if (!pskb_trim(skb, 0)) {
2361
2362 tcp_init_nondata_skb(skb, TCP_SKB_CB(skb)->end_seq - 1,
2363 TCP_SKB_CB(skb)->tcp_flags);
2364 skb->ip_summed = CHECKSUM_NONE;
2365 }
2366 }
2367
2368
2369
2370
2371 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2372
2373
2374 if (unlikely(NET_IP_ALIGN && ((unsigned long)skb->data & 3))) {
2375 struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER,
2376 GFP_ATOMIC);
2377 err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
2378 -ENOBUFS;
2379 } else {
2380 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2381 }
2382
2383 if (err == 0) {
2384
2385 TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
2386
2387 tp->total_retrans++;
2388
2389#if FASTRETRANS_DEBUG > 0
2390 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2391 net_dbg_ratelimited("retrans_out leaked\n");
2392 }
2393#endif
2394 if (!tp->retrans_out)
2395 tp->lost_retrans_low = tp->snd_nxt;
2396 TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;
2397 tp->retrans_out += tcp_skb_pcount(skb);
2398
2399
2400 if (!tp->retrans_stamp)
2401 tp->retrans_stamp = TCP_SKB_CB(skb)->when;
2402
2403 tp->undo_retrans += tcp_skb_pcount(skb);
2404
2405
2406
2407
2408 TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt;
2409 }
2410 return err;
2411}
2412
2413
2414
2415
2416static bool tcp_can_forward_retransmit(struct sock *sk)
2417{
2418 const struct inet_connection_sock *icsk = inet_csk(sk);
2419 const struct tcp_sock *tp = tcp_sk(sk);
2420
2421
2422 if (icsk->icsk_ca_state != TCP_CA_Recovery)
2423 return false;
2424
2425
2426 if (tcp_is_reno(tp))
2427 return false;
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437 if (tcp_may_send_now(sk))
2438 return false;
2439
2440 return true;
2441}
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451void tcp_xmit_retransmit_queue(struct sock *sk)
2452{
2453 const struct inet_connection_sock *icsk = inet_csk(sk);
2454 struct tcp_sock *tp = tcp_sk(sk);
2455 struct sk_buff *skb;
2456 struct sk_buff *hole = NULL;
2457 u32 last_lost;
2458 int mib_idx;
2459 int fwd_rexmitting = 0;
2460
2461 if (!tp->packets_out)
2462 return;
2463
2464 if (!tp->lost_out)
2465 tp->retransmit_high = tp->snd_una;
2466
2467 if (tp->retransmit_skb_hint) {
2468 skb = tp->retransmit_skb_hint;
2469 last_lost = TCP_SKB_CB(skb)->end_seq;
2470 if (after(last_lost, tp->retransmit_high))
2471 last_lost = tp->retransmit_high;
2472 } else {
2473 skb = tcp_write_queue_head(sk);
2474 last_lost = tp->snd_una;
2475 }
2476
2477 tcp_for_write_queue_from(skb, sk) {
2478 __u8 sacked = TCP_SKB_CB(skb)->sacked;
2479
2480 if (skb == tcp_send_head(sk))
2481 break;
2482
2483 if (hole == NULL)
2484 tp->retransmit_skb_hint = skb;
2485
2486
2487
2488
2489
2490
2491
2492
2493 if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
2494 return;
2495
2496 if (fwd_rexmitting) {
2497begin_fwd:
2498 if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
2499 break;
2500 mib_idx = LINUX_MIB_TCPFORWARDRETRANS;
2501
2502 } else if (!before(TCP_SKB_CB(skb)->seq, tp->retransmit_high)) {
2503 tp->retransmit_high = last_lost;
2504 if (!tcp_can_forward_retransmit(sk))
2505 break;
2506
2507 if (hole != NULL) {
2508 skb = hole;
2509 hole = NULL;
2510 }
2511 fwd_rexmitting = 1;
2512 goto begin_fwd;
2513
2514 } else if (!(sacked & TCPCB_LOST)) {
2515 if (hole == NULL && !(sacked & (TCPCB_SACKED_RETRANS|TCPCB_SACKED_ACKED)))
2516 hole = skb;
2517 continue;
2518
2519 } else {
2520 last_lost = TCP_SKB_CB(skb)->end_seq;
2521 if (icsk->icsk_ca_state != TCP_CA_Loss)
2522 mib_idx = LINUX_MIB_TCPFASTRETRANS;
2523 else
2524 mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS;
2525 }
2526
2527 if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))
2528 continue;
2529
2530 if (tcp_retransmit_skb(sk, skb)) {
2531 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
2532 return;
2533 }
2534 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2535
2536 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery)
2537 tp->prr_out += tcp_skb_pcount(skb);
2538
2539 if (skb == tcp_write_queue_head(sk))
2540 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2541 inet_csk(sk)->icsk_rto,
2542 TCP_RTO_MAX);
2543 }
2544}
2545
2546
2547
2548
2549void tcp_send_fin(struct sock *sk)
2550{
2551 struct tcp_sock *tp = tcp_sk(sk);
2552 struct sk_buff *skb = tcp_write_queue_tail(sk);
2553 int mss_now;
2554
2555
2556
2557
2558
2559 mss_now = tcp_current_mss(sk);
2560
2561 if (tcp_send_head(sk) != NULL) {
2562 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN;
2563 TCP_SKB_CB(skb)->end_seq++;
2564 tp->write_seq++;
2565 } else {
2566
2567 for (;;) {
2568 skb = alloc_skb_fclone(MAX_TCP_HEADER,
2569 sk->sk_allocation);
2570 if (skb)
2571 break;
2572 yield();
2573 }
2574
2575
2576 skb_reserve(skb, MAX_TCP_HEADER);
2577
2578 tcp_init_nondata_skb(skb, tp->write_seq,
2579 TCPHDR_ACK | TCPHDR_FIN);
2580 tcp_queue_skb(sk, skb);
2581 }
2582 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);
2583}
2584
2585
2586
2587
2588
2589
2590void tcp_send_active_reset(struct sock *sk, gfp_t priority)
2591{
2592 struct sk_buff *skb;
2593
2594
2595 skb = alloc_skb(MAX_TCP_HEADER, priority);
2596 if (!skb) {
2597 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
2598 return;
2599 }
2600
2601
2602 skb_reserve(skb, MAX_TCP_HEADER);
2603 tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
2604 TCPHDR_ACK | TCPHDR_RST);
2605
2606 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2607 if (tcp_transmit_skb(sk, skb, 0, priority))
2608 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
2609
2610 TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS);
2611}
2612
2613
2614
2615
2616
2617
2618
2619int tcp_send_synack(struct sock *sk)
2620{
2621 struct sk_buff *skb;
2622
2623 skb = tcp_write_queue_head(sk);
2624 if (skb == NULL || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
2625 pr_debug("%s: wrong queue state\n", __func__);
2626 return -EFAULT;
2627 }
2628 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) {
2629 if (skb_cloned(skb)) {
2630 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
2631 if (nskb == NULL)
2632 return -ENOMEM;
2633 tcp_unlink_write_queue(skb, sk);
2634 skb_header_release(nskb);
2635 __tcp_add_write_queue_head(sk, nskb);
2636 sk_wmem_free_skb(sk, skb);
2637 sk->sk_wmem_queued += nskb->truesize;
2638 sk_mem_charge(sk, nskb->truesize);
2639 skb = nskb;
2640 }
2641
2642 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ACK;
2643 TCP_ECN_send_synack(tcp_sk(sk), skb);
2644 }
2645 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2646 return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2647}
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2660 struct request_sock *req,
2661 struct request_values *rvp)
2662{
2663 struct tcp_out_options opts;
2664 struct tcp_extend_values *xvp = tcp_xv(rvp);
2665 struct inet_request_sock *ireq = inet_rsk(req);
2666 struct tcp_sock *tp = tcp_sk(sk);
2667 const struct tcp_cookie_values *cvp = tp->cookie_values;
2668 struct tcphdr *th;
2669 struct sk_buff *skb;
2670 struct tcp_md5sig_key *md5;
2671 int tcp_header_size;
2672 int mss;
2673 int s_data_desired = 0;
2674
2675 if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired)
2676 s_data_desired = cvp->s_data_desired;
2677 skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired,
2678 sk_gfp_atomic(sk, GFP_ATOMIC));
2679 if (unlikely(!skb)) {
2680 dst_release(dst);
2681 return NULL;
2682 }
2683
2684 skb_reserve(skb, MAX_TCP_HEADER);
2685
2686 skb_dst_set(skb, dst);
2687
2688 mss = dst_metric_advmss(dst);
2689 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
2690 mss = tp->rx_opt.user_mss;
2691
2692 if (req->rcv_wnd == 0) {
2693 __u8 rcv_wscale;
2694
2695 req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
2696
2697
2698 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
2699 (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0))
2700 req->window_clamp = tcp_full_space(sk);
2701
2702
2703 tcp_select_initial_window(tcp_full_space(sk),
2704 mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
2705 &req->rcv_wnd,
2706 &req->window_clamp,
2707 ireq->wscale_ok,
2708 &rcv_wscale,
2709 dst_metric(dst, RTAX_INITRWND));
2710 ireq->rcv_wscale = rcv_wscale;
2711 }
2712
2713 memset(&opts, 0, sizeof(opts));
2714#ifdef CONFIG_SYN_COOKIES
2715 if (unlikely(req->cookie_ts))
2716 TCP_SKB_CB(skb)->when = cookie_init_timestamp(req);
2717 else
2718#endif
2719 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2720 tcp_header_size = tcp_synack_options(sk, req, mss,
2721 skb, &opts, &md5, xvp)
2722 + sizeof(*th);
2723
2724 skb_push(skb, tcp_header_size);
2725 skb_reset_transport_header(skb);
2726
2727 th = tcp_hdr(skb);
2728 memset(th, 0, sizeof(struct tcphdr));
2729 th->syn = 1;
2730 th->ack = 1;
2731 TCP_ECN_make_synack(req, th);
2732 th->source = ireq->loc_port;
2733 th->dest = ireq->rmt_port;
2734
2735
2736
2737 tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
2738 TCPHDR_SYN | TCPHDR_ACK);
2739
2740 if (OPTION_COOKIE_EXTENSION & opts.options) {
2741 if (s_data_desired) {
2742 u8 *buf = skb_put(skb, s_data_desired);
2743
2744
2745 memcpy(buf, cvp->s_data_payload, s_data_desired);
2746 TCP_SKB_CB(skb)->end_seq += s_data_desired;
2747 }
2748
2749 if (opts.hash_size > 0) {
2750 __u32 workspace[SHA_WORKSPACE_WORDS];
2751 u32 *mess = &xvp->cookie_bakery[COOKIE_DIGEST_WORDS];
2752 u32 *tail = &mess[COOKIE_MESSAGE_WORDS-1];
2753
2754
2755
2756
2757
2758 *tail-- ^= opts.tsval;
2759 *tail-- ^= tcp_rsk(req)->rcv_isn + 1;
2760 *tail-- ^= TCP_SKB_CB(skb)->seq + 1;
2761
2762
2763 *tail-- ^= (((__force u32)th->dest << 16) | (__force u32)th->source);
2764 *tail-- ^= (u32)(unsigned long)cvp;
2765
2766 sha_transform((__u32 *)&xvp->cookie_bakery[0],
2767 (char *)mess,
2768 &workspace[0]);
2769 opts.hash_location =
2770 (__u8 *)&xvp->cookie_bakery[0];
2771 }
2772 }
2773
2774 th->seq = htonl(TCP_SKB_CB(skb)->seq);
2775 th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1);
2776
2777
2778 th->window = htons(min(req->rcv_wnd, 65535U));
2779 tcp_options_write((__be32 *)(th + 1), tp, &opts);
2780 th->doff = (tcp_header_size >> 2);
2781 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, tcp_skb_pcount(skb));
2782
2783#ifdef CONFIG_TCP_MD5SIG
2784
2785 if (md5) {
2786 tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location,
2787 md5, NULL, req, skb);
2788 }
2789#endif
2790
2791 return skb;
2792}
2793EXPORT_SYMBOL(tcp_make_synack);
2794
2795
2796void tcp_connect_init(struct sock *sk)
2797{
2798 const struct dst_entry *dst = __sk_dst_get(sk);
2799 struct tcp_sock *tp = tcp_sk(sk);
2800 __u8 rcv_wscale;
2801
2802
2803
2804
2805 tp->tcp_header_len = sizeof(struct tcphdr) +
2806 (sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
2807
2808#ifdef CONFIG_TCP_MD5SIG
2809 if (tp->af_specific->md5_lookup(sk, sk) != NULL)
2810 tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
2811#endif
2812
2813
2814 if (tp->rx_opt.user_mss)
2815 tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
2816 tp->max_window = 0;
2817 tcp_mtup_init(sk);
2818 tcp_sync_mss(sk, dst_mtu(dst));
2819
2820 if (!tp->window_clamp)
2821 tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
2822 tp->advmss = dst_metric_advmss(dst);
2823 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->advmss)
2824 tp->advmss = tp->rx_opt.user_mss;
2825
2826 tcp_initialize_rcv_mss(sk);
2827
2828
2829 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
2830 (tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0))
2831 tp->window_clamp = tcp_full_space(sk);
2832
2833 tcp_select_initial_window(tcp_full_space(sk),
2834 tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
2835 &tp->rcv_wnd,
2836 &tp->window_clamp,
2837 sysctl_tcp_window_scaling,
2838 &rcv_wscale,
2839 dst_metric(dst, RTAX_INITRWND));
2840
2841 tp->rx_opt.rcv_wscale = rcv_wscale;
2842 tp->rcv_ssthresh = tp->rcv_wnd;
2843
2844 sk->sk_err = 0;
2845 sock_reset_flag(sk, SOCK_DONE);
2846 tp->snd_wnd = 0;
2847 tcp_init_wl(tp, 0);
2848 tp->snd_una = tp->write_seq;
2849 tp->snd_sml = tp->write_seq;
2850 tp->snd_up = tp->write_seq;
2851 tp->snd_nxt = tp->write_seq;
2852
2853 if (likely(!tp->repair))
2854 tp->rcv_nxt = 0;
2855 tp->rcv_wup = tp->rcv_nxt;
2856 tp->copied_seq = tp->rcv_nxt;
2857
2858 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
2859 inet_csk(sk)->icsk_retransmits = 0;
2860 tcp_clear_retrans(tp);
2861}
2862
2863static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
2864{
2865 struct tcp_sock *tp = tcp_sk(sk);
2866 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
2867
2868 tcb->end_seq += skb->len;
2869 skb_header_release(skb);
2870 __tcp_add_write_queue_tail(sk, skb);
2871 sk->sk_wmem_queued += skb->truesize;
2872 sk_mem_charge(sk, skb->truesize);
2873 tp->write_seq = tcb->end_seq;
2874 tp->packets_out += tcp_skb_pcount(skb);
2875}
2876
2877
2878
2879
2880
2881
2882
2883
2884static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
2885{
2886 struct tcp_sock *tp = tcp_sk(sk);
2887 struct tcp_fastopen_request *fo = tp->fastopen_req;
2888 int syn_loss = 0, space, i, err = 0, iovlen = fo->data->msg_iovlen;
2889 struct sk_buff *syn_data = NULL, *data;
2890 unsigned long last_syn_loss = 0;
2891
2892 tp->rx_opt.mss_clamp = tp->advmss;
2893 tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie,
2894 &syn_loss, &last_syn_loss);
2895
2896 if (syn_loss > 1 &&
2897 time_before(jiffies, last_syn_loss + (60*HZ << syn_loss))) {
2898 fo->cookie.len = -1;
2899 goto fallback;
2900 }
2901
2902 if (sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE)
2903 fo->cookie.len = -1;
2904 else if (fo->cookie.len <= 0)
2905 goto fallback;
2906
2907
2908
2909
2910
2911 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->rx_opt.mss_clamp)
2912 tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
2913 space = tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) -
2914 MAX_TCP_OPTION_SPACE;
2915
2916 syn_data = skb_copy_expand(syn, skb_headroom(syn), space,
2917 sk->sk_allocation);
2918 if (syn_data == NULL)
2919 goto fallback;
2920
2921 for (i = 0; i < iovlen && syn_data->len < space; ++i) {
2922 struct iovec *iov = &fo->data->msg_iov[i];
2923 unsigned char __user *from = iov->iov_base;
2924 int len = iov->iov_len;
2925
2926 if (syn_data->len + len > space)
2927 len = space - syn_data->len;
2928 else if (i + 1 == iovlen)
2929
2930 fo->data = NULL;
2931
2932 if (skb_add_data(syn_data, from, len))
2933 goto fallback;
2934 }
2935
2936
2937 data = pskb_copy(syn_data, sk->sk_allocation);
2938 if (data == NULL)
2939 goto fallback;
2940 TCP_SKB_CB(data)->seq++;
2941 TCP_SKB_CB(data)->tcp_flags &= ~TCPHDR_SYN;
2942 TCP_SKB_CB(data)->tcp_flags = (TCPHDR_ACK|TCPHDR_PSH);
2943 tcp_connect_queue_skb(sk, data);
2944 fo->copied = data->len;
2945
2946 if (tcp_transmit_skb(sk, syn_data, 0, sk->sk_allocation) == 0) {
2947 tp->syn_data = (fo->copied > 0);
2948 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE);
2949 goto done;
2950 }
2951 syn_data = NULL;
2952
2953fallback:
2954
2955 if (fo->cookie.len > 0)
2956 fo->cookie.len = 0;
2957 err = tcp_transmit_skb(sk, syn, 1, sk->sk_allocation);
2958 if (err)
2959 tp->syn_fastopen = 0;
2960 kfree_skb(syn_data);
2961done:
2962 fo->cookie.len = -1;
2963 return err;
2964}
2965
2966
2967int tcp_connect(struct sock *sk)
2968{
2969 struct tcp_sock *tp = tcp_sk(sk);
2970 struct sk_buff *buff;
2971 int err;
2972
2973 tcp_connect_init(sk);
2974
2975 buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation);
2976 if (unlikely(buff == NULL))
2977 return -ENOBUFS;
2978
2979
2980 skb_reserve(buff, MAX_TCP_HEADER);
2981
2982 tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
2983 tp->retrans_stamp = TCP_SKB_CB(buff)->when = tcp_time_stamp;
2984 tcp_connect_queue_skb(sk, buff);
2985 TCP_ECN_send_syn(sk, buff);
2986
2987
2988 err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
2989 tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
2990 if (err == -ECONNREFUSED)
2991 return err;
2992
2993
2994
2995
2996 tp->snd_nxt = tp->write_seq;
2997 tp->pushed_seq = tp->write_seq;
2998 TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
2999
3000
3001 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
3002 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
3003 return 0;
3004}
3005EXPORT_SYMBOL(tcp_connect);
3006
3007
3008
3009
3010
3011void tcp_send_delayed_ack(struct sock *sk)
3012{
3013 struct inet_connection_sock *icsk = inet_csk(sk);
3014 int ato = icsk->icsk_ack.ato;
3015 unsigned long timeout;
3016
3017 if (ato > TCP_DELACK_MIN) {
3018 const struct tcp_sock *tp = tcp_sk(sk);
3019 int max_ato = HZ / 2;
3020
3021 if (icsk->icsk_ack.pingpong ||
3022 (icsk->icsk_ack.pending & ICSK_ACK_PUSHED))
3023 max_ato = TCP_DELACK_MAX;
3024
3025
3026
3027
3028
3029
3030
3031 if (tp->srtt) {
3032 int rtt = max(tp->srtt >> 3, TCP_DELACK_MIN);
3033
3034 if (rtt < max_ato)
3035 max_ato = rtt;
3036 }
3037
3038 ato = min(ato, max_ato);
3039 }
3040
3041
3042 timeout = jiffies + ato;
3043
3044
3045 if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) {
3046
3047
3048
3049 if (icsk->icsk_ack.blocked ||
3050 time_before_eq(icsk->icsk_ack.timeout, jiffies + (ato >> 2))) {
3051 tcp_send_ack(sk);
3052 return;
3053 }
3054
3055 if (!time_before(timeout, icsk->icsk_ack.timeout))
3056 timeout = icsk->icsk_ack.timeout;
3057 }
3058 icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
3059 icsk->icsk_ack.timeout = timeout;
3060 sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
3061}
3062
3063
3064void tcp_send_ack(struct sock *sk)
3065{
3066 struct sk_buff *buff;
3067
3068
3069 if (sk->sk_state == TCP_CLOSE)
3070 return;
3071
3072
3073
3074
3075
3076 buff = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC));
3077 if (buff == NULL) {
3078 inet_csk_schedule_ack(sk);
3079 inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
3080 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
3081 TCP_DELACK_MAX, TCP_RTO_MAX);
3082 return;
3083 }
3084
3085
3086 skb_reserve(buff, MAX_TCP_HEADER);
3087 tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK);
3088
3089
3090 TCP_SKB_CB(buff)->when = tcp_time_stamp;
3091 tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC));
3092}
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
3106{
3107 struct tcp_sock *tp = tcp_sk(sk);
3108 struct sk_buff *skb;
3109
3110
3111 skb = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC));
3112 if (skb == NULL)
3113 return -1;
3114
3115
3116 skb_reserve(skb, MAX_TCP_HEADER);
3117
3118
3119
3120
3121 tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
3122 TCP_SKB_CB(skb)->when = tcp_time_stamp;
3123 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
3124}
3125
3126void tcp_send_window_probe(struct sock *sk)
3127{
3128 if (sk->sk_state == TCP_ESTABLISHED) {
3129 tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1;
3130 tcp_sk(sk)->snd_nxt = tcp_sk(sk)->write_seq;
3131 tcp_xmit_probe_skb(sk, 0);
3132 }
3133}
3134
3135
3136int tcp_write_wakeup(struct sock *sk)
3137{
3138 struct tcp_sock *tp = tcp_sk(sk);
3139 struct sk_buff *skb;
3140
3141 if (sk->sk_state == TCP_CLOSE)
3142 return -1;
3143
3144 if ((skb = tcp_send_head(sk)) != NULL &&
3145 before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) {
3146 int err;
3147 unsigned int mss = tcp_current_mss(sk);
3148 unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
3149
3150 if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
3151 tp->pushed_seq = TCP_SKB_CB(skb)->end_seq;
3152
3153
3154
3155
3156
3157 if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
3158 skb->len > mss) {
3159 seg_size = min(seg_size, mss);
3160 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
3161 if (tcp_fragment(sk, skb, seg_size, mss))
3162 return -1;
3163 } else if (!tcp_skb_pcount(skb))
3164 tcp_set_skb_tso_segs(sk, skb, mss);
3165
3166 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
3167 TCP_SKB_CB(skb)->when = tcp_time_stamp;
3168 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
3169 if (!err)
3170 tcp_event_new_data_sent(sk, skb);
3171 return err;
3172 } else {
3173 if (between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF))
3174 tcp_xmit_probe_skb(sk, 1);
3175 return tcp_xmit_probe_skb(sk, 0);
3176 }
3177}
3178
3179
3180
3181
3182void tcp_send_probe0(struct sock *sk)
3183{
3184 struct inet_connection_sock *icsk = inet_csk(sk);
3185 struct tcp_sock *tp = tcp_sk(sk);
3186 int err;
3187
3188 err = tcp_write_wakeup(sk);
3189
3190 if (tp->packets_out || !tcp_send_head(sk)) {
3191
3192 icsk->icsk_probes_out = 0;
3193 icsk->icsk_backoff = 0;
3194 return;
3195 }
3196
3197 if (err <= 0) {
3198 if (icsk->icsk_backoff < sysctl_tcp_retries2)
3199 icsk->icsk_backoff++;
3200 icsk->icsk_probes_out++;
3201 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
3202 min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX),
3203 TCP_RTO_MAX);
3204 } else {
3205
3206
3207
3208
3209
3210
3211 if (!icsk->icsk_probes_out)
3212 icsk->icsk_probes_out = 1;
3213 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
3214 min(icsk->icsk_rto << icsk->icsk_backoff,
3215 TCP_RESOURCE_PROBE_INTERVAL),
3216 TCP_RTO_MAX);
3217 }
3218}
3219