1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37#include <net/tcp.h>
38
39#include <linux/compiler.h>
40#include <linux/gfp.h>
41#include <linux/module.h>
42
43
44int sysctl_tcp_retrans_collapse __read_mostly = 1;
45
46
47
48
49int sysctl_tcp_workaround_signed_windows __read_mostly = 0;
50
51
52
53
54
55int sysctl_tcp_tso_win_divisor __read_mostly = 3;
56
57int sysctl_tcp_mtu_probing __read_mostly = 0;
58int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS;
59
60
61int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
62
63int sysctl_tcp_cookie_size __read_mostly = 0;
64EXPORT_SYMBOL_GPL(sysctl_tcp_cookie_size);
65
66
67
68static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
69{
70 struct tcp_sock *tp = tcp_sk(sk);
71 unsigned int prior_packets = tp->packets_out;
72
73 tcp_advance_send_head(sk, skb);
74 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
75
76
77 if (tp->frto_counter == 2)
78 tp->frto_counter = 3;
79
80 tp->packets_out += tcp_skb_pcount(skb);
81 if (!prior_packets)
82 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
83 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
84}
85
86
87
88
89
90
91
92static inline __u32 tcp_acceptable_seq(const struct sock *sk)
93{
94 const struct tcp_sock *tp = tcp_sk(sk);
95
96 if (!before(tcp_wnd_end(tp), tp->snd_nxt))
97 return tp->snd_nxt;
98 else
99 return tcp_wnd_end(tp);
100}
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116static __u16 tcp_advertise_mss(struct sock *sk)
117{
118 struct tcp_sock *tp = tcp_sk(sk);
119 const struct dst_entry *dst = __sk_dst_get(sk);
120 int mss = tp->advmss;
121
122 if (dst) {
123 unsigned int metric = dst_metric_advmss(dst);
124
125 if (metric < mss) {
126 mss = metric;
127 tp->advmss = mss;
128 }
129 }
130
131 return (__u16)mss;
132}
133
134
135
136static void tcp_cwnd_restart(struct sock *sk, const struct dst_entry *dst)
137{
138 struct tcp_sock *tp = tcp_sk(sk);
139 s32 delta = tcp_time_stamp - tp->lsndtime;
140 u32 restart_cwnd = tcp_init_cwnd(tp, dst);
141 u32 cwnd = tp->snd_cwnd;
142
143 tcp_ca_event(sk, CA_EVENT_CWND_RESTART);
144
145 tp->snd_ssthresh = tcp_current_ssthresh(sk);
146 restart_cwnd = min(restart_cwnd, cwnd);
147
148 while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd)
149 cwnd >>= 1;
150 tp->snd_cwnd = max(cwnd, restart_cwnd);
151 tp->snd_cwnd_stamp = tcp_time_stamp;
152 tp->snd_cwnd_used = 0;
153}
154
155
156static void tcp_event_data_sent(struct tcp_sock *tp,
157 struct sock *sk)
158{
159 struct inet_connection_sock *icsk = inet_csk(sk);
160 const u32 now = tcp_time_stamp;
161
162 if (sysctl_tcp_slow_start_after_idle &&
163 (!tp->packets_out && (s32)(now - tp->lsndtime) > icsk->icsk_rto))
164 tcp_cwnd_restart(sk, __sk_dst_get(sk));
165
166 tp->lsndtime = now;
167
168
169
170
171 if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
172 icsk->icsk_ack.pingpong = 1;
173}
174
175
176static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
177{
178 tcp_dec_quickack_mode(sk, pkts);
179 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
180}
181
182
183
184
185
186
187
188
189void tcp_select_initial_window(int __space, __u32 mss,
190 __u32 *rcv_wnd, __u32 *window_clamp,
191 int wscale_ok, __u8 *rcv_wscale,
192 __u32 init_rcv_wnd)
193{
194 unsigned int space = (__space < 0 ? 0 : __space);
195
196
197 if (*window_clamp == 0)
198 (*window_clamp) = (65535 << 14);
199 space = min(*window_clamp, space);
200
201
202 if (space > mss)
203 space = (space / mss) * mss;
204
205
206
207
208
209
210
211
212
213 if (sysctl_tcp_workaround_signed_windows)
214 (*rcv_wnd) = min(space, MAX_TCP_WINDOW);
215 else
216 (*rcv_wnd) = space;
217
218 (*rcv_wscale) = 0;
219 if (wscale_ok) {
220
221
222
223 space = max_t(u32, sysctl_tcp_rmem[2], sysctl_rmem_max);
224 space = min_t(u32, space, *window_clamp);
225 while (space > 65535 && (*rcv_wscale) < 14) {
226 space >>= 1;
227 (*rcv_wscale)++;
228 }
229 }
230
231
232
233
234
235 if (mss > (1 << *rcv_wscale)) {
236 int init_cwnd = TCP_DEFAULT_INIT_RCVWND;
237 if (mss > 1460)
238 init_cwnd =
239 max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2);
240
241
242
243 if (init_rcv_wnd)
244 *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
245 else
246 *rcv_wnd = min(*rcv_wnd, init_cwnd * mss);
247 }
248
249
250 (*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp);
251}
252EXPORT_SYMBOL(tcp_select_initial_window);
253
254
255
256
257
258
259static u16 tcp_select_window(struct sock *sk)
260{
261 struct tcp_sock *tp = tcp_sk(sk);
262 u32 cur_win = tcp_receive_window(tp);
263 u32 new_win = __tcp_select_window(sk);
264
265
266 if (new_win < cur_win) {
267
268
269
270
271
272
273
274 new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale);
275 }
276 tp->rcv_wnd = new_win;
277 tp->rcv_wup = tp->rcv_nxt;
278
279
280
281
282 if (!tp->rx_opt.rcv_wscale && sysctl_tcp_workaround_signed_windows)
283 new_win = min(new_win, MAX_TCP_WINDOW);
284 else
285 new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
286
287
288 new_win >>= tp->rx_opt.rcv_wscale;
289
290
291 if (new_win == 0)
292 tp->pred_flags = 0;
293
294 return new_win;
295}
296
297
298static inline void TCP_ECN_send_synack(const struct tcp_sock *tp, struct sk_buff *skb)
299{
300 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
301 if (!(tp->ecn_flags & TCP_ECN_OK))
302 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
303}
304
305
306static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb)
307{
308 struct tcp_sock *tp = tcp_sk(sk);
309
310 tp->ecn_flags = 0;
311 if (sysctl_tcp_ecn == 1) {
312 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
313 tp->ecn_flags = TCP_ECN_OK;
314 }
315}
316
317static __inline__ void
318TCP_ECN_make_synack(const struct request_sock *req, struct tcphdr *th)
319{
320 if (inet_rsk(req)->ecn_ok)
321 th->ece = 1;
322}
323
324
325
326
327static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb,
328 int tcp_header_len)
329{
330 struct tcp_sock *tp = tcp_sk(sk);
331
332 if (tp->ecn_flags & TCP_ECN_OK) {
333
334 if (skb->len != tcp_header_len &&
335 !before(TCP_SKB_CB(skb)->seq, tp->snd_nxt)) {
336 INET_ECN_xmit(sk);
337 if (tp->ecn_flags & TCP_ECN_QUEUE_CWR) {
338 tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
339 tcp_hdr(skb)->cwr = 1;
340 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
341 }
342 } else {
343
344 INET_ECN_dontxmit(sk);
345 }
346 if (tp->ecn_flags & TCP_ECN_DEMAND_CWR)
347 tcp_hdr(skb)->ece = 1;
348 }
349}
350
351
352
353
354static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
355{
356 skb->ip_summed = CHECKSUM_PARTIAL;
357 skb->csum = 0;
358
359 TCP_SKB_CB(skb)->tcp_flags = flags;
360 TCP_SKB_CB(skb)->sacked = 0;
361
362 skb_shinfo(skb)->gso_segs = 1;
363 skb_shinfo(skb)->gso_size = 0;
364 skb_shinfo(skb)->gso_type = 0;
365
366 TCP_SKB_CB(skb)->seq = seq;
367 if (flags & (TCPHDR_SYN | TCPHDR_FIN))
368 seq++;
369 TCP_SKB_CB(skb)->end_seq = seq;
370}
371
372static inline int tcp_urg_mode(const struct tcp_sock *tp)
373{
374 return tp->snd_una != tp->snd_up;
375}
376
377#define OPTION_SACK_ADVERTISE (1 << 0)
378#define OPTION_TS (1 << 1)
379#define OPTION_MD5 (1 << 2)
380#define OPTION_WSCALE (1 << 3)
381#define OPTION_COOKIE_EXTENSION (1 << 4)
382
383struct tcp_out_options {
384 u8 options;
385 u8 ws;
386 u8 num_sack_blocks;
387 u8 hash_size;
388 u16 mss;
389 __u32 tsval, tsecr;
390 __u8 *hash_location;
391};
392
393
394
395static u8 tcp_cookie_size_check(u8 desired)
396{
397 int cookie_size;
398
399 if (desired > 0)
400
401 return desired;
402
403 cookie_size = ACCESS_ONCE(sysctl_tcp_cookie_size);
404 if (cookie_size <= 0)
405
406 return 0;
407
408 if (cookie_size <= TCP_COOKIE_MIN)
409
410 return TCP_COOKIE_MIN;
411
412 if (cookie_size >= TCP_COOKIE_MAX)
413
414 return TCP_COOKIE_MAX;
415
416 if (cookie_size & 1)
417
418 cookie_size++;
419
420 return (u8)cookie_size;
421}
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
437 struct tcp_out_options *opts)
438{
439 u8 options = opts->options;
440
441
442
443
444
445
446
447
448
449 if (unlikely(OPTION_MD5 & options)) {
450 if (unlikely(OPTION_COOKIE_EXTENSION & options)) {
451 *ptr++ = htonl((TCPOPT_COOKIE << 24) |
452 (TCPOLEN_COOKIE_BASE << 16) |
453 (TCPOPT_MD5SIG << 8) |
454 TCPOLEN_MD5SIG);
455 } else {
456 *ptr++ = htonl((TCPOPT_NOP << 24) |
457 (TCPOPT_NOP << 16) |
458 (TCPOPT_MD5SIG << 8) |
459 TCPOLEN_MD5SIG);
460 }
461 options &= ~OPTION_COOKIE_EXTENSION;
462
463 opts->hash_location = (__u8 *)ptr;
464 ptr += 4;
465 }
466
467 if (unlikely(opts->mss)) {
468 *ptr++ = htonl((TCPOPT_MSS << 24) |
469 (TCPOLEN_MSS << 16) |
470 opts->mss);
471 }
472
473 if (likely(OPTION_TS & options)) {
474 if (unlikely(OPTION_SACK_ADVERTISE & options)) {
475 *ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
476 (TCPOLEN_SACK_PERM << 16) |
477 (TCPOPT_TIMESTAMP << 8) |
478 TCPOLEN_TIMESTAMP);
479 options &= ~OPTION_SACK_ADVERTISE;
480 } else {
481 *ptr++ = htonl((TCPOPT_NOP << 24) |
482 (TCPOPT_NOP << 16) |
483 (TCPOPT_TIMESTAMP << 8) |
484 TCPOLEN_TIMESTAMP);
485 }
486 *ptr++ = htonl(opts->tsval);
487 *ptr++ = htonl(opts->tsecr);
488 }
489
490
491
492
493
494
495
496 if (unlikely(OPTION_COOKIE_EXTENSION & options)) {
497 __u8 *cookie_copy = opts->hash_location;
498 u8 cookie_size = opts->hash_size;
499
500
501
502
503 if (0x2 & cookie_size) {
504 __u8 *p = (__u8 *)ptr;
505
506
507 *p++ = TCPOPT_COOKIE;
508 *p++ = TCPOLEN_COOKIE_BASE + cookie_size;
509 *p++ = *cookie_copy++;
510 *p++ = *cookie_copy++;
511 ptr++;
512 cookie_size -= 2;
513 } else {
514
515 *ptr++ = htonl(((TCPOPT_NOP << 24) |
516 (TCPOPT_NOP << 16) |
517 (TCPOPT_COOKIE << 8) |
518 TCPOLEN_COOKIE_BASE) +
519 cookie_size);
520 }
521
522 if (cookie_size > 0) {
523 memcpy(ptr, cookie_copy, cookie_size);
524 ptr += (cookie_size / 4);
525 }
526 }
527
528 if (unlikely(OPTION_SACK_ADVERTISE & options)) {
529 *ptr++ = htonl((TCPOPT_NOP << 24) |
530 (TCPOPT_NOP << 16) |
531 (TCPOPT_SACK_PERM << 8) |
532 TCPOLEN_SACK_PERM);
533 }
534
535 if (unlikely(OPTION_WSCALE & options)) {
536 *ptr++ = htonl((TCPOPT_NOP << 24) |
537 (TCPOPT_WINDOW << 16) |
538 (TCPOLEN_WINDOW << 8) |
539 opts->ws);
540 }
541
542 if (unlikely(opts->num_sack_blocks)) {
543 struct tcp_sack_block *sp = tp->rx_opt.dsack ?
544 tp->duplicate_sack : tp->selective_acks;
545 int this_sack;
546
547 *ptr++ = htonl((TCPOPT_NOP << 24) |
548 (TCPOPT_NOP << 16) |
549 (TCPOPT_SACK << 8) |
550 (TCPOLEN_SACK_BASE + (opts->num_sack_blocks *
551 TCPOLEN_SACK_PERBLOCK)));
552
553 for (this_sack = 0; this_sack < opts->num_sack_blocks;
554 ++this_sack) {
555 *ptr++ = htonl(sp[this_sack].start_seq);
556 *ptr++ = htonl(sp[this_sack].end_seq);
557 }
558
559 tp->rx_opt.dsack = 0;
560 }
561}
562
563
564
565
566static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
567 struct tcp_out_options *opts,
568 struct tcp_md5sig_key **md5)
569{
570 struct tcp_sock *tp = tcp_sk(sk);
571 struct tcp_cookie_values *cvp = tp->cookie_values;
572 unsigned remaining = MAX_TCP_OPTION_SPACE;
573 u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ?
574 tcp_cookie_size_check(cvp->cookie_desired) :
575 0;
576
577#ifdef CONFIG_TCP_MD5SIG
578 *md5 = tp->af_specific->md5_lookup(sk, sk);
579 if (*md5) {
580 opts->options |= OPTION_MD5;
581 remaining -= TCPOLEN_MD5SIG_ALIGNED;
582 }
583#else
584 *md5 = NULL;
585#endif
586
587
588
589
590
591
592
593
594
595
596 opts->mss = tcp_advertise_mss(sk);
597 remaining -= TCPOLEN_MSS_ALIGNED;
598
599 if (likely(sysctl_tcp_timestamps && *md5 == NULL)) {
600 opts->options |= OPTION_TS;
601 opts->tsval = TCP_SKB_CB(skb)->when;
602 opts->tsecr = tp->rx_opt.ts_recent;
603 remaining -= TCPOLEN_TSTAMP_ALIGNED;
604 }
605 if (likely(sysctl_tcp_window_scaling)) {
606 opts->ws = tp->rx_opt.rcv_wscale;
607 opts->options |= OPTION_WSCALE;
608 remaining -= TCPOLEN_WSCALE_ALIGNED;
609 }
610 if (likely(sysctl_tcp_sack)) {
611 opts->options |= OPTION_SACK_ADVERTISE;
612 if (unlikely(!(OPTION_TS & opts->options)))
613 remaining -= TCPOLEN_SACKPERM_ALIGNED;
614 }
615
616
617
618
619
620
621
622 if (*md5 == NULL &&
623 (OPTION_TS & opts->options) &&
624 cookie_size > 0) {
625 int need = TCPOLEN_COOKIE_BASE + cookie_size;
626
627 if (0x2 & need) {
628
629 need += 2;
630
631 if (need > remaining) {
632
633 cookie_size -= 2;
634 need -= 4;
635 }
636 }
637 while (need > remaining && TCP_COOKIE_MIN <= cookie_size) {
638 cookie_size -= 4;
639 need -= 4;
640 }
641 if (TCP_COOKIE_MIN <= cookie_size) {
642 opts->options |= OPTION_COOKIE_EXTENSION;
643 opts->hash_location = (__u8 *)&cvp->cookie_pair[0];
644 opts->hash_size = cookie_size;
645
646
647 cvp->cookie_desired = cookie_size;
648
649 if (cvp->cookie_desired != cvp->cookie_pair_size) {
650
651
652
653
654 get_random_bytes(&cvp->cookie_pair[0],
655 cookie_size);
656 cvp->cookie_pair_size = cookie_size;
657 }
658
659 remaining -= need;
660 }
661 }
662 return MAX_TCP_OPTION_SPACE - remaining;
663}
664
665
666static unsigned tcp_synack_options(struct sock *sk,
667 struct request_sock *req,
668 unsigned mss, struct sk_buff *skb,
669 struct tcp_out_options *opts,
670 struct tcp_md5sig_key **md5,
671 struct tcp_extend_values *xvp)
672{
673 struct inet_request_sock *ireq = inet_rsk(req);
674 unsigned remaining = MAX_TCP_OPTION_SPACE;
675 u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ?
676 xvp->cookie_plus :
677 0;
678
679#ifdef CONFIG_TCP_MD5SIG
680 *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req);
681 if (*md5) {
682 opts->options |= OPTION_MD5;
683 remaining -= TCPOLEN_MD5SIG_ALIGNED;
684
685
686
687
688
689
690 ireq->tstamp_ok &= !ireq->sack_ok;
691 }
692#else
693 *md5 = NULL;
694#endif
695
696
697 opts->mss = mss;
698 remaining -= TCPOLEN_MSS_ALIGNED;
699
700 if (likely(ireq->wscale_ok)) {
701 opts->ws = ireq->rcv_wscale;
702 opts->options |= OPTION_WSCALE;
703 remaining -= TCPOLEN_WSCALE_ALIGNED;
704 }
705 if (likely(ireq->tstamp_ok)) {
706 opts->options |= OPTION_TS;
707 opts->tsval = TCP_SKB_CB(skb)->when;
708 opts->tsecr = req->ts_recent;
709 remaining -= TCPOLEN_TSTAMP_ALIGNED;
710 }
711 if (likely(ireq->sack_ok)) {
712 opts->options |= OPTION_SACK_ADVERTISE;
713 if (unlikely(!ireq->tstamp_ok))
714 remaining -= TCPOLEN_SACKPERM_ALIGNED;
715 }
716
717
718
719
720 if (*md5 == NULL &&
721 ireq->tstamp_ok &&
722 cookie_plus > TCPOLEN_COOKIE_BASE) {
723 int need = cookie_plus;
724
725 if (0x2 & need) {
726
727 need += 2;
728 }
729 if (need <= remaining) {
730 opts->options |= OPTION_COOKIE_EXTENSION;
731 opts->hash_size = cookie_plus - TCPOLEN_COOKIE_BASE;
732 remaining -= need;
733 } else {
734
735 xvp->cookie_out_never = 1;
736 opts->hash_size = 0;
737 }
738 }
739 return MAX_TCP_OPTION_SPACE - remaining;
740}
741
742
743
744
745static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb,
746 struct tcp_out_options *opts,
747 struct tcp_md5sig_key **md5)
748{
749 struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL;
750 struct tcp_sock *tp = tcp_sk(sk);
751 unsigned size = 0;
752 unsigned int eff_sacks;
753
754#ifdef CONFIG_TCP_MD5SIG
755 *md5 = tp->af_specific->md5_lookup(sk, sk);
756 if (unlikely(*md5)) {
757 opts->options |= OPTION_MD5;
758 size += TCPOLEN_MD5SIG_ALIGNED;
759 }
760#else
761 *md5 = NULL;
762#endif
763
764 if (likely(tp->rx_opt.tstamp_ok)) {
765 opts->options |= OPTION_TS;
766 opts->tsval = tcb ? tcb->when : 0;
767 opts->tsecr = tp->rx_opt.ts_recent;
768 size += TCPOLEN_TSTAMP_ALIGNED;
769 }
770
771 eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
772 if (unlikely(eff_sacks)) {
773 const unsigned remaining = MAX_TCP_OPTION_SPACE - size;
774 opts->num_sack_blocks =
775 min_t(unsigned, eff_sacks,
776 (remaining - TCPOLEN_SACK_BASE_ALIGNED) /
777 TCPOLEN_SACK_PERBLOCK);
778 size += TCPOLEN_SACK_BASE_ALIGNED +
779 opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
780 }
781
782 return size;
783}
784
785
786
787
788
789
790
791
792
793
794
795
796static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
797 gfp_t gfp_mask)
798{
799 const struct inet_connection_sock *icsk = inet_csk(sk);
800 struct inet_sock *inet;
801 struct tcp_sock *tp;
802 struct tcp_skb_cb *tcb;
803 struct tcp_out_options opts;
804 unsigned tcp_options_size, tcp_header_size;
805 struct tcp_md5sig_key *md5;
806 struct tcphdr *th;
807 int err;
808
809 BUG_ON(!skb || !tcp_skb_pcount(skb));
810
811
812
813
814 if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP)
815 __net_timestamp(skb);
816
817 if (likely(clone_it)) {
818 if (unlikely(skb_cloned(skb)))
819 skb = pskb_copy(skb, gfp_mask);
820 else
821 skb = skb_clone(skb, gfp_mask);
822 if (unlikely(!skb))
823 return -ENOBUFS;
824 }
825
826 inet = inet_sk(sk);
827 tp = tcp_sk(sk);
828 tcb = TCP_SKB_CB(skb);
829 memset(&opts, 0, sizeof(opts));
830
831 if (unlikely(tcb->tcp_flags & TCPHDR_SYN))
832 tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
833 else
834 tcp_options_size = tcp_established_options(sk, skb, &opts,
835 &md5);
836 tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
837
838 if (tcp_packets_in_flight(tp) == 0) {
839 tcp_ca_event(sk, CA_EVENT_TX_START);
840 skb->ooo_okay = 1;
841 } else
842 skb->ooo_okay = 0;
843
844 skb_push(skb, tcp_header_size);
845 skb_reset_transport_header(skb);
846 skb_set_owner_w(skb, sk);
847
848
849 th = tcp_hdr(skb);
850 th->source = inet->inet_sport;
851 th->dest = inet->inet_dport;
852 th->seq = htonl(tcb->seq);
853 th->ack_seq = htonl(tp->rcv_nxt);
854 *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) |
855 tcb->tcp_flags);
856
857 if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) {
858
859
860
861 th->window = htons(min(tp->rcv_wnd, 65535U));
862 } else {
863 th->window = htons(tcp_select_window(sk));
864 }
865 th->check = 0;
866 th->urg_ptr = 0;
867
868
869 if (unlikely(tcp_urg_mode(tp) && before(tcb->seq, tp->snd_up))) {
870 if (before(tp->snd_up, tcb->seq + 0x10000)) {
871 th->urg_ptr = htons(tp->snd_up - tcb->seq);
872 th->urg = 1;
873 } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) {
874 th->urg_ptr = htons(0xFFFF);
875 th->urg = 1;
876 }
877 }
878
879 tcp_options_write((__be32 *)(th + 1), tp, &opts);
880 if (likely((tcb->tcp_flags & TCPHDR_SYN) == 0))
881 TCP_ECN_send(sk, skb, tcp_header_size);
882
883#ifdef CONFIG_TCP_MD5SIG
884
885 if (md5) {
886 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
887 tp->af_specific->calc_md5_hash(opts.hash_location,
888 md5, sk, NULL, skb);
889 }
890#endif
891
892 icsk->icsk_af_ops->send_check(sk, skb);
893
894 if (likely(tcb->tcp_flags & TCPHDR_ACK))
895 tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
896
897 if (skb->len != tcp_header_size)
898 tcp_event_data_sent(tp, sk);
899
900 if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
901 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
902 tcp_skb_pcount(skb));
903
904 err = icsk->icsk_af_ops->queue_xmit(skb, &inet->cork.fl);
905 if (likely(err <= 0))
906 return err;
907
908 tcp_enter_cwr(sk, 1);
909
910 return net_xmit_eval(err);
911}
912
913
914
915
916
917
918static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
919{
920 struct tcp_sock *tp = tcp_sk(sk);
921
922
923 tp->write_seq = TCP_SKB_CB(skb)->end_seq;
924 skb_header_release(skb);
925 tcp_add_write_queue_tail(sk, skb);
926 sk->sk_wmem_queued += skb->truesize;
927 sk_mem_charge(sk, skb->truesize);
928}
929
930
931static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
932 unsigned int mss_now)
933{
934 if (skb->len <= mss_now || !sk_can_gso(sk) ||
935 skb->ip_summed == CHECKSUM_NONE) {
936
937
938
939 skb_shinfo(skb)->gso_segs = 1;
940 skb_shinfo(skb)->gso_size = 0;
941 skb_shinfo(skb)->gso_type = 0;
942 } else {
943 skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss_now);
944 skb_shinfo(skb)->gso_size = mss_now;
945 skb_shinfo(skb)->gso_type = sk->sk_gso_type;
946 }
947}
948
949
950
951
952static void tcp_adjust_fackets_out(struct sock *sk, const struct sk_buff *skb,
953 int decr)
954{
955 struct tcp_sock *tp = tcp_sk(sk);
956
957 if (!tp->sacked_out || tcp_is_reno(tp))
958 return;
959
960 if (after(tcp_highest_sack_seq(tp), TCP_SKB_CB(skb)->seq))
961 tp->fackets_out -= decr;
962}
963
964
965
966
967static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr)
968{
969 struct tcp_sock *tp = tcp_sk(sk);
970
971 tp->packets_out -= decr;
972
973 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
974 tp->sacked_out -= decr;
975 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
976 tp->retrans_out -= decr;
977 if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST)
978 tp->lost_out -= decr;
979
980
981 if (tcp_is_reno(tp) && decr > 0)
982 tp->sacked_out -= min_t(u32, tp->sacked_out, decr);
983
984 tcp_adjust_fackets_out(sk, skb, decr);
985
986 if (tp->lost_skb_hint &&
987 before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) &&
988 (tcp_is_fack(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)))
989 tp->lost_cnt_hint -= decr;
990
991 tcp_verify_left_out(tp);
992}
993
994
995
996
997
998
999int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
1000 unsigned int mss_now)
1001{
1002 struct tcp_sock *tp = tcp_sk(sk);
1003 struct sk_buff *buff;
1004 int nsize, old_factor;
1005 int nlen;
1006 u8 flags;
1007
1008 if (WARN_ON(len > skb->len))
1009 return -EINVAL;
1010
1011 nsize = skb_headlen(skb) - len;
1012 if (nsize < 0)
1013 nsize = 0;
1014
1015 if (skb_cloned(skb) &&
1016 skb_is_nonlinear(skb) &&
1017 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
1018 return -ENOMEM;
1019
1020
1021 buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC);
1022 if (buff == NULL)
1023 return -ENOMEM;
1024
1025 sk->sk_wmem_queued += buff->truesize;
1026 sk_mem_charge(sk, buff->truesize);
1027 nlen = skb->len - len - nsize;
1028 buff->truesize += nlen;
1029 skb->truesize -= nlen;
1030
1031
1032 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
1033 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
1034 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
1035
1036
1037 flags = TCP_SKB_CB(skb)->tcp_flags;
1038 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1039 TCP_SKB_CB(buff)->tcp_flags = flags;
1040 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
1041
1042 if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
1043
1044 buff->csum = csum_partial_copy_nocheck(skb->data + len,
1045 skb_put(buff, nsize),
1046 nsize, 0);
1047
1048 skb_trim(skb, len);
1049
1050 skb->csum = csum_block_sub(skb->csum, buff->csum, len);
1051 } else {
1052 skb->ip_summed = CHECKSUM_PARTIAL;
1053 skb_split(skb, buff, len);
1054 }
1055
1056 buff->ip_summed = skb->ip_summed;
1057
1058
1059
1060
1061 TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when;
1062 buff->tstamp = skb->tstamp;
1063
1064 old_factor = tcp_skb_pcount(skb);
1065
1066
1067 tcp_set_skb_tso_segs(sk, skb, mss_now);
1068 tcp_set_skb_tso_segs(sk, buff, mss_now);
1069
1070
1071
1072
1073 if (!before(tp->snd_nxt, TCP_SKB_CB(buff)->end_seq)) {
1074 int diff = old_factor - tcp_skb_pcount(skb) -
1075 tcp_skb_pcount(buff);
1076
1077 if (diff)
1078 tcp_adjust_pcount(sk, skb, diff);
1079 }
1080
1081
1082 skb_header_release(buff);
1083 tcp_insert_write_queue_after(skb, buff, sk);
1084
1085 return 0;
1086}
1087
1088
1089
1090
1091
1092static void __pskb_trim_head(struct sk_buff *skb, int len)
1093{
1094 int i, k, eat;
1095
1096 eat = min_t(int, len, skb_headlen(skb));
1097 if (eat) {
1098 __skb_pull(skb, eat);
1099 len -= eat;
1100 if (!len)
1101 return;
1102 }
1103 eat = len;
1104 k = 0;
1105 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1106 int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
1107
1108 if (size <= eat) {
1109 skb_frag_unref(skb, i);
1110 eat -= size;
1111 } else {
1112 skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
1113 if (eat) {
1114 skb_shinfo(skb)->frags[k].page_offset += eat;
1115 skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat);
1116 eat = 0;
1117 }
1118 k++;
1119 }
1120 }
1121 skb_shinfo(skb)->nr_frags = k;
1122
1123 skb_reset_tail_pointer(skb);
1124 skb->data_len -= len;
1125 skb->len = skb->data_len;
1126}
1127
1128
1129int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
1130{
1131 if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
1132 return -ENOMEM;
1133
1134 __pskb_trim_head(skb, len);
1135
1136 TCP_SKB_CB(skb)->seq += len;
1137 skb->ip_summed = CHECKSUM_PARTIAL;
1138
1139 skb->truesize -= len;
1140 sk->sk_wmem_queued -= len;
1141 sk_mem_uncharge(sk, len);
1142 sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
1143
1144
1145 if (tcp_skb_pcount(skb) > 1)
1146 tcp_set_skb_tso_segs(sk, skb, tcp_skb_mss(skb));
1147
1148 return 0;
1149}
1150
1151
1152int tcp_mtu_to_mss(const struct sock *sk, int pmtu)
1153{
1154 const struct tcp_sock *tp = tcp_sk(sk);
1155 const struct inet_connection_sock *icsk = inet_csk(sk);
1156 int mss_now;
1157
1158
1159
1160
1161 mss_now = pmtu - icsk->icsk_af_ops->net_header_len - sizeof(struct tcphdr);
1162
1163
1164 if (mss_now > tp->rx_opt.mss_clamp)
1165 mss_now = tp->rx_opt.mss_clamp;
1166
1167
1168 mss_now -= icsk->icsk_ext_hdr_len;
1169
1170
1171 if (mss_now < 48)
1172 mss_now = 48;
1173
1174
1175 mss_now -= tp->tcp_header_len - sizeof(struct tcphdr);
1176
1177 return mss_now;
1178}
1179
1180
1181int tcp_mss_to_mtu(const struct sock *sk, int mss)
1182{
1183 const struct tcp_sock *tp = tcp_sk(sk);
1184 const struct inet_connection_sock *icsk = inet_csk(sk);
1185 int mtu;
1186
1187 mtu = mss +
1188 tp->tcp_header_len +
1189 icsk->icsk_ext_hdr_len +
1190 icsk->icsk_af_ops->net_header_len;
1191
1192 return mtu;
1193}
1194
1195
1196void tcp_mtup_init(struct sock *sk)
1197{
1198 struct tcp_sock *tp = tcp_sk(sk);
1199 struct inet_connection_sock *icsk = inet_csk(sk);
1200
1201 icsk->icsk_mtup.enabled = sysctl_tcp_mtu_probing > 1;
1202 icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) +
1203 icsk->icsk_af_ops->net_header_len;
1204 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, sysctl_tcp_base_mss);
1205 icsk->icsk_mtup.probe_size = 0;
1206}
1207EXPORT_SYMBOL(tcp_mtup_init);
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
1232{
1233 struct tcp_sock *tp = tcp_sk(sk);
1234 struct inet_connection_sock *icsk = inet_csk(sk);
1235 int mss_now;
1236
1237 if (icsk->icsk_mtup.search_high > pmtu)
1238 icsk->icsk_mtup.search_high = pmtu;
1239
1240 mss_now = tcp_mtu_to_mss(sk, pmtu);
1241 mss_now = tcp_bound_to_half_wnd(tp, mss_now);
1242
1243
1244 icsk->icsk_pmtu_cookie = pmtu;
1245 if (icsk->icsk_mtup.enabled)
1246 mss_now = min(mss_now, tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low));
1247 tp->mss_cache = mss_now;
1248
1249 return mss_now;
1250}
1251EXPORT_SYMBOL(tcp_sync_mss);
1252
1253
1254
1255
1256unsigned int tcp_current_mss(struct sock *sk)
1257{
1258 const struct tcp_sock *tp = tcp_sk(sk);
1259 const struct dst_entry *dst = __sk_dst_get(sk);
1260 u32 mss_now;
1261 unsigned header_len;
1262 struct tcp_out_options opts;
1263 struct tcp_md5sig_key *md5;
1264
1265 mss_now = tp->mss_cache;
1266
1267 if (dst) {
1268 u32 mtu = dst_mtu(dst);
1269 if (mtu != inet_csk(sk)->icsk_pmtu_cookie)
1270 mss_now = tcp_sync_mss(sk, mtu);
1271 }
1272
1273 header_len = tcp_established_options(sk, NULL, &opts, &md5) +
1274 sizeof(struct tcphdr);
1275
1276
1277
1278
1279 if (header_len != tp->tcp_header_len) {
1280 int delta = (int) header_len - tp->tcp_header_len;
1281 mss_now -= delta;
1282 }
1283
1284 return mss_now;
1285}
1286
1287
1288static void tcp_cwnd_validate(struct sock *sk)
1289{
1290 struct tcp_sock *tp = tcp_sk(sk);
1291
1292 if (tp->packets_out >= tp->snd_cwnd) {
1293
1294 tp->snd_cwnd_used = 0;
1295 tp->snd_cwnd_stamp = tcp_time_stamp;
1296 } else {
1297
1298 if (tp->packets_out > tp->snd_cwnd_used)
1299 tp->snd_cwnd_used = tp->packets_out;
1300
1301 if (sysctl_tcp_slow_start_after_idle &&
1302 (s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto)
1303 tcp_cwnd_application_limited(sk);
1304 }
1305}
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb,
1320 unsigned int mss_now, unsigned int cwnd)
1321{
1322 const struct tcp_sock *tp = tcp_sk(sk);
1323 u32 needed, window, cwnd_len;
1324
1325 window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1326 cwnd_len = mss_now * cwnd;
1327
1328 if (likely(cwnd_len <= window && skb != tcp_write_queue_tail(sk)))
1329 return cwnd_len;
1330
1331 needed = min(skb->len, window);
1332
1333 if (cwnd_len <= needed)
1334 return cwnd_len;
1335
1336 return needed - needed % mss_now;
1337}
1338
1339
1340
1341
1342static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp,
1343 const struct sk_buff *skb)
1344{
1345 u32 in_flight, cwnd;
1346
1347
1348 if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&
1349 tcp_skb_pcount(skb) == 1)
1350 return 1;
1351
1352 in_flight = tcp_packets_in_flight(tp);
1353 cwnd = tp->snd_cwnd;
1354 if (in_flight < cwnd)
1355 return (cwnd - in_flight);
1356
1357 return 0;
1358}
1359
1360
1361
1362
1363
1364static int tcp_init_tso_segs(const struct sock *sk, struct sk_buff *skb,
1365 unsigned int mss_now)
1366{
1367 int tso_segs = tcp_skb_pcount(skb);
1368
1369 if (!tso_segs || (tso_segs > 1 && tcp_skb_mss(skb) != mss_now)) {
1370 tcp_set_skb_tso_segs(sk, skb, mss_now);
1371 tso_segs = tcp_skb_pcount(skb);
1372 }
1373 return tso_segs;
1374}
1375
1376
1377static inline int tcp_minshall_check(const struct tcp_sock *tp)
1378{
1379 return after(tp->snd_sml, tp->snd_una) &&
1380 !after(tp->snd_sml, tp->snd_nxt);
1381}
1382
1383
1384
1385
1386
1387
1388
1389
1390static inline int tcp_nagle_check(const struct tcp_sock *tp,
1391 const struct sk_buff *skb,
1392 unsigned mss_now, int nonagle)
1393{
1394 return skb->len < mss_now &&
1395 ((nonagle & TCP_NAGLE_CORK) ||
1396 (!nonagle && tp->packets_out && tcp_minshall_check(tp)));
1397}
1398
1399
1400
1401
1402static inline int tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb,
1403 unsigned int cur_mss, int nonagle)
1404{
1405
1406
1407
1408
1409
1410
1411 if (nonagle & TCP_NAGLE_PUSH)
1412 return 1;
1413
1414
1415
1416
1417 if (tcp_urg_mode(tp) || (tp->frto_counter == 2) ||
1418 (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
1419 return 1;
1420
1421 if (!tcp_nagle_check(tp, skb, cur_mss, nonagle))
1422 return 1;
1423
1424 return 0;
1425}
1426
1427
1428static inline int tcp_snd_wnd_test(const struct tcp_sock *tp, const struct sk_buff *skb,
1429 unsigned int cur_mss)
1430{
1431 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
1432
1433 if (skb->len > cur_mss)
1434 end_seq = TCP_SKB_CB(skb)->seq + cur_mss;
1435
1436 return !after(end_seq, tcp_wnd_end(tp));
1437}
1438
1439
1440
1441
1442
1443static unsigned int tcp_snd_test(const struct sock *sk, struct sk_buff *skb,
1444 unsigned int cur_mss, int nonagle)
1445{
1446 const struct tcp_sock *tp = tcp_sk(sk);
1447 unsigned int cwnd_quota;
1448
1449 tcp_init_tso_segs(sk, skb, cur_mss);
1450
1451 if (!tcp_nagle_test(tp, skb, cur_mss, nonagle))
1452 return 0;
1453
1454 cwnd_quota = tcp_cwnd_test(tp, skb);
1455 if (cwnd_quota && !tcp_snd_wnd_test(tp, skb, cur_mss))
1456 cwnd_quota = 0;
1457
1458 return cwnd_quota;
1459}
1460
1461
1462int tcp_may_send_now(struct sock *sk)
1463{
1464 const struct tcp_sock *tp = tcp_sk(sk);
1465 struct sk_buff *skb = tcp_send_head(sk);
1466
1467 return skb &&
1468 tcp_snd_test(sk, skb, tcp_current_mss(sk),
1469 (tcp_skb_is_last(sk, skb) ?
1470 tp->nonagle : TCP_NAGLE_PUSH));
1471}
1472
1473
1474
1475
1476
1477
1478
1479
1480static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1481 unsigned int mss_now, gfp_t gfp)
1482{
1483 struct sk_buff *buff;
1484 int nlen = skb->len - len;
1485 u8 flags;
1486
1487
1488 if (skb->len != skb->data_len)
1489 return tcp_fragment(sk, skb, len, mss_now);
1490
1491 buff = sk_stream_alloc_skb(sk, 0, gfp);
1492 if (unlikely(buff == NULL))
1493 return -ENOMEM;
1494
1495 sk->sk_wmem_queued += buff->truesize;
1496 sk_mem_charge(sk, buff->truesize);
1497 buff->truesize += nlen;
1498 skb->truesize -= nlen;
1499
1500
1501 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
1502 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
1503 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
1504
1505
1506 flags = TCP_SKB_CB(skb)->tcp_flags;
1507 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1508 TCP_SKB_CB(buff)->tcp_flags = flags;
1509
1510
1511 TCP_SKB_CB(buff)->sacked = 0;
1512
1513 buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL;
1514 skb_split(skb, buff, len);
1515
1516
1517 tcp_set_skb_tso_segs(sk, skb, mss_now);
1518 tcp_set_skb_tso_segs(sk, buff, mss_now);
1519
1520
1521 skb_header_release(buff);
1522 tcp_insert_write_queue_after(skb, buff, sk);
1523
1524 return 0;
1525}
1526
1527
1528
1529
1530
1531
1532static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1533{
1534 struct tcp_sock *tp = tcp_sk(sk);
1535 const struct inet_connection_sock *icsk = inet_csk(sk);
1536 u32 send_win, cong_win, limit, in_flight;
1537 int win_divisor;
1538
1539 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
1540 goto send_now;
1541
1542 if (icsk->icsk_ca_state != TCP_CA_Open)
1543 goto send_now;
1544
1545
1546 if (tp->tso_deferred &&
1547 (((u32)jiffies << 1) >> 1) - (tp->tso_deferred >> 1) > 1)
1548 goto send_now;
1549
1550 in_flight = tcp_packets_in_flight(tp);
1551
1552 BUG_ON(tcp_skb_pcount(skb) <= 1 || (tp->snd_cwnd <= in_flight));
1553
1554 send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1555
1556
1557 cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache;
1558
1559 limit = min(send_win, cong_win);
1560
1561
1562 if (limit >= sk->sk_gso_max_size)
1563 goto send_now;
1564
1565
1566 if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len))
1567 goto send_now;
1568
1569 win_divisor = ACCESS_ONCE(sysctl_tcp_tso_win_divisor);
1570 if (win_divisor) {
1571 u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
1572
1573
1574
1575
1576 chunk /= win_divisor;
1577 if (limit >= chunk)
1578 goto send_now;
1579 } else {
1580
1581
1582
1583
1584
1585 if (limit > tcp_max_tso_deferred_mss(tp) * tp->mss_cache)
1586 goto send_now;
1587 }
1588
1589
1590 tp->tso_deferred = 1 | (jiffies << 1);
1591
1592 return 1;
1593
1594send_now:
1595 tp->tso_deferred = 0;
1596 return 0;
1597}
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608static int tcp_mtu_probe(struct sock *sk)
1609{
1610 struct tcp_sock *tp = tcp_sk(sk);
1611 struct inet_connection_sock *icsk = inet_csk(sk);
1612 struct sk_buff *skb, *nskb, *next;
1613 int len;
1614 int probe_size;
1615 int size_needed;
1616 int copy;
1617 int mss_now;
1618
1619
1620
1621
1622
1623 if (!icsk->icsk_mtup.enabled ||
1624 icsk->icsk_mtup.probe_size ||
1625 inet_csk(sk)->icsk_ca_state != TCP_CA_Open ||
1626 tp->snd_cwnd < 11 ||
1627 tp->rx_opt.num_sacks || tp->rx_opt.dsack)
1628 return -1;
1629
1630
1631 mss_now = tcp_current_mss(sk);
1632 probe_size = 2 * tp->mss_cache;
1633 size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
1634 if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) {
1635
1636 return -1;
1637 }
1638
1639
1640 if (tp->write_seq - tp->snd_nxt < size_needed)
1641 return -1;
1642
1643 if (tp->snd_wnd < size_needed)
1644 return -1;
1645 if (after(tp->snd_nxt + size_needed, tcp_wnd_end(tp)))
1646 return 0;
1647
1648
1649 if (tcp_packets_in_flight(tp) + 2 > tp->snd_cwnd) {
1650 if (!tcp_packets_in_flight(tp))
1651 return -1;
1652 else
1653 return 0;
1654 }
1655
1656
1657 if ((nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC)) == NULL)
1658 return -1;
1659 sk->sk_wmem_queued += nskb->truesize;
1660 sk_mem_charge(sk, nskb->truesize);
1661
1662 skb = tcp_send_head(sk);
1663
1664 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
1665 TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
1666 TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK;
1667 TCP_SKB_CB(nskb)->sacked = 0;
1668 nskb->csum = 0;
1669 nskb->ip_summed = skb->ip_summed;
1670
1671 tcp_insert_write_queue_before(nskb, skb, sk);
1672
1673 len = 0;
1674 tcp_for_write_queue_from_safe(skb, next, sk) {
1675 copy = min_t(int, skb->len, probe_size - len);
1676 if (nskb->ip_summed)
1677 skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
1678 else
1679 nskb->csum = skb_copy_and_csum_bits(skb, 0,
1680 skb_put(nskb, copy),
1681 copy, nskb->csum);
1682
1683 if (skb->len <= copy) {
1684
1685
1686 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
1687 tcp_unlink_write_queue(skb, sk);
1688 sk_wmem_free_skb(sk, skb);
1689 } else {
1690 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags &
1691 ~(TCPHDR_FIN|TCPHDR_PSH);
1692 if (!skb_shinfo(skb)->nr_frags) {
1693 skb_pull(skb, copy);
1694 if (skb->ip_summed != CHECKSUM_PARTIAL)
1695 skb->csum = csum_partial(skb->data,
1696 skb->len, 0);
1697 } else {
1698 __pskb_trim_head(skb, copy);
1699 tcp_set_skb_tso_segs(sk, skb, mss_now);
1700 }
1701 TCP_SKB_CB(skb)->seq += copy;
1702 }
1703
1704 len += copy;
1705
1706 if (len >= probe_size)
1707 break;
1708 }
1709 tcp_init_tso_segs(sk, nskb, nskb->len);
1710
1711
1712
1713 TCP_SKB_CB(nskb)->when = tcp_time_stamp;
1714 if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) {
1715
1716
1717 tp->snd_cwnd--;
1718 tcp_event_new_data_sent(sk, nskb);
1719
1720 icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len);
1721 tp->mtu_probe.probe_seq_start = TCP_SKB_CB(nskb)->seq;
1722 tp->mtu_probe.probe_seq_end = TCP_SKB_CB(nskb)->end_seq;
1723
1724 return 1;
1725 }
1726
1727 return -1;
1728}
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1742 int push_one, gfp_t gfp)
1743{
1744 struct tcp_sock *tp = tcp_sk(sk);
1745 struct sk_buff *skb;
1746 unsigned int tso_segs, sent_pkts;
1747 int cwnd_quota;
1748 int result;
1749
1750 sent_pkts = 0;
1751
1752 if (!push_one) {
1753
1754 result = tcp_mtu_probe(sk);
1755 if (!result) {
1756 return 0;
1757 } else if (result > 0) {
1758 sent_pkts = 1;
1759 }
1760 }
1761
1762 while ((skb = tcp_send_head(sk))) {
1763 unsigned int limit;
1764
1765 tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
1766 BUG_ON(!tso_segs);
1767
1768 cwnd_quota = tcp_cwnd_test(tp, skb);
1769 if (!cwnd_quota)
1770 break;
1771
1772 if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now)))
1773 break;
1774
1775 if (tso_segs == 1) {
1776 if (unlikely(!tcp_nagle_test(tp, skb, mss_now,
1777 (tcp_skb_is_last(sk, skb) ?
1778 nonagle : TCP_NAGLE_PUSH))))
1779 break;
1780 } else {
1781 if (!push_one && tcp_tso_should_defer(sk, skb))
1782 break;
1783 }
1784
1785 limit = mss_now;
1786 if (tso_segs > 1 && !tcp_urg_mode(tp))
1787 limit = tcp_mss_split_point(sk, skb, mss_now,
1788 cwnd_quota);
1789
1790 if (skb->len > limit &&
1791 unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
1792 break;
1793
1794 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1795
1796 if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
1797 break;
1798
1799
1800
1801
1802 tcp_event_new_data_sent(sk, skb);
1803
1804 tcp_minshall_update(tp, mss_now, skb);
1805 sent_pkts += tcp_skb_pcount(skb);
1806
1807 if (push_one)
1808 break;
1809 }
1810 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery)
1811 tp->prr_out += sent_pkts;
1812
1813 if (likely(sent_pkts)) {
1814 tcp_cwnd_validate(sk);
1815 return 0;
1816 }
1817 return !tp->packets_out && tcp_send_head(sk);
1818}
1819
1820
1821
1822
1823
1824void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
1825 int nonagle)
1826{
1827
1828
1829
1830
1831 if (unlikely(sk->sk_state == TCP_CLOSE))
1832 return;
1833
1834 if (tcp_write_xmit(sk, cur_mss, nonagle, 0, GFP_ATOMIC))
1835 tcp_check_probe_timer(sk);
1836}
1837
1838
1839
1840
1841void tcp_push_one(struct sock *sk, unsigned int mss_now)
1842{
1843 struct sk_buff *skb = tcp_send_head(sk);
1844
1845 BUG_ON(!skb || skb->len < mss_now);
1846
1847 tcp_write_xmit(sk, mss_now, TCP_NAGLE_PUSH, 1, sk->sk_allocation);
1848}
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902u32 __tcp_select_window(struct sock *sk)
1903{
1904 struct inet_connection_sock *icsk = inet_csk(sk);
1905 struct tcp_sock *tp = tcp_sk(sk);
1906
1907
1908
1909
1910
1911
1912 int mss = icsk->icsk_ack.rcv_mss;
1913 int free_space = tcp_space(sk);
1914 int full_space = min_t(int, tp->window_clamp, tcp_full_space(sk));
1915 int window;
1916
1917 if (mss > full_space)
1918 mss = full_space;
1919
1920 if (free_space < (full_space >> 1)) {
1921 icsk->icsk_ack.quick = 0;
1922
1923 if (sk_under_memory_pressure(sk))
1924 tp->rcv_ssthresh = min(tp->rcv_ssthresh,
1925 4U * tp->advmss);
1926
1927 if (free_space < mss)
1928 return 0;
1929 }
1930
1931 if (free_space > tp->rcv_ssthresh)
1932 free_space = tp->rcv_ssthresh;
1933
1934
1935
1936
1937 window = tp->rcv_wnd;
1938 if (tp->rx_opt.rcv_wscale) {
1939 window = free_space;
1940
1941
1942
1943
1944
1945 if (((window >> tp->rx_opt.rcv_wscale) << tp->rx_opt.rcv_wscale) != window)
1946 window = (((window >> tp->rx_opt.rcv_wscale) + 1)
1947 << tp->rx_opt.rcv_wscale);
1948 } else {
1949
1950
1951
1952
1953
1954
1955
1956
1957 if (window <= free_space - mss || window > free_space)
1958 window = (free_space / mss) * mss;
1959 else if (mss == full_space &&
1960 free_space > window + (full_space >> 1))
1961 window = free_space;
1962 }
1963
1964 return window;
1965}
1966
1967
1968static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
1969{
1970 struct tcp_sock *tp = tcp_sk(sk);
1971 struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
1972 int skb_size, next_skb_size;
1973
1974 skb_size = skb->len;
1975 next_skb_size = next_skb->len;
1976
1977 BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
1978
1979 tcp_highest_sack_combine(sk, next_skb, skb);
1980
1981 tcp_unlink_write_queue(next_skb, sk);
1982
1983 skb_copy_from_linear_data(next_skb, skb_put(skb, next_skb_size),
1984 next_skb_size);
1985
1986 if (next_skb->ip_summed == CHECKSUM_PARTIAL)
1987 skb->ip_summed = CHECKSUM_PARTIAL;
1988
1989 if (skb->ip_summed != CHECKSUM_PARTIAL)
1990 skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
1991
1992
1993 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
1994
1995
1996 TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(next_skb)->tcp_flags;
1997
1998
1999
2000
2001 TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS;
2002
2003
2004 tcp_clear_retrans_hints_partial(tp);
2005 if (next_skb == tp->retransmit_skb_hint)
2006 tp->retransmit_skb_hint = skb;
2007
2008 tcp_adjust_pcount(sk, next_skb, tcp_skb_pcount(next_skb));
2009
2010 sk_wmem_free_skb(sk, next_skb);
2011}
2012
2013
2014static int tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb)
2015{
2016 if (tcp_skb_pcount(skb) > 1)
2017 return 0;
2018
2019 if (skb_shinfo(skb)->nr_frags != 0)
2020 return 0;
2021 if (skb_cloned(skb))
2022 return 0;
2023 if (skb == tcp_send_head(sk))
2024 return 0;
2025
2026 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
2027 return 0;
2028
2029 return 1;
2030}
2031
2032
2033
2034
2035static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
2036 int space)
2037{
2038 struct tcp_sock *tp = tcp_sk(sk);
2039 struct sk_buff *skb = to, *tmp;
2040 int first = 1;
2041
2042 if (!sysctl_tcp_retrans_collapse)
2043 return;
2044 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
2045 return;
2046
2047 tcp_for_write_queue_from_safe(skb, tmp, sk) {
2048 if (!tcp_can_collapse(sk, skb))
2049 break;
2050
2051 space -= skb->len;
2052
2053 if (first) {
2054 first = 0;
2055 continue;
2056 }
2057
2058 if (space < 0)
2059 break;
2060
2061
2062
2063 if (skb->len > skb_tailroom(to))
2064 break;
2065
2066 if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp)))
2067 break;
2068
2069 tcp_collapse_retrans(sk, to);
2070 }
2071}
2072
2073
2074
2075
2076
2077int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2078{
2079 struct tcp_sock *tp = tcp_sk(sk);
2080 struct inet_connection_sock *icsk = inet_csk(sk);
2081 unsigned int cur_mss;
2082 int err;
2083
2084
2085 if (icsk->icsk_mtup.probe_size) {
2086 icsk->icsk_mtup.probe_size = 0;
2087 }
2088
2089
2090
2091
2092 if (atomic_read(&sk->sk_wmem_alloc) >
2093 min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))
2094 return -EAGAIN;
2095
2096 if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
2097 if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
2098 BUG();
2099 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
2100 return -ENOMEM;
2101 }
2102
2103 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
2104 return -EHOSTUNREACH;
2105
2106 cur_mss = tcp_current_mss(sk);
2107
2108
2109
2110
2111
2112
2113 if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp)) &&
2114 TCP_SKB_CB(skb)->seq != tp->snd_una)
2115 return -EAGAIN;
2116
2117 if (skb->len > cur_mss) {
2118 if (tcp_fragment(sk, skb, cur_mss, cur_mss))
2119 return -ENOMEM;
2120 } else {
2121 int oldpcount = tcp_skb_pcount(skb);
2122
2123 if (unlikely(oldpcount > 1)) {
2124 tcp_init_tso_segs(sk, skb, cur_mss);
2125 tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb));
2126 }
2127 }
2128
2129 tcp_retrans_try_collapse(sk, skb, cur_mss);
2130
2131
2132
2133
2134
2135 if (skb->len > 0 &&
2136 (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&
2137 tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
2138 if (!pskb_trim(skb, 0)) {
2139
2140 tcp_init_nondata_skb(skb, TCP_SKB_CB(skb)->end_seq - 1,
2141 TCP_SKB_CB(skb)->tcp_flags);
2142 skb->ip_summed = CHECKSUM_NONE;
2143 }
2144 }
2145
2146
2147
2148
2149 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2150
2151
2152 if (unlikely(NET_IP_ALIGN && ((unsigned long)skb->data & 3))) {
2153 struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER,
2154 GFP_ATOMIC);
2155 err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
2156 -ENOBUFS;
2157 } else {
2158 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2159 }
2160
2161 if (err == 0) {
2162
2163 TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
2164
2165 tp->total_retrans++;
2166
2167#if FASTRETRANS_DEBUG > 0
2168 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2169 if (net_ratelimit())
2170 printk(KERN_DEBUG "retrans_out leaked.\n");
2171 }
2172#endif
2173 if (!tp->retrans_out)
2174 tp->lost_retrans_low = tp->snd_nxt;
2175 TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;
2176 tp->retrans_out += tcp_skb_pcount(skb);
2177
2178
2179 if (!tp->retrans_stamp)
2180 tp->retrans_stamp = TCP_SKB_CB(skb)->when;
2181
2182 tp->undo_retrans += tcp_skb_pcount(skb);
2183
2184
2185
2186
2187 TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt;
2188 }
2189 return err;
2190}
2191
2192
2193
2194
2195static int tcp_can_forward_retransmit(struct sock *sk)
2196{
2197 const struct inet_connection_sock *icsk = inet_csk(sk);
2198 const struct tcp_sock *tp = tcp_sk(sk);
2199
2200
2201 if (icsk->icsk_ca_state != TCP_CA_Recovery)
2202 return 0;
2203
2204
2205 if (tcp_is_reno(tp))
2206 return 0;
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216 if (tcp_may_send_now(sk))
2217 return 0;
2218
2219 return 1;
2220}
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230void tcp_xmit_retransmit_queue(struct sock *sk)
2231{
2232 const struct inet_connection_sock *icsk = inet_csk(sk);
2233 struct tcp_sock *tp = tcp_sk(sk);
2234 struct sk_buff *skb;
2235 struct sk_buff *hole = NULL;
2236 u32 last_lost;
2237 int mib_idx;
2238 int fwd_rexmitting = 0;
2239
2240 if (!tp->packets_out)
2241 return;
2242
2243 if (!tp->lost_out)
2244 tp->retransmit_high = tp->snd_una;
2245
2246 if (tp->retransmit_skb_hint) {
2247 skb = tp->retransmit_skb_hint;
2248 last_lost = TCP_SKB_CB(skb)->end_seq;
2249 if (after(last_lost, tp->retransmit_high))
2250 last_lost = tp->retransmit_high;
2251 } else {
2252 skb = tcp_write_queue_head(sk);
2253 last_lost = tp->snd_una;
2254 }
2255
2256 tcp_for_write_queue_from(skb, sk) {
2257 __u8 sacked = TCP_SKB_CB(skb)->sacked;
2258
2259 if (skb == tcp_send_head(sk))
2260 break;
2261
2262 if (hole == NULL)
2263 tp->retransmit_skb_hint = skb;
2264
2265
2266
2267
2268
2269
2270
2271
2272 if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
2273 return;
2274
2275 if (fwd_rexmitting) {
2276begin_fwd:
2277 if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
2278 break;
2279 mib_idx = LINUX_MIB_TCPFORWARDRETRANS;
2280
2281 } else if (!before(TCP_SKB_CB(skb)->seq, tp->retransmit_high)) {
2282 tp->retransmit_high = last_lost;
2283 if (!tcp_can_forward_retransmit(sk))
2284 break;
2285
2286 if (hole != NULL) {
2287 skb = hole;
2288 hole = NULL;
2289 }
2290 fwd_rexmitting = 1;
2291 goto begin_fwd;
2292
2293 } else if (!(sacked & TCPCB_LOST)) {
2294 if (hole == NULL && !(sacked & (TCPCB_SACKED_RETRANS|TCPCB_SACKED_ACKED)))
2295 hole = skb;
2296 continue;
2297
2298 } else {
2299 last_lost = TCP_SKB_CB(skb)->end_seq;
2300 if (icsk->icsk_ca_state != TCP_CA_Loss)
2301 mib_idx = LINUX_MIB_TCPFASTRETRANS;
2302 else
2303 mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS;
2304 }
2305
2306 if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))
2307 continue;
2308
2309 if (tcp_retransmit_skb(sk, skb))
2310 return;
2311 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2312
2313 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery)
2314 tp->prr_out += tcp_skb_pcount(skb);
2315
2316 if (skb == tcp_write_queue_head(sk))
2317 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2318 inet_csk(sk)->icsk_rto,
2319 TCP_RTO_MAX);
2320 }
2321}
2322
2323
2324
2325
2326void tcp_send_fin(struct sock *sk)
2327{
2328 struct tcp_sock *tp = tcp_sk(sk);
2329 struct sk_buff *skb = tcp_write_queue_tail(sk);
2330 int mss_now;
2331
2332
2333
2334
2335
2336 mss_now = tcp_current_mss(sk);
2337
2338 if (tcp_send_head(sk) != NULL) {
2339 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN;
2340 TCP_SKB_CB(skb)->end_seq++;
2341 tp->write_seq++;
2342 } else {
2343
2344 for (;;) {
2345 skb = alloc_skb_fclone(MAX_TCP_HEADER,
2346 sk->sk_allocation);
2347 if (skb)
2348 break;
2349 yield();
2350 }
2351
2352
2353 skb_reserve(skb, MAX_TCP_HEADER);
2354
2355 tcp_init_nondata_skb(skb, tp->write_seq,
2356 TCPHDR_ACK | TCPHDR_FIN);
2357 tcp_queue_skb(sk, skb);
2358 }
2359 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);
2360}
2361
2362
2363
2364
2365
2366
2367void tcp_send_active_reset(struct sock *sk, gfp_t priority)
2368{
2369 struct sk_buff *skb;
2370
2371
2372 skb = alloc_skb(MAX_TCP_HEADER, priority);
2373 if (!skb) {
2374 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
2375 return;
2376 }
2377
2378
2379 skb_reserve(skb, MAX_TCP_HEADER);
2380 tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
2381 TCPHDR_ACK | TCPHDR_RST);
2382
2383 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2384 if (tcp_transmit_skb(sk, skb, 0, priority))
2385 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
2386
2387 TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS);
2388}
2389
2390
2391
2392
2393
2394
2395
2396int tcp_send_synack(struct sock *sk)
2397{
2398 struct sk_buff *skb;
2399
2400 skb = tcp_write_queue_head(sk);
2401 if (skb == NULL || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
2402 printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n");
2403 return -EFAULT;
2404 }
2405 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) {
2406 if (skb_cloned(skb)) {
2407 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
2408 if (nskb == NULL)
2409 return -ENOMEM;
2410 tcp_unlink_write_queue(skb, sk);
2411 skb_header_release(nskb);
2412 __tcp_add_write_queue_head(sk, nskb);
2413 sk_wmem_free_skb(sk, skb);
2414 sk->sk_wmem_queued += nskb->truesize;
2415 sk_mem_charge(sk, nskb->truesize);
2416 skb = nskb;
2417 }
2418
2419 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ACK;
2420 TCP_ECN_send_synack(tcp_sk(sk), skb);
2421 }
2422 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2423 return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2424}
2425
2426
2427struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2428 struct request_sock *req,
2429 struct request_values *rvp)
2430{
2431 struct tcp_out_options opts;
2432 struct tcp_extend_values *xvp = tcp_xv(rvp);
2433 struct inet_request_sock *ireq = inet_rsk(req);
2434 struct tcp_sock *tp = tcp_sk(sk);
2435 const struct tcp_cookie_values *cvp = tp->cookie_values;
2436 struct tcphdr *th;
2437 struct sk_buff *skb;
2438 struct tcp_md5sig_key *md5;
2439 int tcp_header_size;
2440 int mss;
2441 int s_data_desired = 0;
2442
2443 if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired)
2444 s_data_desired = cvp->s_data_desired;
2445 skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15 + s_data_desired, 1, GFP_ATOMIC);
2446 if (skb == NULL)
2447 return NULL;
2448
2449
2450 skb_reserve(skb, MAX_TCP_HEADER);
2451
2452 skb_dst_set(skb, dst_clone(dst));
2453
2454 mss = dst_metric_advmss(dst);
2455 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
2456 mss = tp->rx_opt.user_mss;
2457
2458 if (req->rcv_wnd == 0) {
2459 __u8 rcv_wscale;
2460
2461 req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
2462
2463
2464 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
2465 (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0))
2466 req->window_clamp = tcp_full_space(sk);
2467
2468
2469 tcp_select_initial_window(tcp_full_space(sk),
2470 mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
2471 &req->rcv_wnd,
2472 &req->window_clamp,
2473 ireq->wscale_ok,
2474 &rcv_wscale,
2475 dst_metric(dst, RTAX_INITRWND));
2476 ireq->rcv_wscale = rcv_wscale;
2477 }
2478
2479 memset(&opts, 0, sizeof(opts));
2480#ifdef CONFIG_SYN_COOKIES
2481 if (unlikely(req->cookie_ts))
2482 TCP_SKB_CB(skb)->when = cookie_init_timestamp(req);
2483 else
2484#endif
2485 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2486 tcp_header_size = tcp_synack_options(sk, req, mss,
2487 skb, &opts, &md5, xvp)
2488 + sizeof(*th);
2489
2490 skb_push(skb, tcp_header_size);
2491 skb_reset_transport_header(skb);
2492
2493 th = tcp_hdr(skb);
2494 memset(th, 0, sizeof(struct tcphdr));
2495 th->syn = 1;
2496 th->ack = 1;
2497 TCP_ECN_make_synack(req, th);
2498 th->source = ireq->loc_port;
2499 th->dest = ireq->rmt_port;
2500
2501
2502
2503 tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
2504 TCPHDR_SYN | TCPHDR_ACK);
2505
2506 if (OPTION_COOKIE_EXTENSION & opts.options) {
2507 if (s_data_desired) {
2508 u8 *buf = skb_put(skb, s_data_desired);
2509
2510
2511 memcpy(buf, cvp->s_data_payload, s_data_desired);
2512 TCP_SKB_CB(skb)->end_seq += s_data_desired;
2513 }
2514
2515 if (opts.hash_size > 0) {
2516 __u32 workspace[SHA_WORKSPACE_WORDS];
2517 u32 *mess = &xvp->cookie_bakery[COOKIE_DIGEST_WORDS];
2518 u32 *tail = &mess[COOKIE_MESSAGE_WORDS-1];
2519
2520
2521
2522
2523
2524 *tail-- ^= opts.tsval;
2525 *tail-- ^= tcp_rsk(req)->rcv_isn + 1;
2526 *tail-- ^= TCP_SKB_CB(skb)->seq + 1;
2527
2528
2529 *tail-- ^= (((__force u32)th->dest << 16) | (__force u32)th->source);
2530 *tail-- ^= (u32)(unsigned long)cvp;
2531
2532 sha_transform((__u32 *)&xvp->cookie_bakery[0],
2533 (char *)mess,
2534 &workspace[0]);
2535 opts.hash_location =
2536 (__u8 *)&xvp->cookie_bakery[0];
2537 }
2538 }
2539
2540 th->seq = htonl(TCP_SKB_CB(skb)->seq);
2541 th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1);
2542
2543
2544 th->window = htons(min(req->rcv_wnd, 65535U));
2545 tcp_options_write((__be32 *)(th + 1), tp, &opts);
2546 th->doff = (tcp_header_size >> 2);
2547 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, tcp_skb_pcount(skb));
2548
2549#ifdef CONFIG_TCP_MD5SIG
2550
2551 if (md5) {
2552 tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location,
2553 md5, NULL, req, skb);
2554 }
2555#endif
2556
2557 return skb;
2558}
2559EXPORT_SYMBOL(tcp_make_synack);
2560
2561
2562static void tcp_connect_init(struct sock *sk)
2563{
2564 const struct dst_entry *dst = __sk_dst_get(sk);
2565 struct tcp_sock *tp = tcp_sk(sk);
2566 __u8 rcv_wscale;
2567
2568
2569
2570
2571 tp->tcp_header_len = sizeof(struct tcphdr) +
2572 (sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
2573
2574#ifdef CONFIG_TCP_MD5SIG
2575 if (tp->af_specific->md5_lookup(sk, sk) != NULL)
2576 tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
2577#endif
2578
2579
2580 if (tp->rx_opt.user_mss)
2581 tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
2582 tp->max_window = 0;
2583 tcp_mtup_init(sk);
2584 tcp_sync_mss(sk, dst_mtu(dst));
2585
2586 if (!tp->window_clamp)
2587 tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
2588 tp->advmss = dst_metric_advmss(dst);
2589 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->advmss)
2590 tp->advmss = tp->rx_opt.user_mss;
2591
2592 tcp_initialize_rcv_mss(sk);
2593
2594
2595 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
2596 (tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0))
2597 tp->window_clamp = tcp_full_space(sk);
2598
2599 tcp_select_initial_window(tcp_full_space(sk),
2600 tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
2601 &tp->rcv_wnd,
2602 &tp->window_clamp,
2603 sysctl_tcp_window_scaling,
2604 &rcv_wscale,
2605 dst_metric(dst, RTAX_INITRWND));
2606
2607 tp->rx_opt.rcv_wscale = rcv_wscale;
2608 tp->rcv_ssthresh = tp->rcv_wnd;
2609
2610 sk->sk_err = 0;
2611 sock_reset_flag(sk, SOCK_DONE);
2612 tp->snd_wnd = 0;
2613 tcp_init_wl(tp, 0);
2614 tp->snd_una = tp->write_seq;
2615 tp->snd_sml = tp->write_seq;
2616 tp->snd_up = tp->write_seq;
2617 tp->rcv_nxt = 0;
2618 tp->rcv_wup = 0;
2619 tp->copied_seq = 0;
2620
2621 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
2622 inet_csk(sk)->icsk_retransmits = 0;
2623 tcp_clear_retrans(tp);
2624}
2625
2626
2627int tcp_connect(struct sock *sk)
2628{
2629 struct tcp_sock *tp = tcp_sk(sk);
2630 struct sk_buff *buff;
2631 int err;
2632
2633 tcp_connect_init(sk);
2634
2635 buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation);
2636 if (unlikely(buff == NULL))
2637 return -ENOBUFS;
2638
2639
2640 skb_reserve(buff, MAX_TCP_HEADER);
2641
2642 tp->snd_nxt = tp->write_seq;
2643 tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
2644 TCP_ECN_send_syn(sk, buff);
2645
2646
2647 TCP_SKB_CB(buff)->when = tcp_time_stamp;
2648 tp->retrans_stamp = TCP_SKB_CB(buff)->when;
2649 skb_header_release(buff);
2650 __tcp_add_write_queue_tail(sk, buff);
2651 sk->sk_wmem_queued += buff->truesize;
2652 sk_mem_charge(sk, buff->truesize);
2653 tp->packets_out += tcp_skb_pcount(buff);
2654 err = tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
2655 if (err == -ECONNREFUSED)
2656 return err;
2657
2658
2659
2660
2661 tp->snd_nxt = tp->write_seq;
2662 tp->pushed_seq = tp->write_seq;
2663 TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
2664
2665
2666 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2667 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
2668 return 0;
2669}
2670EXPORT_SYMBOL(tcp_connect);
2671
2672
2673
2674
2675
2676void tcp_send_delayed_ack(struct sock *sk)
2677{
2678 struct inet_connection_sock *icsk = inet_csk(sk);
2679 int ato = icsk->icsk_ack.ato;
2680 unsigned long timeout;
2681
2682 if (ato > TCP_DELACK_MIN) {
2683 const struct tcp_sock *tp = tcp_sk(sk);
2684 int max_ato = HZ / 2;
2685
2686 if (icsk->icsk_ack.pingpong ||
2687 (icsk->icsk_ack.pending & ICSK_ACK_PUSHED))
2688 max_ato = TCP_DELACK_MAX;
2689
2690
2691
2692
2693
2694
2695
2696 if (tp->srtt) {
2697 int rtt = max(tp->srtt >> 3, TCP_DELACK_MIN);
2698
2699 if (rtt < max_ato)
2700 max_ato = rtt;
2701 }
2702
2703 ato = min(ato, max_ato);
2704 }
2705
2706
2707 timeout = jiffies + ato;
2708
2709
2710 if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) {
2711
2712
2713
2714 if (icsk->icsk_ack.blocked ||
2715 time_before_eq(icsk->icsk_ack.timeout, jiffies + (ato >> 2))) {
2716 tcp_send_ack(sk);
2717 return;
2718 }
2719
2720 if (!time_before(timeout, icsk->icsk_ack.timeout))
2721 timeout = icsk->icsk_ack.timeout;
2722 }
2723 icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
2724 icsk->icsk_ack.timeout = timeout;
2725 sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
2726}
2727
2728
2729void tcp_send_ack(struct sock *sk)
2730{
2731 struct sk_buff *buff;
2732
2733
2734 if (sk->sk_state == TCP_CLOSE)
2735 return;
2736
2737
2738
2739
2740
2741 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
2742 if (buff == NULL) {
2743 inet_csk_schedule_ack(sk);
2744 inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
2745 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
2746 TCP_DELACK_MAX, TCP_RTO_MAX);
2747 return;
2748 }
2749
2750
2751 skb_reserve(buff, MAX_TCP_HEADER);
2752 tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK);
2753
2754
2755 TCP_SKB_CB(buff)->when = tcp_time_stamp;
2756 tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC);
2757}
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
2771{
2772 struct tcp_sock *tp = tcp_sk(sk);
2773 struct sk_buff *skb;
2774
2775
2776 skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
2777 if (skb == NULL)
2778 return -1;
2779
2780
2781 skb_reserve(skb, MAX_TCP_HEADER);
2782
2783
2784
2785
2786 tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
2787 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2788 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
2789}
2790
2791
2792int tcp_write_wakeup(struct sock *sk)
2793{
2794 struct tcp_sock *tp = tcp_sk(sk);
2795 struct sk_buff *skb;
2796
2797 if (sk->sk_state == TCP_CLOSE)
2798 return -1;
2799
2800 if ((skb = tcp_send_head(sk)) != NULL &&
2801 before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) {
2802 int err;
2803 unsigned int mss = tcp_current_mss(sk);
2804 unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
2805
2806 if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
2807 tp->pushed_seq = TCP_SKB_CB(skb)->end_seq;
2808
2809
2810
2811
2812
2813 if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
2814 skb->len > mss) {
2815 seg_size = min(seg_size, mss);
2816 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
2817 if (tcp_fragment(sk, skb, seg_size, mss))
2818 return -1;
2819 } else if (!tcp_skb_pcount(skb))
2820 tcp_set_skb_tso_segs(sk, skb, mss);
2821
2822 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
2823 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2824 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2825 if (!err)
2826 tcp_event_new_data_sent(sk, skb);
2827 return err;
2828 } else {
2829 if (between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF))
2830 tcp_xmit_probe_skb(sk, 1);
2831 return tcp_xmit_probe_skb(sk, 0);
2832 }
2833}
2834
2835
2836
2837
2838void tcp_send_probe0(struct sock *sk)
2839{
2840 struct inet_connection_sock *icsk = inet_csk(sk);
2841 struct tcp_sock *tp = tcp_sk(sk);
2842 int err;
2843
2844 err = tcp_write_wakeup(sk);
2845
2846 if (tp->packets_out || !tcp_send_head(sk)) {
2847
2848 icsk->icsk_probes_out = 0;
2849 icsk->icsk_backoff = 0;
2850 return;
2851 }
2852
2853 if (err <= 0) {
2854 if (icsk->icsk_backoff < sysctl_tcp_retries2)
2855 icsk->icsk_backoff++;
2856 icsk->icsk_probes_out++;
2857 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
2858 min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX),
2859 TCP_RTO_MAX);
2860 } else {
2861
2862
2863
2864
2865
2866
2867 if (!icsk->icsk_probes_out)
2868 icsk->icsk_probes_out = 1;
2869 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
2870 min(icsk->icsk_rto << icsk->icsk_backoff,
2871 TCP_RESOURCE_PROBE_INTERVAL),
2872 TCP_RTO_MAX);
2873 }
2874}
2875