1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39#include <net/tcp.h>
40
41#include <linux/compiler.h>
42#include <linux/module.h>
43
44
45int sysctl_tcp_retrans_collapse __read_mostly = 1;
46
47
48
49
50int sysctl_tcp_workaround_signed_windows __read_mostly = 0;
51
52
53
54
55
56int sysctl_tcp_tso_win_divisor __read_mostly = 3;
57
58int sysctl_tcp_mtu_probing __read_mostly = 0;
59int sysctl_tcp_base_mss __read_mostly = 512;
60
61
62int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
63
64static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
65{
66 struct tcp_sock *tp = tcp_sk(sk);
67 unsigned int prior_packets = tp->packets_out;
68
69 tcp_advance_send_head(sk, skb);
70 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
71
72
73 if (tp->frto_counter == 2)
74 tp->frto_counter = 3;
75
76 tp->packets_out += tcp_skb_pcount(skb);
77 if (!prior_packets)
78 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
79 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
80}
81
82
83
84
85
86
87
88static inline __u32 tcp_acceptable_seq(struct sock *sk)
89{
90 struct tcp_sock *tp = tcp_sk(sk);
91
92 if (!before(tcp_wnd_end(tp), tp->snd_nxt))
93 return tp->snd_nxt;
94 else
95 return tcp_wnd_end(tp);
96}
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112static __u16 tcp_advertise_mss(struct sock *sk)
113{
114 struct tcp_sock *tp = tcp_sk(sk);
115 struct dst_entry *dst = __sk_dst_get(sk);
116 int mss = tp->advmss;
117
118 if (dst && dst_metric(dst, RTAX_ADVMSS) < mss) {
119 mss = dst_metric(dst, RTAX_ADVMSS);
120 tp->advmss = mss;
121 }
122
123 return (__u16)mss;
124}
125
126
127
128static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst)
129{
130 struct tcp_sock *tp = tcp_sk(sk);
131 s32 delta = tcp_time_stamp - tp->lsndtime;
132 u32 restart_cwnd = tcp_init_cwnd(tp, dst);
133 u32 cwnd = tp->snd_cwnd;
134
135 tcp_ca_event(sk, CA_EVENT_CWND_RESTART);
136
137 tp->snd_ssthresh = tcp_current_ssthresh(sk);
138 restart_cwnd = min(restart_cwnd, cwnd);
139
140 while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd)
141 cwnd >>= 1;
142 tp->snd_cwnd = max(cwnd, restart_cwnd);
143 tp->snd_cwnd_stamp = tcp_time_stamp;
144 tp->snd_cwnd_used = 0;
145}
146
147static void tcp_event_data_sent(struct tcp_sock *tp,
148 struct sk_buff *skb, struct sock *sk)
149{
150 struct inet_connection_sock *icsk = inet_csk(sk);
151 const u32 now = tcp_time_stamp;
152
153 if (sysctl_tcp_slow_start_after_idle &&
154 (!tp->packets_out && (s32)(now - tp->lsndtime) > icsk->icsk_rto))
155 tcp_cwnd_restart(sk, __sk_dst_get(sk));
156
157 tp->lsndtime = now;
158
159
160
161
162 if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
163 icsk->icsk_ack.pingpong = 1;
164}
165
166static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
167{
168 tcp_dec_quickack_mode(sk, pkts);
169 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
170}
171
172
173
174
175
176
177
178
179void tcp_select_initial_window(int __space, __u32 mss,
180 __u32 *rcv_wnd, __u32 *window_clamp,
181 int wscale_ok, __u8 *rcv_wscale)
182{
183 unsigned int space = (__space < 0 ? 0 : __space);
184
185
186 if (*window_clamp == 0)
187 (*window_clamp) = (65535 << 14);
188 space = min(*window_clamp, space);
189
190
191 if (space > mss)
192 space = (space / mss) * mss;
193
194
195
196
197
198
199
200
201
202 if (sysctl_tcp_workaround_signed_windows)
203 (*rcv_wnd) = min(space, MAX_TCP_WINDOW);
204 else
205 (*rcv_wnd) = space;
206
207 (*rcv_wscale) = 0;
208 if (wscale_ok) {
209
210
211
212 space = max_t(u32, sysctl_tcp_rmem[2], sysctl_rmem_max);
213 space = min_t(u32, space, *window_clamp);
214 while (space > 65535 && (*rcv_wscale) < 14) {
215 space >>= 1;
216 (*rcv_wscale)++;
217 }
218 }
219
220
221
222
223
224 if (mss > (1 << *rcv_wscale)) {
225 int init_cwnd = 4;
226 if (mss > 1460 * 3)
227 init_cwnd = 2;
228 else if (mss > 1460)
229 init_cwnd = 3;
230 if (*rcv_wnd > init_cwnd * mss)
231 *rcv_wnd = init_cwnd * mss;
232 }
233
234
235 (*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp);
236}
237
238
239
240
241
242
243static u16 tcp_select_window(struct sock *sk)
244{
245 struct tcp_sock *tp = tcp_sk(sk);
246 u32 cur_win = tcp_receive_window(tp);
247 u32 new_win = __tcp_select_window(sk);
248
249
250 if (new_win < cur_win) {
251
252
253
254
255
256
257
258 new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale);
259 }
260 tp->rcv_wnd = new_win;
261 tp->rcv_wup = tp->rcv_nxt;
262
263
264
265
266 if (!tp->rx_opt.rcv_wscale && sysctl_tcp_workaround_signed_windows)
267 new_win = min(new_win, MAX_TCP_WINDOW);
268 else
269 new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
270
271
272 new_win >>= tp->rx_opt.rcv_wscale;
273
274
275 if (new_win == 0)
276 tp->pred_flags = 0;
277
278 return new_win;
279}
280
281static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb)
282{
283 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_CWR;
284 if (!(tp->ecn_flags & TCP_ECN_OK))
285 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_ECE;
286}
287
288static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb)
289{
290 struct tcp_sock *tp = tcp_sk(sk);
291
292 tp->ecn_flags = 0;
293 if (sysctl_tcp_ecn) {
294 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE | TCPCB_FLAG_CWR;
295 tp->ecn_flags = TCP_ECN_OK;
296 }
297}
298
299static __inline__ void
300TCP_ECN_make_synack(struct request_sock *req, struct tcphdr *th)
301{
302 if (inet_rsk(req)->ecn_ok)
303 th->ece = 1;
304}
305
306static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb,
307 int tcp_header_len)
308{
309 struct tcp_sock *tp = tcp_sk(sk);
310
311 if (tp->ecn_flags & TCP_ECN_OK) {
312
313 if (skb->len != tcp_header_len &&
314 !before(TCP_SKB_CB(skb)->seq, tp->snd_nxt)) {
315 INET_ECN_xmit(sk);
316 if (tp->ecn_flags & TCP_ECN_QUEUE_CWR) {
317 tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
318 tcp_hdr(skb)->cwr = 1;
319 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
320 }
321 } else {
322
323 INET_ECN_dontxmit(sk);
324 }
325 if (tp->ecn_flags & TCP_ECN_DEMAND_CWR)
326 tcp_hdr(skb)->ece = 1;
327 }
328}
329
330
331
332
333static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
334{
335 skb->csum = 0;
336
337 TCP_SKB_CB(skb)->flags = flags;
338 TCP_SKB_CB(skb)->sacked = 0;
339
340 skb_shinfo(skb)->gso_segs = 1;
341 skb_shinfo(skb)->gso_size = 0;
342 skb_shinfo(skb)->gso_type = 0;
343
344 TCP_SKB_CB(skb)->seq = seq;
345 if (flags & (TCPCB_FLAG_SYN | TCPCB_FLAG_FIN))
346 seq++;
347 TCP_SKB_CB(skb)->end_seq = seq;
348}
349
350static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp,
351 __u32 tstamp, __u8 **md5_hash)
352{
353 if (tp->rx_opt.tstamp_ok) {
354 *ptr++ = htonl((TCPOPT_NOP << 24) |
355 (TCPOPT_NOP << 16) |
356 (TCPOPT_TIMESTAMP << 8) |
357 TCPOLEN_TIMESTAMP);
358 *ptr++ = htonl(tstamp);
359 *ptr++ = htonl(tp->rx_opt.ts_recent);
360 }
361 if (tp->rx_opt.eff_sacks) {
362 struct tcp_sack_block *sp = tp->rx_opt.dsack ? tp->duplicate_sack : tp->selective_acks;
363 int this_sack;
364
365 *ptr++ = htonl((TCPOPT_NOP << 24) |
366 (TCPOPT_NOP << 16) |
367 (TCPOPT_SACK << 8) |
368 (TCPOLEN_SACK_BASE + (tp->rx_opt.eff_sacks *
369 TCPOLEN_SACK_PERBLOCK)));
370
371 for (this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) {
372 *ptr++ = htonl(sp[this_sack].start_seq);
373 *ptr++ = htonl(sp[this_sack].end_seq);
374 }
375
376 if (tp->rx_opt.dsack) {
377 tp->rx_opt.dsack = 0;
378 tp->rx_opt.eff_sacks--;
379 }
380 }
381#ifdef CONFIG_TCP_MD5SIG
382 if (md5_hash) {
383 *ptr++ = htonl((TCPOPT_NOP << 24) |
384 (TCPOPT_NOP << 16) |
385 (TCPOPT_MD5SIG << 8) |
386 TCPOLEN_MD5SIG);
387 *md5_hash = (__u8 *)ptr;
388 }
389#endif
390}
391
392
393
394
395
396
397
398
399
400
401static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack,
402 int offer_wscale, int wscale, __u32 tstamp,
403 __u32 ts_recent, __u8 **md5_hash)
404{
405
406
407
408
409
410
411
412
413
414
415
416
417
418 *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss);
419 if (ts) {
420 if (sack)
421 *ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
422 (TCPOLEN_SACK_PERM << 16) |
423 (TCPOPT_TIMESTAMP << 8) |
424 TCPOLEN_TIMESTAMP);
425 else
426 *ptr++ = htonl((TCPOPT_NOP << 24) |
427 (TCPOPT_NOP << 16) |
428 (TCPOPT_TIMESTAMP << 8) |
429 TCPOLEN_TIMESTAMP);
430 *ptr++ = htonl(tstamp);
431 *ptr++ = htonl(ts_recent);
432 } else if (sack)
433 *ptr++ = htonl((TCPOPT_NOP << 24) |
434 (TCPOPT_NOP << 16) |
435 (TCPOPT_SACK_PERM << 8) |
436 TCPOLEN_SACK_PERM);
437 if (offer_wscale)
438 *ptr++ = htonl((TCPOPT_NOP << 24) |
439 (TCPOPT_WINDOW << 16) |
440 (TCPOLEN_WINDOW << 8) |
441 (wscale));
442#ifdef CONFIG_TCP_MD5SIG
443
444
445
446
447
448 if (md5_hash) {
449 *ptr++ = htonl((TCPOPT_NOP << 24) |
450 (TCPOPT_NOP << 16) |
451 (TCPOPT_MD5SIG << 8) |
452 TCPOLEN_MD5SIG);
453 *md5_hash = (__u8 *)ptr;
454 }
455#endif
456}
457
458
459
460
461
462
463
464
465
466
467
468
469static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
470 gfp_t gfp_mask)
471{
472 const struct inet_connection_sock *icsk = inet_csk(sk);
473 struct inet_sock *inet;
474 struct tcp_sock *tp;
475 struct tcp_skb_cb *tcb;
476 int tcp_header_size;
477#ifdef CONFIG_TCP_MD5SIG
478 struct tcp_md5sig_key *md5;
479 __u8 *md5_hash_location;
480#endif
481 struct tcphdr *th;
482 int sysctl_flags;
483 int err;
484
485 BUG_ON(!skb || !tcp_skb_pcount(skb));
486
487
488
489
490 if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP)
491 __net_timestamp(skb);
492
493 if (likely(clone_it)) {
494 if (unlikely(skb_cloned(skb)))
495 skb = pskb_copy(skb, gfp_mask);
496 else
497 skb = skb_clone(skb, gfp_mask);
498 if (unlikely(!skb))
499 return -ENOBUFS;
500 }
501
502 inet = inet_sk(sk);
503 tp = tcp_sk(sk);
504 tcb = TCP_SKB_CB(skb);
505 tcp_header_size = tp->tcp_header_len;
506
507#define SYSCTL_FLAG_TSTAMPS 0x1
508#define SYSCTL_FLAG_WSCALE 0x2
509#define SYSCTL_FLAG_SACK 0x4
510
511 sysctl_flags = 0;
512 if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
513 tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS;
514 if (sysctl_tcp_timestamps) {
515 tcp_header_size += TCPOLEN_TSTAMP_ALIGNED;
516 sysctl_flags |= SYSCTL_FLAG_TSTAMPS;
517 }
518 if (sysctl_tcp_window_scaling) {
519 tcp_header_size += TCPOLEN_WSCALE_ALIGNED;
520 sysctl_flags |= SYSCTL_FLAG_WSCALE;
521 }
522 if (sysctl_tcp_sack) {
523 sysctl_flags |= SYSCTL_FLAG_SACK;
524 if (!(sysctl_flags & SYSCTL_FLAG_TSTAMPS))
525 tcp_header_size += TCPOLEN_SACKPERM_ALIGNED;
526 }
527 } else if (unlikely(tp->rx_opt.eff_sacks)) {
528
529
530
531 tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED +
532 (tp->rx_opt.eff_sacks *
533 TCPOLEN_SACK_PERBLOCK));
534 }
535
536 if (tcp_packets_in_flight(tp) == 0)
537 tcp_ca_event(sk, CA_EVENT_TX_START);
538
539#ifdef CONFIG_TCP_MD5SIG
540
541
542
543
544 md5 = tp->af_specific->md5_lookup(sk, sk);
545 if (md5)
546 tcp_header_size += TCPOLEN_MD5SIG_ALIGNED;
547#endif
548
549 skb_push(skb, tcp_header_size);
550 skb_reset_transport_header(skb);
551 skb_set_owner_w(skb, sk);
552
553
554 th = tcp_hdr(skb);
555 th->source = inet->sport;
556 th->dest = inet->dport;
557 th->seq = htonl(tcb->seq);
558 th->ack_seq = htonl(tp->rcv_nxt);
559 *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) |
560 tcb->flags);
561
562 if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
563
564
565
566 th->window = htons(min(tp->rcv_wnd, 65535U));
567 } else {
568 th->window = htons(tcp_select_window(sk));
569 }
570 th->check = 0;
571 th->urg_ptr = 0;
572
573 if (unlikely(tp->urg_mode &&
574 between(tp->snd_up, tcb->seq + 1, tcb->seq + 0xFFFF))) {
575 th->urg_ptr = htons(tp->snd_up - tcb->seq);
576 th->urg = 1;
577 }
578
579 if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
580 tcp_syn_build_options((__be32 *)(th + 1),
581 tcp_advertise_mss(sk),
582 (sysctl_flags & SYSCTL_FLAG_TSTAMPS),
583 (sysctl_flags & SYSCTL_FLAG_SACK),
584 (sysctl_flags & SYSCTL_FLAG_WSCALE),
585 tp->rx_opt.rcv_wscale,
586 tcb->when,
587 tp->rx_opt.ts_recent,
588
589#ifdef CONFIG_TCP_MD5SIG
590 md5 ? &md5_hash_location :
591#endif
592 NULL);
593 } else {
594 tcp_build_and_update_options((__be32 *)(th + 1),
595 tp, tcb->when,
596#ifdef CONFIG_TCP_MD5SIG
597 md5 ? &md5_hash_location :
598#endif
599 NULL);
600 TCP_ECN_send(sk, skb, tcp_header_size);
601 }
602
603#ifdef CONFIG_TCP_MD5SIG
604
605 if (md5) {
606 tp->af_specific->calc_md5_hash(md5_hash_location,
607 md5,
608 sk, NULL, NULL,
609 tcp_hdr(skb),
610 sk->sk_protocol,
611 skb->len);
612 }
613#endif
614
615 icsk->icsk_af_ops->send_check(sk, skb->len, skb);
616
617 if (likely(tcb->flags & TCPCB_FLAG_ACK))
618 tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
619
620 if (skb->len != tcp_header_size)
621 tcp_event_data_sent(tp, skb, sk);
622
623 if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
624 TCP_INC_STATS(TCP_MIB_OUTSEGS);
625
626 err = icsk->icsk_af_ops->queue_xmit(skb, 0);
627 if (likely(err <= 0))
628 return err;
629
630 tcp_enter_cwr(sk, 1);
631
632 return net_xmit_eval(err);
633
634#undef SYSCTL_FLAG_TSTAMPS
635#undef SYSCTL_FLAG_WSCALE
636#undef SYSCTL_FLAG_SACK
637}
638
639
640
641
642
643
644static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
645{
646 struct tcp_sock *tp = tcp_sk(sk);
647
648
649 tp->write_seq = TCP_SKB_CB(skb)->end_seq;
650 skb_header_release(skb);
651 tcp_add_write_queue_tail(sk, skb);
652 sk->sk_wmem_queued += skb->truesize;
653 sk_mem_charge(sk, skb->truesize);
654}
655
656static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb,
657 unsigned int mss_now)
658{
659 if (skb->len <= mss_now || !sk_can_gso(sk)) {
660
661
662
663 skb_shinfo(skb)->gso_segs = 1;
664 skb_shinfo(skb)->gso_size = 0;
665 skb_shinfo(skb)->gso_type = 0;
666 } else {
667 skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss_now);
668 skb_shinfo(skb)->gso_size = mss_now;
669 skb_shinfo(skb)->gso_type = sk->sk_gso_type;
670 }
671}
672
673
674
675
676static void tcp_adjust_fackets_out(struct sock *sk, struct sk_buff *skb,
677 int decr)
678{
679 struct tcp_sock *tp = tcp_sk(sk);
680
681 if (!tp->sacked_out || tcp_is_reno(tp))
682 return;
683
684 if (after(tcp_highest_sack_seq(tp), TCP_SKB_CB(skb)->seq))
685 tp->fackets_out -= decr;
686}
687
688
689
690
691
692
693int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
694 unsigned int mss_now)
695{
696 struct tcp_sock *tp = tcp_sk(sk);
697 struct sk_buff *buff;
698 int nsize, old_factor;
699 int nlen;
700 u16 flags;
701
702 BUG_ON(len > skb->len);
703
704 tcp_clear_retrans_hints_partial(tp);
705 nsize = skb_headlen(skb) - len;
706 if (nsize < 0)
707 nsize = 0;
708
709 if (skb_cloned(skb) &&
710 skb_is_nonlinear(skb) &&
711 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
712 return -ENOMEM;
713
714
715 buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC);
716 if (buff == NULL)
717 return -ENOMEM;
718
719 sk->sk_wmem_queued += buff->truesize;
720 sk_mem_charge(sk, buff->truesize);
721 nlen = skb->len - len - nsize;
722 buff->truesize += nlen;
723 skb->truesize -= nlen;
724
725
726 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
727 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
728 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
729
730
731 flags = TCP_SKB_CB(skb)->flags;
732 TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN | TCPCB_FLAG_PSH);
733 TCP_SKB_CB(buff)->flags = flags;
734 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
735
736 if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
737
738 buff->csum = csum_partial_copy_nocheck(skb->data + len,
739 skb_put(buff, nsize),
740 nsize, 0);
741
742 skb_trim(skb, len);
743
744 skb->csum = csum_block_sub(skb->csum, buff->csum, len);
745 } else {
746 skb->ip_summed = CHECKSUM_PARTIAL;
747 skb_split(skb, buff, len);
748 }
749
750 buff->ip_summed = skb->ip_summed;
751
752
753
754
755 TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when;
756 buff->tstamp = skb->tstamp;
757
758 old_factor = tcp_skb_pcount(skb);
759
760
761 tcp_set_skb_tso_segs(sk, skb, mss_now);
762 tcp_set_skb_tso_segs(sk, buff, mss_now);
763
764
765
766
767 if (!before(tp->snd_nxt, TCP_SKB_CB(buff)->end_seq)) {
768 int diff = old_factor - tcp_skb_pcount(skb) -
769 tcp_skb_pcount(buff);
770
771 tp->packets_out -= diff;
772
773 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
774 tp->sacked_out -= diff;
775 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
776 tp->retrans_out -= diff;
777
778 if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST)
779 tp->lost_out -= diff;
780
781
782 if (tcp_is_reno(tp) && diff > 0) {
783 tcp_dec_pcount_approx_int(&tp->sacked_out, diff);
784 tcp_verify_left_out(tp);
785 }
786 tcp_adjust_fackets_out(sk, skb, diff);
787 }
788
789
790 skb_header_release(buff);
791 tcp_insert_write_queue_after(skb, buff, sk);
792
793 return 0;
794}
795
796
797
798
799
800static void __pskb_trim_head(struct sk_buff *skb, int len)
801{
802 int i, k, eat;
803
804 eat = len;
805 k = 0;
806 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
807 if (skb_shinfo(skb)->frags[i].size <= eat) {
808 put_page(skb_shinfo(skb)->frags[i].page);
809 eat -= skb_shinfo(skb)->frags[i].size;
810 } else {
811 skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
812 if (eat) {
813 skb_shinfo(skb)->frags[k].page_offset += eat;
814 skb_shinfo(skb)->frags[k].size -= eat;
815 eat = 0;
816 }
817 k++;
818 }
819 }
820 skb_shinfo(skb)->nr_frags = k;
821
822 skb_reset_tail_pointer(skb);
823 skb->data_len -= len;
824 skb->len = skb->data_len;
825}
826
827int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
828{
829 if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
830 return -ENOMEM;
831
832
833 if (unlikely(len < skb_headlen(skb)))
834 __skb_pull(skb, len);
835 else
836 __pskb_trim_head(skb, len - skb_headlen(skb));
837
838 TCP_SKB_CB(skb)->seq += len;
839 skb->ip_summed = CHECKSUM_PARTIAL;
840
841 skb->truesize -= len;
842 sk->sk_wmem_queued -= len;
843 sk_mem_uncharge(sk, len);
844 sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
845
846
847
848
849 if (tcp_skb_pcount(skb) > 1)
850 tcp_set_skb_tso_segs(sk, skb, tcp_current_mss(sk, 1));
851
852 return 0;
853}
854
855
856int tcp_mtu_to_mss(struct sock *sk, int pmtu)
857{
858 struct tcp_sock *tp = tcp_sk(sk);
859 struct inet_connection_sock *icsk = inet_csk(sk);
860 int mss_now;
861
862
863
864
865 mss_now = pmtu - icsk->icsk_af_ops->net_header_len - sizeof(struct tcphdr);
866
867
868 if (mss_now > tp->rx_opt.mss_clamp)
869 mss_now = tp->rx_opt.mss_clamp;
870
871
872 mss_now -= icsk->icsk_ext_hdr_len;
873
874
875 if (mss_now < 48)
876 mss_now = 48;
877
878
879 mss_now -= tp->tcp_header_len - sizeof(struct tcphdr);
880
881 return mss_now;
882}
883
884
885int tcp_mss_to_mtu(struct sock *sk, int mss)
886{
887 struct tcp_sock *tp = tcp_sk(sk);
888 struct inet_connection_sock *icsk = inet_csk(sk);
889 int mtu;
890
891 mtu = mss +
892 tp->tcp_header_len +
893 icsk->icsk_ext_hdr_len +
894 icsk->icsk_af_ops->net_header_len;
895
896 return mtu;
897}
898
899void tcp_mtup_init(struct sock *sk)
900{
901 struct tcp_sock *tp = tcp_sk(sk);
902 struct inet_connection_sock *icsk = inet_csk(sk);
903
904 icsk->icsk_mtup.enabled = sysctl_tcp_mtu_probing > 1;
905 icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) +
906 icsk->icsk_af_ops->net_header_len;
907 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, sysctl_tcp_base_mss);
908 icsk->icsk_mtup.probe_size = 0;
909}
910
911
912static int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
913{
914 if (tp->max_window && pktsize > (tp->max_window >> 1))
915 return max(tp->max_window >> 1, 68U - tp->tcp_header_len);
916 else
917 return pktsize;
918}
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
943{
944 struct tcp_sock *tp = tcp_sk(sk);
945 struct inet_connection_sock *icsk = inet_csk(sk);
946 int mss_now;
947
948 if (icsk->icsk_mtup.search_high > pmtu)
949 icsk->icsk_mtup.search_high = pmtu;
950
951 mss_now = tcp_mtu_to_mss(sk, pmtu);
952 mss_now = tcp_bound_to_half_wnd(tp, mss_now);
953
954
955 icsk->icsk_pmtu_cookie = pmtu;
956 if (icsk->icsk_mtup.enabled)
957 mss_now = min(mss_now, tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low));
958 tp->mss_cache = mss_now;
959
960 return mss_now;
961}
962
963
964
965
966
967
968
969
970unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
971{
972 struct tcp_sock *tp = tcp_sk(sk);
973 struct dst_entry *dst = __sk_dst_get(sk);
974 u32 mss_now;
975 u16 xmit_size_goal;
976 int doing_tso = 0;
977
978 mss_now = tp->mss_cache;
979
980 if (large_allowed && sk_can_gso(sk) && !tp->urg_mode)
981 doing_tso = 1;
982
983 if (dst) {
984 u32 mtu = dst_mtu(dst);
985 if (mtu != inet_csk(sk)->icsk_pmtu_cookie)
986 mss_now = tcp_sync_mss(sk, mtu);
987 }
988
989 if (tp->rx_opt.eff_sacks)
990 mss_now -= (TCPOLEN_SACK_BASE_ALIGNED +
991 (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK));
992
993#ifdef CONFIG_TCP_MD5SIG
994 if (tp->af_specific->md5_lookup(sk, sk))
995 mss_now -= TCPOLEN_MD5SIG_ALIGNED;
996#endif
997
998 xmit_size_goal = mss_now;
999
1000 if (doing_tso) {
1001 xmit_size_goal = (65535 -
1002 inet_csk(sk)->icsk_af_ops->net_header_len -
1003 inet_csk(sk)->icsk_ext_hdr_len -
1004 tp->tcp_header_len);
1005
1006 xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal);
1007 xmit_size_goal -= (xmit_size_goal % mss_now);
1008 }
1009 tp->xmit_size_goal = xmit_size_goal;
1010
1011 return mss_now;
1012}
1013
1014
1015static void tcp_cwnd_validate(struct sock *sk)
1016{
1017 struct tcp_sock *tp = tcp_sk(sk);
1018
1019 if (tp->packets_out >= tp->snd_cwnd) {
1020
1021 tp->snd_cwnd_used = 0;
1022 tp->snd_cwnd_stamp = tcp_time_stamp;
1023 } else {
1024
1025 if (tp->packets_out > tp->snd_cwnd_used)
1026 tp->snd_cwnd_used = tp->packets_out;
1027
1028 if (sysctl_tcp_slow_start_after_idle &&
1029 (s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto)
1030 tcp_cwnd_application_limited(sk);
1031 }
1032}
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046static unsigned int tcp_mss_split_point(struct sock *sk, struct sk_buff *skb,
1047 unsigned int mss_now, unsigned int cwnd)
1048{
1049 struct tcp_sock *tp = tcp_sk(sk);
1050 u32 needed, window, cwnd_len;
1051
1052 window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1053 cwnd_len = mss_now * cwnd;
1054
1055 if (likely(cwnd_len <= window && skb != tcp_write_queue_tail(sk)))
1056 return cwnd_len;
1057
1058 needed = min(skb->len, window);
1059
1060 if (skb == tcp_write_queue_tail(sk) && cwnd_len <= needed)
1061 return cwnd_len;
1062
1063 return needed - needed % mss_now;
1064}
1065
1066
1067
1068
1069static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp,
1070 struct sk_buff *skb)
1071{
1072 u32 in_flight, cwnd;
1073
1074
1075 if ((TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
1076 tcp_skb_pcount(skb) == 1)
1077 return 1;
1078
1079 in_flight = tcp_packets_in_flight(tp);
1080 cwnd = tp->snd_cwnd;
1081 if (in_flight < cwnd)
1082 return (cwnd - in_flight);
1083
1084 return 0;
1085}
1086
1087
1088
1089
1090static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb,
1091 unsigned int mss_now)
1092{
1093 int tso_segs = tcp_skb_pcount(skb);
1094
1095 if (!tso_segs || (tso_segs > 1 && tcp_skb_mss(skb) != mss_now)) {
1096 tcp_set_skb_tso_segs(sk, skb, mss_now);
1097 tso_segs = tcp_skb_pcount(skb);
1098 }
1099 return tso_segs;
1100}
1101
1102static inline int tcp_minshall_check(const struct tcp_sock *tp)
1103{
1104 return after(tp->snd_sml,tp->snd_una) &&
1105 !after(tp->snd_sml, tp->snd_nxt);
1106}
1107
1108
1109
1110
1111
1112
1113
1114
1115static inline int tcp_nagle_check(const struct tcp_sock *tp,
1116 const struct sk_buff *skb,
1117 unsigned mss_now, int nonagle)
1118{
1119 return (skb->len < mss_now &&
1120 ((nonagle & TCP_NAGLE_CORK) ||
1121 (!nonagle && tp->packets_out && tcp_minshall_check(tp))));
1122}
1123
1124
1125
1126
1127static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb,
1128 unsigned int cur_mss, int nonagle)
1129{
1130
1131
1132
1133
1134
1135
1136 if (nonagle & TCP_NAGLE_PUSH)
1137 return 1;
1138
1139
1140
1141
1142 if (tp->urg_mode || (tp->frto_counter == 2) ||
1143 (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN))
1144 return 1;
1145
1146 if (!tcp_nagle_check(tp, skb, cur_mss, nonagle))
1147 return 1;
1148
1149 return 0;
1150}
1151
1152
1153static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb,
1154 unsigned int cur_mss)
1155{
1156 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
1157
1158 if (skb->len > cur_mss)
1159 end_seq = TCP_SKB_CB(skb)->seq + cur_mss;
1160
1161 return !after(end_seq, tcp_wnd_end(tp));
1162}
1163
1164
1165
1166
1167
1168static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb,
1169 unsigned int cur_mss, int nonagle)
1170{
1171 struct tcp_sock *tp = tcp_sk(sk);
1172 unsigned int cwnd_quota;
1173
1174 tcp_init_tso_segs(sk, skb, cur_mss);
1175
1176 if (!tcp_nagle_test(tp, skb, cur_mss, nonagle))
1177 return 0;
1178
1179 cwnd_quota = tcp_cwnd_test(tp, skb);
1180 if (cwnd_quota && !tcp_snd_wnd_test(tp, skb, cur_mss))
1181 cwnd_quota = 0;
1182
1183 return cwnd_quota;
1184}
1185
1186int tcp_may_send_now(struct sock *sk)
1187{
1188 struct tcp_sock *tp = tcp_sk(sk);
1189 struct sk_buff *skb = tcp_send_head(sk);
1190
1191 return (skb &&
1192 tcp_snd_test(sk, skb, tcp_current_mss(sk, 1),
1193 (tcp_skb_is_last(sk, skb) ?
1194 tp->nonagle : TCP_NAGLE_PUSH)));
1195}
1196
1197
1198
1199
1200
1201
1202
1203
1204static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1205 unsigned int mss_now)
1206{
1207 struct sk_buff *buff;
1208 int nlen = skb->len - len;
1209 u16 flags;
1210
1211
1212 if (skb->len != skb->data_len)
1213 return tcp_fragment(sk, skb, len, mss_now);
1214
1215 buff = sk_stream_alloc_skb(sk, 0, GFP_ATOMIC);
1216 if (unlikely(buff == NULL))
1217 return -ENOMEM;
1218
1219 sk->sk_wmem_queued += buff->truesize;
1220 sk_mem_charge(sk, buff->truesize);
1221 buff->truesize += nlen;
1222 skb->truesize -= nlen;
1223
1224
1225 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
1226 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
1227 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
1228
1229
1230 flags = TCP_SKB_CB(skb)->flags;
1231 TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN | TCPCB_FLAG_PSH);
1232 TCP_SKB_CB(buff)->flags = flags;
1233
1234
1235 TCP_SKB_CB(buff)->sacked = 0;
1236
1237 buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL;
1238 skb_split(skb, buff, len);
1239
1240
1241 tcp_set_skb_tso_segs(sk, skb, mss_now);
1242 tcp_set_skb_tso_segs(sk, buff, mss_now);
1243
1244
1245 skb_header_release(buff);
1246 tcp_insert_write_queue_after(skb, buff, sk);
1247
1248 return 0;
1249}
1250
1251
1252
1253
1254
1255
1256static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1257{
1258 struct tcp_sock *tp = tcp_sk(sk);
1259 const struct inet_connection_sock *icsk = inet_csk(sk);
1260 u32 send_win, cong_win, limit, in_flight;
1261
1262 if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)
1263 goto send_now;
1264
1265 if (icsk->icsk_ca_state != TCP_CA_Open)
1266 goto send_now;
1267
1268
1269 if (tp->tso_deferred &&
1270 ((jiffies << 1) >> 1) - (tp->tso_deferred >> 1) > 1)
1271 goto send_now;
1272
1273 in_flight = tcp_packets_in_flight(tp);
1274
1275 BUG_ON(tcp_skb_pcount(skb) <= 1 || (tp->snd_cwnd <= in_flight));
1276
1277 send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1278
1279
1280 cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache;
1281
1282 limit = min(send_win, cong_win);
1283
1284
1285 if (limit >= 65536)
1286 goto send_now;
1287
1288 if (sysctl_tcp_tso_win_divisor) {
1289 u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
1290
1291
1292
1293
1294 chunk /= sysctl_tcp_tso_win_divisor;
1295 if (limit >= chunk)
1296 goto send_now;
1297 } else {
1298
1299
1300
1301
1302
1303 if (limit > tcp_max_burst(tp) * tp->mss_cache)
1304 goto send_now;
1305 }
1306
1307
1308 tp->tso_deferred = 1 | (jiffies << 1);
1309
1310 return 1;
1311
1312send_now:
1313 tp->tso_deferred = 0;
1314 return 0;
1315}
1316
1317
1318
1319
1320
1321
1322static int tcp_mtu_probe(struct sock *sk)
1323{
1324 struct tcp_sock *tp = tcp_sk(sk);
1325 struct inet_connection_sock *icsk = inet_csk(sk);
1326 struct sk_buff *skb, *nskb, *next;
1327 int len;
1328 int probe_size;
1329 int size_needed;
1330 int copy;
1331 int mss_now;
1332
1333
1334
1335
1336
1337 if (!icsk->icsk_mtup.enabled ||
1338 icsk->icsk_mtup.probe_size ||
1339 inet_csk(sk)->icsk_ca_state != TCP_CA_Open ||
1340 tp->snd_cwnd < 11 ||
1341 tp->rx_opt.eff_sacks)
1342 return -1;
1343
1344
1345 mss_now = tcp_current_mss(sk, 0);
1346 probe_size = 2 * tp->mss_cache;
1347 size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
1348 if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) {
1349
1350 return -1;
1351 }
1352
1353
1354 if (tp->write_seq - tp->snd_nxt < size_needed)
1355 return -1;
1356
1357 if (tp->snd_wnd < size_needed)
1358 return -1;
1359 if (after(tp->snd_nxt + size_needed, tcp_wnd_end(tp)))
1360 return 0;
1361
1362
1363 if (tcp_packets_in_flight(tp) + 2 > tp->snd_cwnd) {
1364 if (!tcp_packets_in_flight(tp))
1365 return -1;
1366 else
1367 return 0;
1368 }
1369
1370
1371 if ((nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC)) == NULL)
1372 return -1;
1373 sk->sk_wmem_queued += nskb->truesize;
1374 sk_mem_charge(sk, nskb->truesize);
1375
1376 skb = tcp_send_head(sk);
1377
1378 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
1379 TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
1380 TCP_SKB_CB(nskb)->flags = TCPCB_FLAG_ACK;
1381 TCP_SKB_CB(nskb)->sacked = 0;
1382 nskb->csum = 0;
1383 nskb->ip_summed = skb->ip_summed;
1384
1385 tcp_insert_write_queue_before(nskb, skb, sk);
1386
1387 len = 0;
1388 tcp_for_write_queue_from_safe(skb, next, sk) {
1389 copy = min_t(int, skb->len, probe_size - len);
1390 if (nskb->ip_summed)
1391 skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
1392 else
1393 nskb->csum = skb_copy_and_csum_bits(skb, 0,
1394 skb_put(nskb, copy),
1395 copy, nskb->csum);
1396
1397 if (skb->len <= copy) {
1398
1399
1400 TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags;
1401 tcp_unlink_write_queue(skb, sk);
1402 sk_wmem_free_skb(sk, skb);
1403 } else {
1404 TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags &
1405 ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
1406 if (!skb_shinfo(skb)->nr_frags) {
1407 skb_pull(skb, copy);
1408 if (skb->ip_summed != CHECKSUM_PARTIAL)
1409 skb->csum = csum_partial(skb->data,
1410 skb->len, 0);
1411 } else {
1412 __pskb_trim_head(skb, copy);
1413 tcp_set_skb_tso_segs(sk, skb, mss_now);
1414 }
1415 TCP_SKB_CB(skb)->seq += copy;
1416 }
1417
1418 len += copy;
1419
1420 if (len >= probe_size)
1421 break;
1422 }
1423 tcp_init_tso_segs(sk, nskb, nskb->len);
1424
1425
1426
1427 TCP_SKB_CB(nskb)->when = tcp_time_stamp;
1428 if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) {
1429
1430
1431 tp->snd_cwnd--;
1432 tcp_event_new_data_sent(sk, nskb);
1433
1434 icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len);
1435 tp->mtu_probe.probe_seq_start = TCP_SKB_CB(nskb)->seq;
1436 tp->mtu_probe.probe_seq_end = TCP_SKB_CB(nskb)->end_seq;
1437
1438 return 1;
1439 }
1440
1441 return -1;
1442}
1443
1444
1445
1446
1447
1448
1449
1450
1451static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
1452{
1453 struct tcp_sock *tp = tcp_sk(sk);
1454 struct sk_buff *skb;
1455 unsigned int tso_segs, sent_pkts;
1456 int cwnd_quota;
1457 int result;
1458
1459
1460
1461
1462
1463 if (unlikely(sk->sk_state == TCP_CLOSE))
1464 return 0;
1465
1466 sent_pkts = 0;
1467
1468
1469 if ((result = tcp_mtu_probe(sk)) == 0) {
1470 return 0;
1471 } else if (result > 0) {
1472 sent_pkts = 1;
1473 }
1474
1475 while ((skb = tcp_send_head(sk))) {
1476 unsigned int limit;
1477
1478 tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
1479 BUG_ON(!tso_segs);
1480
1481 cwnd_quota = tcp_cwnd_test(tp, skb);
1482 if (!cwnd_quota)
1483 break;
1484
1485 if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now)))
1486 break;
1487
1488 if (tso_segs == 1) {
1489 if (unlikely(!tcp_nagle_test(tp, skb, mss_now,
1490 (tcp_skb_is_last(sk, skb) ?
1491 nonagle : TCP_NAGLE_PUSH))))
1492 break;
1493 } else {
1494 if (tcp_tso_should_defer(sk, skb))
1495 break;
1496 }
1497
1498 limit = mss_now;
1499 if (tso_segs > 1)
1500 limit = tcp_mss_split_point(sk, skb, mss_now,
1501 cwnd_quota);
1502
1503 if (skb->len > limit &&
1504 unlikely(tso_fragment(sk, skb, limit, mss_now)))
1505 break;
1506
1507 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1508
1509 if (unlikely(tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC)))
1510 break;
1511
1512
1513
1514
1515 tcp_event_new_data_sent(sk, skb);
1516
1517 tcp_minshall_update(tp, mss_now, skb);
1518 sent_pkts++;
1519 }
1520
1521 if (likely(sent_pkts)) {
1522 tcp_cwnd_validate(sk);
1523 return 0;
1524 }
1525 return !tp->packets_out && tcp_send_head(sk);
1526}
1527
1528
1529
1530
1531
1532void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
1533 int nonagle)
1534{
1535 struct sk_buff *skb = tcp_send_head(sk);
1536
1537 if (skb) {
1538 if (tcp_write_xmit(sk, cur_mss, nonagle))
1539 tcp_check_probe_timer(sk);
1540 }
1541}
1542
1543
1544
1545
1546void tcp_push_one(struct sock *sk, unsigned int mss_now)
1547{
1548 struct sk_buff *skb = tcp_send_head(sk);
1549 unsigned int tso_segs, cwnd_quota;
1550
1551 BUG_ON(!skb || skb->len < mss_now);
1552
1553 tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
1554 cwnd_quota = tcp_snd_test(sk, skb, mss_now, TCP_NAGLE_PUSH);
1555
1556 if (likely(cwnd_quota)) {
1557 unsigned int limit;
1558
1559 BUG_ON(!tso_segs);
1560
1561 limit = mss_now;
1562 if (tso_segs > 1)
1563 limit = tcp_mss_split_point(sk, skb, mss_now,
1564 cwnd_quota);
1565
1566 if (skb->len > limit &&
1567 unlikely(tso_fragment(sk, skb, limit, mss_now)))
1568 return;
1569
1570
1571 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1572
1573 if (likely(!tcp_transmit_skb(sk, skb, 1, sk->sk_allocation))) {
1574 tcp_event_new_data_sent(sk, skb);
1575 tcp_cwnd_validate(sk);
1576 return;
1577 }
1578 }
1579}
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633u32 __tcp_select_window(struct sock *sk)
1634{
1635 struct inet_connection_sock *icsk = inet_csk(sk);
1636 struct tcp_sock *tp = tcp_sk(sk);
1637
1638
1639
1640
1641
1642
1643 int mss = icsk->icsk_ack.rcv_mss;
1644 int free_space = tcp_space(sk);
1645 int full_space = min_t(int, tp->window_clamp, tcp_full_space(sk));
1646 int window;
1647
1648 if (mss > full_space)
1649 mss = full_space;
1650
1651 if (free_space < (full_space >> 1)) {
1652 icsk->icsk_ack.quick = 0;
1653
1654 if (tcp_memory_pressure)
1655 tp->rcv_ssthresh = min(tp->rcv_ssthresh,
1656 4U * tp->advmss);
1657
1658 if (free_space < mss)
1659 return 0;
1660 }
1661
1662 if (free_space > tp->rcv_ssthresh)
1663 free_space = tp->rcv_ssthresh;
1664
1665
1666
1667
1668 window = tp->rcv_wnd;
1669 if (tp->rx_opt.rcv_wscale) {
1670 window = free_space;
1671
1672
1673
1674
1675
1676 if (((window >> tp->rx_opt.rcv_wscale) << tp->rx_opt.rcv_wscale) != window)
1677 window = (((window >> tp->rx_opt.rcv_wscale) + 1)
1678 << tp->rx_opt.rcv_wscale);
1679 } else {
1680
1681
1682
1683
1684
1685
1686
1687
1688 if (window <= free_space - mss || window > free_space)
1689 window = (free_space / mss) * mss;
1690 else if (mss == full_space &&
1691 free_space > window + (full_space >> 1))
1692 window = free_space;
1693 }
1694
1695 return window;
1696}
1697
1698
1699static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb,
1700 int mss_now)
1701{
1702 struct tcp_sock *tp = tcp_sk(sk);
1703 struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
1704 int skb_size, next_skb_size;
1705 u16 flags;
1706
1707
1708
1709
1710 if (skb_cloned(skb) || skb_cloned(next_skb))
1711 return;
1712
1713 skb_size = skb->len;
1714 next_skb_size = next_skb->len;
1715 flags = TCP_SKB_CB(skb)->flags;
1716
1717
1718 if (TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED)
1719 return;
1720
1721
1722 if (after(TCP_SKB_CB(next_skb)->end_seq, tcp_wnd_end(tp)))
1723 return;
1724
1725
1726
1727
1728
1729 if ((next_skb_size > skb_tailroom(skb)) ||
1730 ((skb_size + next_skb_size) > mss_now))
1731 return;
1732
1733 BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
1734
1735 tcp_highest_sack_combine(sk, next_skb, skb);
1736
1737
1738 tcp_unlink_write_queue(next_skb, sk);
1739
1740 skb_copy_from_linear_data(next_skb, skb_put(skb, next_skb_size),
1741 next_skb_size);
1742
1743 if (next_skb->ip_summed == CHECKSUM_PARTIAL)
1744 skb->ip_summed = CHECKSUM_PARTIAL;
1745
1746 if (skb->ip_summed != CHECKSUM_PARTIAL)
1747 skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
1748
1749
1750 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
1751
1752
1753 flags |= TCP_SKB_CB(next_skb)->flags;
1754 TCP_SKB_CB(skb)->flags = flags;
1755
1756
1757
1758
1759 TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS;
1760 if (TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_RETRANS)
1761 tp->retrans_out -= tcp_skb_pcount(next_skb);
1762 if (TCP_SKB_CB(next_skb)->sacked & TCPCB_LOST)
1763 tp->lost_out -= tcp_skb_pcount(next_skb);
1764
1765 if (tcp_is_reno(tp) && tp->sacked_out)
1766 tcp_dec_pcount_approx(&tp->sacked_out, next_skb);
1767
1768 tcp_adjust_fackets_out(sk, next_skb, tcp_skb_pcount(next_skb));
1769 tp->packets_out -= tcp_skb_pcount(next_skb);
1770
1771
1772 tcp_clear_retrans_hints_partial(tp);
1773
1774 sk_wmem_free_skb(sk, next_skb);
1775}
1776
1777
1778
1779
1780
1781void tcp_simple_retransmit(struct sock *sk)
1782{
1783 const struct inet_connection_sock *icsk = inet_csk(sk);
1784 struct tcp_sock *tp = tcp_sk(sk);
1785 struct sk_buff *skb;
1786 unsigned int mss = tcp_current_mss(sk, 0);
1787 int lost = 0;
1788
1789 tcp_for_write_queue(skb, sk) {
1790 if (skb == tcp_send_head(sk))
1791 break;
1792 if (skb->len > mss &&
1793 !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
1794 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
1795 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1796 tp->retrans_out -= tcp_skb_pcount(skb);
1797 }
1798 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_LOST)) {
1799 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1800 tp->lost_out += tcp_skb_pcount(skb);
1801 lost = 1;
1802 }
1803 }
1804 }
1805
1806 tcp_clear_all_retrans_hints(tp);
1807
1808 if (!lost)
1809 return;
1810
1811 if (tcp_is_reno(tp))
1812 tcp_limit_reno_sacked(tp);
1813
1814 tcp_verify_left_out(tp);
1815
1816
1817
1818
1819
1820
1821 if (icsk->icsk_ca_state != TCP_CA_Loss) {
1822 tp->high_seq = tp->snd_nxt;
1823 tp->snd_ssthresh = tcp_current_ssthresh(sk);
1824 tp->prior_ssthresh = 0;
1825 tp->undo_marker = 0;
1826 tcp_set_ca_state(sk, TCP_CA_Loss);
1827 }
1828 tcp_xmit_retransmit_queue(sk);
1829}
1830
1831
1832
1833
1834
1835int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1836{
1837 struct tcp_sock *tp = tcp_sk(sk);
1838 struct inet_connection_sock *icsk = inet_csk(sk);
1839 unsigned int cur_mss = tcp_current_mss(sk, 0);
1840 int err;
1841
1842
1843 if (icsk->icsk_mtup.probe_size) {
1844 icsk->icsk_mtup.probe_size = 0;
1845 }
1846
1847
1848
1849
1850 if (atomic_read(&sk->sk_wmem_alloc) >
1851 min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))
1852 return -EAGAIN;
1853
1854 if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
1855 if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1856 BUG();
1857 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
1858 return -ENOMEM;
1859 }
1860
1861
1862
1863
1864
1865
1866 if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))
1867 && TCP_SKB_CB(skb)->seq != tp->snd_una)
1868 return -EAGAIN;
1869
1870 if (skb->len > cur_mss) {
1871 if (tcp_fragment(sk, skb, cur_mss, cur_mss))
1872 return -ENOMEM;
1873 }
1874
1875
1876 if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) &&
1877 (skb->len < (cur_mss >> 1)) &&
1878 (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) &&
1879 (!tcp_skb_is_last(sk, skb)) &&
1880 (skb_shinfo(skb)->nr_frags == 0 &&
1881 skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) &&
1882 (tcp_skb_pcount(skb) == 1 &&
1883 tcp_skb_pcount(tcp_write_queue_next(sk, skb)) == 1) &&
1884 (sysctl_tcp_retrans_collapse != 0))
1885 tcp_retrans_try_collapse(sk, skb, cur_mss);
1886
1887 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
1888 return -EHOSTUNREACH;
1889
1890
1891
1892
1893
1894 if (skb->len > 0 &&
1895 (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
1896 tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
1897 if (!pskb_trim(skb, 0)) {
1898
1899 tcp_init_nondata_skb(skb, TCP_SKB_CB(skb)->end_seq - 1,
1900 TCP_SKB_CB(skb)->flags);
1901 skb->ip_summed = CHECKSUM_NONE;
1902 }
1903 }
1904
1905
1906
1907
1908 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1909
1910 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
1911
1912 if (err == 0) {
1913
1914 TCP_INC_STATS(TCP_MIB_RETRANSSEGS);
1915
1916 tp->total_retrans++;
1917
1918#if FASTRETRANS_DEBUG > 0
1919 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
1920 if (net_ratelimit())
1921 printk(KERN_DEBUG "retrans_out leaked.\n");
1922 }
1923#endif
1924 if (!tp->retrans_out)
1925 tp->lost_retrans_low = tp->snd_nxt;
1926 TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;
1927 tp->retrans_out += tcp_skb_pcount(skb);
1928
1929
1930 if (!tp->retrans_stamp)
1931 tp->retrans_stamp = TCP_SKB_CB(skb)->when;
1932
1933 tp->undo_retrans++;
1934
1935
1936
1937
1938 TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt;
1939 }
1940 return err;
1941}
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951void tcp_xmit_retransmit_queue(struct sock *sk)
1952{
1953 const struct inet_connection_sock *icsk = inet_csk(sk);
1954 struct tcp_sock *tp = tcp_sk(sk);
1955 struct sk_buff *skb;
1956 int packet_cnt;
1957
1958 if (tp->retransmit_skb_hint) {
1959 skb = tp->retransmit_skb_hint;
1960 packet_cnt = tp->retransmit_cnt_hint;
1961 } else {
1962 skb = tcp_write_queue_head(sk);
1963 packet_cnt = 0;
1964 }
1965
1966
1967 if (tp->lost_out) {
1968 tcp_for_write_queue_from(skb, sk) {
1969 __u8 sacked = TCP_SKB_CB(skb)->sacked;
1970
1971 if (skb == tcp_send_head(sk))
1972 break;
1973
1974 tp->retransmit_skb_hint = skb;
1975 tp->retransmit_cnt_hint = packet_cnt;
1976
1977
1978
1979
1980
1981
1982
1983
1984 if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
1985 return;
1986
1987 if (sacked & TCPCB_LOST) {
1988 if (!(sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) {
1989 if (tcp_retransmit_skb(sk, skb)) {
1990 tp->retransmit_skb_hint = NULL;
1991 return;
1992 }
1993 if (icsk->icsk_ca_state != TCP_CA_Loss)
1994 NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS);
1995 else
1996 NET_INC_STATS_BH(LINUX_MIB_TCPSLOWSTARTRETRANS);
1997
1998 if (skb == tcp_write_queue_head(sk))
1999 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2000 inet_csk(sk)->icsk_rto,
2001 TCP_RTO_MAX);
2002 }
2003
2004 packet_cnt += tcp_skb_pcount(skb);
2005 if (packet_cnt >= tp->lost_out)
2006 break;
2007 }
2008 }
2009 }
2010
2011
2012
2013
2014 if (icsk->icsk_ca_state != TCP_CA_Recovery)
2015 return;
2016
2017
2018 if (tcp_is_reno(tp))
2019 return;
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029 if (tcp_may_send_now(sk))
2030 return;
2031
2032
2033 if (!tp->sacked_out)
2034 return;
2035
2036 if (tp->forward_skb_hint)
2037 skb = tp->forward_skb_hint;
2038 else
2039 skb = tcp_write_queue_head(sk);
2040
2041 tcp_for_write_queue_from(skb, sk) {
2042 if (skb == tcp_send_head(sk))
2043 break;
2044 tp->forward_skb_hint = skb;
2045
2046 if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
2047 break;
2048
2049 if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
2050 break;
2051
2052 if (TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS)
2053 continue;
2054
2055
2056 if (tcp_retransmit_skb(sk, skb)) {
2057 tp->forward_skb_hint = NULL;
2058 break;
2059 }
2060
2061 if (skb == tcp_write_queue_head(sk))
2062 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2063 inet_csk(sk)->icsk_rto,
2064 TCP_RTO_MAX);
2065
2066 NET_INC_STATS_BH(LINUX_MIB_TCPFORWARDRETRANS);
2067 }
2068}
2069
2070
2071
2072
2073void tcp_send_fin(struct sock *sk)
2074{
2075 struct tcp_sock *tp = tcp_sk(sk);
2076 struct sk_buff *skb = tcp_write_queue_tail(sk);
2077 int mss_now;
2078
2079
2080
2081
2082
2083 mss_now = tcp_current_mss(sk, 1);
2084
2085 if (tcp_send_head(sk) != NULL) {
2086 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN;
2087 TCP_SKB_CB(skb)->end_seq++;
2088 tp->write_seq++;
2089 } else {
2090
2091 for (;;) {
2092 skb = alloc_skb_fclone(MAX_TCP_HEADER, GFP_KERNEL);
2093 if (skb)
2094 break;
2095 yield();
2096 }
2097
2098
2099 skb_reserve(skb, MAX_TCP_HEADER);
2100
2101 tcp_init_nondata_skb(skb, tp->write_seq,
2102 TCPCB_FLAG_ACK | TCPCB_FLAG_FIN);
2103 tcp_queue_skb(sk, skb);
2104 }
2105 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);
2106}
2107
2108
2109
2110
2111
2112
2113void tcp_send_active_reset(struct sock *sk, gfp_t priority)
2114{
2115 struct sk_buff *skb;
2116
2117
2118 skb = alloc_skb(MAX_TCP_HEADER, priority);
2119 if (!skb) {
2120 NET_INC_STATS(LINUX_MIB_TCPABORTFAILED);
2121 return;
2122 }
2123
2124
2125 skb_reserve(skb, MAX_TCP_HEADER);
2126 tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
2127 TCPCB_FLAG_ACK | TCPCB_FLAG_RST);
2128
2129 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2130 if (tcp_transmit_skb(sk, skb, 0, priority))
2131 NET_INC_STATS(LINUX_MIB_TCPABORTFAILED);
2132}
2133
2134
2135
2136
2137
2138
2139int tcp_send_synack(struct sock *sk)
2140{
2141 struct sk_buff *skb;
2142
2143 skb = tcp_write_queue_head(sk);
2144 if (skb == NULL || !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN)) {
2145 printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n");
2146 return -EFAULT;
2147 }
2148 if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_ACK)) {
2149 if (skb_cloned(skb)) {
2150 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
2151 if (nskb == NULL)
2152 return -ENOMEM;
2153 tcp_unlink_write_queue(skb, sk);
2154 skb_header_release(nskb);
2155 __tcp_add_write_queue_head(sk, nskb);
2156 sk_wmem_free_skb(sk, skb);
2157 sk->sk_wmem_queued += nskb->truesize;
2158 sk_mem_charge(sk, nskb->truesize);
2159 skb = nskb;
2160 }
2161
2162 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ACK;
2163 TCP_ECN_send_synack(tcp_sk(sk), skb);
2164 }
2165 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2166 return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2167}
2168
2169
2170
2171
2172struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2173 struct request_sock *req)
2174{
2175 struct inet_request_sock *ireq = inet_rsk(req);
2176 struct tcp_sock *tp = tcp_sk(sk);
2177 struct tcphdr *th;
2178 int tcp_header_size;
2179 struct sk_buff *skb;
2180#ifdef CONFIG_TCP_MD5SIG
2181 struct tcp_md5sig_key *md5;
2182 __u8 *md5_hash_location;
2183#endif
2184
2185 skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC);
2186 if (skb == NULL)
2187 return NULL;
2188
2189
2190 skb_reserve(skb, MAX_TCP_HEADER);
2191
2192 skb->dst = dst_clone(dst);
2193
2194 tcp_header_size = (sizeof(struct tcphdr) + TCPOLEN_MSS +
2195 (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0) +
2196 (ireq->wscale_ok ? TCPOLEN_WSCALE_ALIGNED : 0) +
2197
2198 ((ireq->sack_ok && !ireq->tstamp_ok) ? TCPOLEN_SACKPERM_ALIGNED : 0));
2199
2200#ifdef CONFIG_TCP_MD5SIG
2201
2202 md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req);
2203 if (md5)
2204 tcp_header_size += TCPOLEN_MD5SIG_ALIGNED;
2205#endif
2206 skb_push(skb, tcp_header_size);
2207 skb_reset_transport_header(skb);
2208
2209 th = tcp_hdr(skb);
2210 memset(th, 0, sizeof(struct tcphdr));
2211 th->syn = 1;
2212 th->ack = 1;
2213 TCP_ECN_make_synack(req, th);
2214 th->source = inet_sk(sk)->sport;
2215 th->dest = ireq->rmt_port;
2216
2217
2218
2219 tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
2220 TCPCB_FLAG_SYN | TCPCB_FLAG_ACK);
2221 th->seq = htonl(TCP_SKB_CB(skb)->seq);
2222 th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1);
2223 if (req->rcv_wnd == 0) {
2224 __u8 rcv_wscale;
2225
2226 req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
2227
2228 tcp_select_initial_window(tcp_full_space(sk),
2229 dst_metric(dst, RTAX_ADVMSS) - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
2230 &req->rcv_wnd,
2231 &req->window_clamp,
2232 ireq->wscale_ok,
2233 &rcv_wscale);
2234 ireq->rcv_wscale = rcv_wscale;
2235 }
2236
2237
2238 th->window = htons(min(req->rcv_wnd, 65535U));
2239
2240 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2241 tcp_syn_build_options((__be32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), ireq->tstamp_ok,
2242 ireq->sack_ok, ireq->wscale_ok, ireq->rcv_wscale,
2243 TCP_SKB_CB(skb)->when,
2244 req->ts_recent,
2245 (
2246#ifdef CONFIG_TCP_MD5SIG
2247 md5 ? &md5_hash_location :
2248#endif
2249 NULL)
2250 );
2251
2252 th->doff = (tcp_header_size >> 2);
2253 TCP_INC_STATS(TCP_MIB_OUTSEGS);
2254
2255#ifdef CONFIG_TCP_MD5SIG
2256
2257 if (md5) {
2258 tp->af_specific->calc_md5_hash(md5_hash_location,
2259 md5,
2260 NULL, dst, req,
2261 tcp_hdr(skb), sk->sk_protocol,
2262 skb->len);
2263 }
2264#endif
2265
2266 return skb;
2267}
2268
2269
2270
2271
2272static void tcp_connect_init(struct sock *sk)
2273{
2274 struct dst_entry *dst = __sk_dst_get(sk);
2275 struct tcp_sock *tp = tcp_sk(sk);
2276 __u8 rcv_wscale;
2277
2278
2279
2280
2281 tp->tcp_header_len = sizeof(struct tcphdr) +
2282 (sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
2283
2284#ifdef CONFIG_TCP_MD5SIG
2285 if (tp->af_specific->md5_lookup(sk, sk) != NULL)
2286 tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
2287#endif
2288
2289
2290 if (tp->rx_opt.user_mss)
2291 tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
2292 tp->max_window = 0;
2293 tcp_mtup_init(sk);
2294 tcp_sync_mss(sk, dst_mtu(dst));
2295
2296 if (!tp->window_clamp)
2297 tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
2298 tp->advmss = dst_metric(dst, RTAX_ADVMSS);
2299 tcp_initialize_rcv_mss(sk);
2300
2301 tcp_select_initial_window(tcp_full_space(sk),
2302 tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
2303 &tp->rcv_wnd,
2304 &tp->window_clamp,
2305 sysctl_tcp_window_scaling,
2306 &rcv_wscale);
2307
2308 tp->rx_opt.rcv_wscale = rcv_wscale;
2309 tp->rcv_ssthresh = tp->rcv_wnd;
2310
2311 sk->sk_err = 0;
2312 sock_reset_flag(sk, SOCK_DONE);
2313 tp->snd_wnd = 0;
2314 tcp_init_wl(tp, tp->write_seq, 0);
2315 tp->snd_una = tp->write_seq;
2316 tp->snd_sml = tp->write_seq;
2317 tp->rcv_nxt = 0;
2318 tp->rcv_wup = 0;
2319 tp->copied_seq = 0;
2320
2321 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
2322 inet_csk(sk)->icsk_retransmits = 0;
2323 tcp_clear_retrans(tp);
2324}
2325
2326
2327
2328
2329int tcp_connect(struct sock *sk)
2330{
2331 struct tcp_sock *tp = tcp_sk(sk);
2332 struct sk_buff *buff;
2333
2334 tcp_connect_init(sk);
2335
2336 buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation);
2337 if (unlikely(buff == NULL))
2338 return -ENOBUFS;
2339
2340
2341 skb_reserve(buff, MAX_TCP_HEADER);
2342
2343 tp->snd_nxt = tp->write_seq;
2344 tcp_init_nondata_skb(buff, tp->write_seq++, TCPCB_FLAG_SYN);
2345 TCP_ECN_send_syn(sk, buff);
2346
2347
2348 TCP_SKB_CB(buff)->when = tcp_time_stamp;
2349 tp->retrans_stamp = TCP_SKB_CB(buff)->when;
2350 skb_header_release(buff);
2351 __tcp_add_write_queue_tail(sk, buff);
2352 sk->sk_wmem_queued += buff->truesize;
2353 sk_mem_charge(sk, buff->truesize);
2354 tp->packets_out += tcp_skb_pcount(buff);
2355 tcp_transmit_skb(sk, buff, 1, GFP_KERNEL);
2356
2357
2358
2359
2360 tp->snd_nxt = tp->write_seq;
2361 tp->pushed_seq = tp->write_seq;
2362 TCP_INC_STATS(TCP_MIB_ACTIVEOPENS);
2363
2364
2365 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2366 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
2367 return 0;
2368}
2369
2370
2371
2372
2373
2374void tcp_send_delayed_ack(struct sock *sk)
2375{
2376 struct inet_connection_sock *icsk = inet_csk(sk);
2377 int ato = icsk->icsk_ack.ato;
2378 unsigned long timeout;
2379
2380 if (ato > TCP_DELACK_MIN) {
2381 const struct tcp_sock *tp = tcp_sk(sk);
2382 int max_ato = HZ / 2;
2383
2384 if (icsk->icsk_ack.pingpong ||
2385 (icsk->icsk_ack.pending & ICSK_ACK_PUSHED))
2386 max_ato = TCP_DELACK_MAX;
2387
2388
2389
2390
2391
2392
2393
2394 if (tp->srtt) {
2395 int rtt = max(tp->srtt >> 3, TCP_DELACK_MIN);
2396
2397 if (rtt < max_ato)
2398 max_ato = rtt;
2399 }
2400
2401 ato = min(ato, max_ato);
2402 }
2403
2404
2405 timeout = jiffies + ato;
2406
2407
2408 if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) {
2409
2410
2411
2412 if (icsk->icsk_ack.blocked ||
2413 time_before_eq(icsk->icsk_ack.timeout, jiffies + (ato >> 2))) {
2414 tcp_send_ack(sk);
2415 return;
2416 }
2417
2418 if (!time_before(timeout, icsk->icsk_ack.timeout))
2419 timeout = icsk->icsk_ack.timeout;
2420 }
2421 icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
2422 icsk->icsk_ack.timeout = timeout;
2423 sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
2424}
2425
2426
2427void tcp_send_ack(struct sock *sk)
2428{
2429 struct sk_buff *buff;
2430
2431
2432 if (sk->sk_state == TCP_CLOSE)
2433 return;
2434
2435
2436
2437
2438
2439 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
2440 if (buff == NULL) {
2441 inet_csk_schedule_ack(sk);
2442 inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
2443 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
2444 TCP_DELACK_MAX, TCP_RTO_MAX);
2445 return;
2446 }
2447
2448
2449 skb_reserve(buff, MAX_TCP_HEADER);
2450 tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPCB_FLAG_ACK);
2451
2452
2453 TCP_SKB_CB(buff)->when = tcp_time_stamp;
2454 tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC);
2455}
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
2469{
2470 struct tcp_sock *tp = tcp_sk(sk);
2471 struct sk_buff *skb;
2472
2473
2474 skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
2475 if (skb == NULL)
2476 return -1;
2477
2478
2479 skb_reserve(skb, MAX_TCP_HEADER);
2480
2481
2482
2483
2484 tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPCB_FLAG_ACK);
2485 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2486 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
2487}
2488
2489int tcp_write_wakeup(struct sock *sk)
2490{
2491 struct tcp_sock *tp = tcp_sk(sk);
2492 struct sk_buff *skb;
2493
2494 if (sk->sk_state == TCP_CLOSE)
2495 return -1;
2496
2497 if ((skb = tcp_send_head(sk)) != NULL &&
2498 before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) {
2499 int err;
2500 unsigned int mss = tcp_current_mss(sk, 0);
2501 unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
2502
2503 if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
2504 tp->pushed_seq = TCP_SKB_CB(skb)->end_seq;
2505
2506
2507
2508
2509
2510 if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
2511 skb->len > mss) {
2512 seg_size = min(seg_size, mss);
2513 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
2514 if (tcp_fragment(sk, skb, seg_size, mss))
2515 return -1;
2516 } else if (!tcp_skb_pcount(skb))
2517 tcp_set_skb_tso_segs(sk, skb, mss);
2518
2519 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
2520 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2521 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2522 if (!err)
2523 tcp_event_new_data_sent(sk, skb);
2524 return err;
2525 } else {
2526 if (tp->urg_mode &&
2527 between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF))
2528 tcp_xmit_probe_skb(sk, 1);
2529 return tcp_xmit_probe_skb(sk, 0);
2530 }
2531}
2532
2533
2534
2535
2536void tcp_send_probe0(struct sock *sk)
2537{
2538 struct inet_connection_sock *icsk = inet_csk(sk);
2539 struct tcp_sock *tp = tcp_sk(sk);
2540 int err;
2541
2542 err = tcp_write_wakeup(sk);
2543
2544 if (tp->packets_out || !tcp_send_head(sk)) {
2545
2546 icsk->icsk_probes_out = 0;
2547 icsk->icsk_backoff = 0;
2548 return;
2549 }
2550
2551 if (err <= 0) {
2552 if (icsk->icsk_backoff < sysctl_tcp_retries2)
2553 icsk->icsk_backoff++;
2554 icsk->icsk_probes_out++;
2555 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
2556 min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX),
2557 TCP_RTO_MAX);
2558 } else {
2559
2560
2561
2562
2563
2564
2565 if (!icsk->icsk_probes_out)
2566 icsk->icsk_probes_out = 1;
2567 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
2568 min(icsk->icsk_rto << icsk->icsk_backoff,
2569 TCP_RESOURCE_PROBE_INTERVAL),
2570 TCP_RTO_MAX);
2571 }
2572}
2573
2574EXPORT_SYMBOL(tcp_connect);
2575EXPORT_SYMBOL(tcp_make_synack);
2576EXPORT_SYMBOL(tcp_simple_retransmit);
2577EXPORT_SYMBOL(tcp_sync_mss);
2578EXPORT_SYMBOL(tcp_mtup_init);
2579