1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39#include <net/tcp.h>
40
41#include <linux/compiler.h>
42#include <linux/module.h>
43#include <linux/smp_lock.h>
44
45
46int sysctl_tcp_retrans_collapse = 1;
47
48static __inline__
49void update_send_head(struct sock *sk, struct tcp_opt *tp, struct sk_buff *skb)
50{
51 tp->send_head = skb->next;
52 if (tp->send_head == (struct sk_buff *)&sk->sk_write_queue)
53 tp->send_head = NULL;
54 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
55 if (tp->packets_out++ == 0)
56 tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
57}
58
59
60
61
62
63
64
65static __inline__ __u32 tcp_acceptable_seq(struct sock *sk, struct tcp_opt *tp)
66{
67 if (!before(tp->snd_una+tp->snd_wnd, tp->snd_nxt))
68 return tp->snd_nxt;
69 else
70 return tp->snd_una+tp->snd_wnd;
71}
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87static __u16 tcp_advertise_mss(struct sock *sk)
88{
89 struct tcp_opt *tp = tcp_sk(sk);
90 struct dst_entry *dst = __sk_dst_get(sk);
91 int mss = tp->advmss;
92
93 if (dst && dst_metric(dst, RTAX_ADVMSS) < mss) {
94 mss = dst_metric(dst, RTAX_ADVMSS);
95 tp->advmss = mss;
96 }
97
98 return (__u16)mss;
99}
100
101
102
103static void tcp_cwnd_restart(struct tcp_opt *tp, struct dst_entry *dst)
104{
105 s32 delta = tcp_time_stamp - tp->lsndtime;
106 u32 restart_cwnd = tcp_init_cwnd(tp, dst);
107 u32 cwnd = tp->snd_cwnd;
108
109 tp->snd_ssthresh = tcp_current_ssthresh(tp);
110 restart_cwnd = min(restart_cwnd, cwnd);
111
112 while ((delta -= tp->rto) > 0 && cwnd > restart_cwnd)
113 cwnd >>= 1;
114 tp->snd_cwnd = max(cwnd, restart_cwnd);
115 tp->snd_cwnd_stamp = tcp_time_stamp;
116 tp->snd_cwnd_used = 0;
117}
118
119static __inline__ void tcp_event_data_sent(struct tcp_opt *tp, struct sk_buff *skb, struct sock *sk)
120{
121 u32 now = tcp_time_stamp;
122
123 if (!tp->packets_out && (s32)(now - tp->lsndtime) > tp->rto)
124 tcp_cwnd_restart(tp, __sk_dst_get(sk));
125
126 tp->lsndtime = now;
127
128
129
130
131 if ((u32)(now - tp->ack.lrcvtime) < tp->ack.ato)
132 tp->ack.pingpong = 1;
133}
134
135static __inline__ void tcp_event_ack_sent(struct sock *sk)
136{
137 struct tcp_opt *tp = tcp_sk(sk);
138
139 tcp_dec_quickack_mode(tp);
140 tcp_clear_xmit_timer(sk, TCP_TIME_DACK);
141}
142
143
144
145
146
147
148static __inline__ u16 tcp_select_window(struct sock *sk)
149{
150 struct tcp_opt *tp = tcp_sk(sk);
151 u32 cur_win = tcp_receive_window(tp);
152 u32 new_win = __tcp_select_window(sk);
153
154
155 if(new_win < cur_win) {
156
157
158
159
160
161
162
163 new_win = cur_win;
164 }
165 tp->rcv_wnd = new_win;
166 tp->rcv_wup = tp->rcv_nxt;
167
168
169 new_win >>= tp->rcv_wscale;
170
171
172 if (new_win == 0)
173 tp->pred_flags = 0;
174
175 return new_win;
176}
177
178
179
180
181
182
183
184
185
186
187
188
189
190int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
191{
192 if(skb != NULL) {
193 struct inet_opt *inet = inet_sk(sk);
194 struct tcp_opt *tp = tcp_sk(sk);
195 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
196 int tcp_header_size = tp->tcp_header_len;
197 struct tcphdr *th;
198 int sysctl_flags;
199 int err;
200
201#define SYSCTL_FLAG_TSTAMPS 0x1
202#define SYSCTL_FLAG_WSCALE 0x2
203#define SYSCTL_FLAG_SACK 0x4
204
205 sysctl_flags = 0;
206 if (tcb->flags & TCPCB_FLAG_SYN) {
207 tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS;
208 if(sysctl_tcp_timestamps) {
209 tcp_header_size += TCPOLEN_TSTAMP_ALIGNED;
210 sysctl_flags |= SYSCTL_FLAG_TSTAMPS;
211 }
212 if(sysctl_tcp_window_scaling) {
213 tcp_header_size += TCPOLEN_WSCALE_ALIGNED;
214 sysctl_flags |= SYSCTL_FLAG_WSCALE;
215 }
216 if(sysctl_tcp_sack) {
217 sysctl_flags |= SYSCTL_FLAG_SACK;
218 if(!(sysctl_flags & SYSCTL_FLAG_TSTAMPS))
219 tcp_header_size += TCPOLEN_SACKPERM_ALIGNED;
220 }
221 } else if (tp->eff_sacks) {
222
223
224
225 tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED +
226 (tp->eff_sacks * TCPOLEN_SACK_PERBLOCK));
227 }
228 th = (struct tcphdr *) skb_push(skb, tcp_header_size);
229 skb->h.th = th;
230 skb_set_owner_w(skb, sk);
231
232
233 th->source = inet->sport;
234 th->dest = inet->dport;
235 th->seq = htonl(tcb->seq);
236 th->ack_seq = htonl(tp->rcv_nxt);
237 *(((__u16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | tcb->flags);
238 if (tcb->flags & TCPCB_FLAG_SYN) {
239
240
241
242 th->window = htons(tp->rcv_wnd);
243 } else {
244 th->window = htons(tcp_select_window(sk));
245 }
246 th->check = 0;
247 th->urg_ptr = 0;
248
249 if (tp->urg_mode &&
250 between(tp->snd_up, tcb->seq+1, tcb->seq+0xFFFF)) {
251 th->urg_ptr = htons(tp->snd_up-tcb->seq);
252 th->urg = 1;
253 }
254
255 if (tcb->flags & TCPCB_FLAG_SYN) {
256 tcp_syn_build_options((__u32 *)(th + 1),
257 tcp_advertise_mss(sk),
258 (sysctl_flags & SYSCTL_FLAG_TSTAMPS),
259 (sysctl_flags & SYSCTL_FLAG_SACK),
260 (sysctl_flags & SYSCTL_FLAG_WSCALE),
261 tp->rcv_wscale,
262 tcb->when,
263 tp->ts_recent);
264 } else {
265 tcp_build_and_update_options((__u32 *)(th + 1),
266 tp, tcb->when);
267
268 TCP_ECN_send(sk, tp, skb, tcp_header_size);
269 }
270 tp->af_specific->send_check(sk, th, skb->len, skb);
271
272 if (tcb->flags & TCPCB_FLAG_ACK)
273 tcp_event_ack_sent(sk);
274
275 if (skb->len != tcp_header_size)
276 tcp_event_data_sent(tp, skb, sk);
277
278 TCP_INC_STATS(TcpOutSegs);
279
280 err = tp->af_specific->queue_xmit(skb, 0);
281 if (err <= 0)
282 return err;
283
284 tcp_enter_cwr(tp);
285
286
287
288
289
290
291
292 return err == NET_XMIT_CN ? 0 : err;
293 }
294 return -ENOBUFS;
295#undef SYSCTL_FLAG_TSTAMPS
296#undef SYSCTL_FLAG_WSCALE
297#undef SYSCTL_FLAG_SACK
298}
299
300
301
302
303
304
305
306
307void tcp_send_skb(struct sock *sk, struct sk_buff *skb, int force_queue, unsigned cur_mss)
308{
309 struct tcp_opt *tp = tcp_sk(sk);
310
311
312 tp->write_seq = TCP_SKB_CB(skb)->end_seq;
313 __skb_queue_tail(&sk->sk_write_queue, skb);
314 tcp_charge_skb(sk, skb);
315
316 if (!force_queue && tp->send_head == NULL && tcp_snd_test(tp, skb, cur_mss, tp->nonagle)) {
317
318 TCP_SKB_CB(skb)->when = tcp_time_stamp;
319 if (!tcp_transmit_skb(sk, skb_clone(skb, sk->sk_allocation))) {
320 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
321 tcp_minshall_update(tp, cur_mss, skb);
322 if (tp->packets_out++ == 0)
323 tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
324 return;
325 }
326 }
327
328 if (tp->send_head == NULL)
329 tp->send_head = skb;
330}
331
332
333
334
335void tcp_push_one(struct sock *sk, unsigned cur_mss)
336{
337 struct tcp_opt *tp = tcp_sk(sk);
338 struct sk_buff *skb = tp->send_head;
339
340 if (tcp_snd_test(tp, skb, cur_mss, TCP_NAGLE_PUSH)) {
341
342 TCP_SKB_CB(skb)->when = tcp_time_stamp;
343 if (!tcp_transmit_skb(sk, skb_clone(skb, sk->sk_allocation))) {
344 tp->send_head = NULL;
345 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
346 if (tp->packets_out++ == 0)
347 tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
348 return;
349 }
350 }
351}
352
353
354
355static void skb_split(struct sk_buff *skb, struct sk_buff *skb1, u32 len)
356{
357 int i;
358 int pos = skb_headlen(skb);
359
360 if (len < pos) {
361
362 memcpy(skb_put(skb1, pos-len), skb->data + len, pos-len);
363
364
365 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
366 skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i];
367
368 skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags;
369 skb_shinfo(skb)->nr_frags = 0;
370
371 skb1->data_len = skb->data_len;
372 skb1->len += skb1->data_len;
373 skb->data_len = 0;
374 skb->len = len;
375 skb->tail = skb->data+len;
376 } else {
377 int k = 0;
378 int nfrags = skb_shinfo(skb)->nr_frags;
379
380
381
382 skb_shinfo(skb)->nr_frags = 0;
383 skb1->len = skb1->data_len = skb->len - len;
384 skb->len = len;
385 skb->data_len = len - pos;
386
387 for (i=0; i<nfrags; i++) {
388 int size = skb_shinfo(skb)->frags[i].size;
389 if (pos + size > len) {
390 skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i];
391
392 if (pos < len) {
393
394
395
396
397
398
399
400
401 get_page(skb_shinfo(skb)->frags[i].page);
402 skb_shinfo(skb1)->frags[0].page_offset += (len-pos);
403 skb_shinfo(skb1)->frags[0].size -= (len-pos);
404 skb_shinfo(skb)->frags[i].size = len-pos;
405 skb_shinfo(skb)->nr_frags++;
406 }
407 k++;
408 } else {
409 skb_shinfo(skb)->nr_frags++;
410 }
411 pos += size;
412 }
413 skb_shinfo(skb1)->nr_frags = k;
414 }
415}
416
417
418
419
420
421
422static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len)
423{
424 struct tcp_opt *tp = tcp_sk(sk);
425 struct sk_buff *buff;
426 int nsize = skb->len - len;
427 u16 flags;
428
429 if (skb_cloned(skb) &&
430 skb_is_nonlinear(skb) &&
431 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
432 return -ENOMEM;
433
434
435 buff = tcp_alloc_skb(sk, nsize, GFP_ATOMIC);
436 if (buff == NULL)
437 return -ENOMEM;
438 tcp_charge_skb(sk, buff);
439
440
441 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
442 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
443 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
444
445
446 flags = TCP_SKB_CB(skb)->flags;
447 TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
448 TCP_SKB_CB(buff)->flags = flags;
449 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_EVER_RETRANS|TCPCB_AT_TAIL);
450 if (TCP_SKB_CB(buff)->sacked&TCPCB_LOST) {
451 tp->lost_out++;
452 tp->left_out++;
453 }
454 TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL;
455
456 if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_HW) {
457
458 buff->csum = csum_partial_copy_nocheck(skb->data + len, skb_put(buff, nsize),
459 nsize, 0);
460
461 skb_trim(skb, len);
462
463 skb->csum = csum_block_sub(skb->csum, buff->csum, len);
464 } else {
465 skb->ip_summed = CHECKSUM_HW;
466 skb_split(skb, buff, len);
467 }
468
469 buff->ip_summed = skb->ip_summed;
470
471
472
473
474 TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when;
475
476
477 __skb_append(skb, buff);
478
479 return 0;
480}
481
482
483
484
485
486unsigned char * __pskb_trim_head(struct sk_buff *skb, int len)
487{
488 int i, k, eat;
489
490 eat = len;
491 k = 0;
492 for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
493 if (skb_shinfo(skb)->frags[i].size <= eat) {
494 put_page(skb_shinfo(skb)->frags[i].page);
495 eat -= skb_shinfo(skb)->frags[i].size;
496 } else {
497 skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
498 if (eat) {
499 skb_shinfo(skb)->frags[k].page_offset += eat;
500 skb_shinfo(skb)->frags[k].size -= eat;
501 eat = 0;
502 }
503 k++;
504 }
505 }
506 skb_shinfo(skb)->nr_frags = k;
507
508 skb->tail = skb->data;
509 skb->data_len -= len;
510 skb->len = skb->data_len;
511 return skb->tail;
512}
513
514static int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
515{
516 if (skb_cloned(skb) &&
517 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
518 return -ENOMEM;
519
520 if (len <= skb_headlen(skb)) {
521 __skb_pull(skb, len);
522 } else {
523 if (__pskb_trim_head(skb, len-skb_headlen(skb)) == NULL)
524 return -ENOMEM;
525 }
526
527 TCP_SKB_CB(skb)->seq += len;
528 skb->ip_summed = CHECKSUM_HW;
529 return 0;
530}
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555int tcp_sync_mss(struct sock *sk, u32 pmtu)
556{
557 struct tcp_opt *tp = tcp_sk(sk);
558 struct dst_entry *dst = __sk_dst_get(sk);
559 int mss_now;
560
561 if (dst && dst->ops->get_mss)
562 pmtu = dst->ops->get_mss(dst, pmtu);
563
564
565
566
567 mss_now = pmtu - tp->af_specific->net_header_len - sizeof(struct tcphdr);
568
569
570 if (mss_now > tp->mss_clamp)
571 mss_now = tp->mss_clamp;
572
573
574 mss_now -= tp->ext_header_len + tp->ext2_header_len;
575
576
577 if (mss_now < 48)
578 mss_now = 48;
579
580
581 mss_now -= tp->tcp_header_len - sizeof(struct tcphdr);
582
583
584 if (tp->max_window && mss_now > (tp->max_window>>1))
585 mss_now = max((tp->max_window>>1), 68U - tp->tcp_header_len);
586
587
588 tp->pmtu_cookie = pmtu;
589 tp->mss_cache = tp->mss_cache_std = mss_now;
590
591 if (sk->sk_route_caps & NETIF_F_TSO) {
592 int large_mss;
593
594 large_mss = 65535 - tp->af_specific->net_header_len -
595 tp->ext_header_len - tp->ext2_header_len - tp->tcp_header_len;
596
597 if (tp->max_window && large_mss > (tp->max_window>>1))
598 large_mss = max((tp->max_window>>1), 68U - tp->tcp_header_len);
599
600
601 tp->mss_cache = mss_now*(large_mss/mss_now);
602 }
603
604 return mss_now;
605}
606
607
608
609
610
611
612
613
614
615int tcp_write_xmit(struct sock *sk, int nonagle)
616{
617 struct tcp_opt *tp = tcp_sk(sk);
618 unsigned int mss_now;
619
620
621
622
623
624 if (sk->sk_state != TCP_CLOSE) {
625 struct sk_buff *skb;
626 int sent_pkts = 0;
627
628
629
630
631
632
633 mss_now = tcp_current_mss(sk, 1);
634
635 while((skb = tp->send_head) &&
636 tcp_snd_test(tp, skb, mss_now, tcp_skb_is_last(sk, skb) ? nonagle : TCP_NAGLE_PUSH)) {
637 if (skb->len > mss_now) {
638 if (tcp_fragment(sk, skb, mss_now))
639 break;
640 }
641
642 TCP_SKB_CB(skb)->when = tcp_time_stamp;
643 if (tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC)))
644 break;
645
646 update_send_head(sk, tp, skb);
647 tcp_minshall_update(tp, mss_now, skb);
648 sent_pkts = 1;
649 }
650
651 if (sent_pkts) {
652 tcp_cwnd_validate(sk, tp);
653 return 0;
654 }
655
656 return !tp->packets_out && tp->send_head;
657 }
658 return 0;
659}
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713u32 __tcp_select_window(struct sock *sk)
714{
715 struct tcp_opt *tp = tcp_sk(sk);
716
717
718
719
720
721
722 int mss = tp->ack.rcv_mss;
723 int free_space = tcp_space(sk);
724 int full_space = min_t(int, tp->window_clamp, tcp_full_space(sk));
725 int window;
726
727 if (mss > full_space)
728 mss = full_space;
729
730 if (free_space < full_space/2) {
731 tp->ack.quick = 0;
732
733 if (tcp_memory_pressure)
734 tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U*tp->advmss);
735
736 if (free_space < mss)
737 return 0;
738 }
739
740 if (free_space > tp->rcv_ssthresh)
741 free_space = tp->rcv_ssthresh;
742
743
744
745
746
747
748
749
750
751 window = tp->rcv_wnd;
752 if (window <= free_space - mss || window > free_space)
753 window = (free_space/mss)*mss;
754
755 return window;
756}
757
758
759static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int mss_now)
760{
761 struct tcp_opt *tp = tcp_sk(sk);
762 struct sk_buff *next_skb = skb->next;
763
764
765
766
767 if(!skb_cloned(skb) && !skb_cloned(next_skb)) {
768 int skb_size = skb->len, next_skb_size = next_skb->len;
769 u16 flags = TCP_SKB_CB(skb)->flags;
770
771
772 if(TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED)
773 return;
774
775
776 if (after(TCP_SKB_CB(next_skb)->end_seq, tp->snd_una+tp->snd_wnd))
777 return;
778
779
780
781
782
783 if ((next_skb_size > skb_tailroom(skb)) ||
784 ((skb_size + next_skb_size) > mss_now))
785 return;
786
787
788 __skb_unlink(next_skb, next_skb->list);
789
790 memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size);
791
792 if (next_skb->ip_summed == CHECKSUM_HW)
793 skb->ip_summed = CHECKSUM_HW;
794
795 if (skb->ip_summed != CHECKSUM_HW)
796 skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
797
798
799 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
800
801
802 flags |= TCP_SKB_CB(next_skb)->flags;
803 TCP_SKB_CB(skb)->flags = flags;
804
805
806
807
808 TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked&(TCPCB_EVER_RETRANS|TCPCB_AT_TAIL);
809 if (TCP_SKB_CB(next_skb)->sacked&TCPCB_SACKED_RETRANS)
810 tp->retrans_out--;
811 if (TCP_SKB_CB(next_skb)->sacked&TCPCB_LOST) {
812 tp->lost_out--;
813 tp->left_out--;
814 }
815
816 if (!tp->sack_ok && tp->sacked_out) {
817 tp->sacked_out--;
818 tp->left_out--;
819 }
820
821
822
823
824 if (tp->fackets_out)
825 tp->fackets_out--;
826 tcp_free_skb(sk, next_skb);
827 tp->packets_out--;
828 }
829}
830
831
832
833
834
835void tcp_simple_retransmit(struct sock *sk)
836{
837 struct tcp_opt *tp = tcp_sk(sk);
838 struct sk_buff *skb;
839 unsigned int mss = tcp_current_mss(sk, 0);
840 int lost = 0;
841
842 for_retrans_queue(skb, sk, tp) {
843 if (skb->len > mss &&
844 !(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) {
845 if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) {
846 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
847 tp->retrans_out--;
848 }
849 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_LOST)) {
850 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
851 tp->lost_out++;
852 lost = 1;
853 }
854 }
855 }
856
857 if (!lost)
858 return;
859
860 tcp_sync_left_out(tp);
861
862
863
864
865
866
867 if (tp->ca_state != TCP_CA_Loss) {
868 tp->high_seq = tp->snd_nxt;
869 tp->snd_ssthresh = tcp_current_ssthresh(tp);
870 tp->prior_ssthresh = 0;
871 tp->undo_marker = 0;
872 tp->ca_state = TCP_CA_Loss;
873 }
874 tcp_xmit_retransmit_queue(sk);
875}
876
877
878
879
880
881int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
882{
883 struct tcp_opt *tp = tcp_sk(sk);
884 unsigned int cur_mss = tcp_current_mss(sk, 0);
885 int err;
886
887
888
889
890 if (atomic_read(&sk->sk_wmem_alloc) >
891 min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))
892 return -EAGAIN;
893
894 if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
895 if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
896 BUG();
897
898 if (sk->sk_route_caps & NETIF_F_TSO) {
899 sk->sk_route_caps &= ~NETIF_F_TSO;
900 sk->sk_no_largesend = 1;
901 tp->mss_cache = tp->mss_cache_std;
902 }
903
904 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
905 return -ENOMEM;
906 }
907
908
909
910
911
912
913 if (!before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)
914 && TCP_SKB_CB(skb)->seq != tp->snd_una)
915 return -EAGAIN;
916
917 if(skb->len > cur_mss) {
918 if(tcp_fragment(sk, skb, cur_mss))
919 return -ENOMEM;
920
921
922 tp->packets_out++;
923 }
924
925
926 if(!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) &&
927 (skb->len < (cur_mss >> 1)) &&
928 (skb->next != tp->send_head) &&
929 (skb->next != (struct sk_buff *)&sk->sk_write_queue) &&
930 (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(skb->next)->nr_frags == 0) &&
931 (sysctl_tcp_retrans_collapse != 0))
932 tcp_retrans_try_collapse(sk, skb, cur_mss);
933
934 if(tp->af_specific->rebuild_header(sk))
935 return -EHOSTUNREACH;
936
937
938
939
940
941 if(skb->len > 0 &&
942 (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
943 tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
944 if (!pskb_trim(skb, 0)) {
945 TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1;
946 skb->ip_summed = CHECKSUM_NONE;
947 skb->csum = 0;
948 }
949 }
950
951
952
953
954 TCP_SKB_CB(skb)->when = tcp_time_stamp;
955
956 err = tcp_transmit_skb(sk, (skb_cloned(skb) ?
957 pskb_copy(skb, GFP_ATOMIC):
958 skb_clone(skb, GFP_ATOMIC)));
959
960 if (err == 0) {
961
962 TCP_INC_STATS(TcpRetransSegs);
963
964#if FASTRETRANS_DEBUG > 0
965 if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) {
966 if (net_ratelimit())
967 printk(KERN_DEBUG "retrans_out leaked.\n");
968 }
969#endif
970 TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;
971 tp->retrans_out++;
972
973
974 if (!tp->retrans_stamp)
975 tp->retrans_stamp = TCP_SKB_CB(skb)->when;
976
977 tp->undo_retrans++;
978
979
980
981
982 TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt;
983 }
984 return err;
985}
986
987
988
989
990
991
992
993
994
995void tcp_xmit_retransmit_queue(struct sock *sk)
996{
997 struct tcp_opt *tp = tcp_sk(sk);
998 struct sk_buff *skb;
999 int packet_cnt = tp->lost_out;
1000
1001
1002 if (packet_cnt) {
1003 for_retrans_queue(skb, sk, tp) {
1004 __u8 sacked = TCP_SKB_CB(skb)->sacked;
1005
1006 if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
1007 return;
1008
1009 if (sacked&TCPCB_LOST) {
1010 if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) {
1011 if (tcp_retransmit_skb(sk, skb))
1012 return;
1013 if (tp->ca_state != TCP_CA_Loss)
1014 NET_INC_STATS_BH(TCPFastRetrans);
1015 else
1016 NET_INC_STATS_BH(TCPSlowStartRetrans);
1017
1018 if (skb ==
1019 skb_peek(&sk->sk_write_queue))
1020 tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
1021 }
1022
1023 if (--packet_cnt <= 0)
1024 break;
1025 }
1026 }
1027 }
1028
1029
1030
1031
1032 if (tp->ca_state != TCP_CA_Recovery)
1033 return;
1034
1035
1036 if (!tp->sack_ok)
1037 return;
1038
1039
1040
1041
1042
1043
1044
1045
1046 if (tcp_may_send_now(sk, tp))
1047 return;
1048
1049 packet_cnt = 0;
1050
1051 for_retrans_queue(skb, sk, tp) {
1052 if(++packet_cnt > tp->fackets_out)
1053 break;
1054
1055 if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
1056 break;
1057
1058 if(TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS)
1059 continue;
1060
1061
1062 if(tcp_retransmit_skb(sk, skb))
1063 break;
1064
1065 if (skb == skb_peek(&sk->sk_write_queue))
1066 tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
1067
1068 NET_INC_STATS_BH(TCPForwardRetrans);
1069 }
1070}
1071
1072
1073
1074
1075
1076void tcp_send_fin(struct sock *sk)
1077{
1078 struct tcp_opt *tp = tcp_sk(sk);
1079 struct sk_buff *skb = skb_peek_tail(&sk->sk_write_queue);
1080 unsigned int mss_now;
1081
1082
1083
1084
1085
1086 mss_now = tcp_current_mss(sk, 1);
1087
1088 if(tp->send_head != NULL) {
1089 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN;
1090 TCP_SKB_CB(skb)->end_seq++;
1091 tp->write_seq++;
1092 } else {
1093
1094 for (;;) {
1095 skb = alloc_skb(MAX_TCP_HEADER, GFP_KERNEL);
1096 if (skb)
1097 break;
1098 yield();
1099 }
1100
1101
1102 skb_reserve(skb, MAX_TCP_HEADER);
1103 skb->csum = 0;
1104 TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_FIN);
1105 TCP_SKB_CB(skb)->sacked = 0;
1106
1107
1108 TCP_SKB_CB(skb)->seq = tp->write_seq;
1109 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
1110 tcp_send_skb(sk, skb, 1, mss_now);
1111 }
1112 __tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_OFF);
1113}
1114
1115
1116
1117
1118
1119
1120void tcp_send_active_reset(struct sock *sk, int priority)
1121{
1122 struct tcp_opt *tp = tcp_sk(sk);
1123 struct sk_buff *skb;
1124
1125
1126 skb = alloc_skb(MAX_TCP_HEADER, priority);
1127 if (!skb) {
1128 NET_INC_STATS(TCPAbortFailed);
1129 return;
1130 }
1131
1132
1133 skb_reserve(skb, MAX_TCP_HEADER);
1134 skb->csum = 0;
1135 TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_RST);
1136 TCP_SKB_CB(skb)->sacked = 0;
1137
1138
1139 TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp);
1140 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
1141 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1142 if (tcp_transmit_skb(sk, skb))
1143 NET_INC_STATS(TCPAbortFailed);
1144}
1145
1146
1147
1148
1149
1150
1151int tcp_send_synack(struct sock *sk)
1152{
1153 struct sk_buff* skb;
1154
1155 skb = skb_peek(&sk->sk_write_queue);
1156 if (skb == NULL || !(TCP_SKB_CB(skb)->flags&TCPCB_FLAG_SYN)) {
1157 printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n");
1158 return -EFAULT;
1159 }
1160 if (!(TCP_SKB_CB(skb)->flags&TCPCB_FLAG_ACK)) {
1161 if (skb_cloned(skb)) {
1162 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
1163 if (nskb == NULL)
1164 return -ENOMEM;
1165 __skb_unlink(skb, &sk->sk_write_queue);
1166 __skb_queue_head(&sk->sk_write_queue, nskb);
1167 tcp_free_skb(sk, skb);
1168 tcp_charge_skb(sk, nskb);
1169 skb = nskb;
1170 }
1171
1172 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ACK;
1173 TCP_ECN_send_synack(tcp_sk(sk), skb);
1174 }
1175 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1176 return tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC));
1177}
1178
1179
1180
1181
1182struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
1183 struct open_request *req)
1184{
1185 struct tcp_opt *tp = tcp_sk(sk);
1186 struct tcphdr *th;
1187 int tcp_header_size;
1188 struct sk_buff *skb;
1189
1190 skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC);
1191 if (skb == NULL)
1192 return NULL;
1193
1194
1195 skb_reserve(skb, MAX_TCP_HEADER);
1196
1197 skb->dst = dst_clone(dst);
1198
1199 tcp_header_size = (sizeof(struct tcphdr) + TCPOLEN_MSS +
1200 (req->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0) +
1201 (req->wscale_ok ? TCPOLEN_WSCALE_ALIGNED : 0) +
1202
1203 ((req->sack_ok && !req->tstamp_ok) ? TCPOLEN_SACKPERM_ALIGNED : 0));
1204 skb->h.th = th = (struct tcphdr *) skb_push(skb, tcp_header_size);
1205
1206 memset(th, 0, sizeof(struct tcphdr));
1207 th->syn = 1;
1208 th->ack = 1;
1209 if (dst->dev->features&NETIF_F_TSO)
1210 req->ecn_ok = 0;
1211 TCP_ECN_make_synack(req, th);
1212 th->source = inet_sk(sk)->sport;
1213 th->dest = req->rmt_port;
1214 TCP_SKB_CB(skb)->seq = req->snt_isn;
1215 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
1216 th->seq = htonl(TCP_SKB_CB(skb)->seq);
1217 th->ack_seq = htonl(req->rcv_isn + 1);
1218 if (req->rcv_wnd == 0) {
1219 __u8 rcv_wscale;
1220
1221 req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
1222
1223 tcp_select_initial_window(tcp_full_space(sk),
1224 dst_metric(dst, RTAX_ADVMSS) - (req->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
1225 &req->rcv_wnd,
1226 &req->window_clamp,
1227 req->wscale_ok,
1228 &rcv_wscale);
1229 req->rcv_wscale = rcv_wscale;
1230 }
1231
1232
1233 th->window = htons(req->rcv_wnd);
1234
1235 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1236 tcp_syn_build_options((__u32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), req->tstamp_ok,
1237 req->sack_ok, req->wscale_ok, req->rcv_wscale,
1238 TCP_SKB_CB(skb)->when,
1239 req->ts_recent);
1240
1241 skb->csum = 0;
1242 th->doff = (tcp_header_size >> 2);
1243 TCP_INC_STATS(TcpOutSegs);
1244 return skb;
1245}
1246
1247
1248
1249
1250static inline void tcp_connect_init(struct sock *sk)
1251{
1252 struct dst_entry *dst = __sk_dst_get(sk);
1253 struct tcp_opt *tp = tcp_sk(sk);
1254
1255
1256
1257
1258 tp->tcp_header_len = sizeof(struct tcphdr) +
1259 (sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
1260
1261
1262 if (tp->user_mss)
1263 tp->mss_clamp = tp->user_mss;
1264 tp->max_window = 0;
1265 tcp_sync_mss(sk, dst_pmtu(dst));
1266
1267 if (!tp->window_clamp)
1268 tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
1269 tp->advmss = dst_metric(dst, RTAX_ADVMSS);
1270 tcp_initialize_rcv_mss(sk);
1271
1272 tcp_select_initial_window(tcp_full_space(sk),
1273 tp->advmss - (tp->ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
1274 &tp->rcv_wnd,
1275 &tp->window_clamp,
1276 sysctl_tcp_window_scaling,
1277 &tp->rcv_wscale);
1278
1279 tp->rcv_ssthresh = tp->rcv_wnd;
1280
1281 sk->sk_err = 0;
1282 sock_reset_flag(sk, SOCK_DONE);
1283 tp->snd_wnd = 0;
1284 tcp_init_wl(tp, tp->write_seq, 0);
1285 tp->snd_una = tp->write_seq;
1286 tp->snd_sml = tp->write_seq;
1287 tp->rcv_nxt = 0;
1288 tp->rcv_wup = 0;
1289 tp->copied_seq = 0;
1290
1291 tp->rto = TCP_TIMEOUT_INIT;
1292 tp->retransmits = 0;
1293 tcp_clear_retrans(tp);
1294}
1295
1296
1297
1298
1299int tcp_connect(struct sock *sk)
1300{
1301 struct tcp_opt *tp = tcp_sk(sk);
1302 struct sk_buff *buff;
1303
1304 tcp_connect_init(sk);
1305
1306 buff = alloc_skb(MAX_TCP_HEADER + 15, sk->sk_allocation);
1307 if (unlikely(buff == NULL))
1308 return -ENOBUFS;
1309
1310
1311 skb_reserve(buff, MAX_TCP_HEADER);
1312
1313 TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN;
1314 TCP_ECN_send_syn(sk, tp, buff);
1315 TCP_SKB_CB(buff)->sacked = 0;
1316 buff->csum = 0;
1317 TCP_SKB_CB(buff)->seq = tp->write_seq++;
1318 TCP_SKB_CB(buff)->end_seq = tp->write_seq;
1319 tp->snd_nxt = tp->write_seq;
1320 tp->pushed_seq = tp->write_seq;
1321
1322
1323 TCP_SKB_CB(buff)->when = tcp_time_stamp;
1324 tp->retrans_stamp = TCP_SKB_CB(buff)->when;
1325 __skb_queue_tail(&sk->sk_write_queue, buff);
1326 tcp_charge_skb(sk, buff);
1327 tp->packets_out++;
1328 tcp_transmit_skb(sk, skb_clone(buff, GFP_KERNEL));
1329 TCP_INC_STATS(TcpActiveOpens);
1330
1331
1332 tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
1333 return 0;
1334}
1335
1336
1337
1338
1339
1340void tcp_send_delayed_ack(struct sock *sk)
1341{
1342 struct tcp_opt *tp = tcp_sk(sk);
1343 int ato = tp->ack.ato;
1344 unsigned long timeout;
1345
1346 if (ato > TCP_DELACK_MIN) {
1347 int max_ato = HZ/2;
1348
1349 if (tp->ack.pingpong || (tp->ack.pending&TCP_ACK_PUSHED))
1350 max_ato = TCP_DELACK_MAX;
1351
1352
1353
1354
1355
1356
1357
1358 if (tp->srtt) {
1359 int rtt = max(tp->srtt>>3, TCP_DELACK_MIN);
1360
1361 if (rtt < max_ato)
1362 max_ato = rtt;
1363 }
1364
1365 ato = min(ato, max_ato);
1366 }
1367
1368
1369 timeout = jiffies + ato;
1370
1371
1372 if (tp->ack.pending&TCP_ACK_TIMER) {
1373
1374
1375
1376 if (tp->ack.blocked || time_before_eq(tp->ack.timeout, jiffies+(ato>>2))) {
1377 tcp_send_ack(sk);
1378 return;
1379 }
1380
1381 if (!time_before(timeout, tp->ack.timeout))
1382 timeout = tp->ack.timeout;
1383 }
1384 tp->ack.pending |= TCP_ACK_SCHED|TCP_ACK_TIMER;
1385 tp->ack.timeout = timeout;
1386 if (!mod_timer(&tp->delack_timer, timeout))
1387 sock_hold(sk);
1388}
1389
1390
1391void tcp_send_ack(struct sock *sk)
1392{
1393
1394 if (sk->sk_state != TCP_CLOSE) {
1395 struct tcp_opt *tp = tcp_sk(sk);
1396 struct sk_buff *buff;
1397
1398
1399
1400
1401
1402 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
1403 if (buff == NULL) {
1404 tcp_schedule_ack(tp);
1405 tp->ack.ato = TCP_ATO_MIN;
1406 tcp_reset_xmit_timer(sk, TCP_TIME_DACK, TCP_DELACK_MAX);
1407 return;
1408 }
1409
1410
1411 skb_reserve(buff, MAX_TCP_HEADER);
1412 buff->csum = 0;
1413 TCP_SKB_CB(buff)->flags = TCPCB_FLAG_ACK;
1414 TCP_SKB_CB(buff)->sacked = 0;
1415
1416
1417 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp);
1418 TCP_SKB_CB(buff)->when = tcp_time_stamp;
1419 tcp_transmit_skb(sk, buff);
1420 }
1421}
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
1435{
1436 struct tcp_opt *tp = tcp_sk(sk);
1437 struct sk_buff *skb;
1438
1439
1440 skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
1441 if (skb == NULL)
1442 return -1;
1443
1444
1445 skb_reserve(skb, MAX_TCP_HEADER);
1446 skb->csum = 0;
1447 TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK;
1448 TCP_SKB_CB(skb)->sacked = urgent;
1449
1450
1451
1452
1453
1454 TCP_SKB_CB(skb)->seq = urgent ? tp->snd_una : tp->snd_una - 1;
1455 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
1456 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1457 return tcp_transmit_skb(sk, skb);
1458}
1459
1460int tcp_write_wakeup(struct sock *sk)
1461{
1462 if (sk->sk_state != TCP_CLOSE) {
1463 struct tcp_opt *tp = tcp_sk(sk);
1464 struct sk_buff *skb;
1465
1466 if ((skb = tp->send_head) != NULL &&
1467 before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)) {
1468 int err;
1469 int mss = tcp_current_mss(sk, 0);
1470 int seg_size = tp->snd_una+tp->snd_wnd-TCP_SKB_CB(skb)->seq;
1471
1472 if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
1473 tp->pushed_seq = TCP_SKB_CB(skb)->end_seq;
1474
1475
1476
1477
1478
1479 if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
1480 skb->len > mss) {
1481 seg_size = min(seg_size, mss);
1482 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
1483 if (tcp_fragment(sk, skb, seg_size))
1484 return -1;
1485
1486
1487 if (sk->sk_route_caps & NETIF_F_TSO) {
1488 sk->sk_no_largesend = 1;
1489 sk->sk_route_caps &= ~NETIF_F_TSO;
1490 tp->mss_cache = tp->mss_cache_std;
1491 }
1492 }
1493 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
1494 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1495 err = tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC));
1496 if (!err) {
1497 update_send_head(sk, tp, skb);
1498 }
1499 return err;
1500 } else {
1501 if (tp->urg_mode &&
1502 between(tp->snd_up, tp->snd_una+1, tp->snd_una+0xFFFF))
1503 tcp_xmit_probe_skb(sk, TCPCB_URG);
1504 return tcp_xmit_probe_skb(sk, 0);
1505 }
1506 }
1507 return -1;
1508}
1509
1510
1511
1512
1513void tcp_send_probe0(struct sock *sk)
1514{
1515 struct tcp_opt *tp = tcp_sk(sk);
1516 int err;
1517
1518 err = tcp_write_wakeup(sk);
1519
1520 if (tp->packets_out || !tp->send_head) {
1521
1522 tp->probes_out = 0;
1523 tp->backoff = 0;
1524 return;
1525 }
1526
1527 if (err <= 0) {
1528 if (tp->backoff < sysctl_tcp_retries2)
1529 tp->backoff++;
1530 tp->probes_out++;
1531 tcp_reset_xmit_timer (sk, TCP_TIME_PROBE0,
1532 min(tp->rto << tp->backoff, TCP_RTO_MAX));
1533 } else {
1534
1535
1536
1537
1538
1539
1540 if (!tp->probes_out)
1541 tp->probes_out=1;
1542 tcp_reset_xmit_timer (sk, TCP_TIME_PROBE0,
1543 min(tp->rto << tp->backoff, TCP_RESOURCE_PROBE_INTERVAL));
1544 }
1545}
1546
1547EXPORT_SYMBOL(tcp_acceptable_seq);
1548EXPORT_SYMBOL(tcp_connect);
1549EXPORT_SYMBOL(tcp_connect_init);
1550EXPORT_SYMBOL(tcp_make_synack);
1551EXPORT_SYMBOL(tcp_send_synack);
1552EXPORT_SYMBOL(tcp_simple_retransmit);
1553EXPORT_SYMBOL(tcp_sync_mss);
1554EXPORT_SYMBOL(tcp_transmit_skb);
1555EXPORT_SYMBOL(tcp_write_wakeup);
1556EXPORT_SYMBOL(tcp_write_xmit);
1557